讀x86_64 內核stack 的代碼, 留下筆記
1. 內核使用獨立的irq stack, 軟切換 無論cpu 運行user 代碼還是kernel 代碼, 都會最終切換到獨立的irq stack, 這個irq stack 可以通過irq_stack_ptr 來追蹤, 具體查看文件 arch/x86/kernel/cpu/common.c line: 1781.
DEFINE_PER_CPU(char *, irq_stack_ptr) = init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
2. 4 個獨立的exception stack ; double fault, mce, debug (hw and int3), nmi. x86_64 支持TSS 段(tss段的7個stack 地址), 是通過cpu 硬件切換的. 將irq gate 的 ist 設置為對應的 exception stack 的下標。 而所有的4種stack 都被放在了一個大數(shù)組當中. arch/x86/include/asm/processor.h struct x86_hw_tss { * We store cpu_current_top_of_stack in sp1 so it's always accessible. * Linux does not use ring 1, so sp1 is not otherwise needed. } __attribute__((packed));
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); struct cpu_entry_area { char gdt[PAGE_SIZE]; struct entry_stack_page entry_stack_page; struct tss_struct tss; char entry_trampoline[PAGE_SIZE]; char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; ......
} arch/x86/include/asm/desc.h struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
門描述符 /* Must be page-aligned because the real IDT is used in a fixmap. */ gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; struct desc_ptr idt_descr __ro_after_init = { .size = (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1, .address = (unsigned long) idt_table,
/home/chuhu/Git/linux//arch/x86/include/asm/desc_defs.h } __attribute__((packed)); typedef struct gate_struct gate_desc;
內核異常棧的位置: /home/chuhu/Git/linux//arch/x86/kernel/idt.c static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), SISTG(X86_TRAP_BP, int3, DEBUG_STACK), ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
task->stack 保留了內核棧的極限(can't push after this, or overflow). 這是stack 的最低地址,因為這個growdown. copy_process -> dup_task_struct -> alloc_thread_stack_node
/* * This is the structure pointed to by thread.sp for an inactive task. The * order of the fields must match the code in __switch_to_asm(). */ struct inactive_task_frame { #ifdef CONFIG_X86_64 unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; #else unsigned long si; unsigned long di; #endif unsigned long bx;
/* * These two fields must be together. They form a stack frame header, * needed by get_frame_pointer(). */ unsigned long bp; unsigned long ret_addr; };
struct fork_frame { struct inactive_task_frame frame; struct pt_regs regs; };
|