123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454 |
- KRL_CODE_X64 equ 0x08
- KRL_DATA_X64 equ 0x10
- USR_CODE_X86 equ 0x18
- USR_DATA_X64 equ 0x20
- USR_CODE_X64 equ 0x28 + 3
- GS_BASE equ 0xc0000101
- KERNEL_GS_BASE equ 0xc0000102
- struc Frame
- .gs_base resq 1
- .fs_base resq 1
- .r15 resq 1
- .r14 resq 1
- .r13 resq 1
- .r12 resq 1
- .r11 resq 1
- .r10 resq 1
- .r9 resq 1
- .r8 resq 1
- .rsi resq 1
- .rdi resq 1
- .rbp resq 1
- .rbx resq 1
- .rdx resq 1
- .rcx resq 1
- .rax resq 1
- .errc resq 1
- .rip resq 1
- .cs resq 1
- .rflags resq 1
- .rsp resq 1
- .ss resq 1
- endstruc
- struc KernelGs
- .tss_rsp0 resq 1
- .syscall_user_stack resq 1
- .syscall_stack resq 1
- .kernel_fs resq 1
- endstruc
- %macro align_rsp 1
- mov %1, 0
- test rsp, 0xF
- jnz %%next
- mov %1, 1
- sub rsp, 8
- %%next:
- %endmacro
- %macro recover_rsp 1
- cmp %1, 0
- je %%next
- add rsp, 8
- %%next:
- %endmacro
- %macro align_call 2
- align_rsp %2
- call %1
- recover_rsp %2
- %endmacro
- %macro push_xs 1
- push rcx
- mov rcx, %1
- rdmsr
- shl rdx, 32
- add rax, rdx
- mov rcx, [rsp]
- mov [rsp], rax
- %endmacro
- %macro pop_xs 1
- mov rax, [rsp]
- mov [rsp], rcx
- mov rdx, rax
- shr rdx, 32
- mov rcx, %1
- wrmsr
- pop rcx
- %endmacro
- ; push_regs(bool save_ret_addr, bool gs_swapped)
- %macro push_regs 2
- %if %1 == 1
- push rcx ; | ret_addr |<-rsp | ret_addr | | rax |
- mov rcx, [rsp + 8] ; |-----------| ---> |-----------| ---> |-----------|, rcx=ret_addr
- mov [rsp + 8], rax ; | | | rcx |<-rsp | rcx |<-rsp
- %else
- push rax
- push rcx
- %endif
- push rdx
- push rbx
- push rbp
- push rdi
- push rsi
- push r8
- push r9
- push r10
- push r11
- push r12
- push r13
- push r14
- push r15
- push rcx
- rdfsbase rcx
- xchg [rsp], rcx
- %if %2 == 1
- push_xs KERNEL_GS_BASE
- %else
- push rcx
- rdgsbase rcx
- xchg [rsp], rcx
- %endif
- %if %1 == 1
- push rcx
- %endif
- xor rcx, rcx
- xor rdx, rdx
- xor rbx, rbx
- xor rbp, rbp
- xor r8, r8
- xor r9, r9
- xor r10, r10
- xor r11, r11
- xor r12, r12
- xor r13, r13
- xor r14, r14
- xor r15, r15
- %endmacro
- ; pop_regs(bool gs_swapped)
- %macro pop_regs 1
- cmp word [rsp + Frame.cs], KRL_CODE_X64
- jne %%set_user
- mov ax, KRL_DATA_X64
- jmp %%pop
- %%set_user:
- mov ax, USR_DATA_X64
- %%pop:
- mov fs, ax
- %if %1 == 1
- pop_xs KERNEL_GS_BASE
- %else
- xchg rcx, [rsp]
- wrgsbase rcx
- pop rcx
- %endif
- xchg rcx, [rsp]
- wrfsbase rcx
- pop rcx
- pop r15
- pop r14
- pop r13
- pop r12
- pop r11
- pop r10
- pop r9
- pop r8
- pop rsi
- pop rdi
- pop rbp
- pop rbx
- pop rdx
- pop rcx
- pop rax
- %endmacro
- ; ---------------------------------------------------------------------------------------
- ; Interrupts
- ExVec_DivideBy0 equ 0
- ExVec_Debug equ 1
- ExVec_Nmi equ 2
- ExVec_Breakpoint equ 3
- ExVec_Overflow equ 4
- ExVec_Bound equ 5
- ExVec_InvalidOp equ 6
- ExVec_DeviceNa equ 7
- ExVec_DoubleFault equ 8
- ExVec_CoprocOverrun equ 9
- ExVec_InvalidTss equ 10
- ExVec_SegmentNa equ 11
- ExVec_StackFault equ 12
- ExVec_GeneralProt equ 13
- ExVec_PageFault equ 14
- ExVec_FloatPoint equ 16
- ExVec_Alignment equ 17
- ExVec_MachineCheck equ 18
- ExVec_SimdExcep equ 19
- ExVec_Virtual equ 20
- ExVec_ControlProt equ 21
- ExVec_VmmComm equ 29
- ApicVec_Timer equ 0x20
- ApicVec_Error equ 0x21
- ApicVec_IpiTaskMigrate equ 0x22
- ApicVec_Spurious equ 0xFF
- ; define_intr(vec, asm_name, name, err_vec)
- %macro define_intr 4
- global %2:function
- extern %3
- %2:
- %if %4 != 0
- push %4
- %endif
- call intr_entry
- mov rdi, rsp
- align_call %3, r12
- jmp intr_exit
- %endmacro
- [section .text]
- global switch_kframe:function
- ; Switch the kernel frame of the current thread.
- ;
- ; # Safety
- ;
- ; The layout in the stack must matches [`h2o::sched::task::ctx::x86_64::Kframe`].
- ;
- ; switch_kframe(rdi *old_kframe, rsi new_kframe)
- switch_kframe:
- push rbp
- push rbx
- push r12
- push r13
- push r14
- push r15
- pushfq
- push qword [rdx]
- push qword [rcx]
- xor rax, rax
- mov ax, cs
- push rax
- ; Save the current stack context.
- cmp rdi, 0
- je .switch
- mov [rdi], rsp
- .switch:
- ; Switch the stack (a.k.a. the context).
- mov rsp, rsi
- push .pop_regs
- retfq
- .pop_regs:
- pop qword [rcx]
- pop qword [rdx]
- popfq
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbx
- pop rbp
- ret
- global task_fresh:function
- extern switch_finishing
- ; The entry into the interrupt context of a new task.
- task_fresh:
- xor rdi, rdi
- xor rsi, rsi
- align_call switch_finishing, r12
- cli
- jmp intr_exit
- ; define_intr(vec, asm_name, name, has_code)
- ; x86 exceptions
- define_intr ExVec_DivideBy0, rout_div_0, hdl_div_0, -1
- define_intr ExVec_Debug, rout_debug, hdl_debug, -1
- define_intr ExVec_Nmi, rout_nmi, hdl_nmi, -1
- define_intr ExVec_Breakpoint, rout_breakpoint, hdl_breakpoint, -1
- define_intr ExVec_Overflow, rout_overflow, hdl_overflow, -1
- define_intr ExVec_Bound, rout_bound, hdl_bound, -1
- define_intr ExVec_InvalidOp, rout_invalid_op, hdl_invalid_op, -1
- define_intr ExVec_DeviceNa, rout_device_na, hdl_device_na, -1
- define_intr ExVec_DoubleFault, rout_double_fault, hdl_double_fault, 0
- define_intr ExVec_CoprocOverrun, rout_coproc_overrun, hdl_coproc_overrun, -1
- define_intr ExVec_InvalidTss, rout_invalid_tss, hdl_invalid_tss, 0
- define_intr ExVec_SegmentNa, rout_segment_na, hdl_segment_na, 0
- define_intr ExVec_StackFault, rout_stack_fault, hdl_stack_fault, 0
- define_intr ExVec_GeneralProt, rout_general_prot, hdl_general_prot, 0
- define_intr ExVec_PageFault, rout_page_fault, hdl_page_fault, 0
- define_intr ExVec_FloatPoint, rout_fp_excep, hdl_fp_excep, -1
- define_intr ExVec_Alignment, rout_alignment, hdl_alignment, -1
- ; define_intr ExVec_MachineCheck, rout_mach_check, hdl_mach_check, 0
- define_intr ExVec_SimdExcep, rout_simd, hdl_simd, 0
- ; Local APIC interrupts
- define_intr ApicVec_Timer, rout_lapic_timer, hdl_lapic_timer, -1
- define_intr ApicVec_Error, rout_lapic_error, hdl_lapic_error, -1
- define_intr ApicVec_IpiTaskMigrate, rout_lapic_ipi_task_migrate, hdl_lapic_ipi_task_migrate, -1
- define_intr ApicVec_Spurious, rout_lapic_spurious, hdl_lapic_spurious, -1
- ; All other interrupts
- %define rout_name(x) rout_ %+ x
- %assign i 0x40
- %rep (0xFF - 0x40)
- define_intr i, rout_name(i), common_interrupt, i
- %assign i (i + 1)
- %endrep
- %undef rout_name
- extern save_regs; Save the GPRs from the current stack and switch to the task's `intr_stack`.
- intr_entry:
- cld
- cmp qword [rsp + 8 * 3], 0xc; Test if it's a reentrancy.
- je .reent
- swapgs
- lfence
- ; If we came from a kernel task, manually switch to its kernel stack.
- cmp qword [rsp + 8 * 3], 0x8
- jne .push_regs
- push rdi
- mov rdi, rsp
- mov rsp, [gs:KernelGs.tss_rsp0]
- push qword [rdi + 8 * 7]
- push qword [rdi + 8 * 6]
- push qword [rdi + 8 * 5]
- push qword [rdi + 8 * 4]
- push qword [rdi + 8 * 3]
- push qword [rdi + 8 * 2]
- push qword [rdi + 8]
- mov rdi, [rdi]
- .push_regs:
- push_regs 1, 1; The routine has a return address, so we must preserve it.
- lea rbp, [rsp + 8 + 1]
- mov rax, [gs:(KernelGs.kernel_fs)]
- wrfsbase rax
- align_call save_regs, r12
- jmp .ret
- .reent:
- ; A preemption happens.
- lfence
- push_regs 1, 0; The routine has a return address, so we must preserve it.
- lea rbp, [rsp + 8 + 1]
- .ret:
- ret
- intr_exit:
- cmp qword [rsp + Frame.cs], 0xc; Test if it's a reentrancy.
- je .reent
- pop_regs 1
- ; The stack now consists of errc and 'iretq stuff'
- add rsp, 8
- swapgs
- jmp .ret
- .reent:
- ; A preemption happens.
- pop_regs 0
- add rsp, 8
- .ret:
- iretq
- ; ---------------------------------------------------------------------------------------
- ; Syscalls
- global rout_syscall:function
- extern hdl_syscall
- rout_syscall:
- swapgs
- mov [gs:(KernelGs.syscall_user_stack)], rsp
- mov rsp, [gs:(KernelGs.tss_rsp0)]
- ; Fit the context to [`x86_64::Frame`]
- push qword USR_DATA_X64 ; ss
- push qword [gs:(KernelGs.syscall_user_stack)] ; rsp
- push r11 ; rflags
- push qword USR_CODE_X64 ; cs
- push rcx ; rip
- push -1 ; errc_vec
- push_regs 0, 1
- lea rbp, [rsp + 8 + 1]
- mov rax, [gs:(KernelGs.kernel_fs)]
- wrfsbase rax
- mov rcx, GS_BASE
- rdmsr
- mov rcx, KERNEL_GS_BASE
- wrmsr
- align_call save_regs, r12
- mov rdi, rsp
- align_call hdl_syscall, r12
- pop_regs 1
- add rsp, 8 ; errc_vec
- ; Here we must test the return address because Intel's mistake of `sysret`
- ; machanism. Normally, only codes on ring 3 (lower half) can execute `syscall`.
- ; See https://xenproject.org/2012/06/13/the-intel-sysret-privilege-escalation/
- test dword [rsp + 4], 0xFFFF8000 ; test if the return address is on the higher half
- jnz .fault_iret
- cmp qword [rsp + 8], 0x8 ; test if the return segment is kernel.
- je .fault_iret
- pop rcx ; rip
- add rsp, 8 ; cs
- pop r11 ; rflags
- ; pop qword [gs:(KernelGs.syscall_user_stack)] ; rsp
- ; ;add rsp, 8 ; ss
- ; mov rsp, [gs:(KernelGs.syscall_user_stack)]
- pop rsp ; simplify 3 instructions above
- swapgs
- o64 sysret
- .fault_iret:
- xor rcx, rcx
- xor r11, r11
- swapgs
- iretq
|