1
1
Quellcode durchsuchen

Merge branch 'toolchains' of akucxy/oceanic into master

Good.
akucxy vor 2 Jahren
Ursprung
Commit
cdb14a142e
100 geänderte Dateien mit 2029 neuen und 928 gelöschten Zeilen
  1. 3 0
      .cargo/config.toml
  2. 2 2
      .cargo/x86_64-pc-oceanic.json
  3. 1 1
      h2o/boot/Cargo.toml
  4. 2 9
      h2o/boot/src/file/elf.rs
  5. 5 1
      h2o/boot/src/main.rs
  6. 1 1
      h2o/kernel/.cargo/x86_64-h2o-kernel.json
  7. 1 1
      h2o/kernel/Cargo.toml
  8. 1 1
      h2o/kernel/build.rs
  9. 15 13
      h2o/kernel/entry/x86_64/stub.asm
  10. 1 1
      h2o/kernel/src/cpu.rs
  11. 16 14
      h2o/kernel/src/cpu/intr/imp.rs
  12. 17 3
      h2o/kernel/src/cpu/time.rs
  13. 6 8
      h2o/kernel/src/cpu/time/chip.rs
  14. 199 107
      h2o/kernel/src/cpu/time/timer.rs
  15. 28 3
      h2o/kernel/src/cpu/x86_64/apic.rs
  16. 7 7
      h2o/kernel/src/cpu/x86_64/apic/ipi.rs
  17. 3 3
      h2o/kernel/src/cpu/x86_64/apic/timer.rs
  18. 6 13
      h2o/kernel/src/cpu/x86_64/intr.rs
  19. 2 6
      h2o/kernel/src/cpu/x86_64/mod.rs
  20. 6 6
      h2o/kernel/src/cpu/x86_64/seg.rs
  21. 3 6
      h2o/kernel/src/cpu/x86_64/syscall.rs
  22. 21 24
      h2o/kernel/src/cpu/x86_64/tsc.rs
  23. 5 5
      h2o/kernel/src/dev.rs
  24. 2 2
      h2o/kernel/src/dev/acpi.rs
  25. 6 7
      h2o/kernel/src/dev/res.rs
  26. 8 8
      h2o/kernel/src/dev/x86_64/ioapic.rs
  27. 0 3
      h2o/kernel/src/dev/x86_64/lpic.rs
  28. 1 2
      h2o/kernel/src/kmain.rs
  29. 11 8
      h2o/kernel/src/log.rs
  30. 1 1
      h2o/kernel/src/mem.rs
  31. 5 5
      h2o/kernel/src/mem/arena.rs
  32. 4 2
      h2o/kernel/src/mem/arena/atomic.rs
  33. 7 7
      h2o/kernel/src/mem/space.rs
  34. 3 3
      h2o/kernel/src/mem/space/phys.rs
  35. 38 62
      h2o/kernel/src/mem/space/virt.rs
  36. 3 7
      h2o/kernel/src/mem/space/x86_64/mod.rs
  37. 22 22
      h2o/kernel/src/mem/syscall.rs
  38. 5 1
      h2o/kernel/src/sched.rs
  39. 122 34
      h2o/kernel/src/sched/imp.rs
  40. 166 12
      h2o/kernel/src/sched/imp/waiter.rs
  41. 126 67
      h2o/kernel/src/sched/ipc.rs
  42. 67 64
      h2o/kernel/src/sched/ipc/channel.rs
  43. 39 15
      h2o/kernel/src/sched/ipc/channel/syscall.rs
  44. 6 6
      h2o/kernel/src/sched/task.rs
  45. 21 3
      h2o/kernel/src/sched/task/boot.rs
  46. 1 1
      h2o/kernel/src/sched/task/ctx.rs
  47. 10 9
      h2o/kernel/src/sched/task/ctx/x86_64.rs
  48. 6 6
      h2o/kernel/src/sched/task/elf.rs
  49. 2 2
      h2o/kernel/src/sched/task/excep.rs
  50. 29 10
      h2o/kernel/src/sched/task/hdl.rs
  51. 40 15
      h2o/kernel/src/sched/task/hdl/node.rs
  52. 3 5
      h2o/kernel/src/sched/task/idle.rs
  53. 5 18
      h2o/kernel/src/sched/task/sm.rs
  54. 10 15
      h2o/kernel/src/sched/task/space.rs
  55. 32 30
      h2o/kernel/src/sched/task/syscall.rs
  56. 2 2
      h2o/kernel/src/sched/task/tid.rs
  57. 2 5
      h2o/kernel/src/sched/wait.rs
  58. 30 17
      h2o/kernel/src/sched/wait/futex.rs
  59. 27 9
      h2o/kernel/src/syscall.rs
  60. 4 4
      h2o/kernel/src/syscall/user_ptr.rs
  61. 7 3
      h2o/kernel/syscall/channel.json
  62. 56 0
      h2o/kernel/syscall/dispatcher.json
  63. 0 32
      h2o/kernel/syscall/object.json
  64. 7 0
      h2o/kernel/syscall/task.json
  65. 27 0
      h2o/kernel/syscall/time.json
  66. 1 0
      h2o/libs/archop/src/x86_64/lock.rs
  67. 9 0
      h2o/libs/archop/src/x86_64/msr.rs
  68. 5 0
      h2o/libs/archop/src/x86_64/rand.rs
  69. 36 0
      h2o/libs/archop/src/x86_64/reg.rs
  70. 21 13
      h2o/libs/bitop_ex/src/lib.rs
  71. 3 2
      h2o/libs/canary/src/lib.rs
  72. 82 4
      h2o/libs/collection_ex/src/chash_map.rs
  73. 29 0
      h2o/libs/collection_ex/src/chash_map/inner.rs
  74. 1 0
      h2o/libs/heap/Cargo.toml
  75. 105 36
      h2o/libs/heap/src/alloc.rs
  76. 11 4
      h2o/libs/heap/src/lib.rs
  77. 19 3
      h2o/libs/heap/src/page.rs
  78. 4 1
      h2o/libs/heap/src/pool.rs
  79. 142 0
      h2o/libs/heap/src/tcache.rs
  80. 14 0
      h2o/libs/paging/src/addr.rs
  81. 1 1
      h2o/libs/pmm/src/buddy.rs
  82. 1 0
      h2o/libs/syscall/Cargo.toml
  83. 14 15
      h2o/libs/syscall/build.rs
  84. 1 1
      h2o/libs/syscall/cbindgen.toml
  85. 0 8
      h2o/libs/syscall/rxx.rs.in
  86. 66 0
      h2o/libs/syscall/src/call.rs
  87. 1 1
      h2o/libs/syscall/src/call/hdl.rs
  88. 15 0
      h2o/libs/syscall/src/call/raw.rs
  89. 49 51
      h2o/libs/syscall/src/error.rs
  90. 4 4
      h2o/libs/syscall/src/error/c_ty.rs
  91. 4 3
      h2o/libs/syscall/src/ipc.rs
  92. 64 3
      h2o/libs/syscall/src/lib.rs
  93. 0 4
      h2o/libs/syscall/src/mem.rs
  94. 1 3
      h2o/libs/syscall/src/stub.rs
  95. 3 0
      h2o/libs/syscall/syscall.ld
  96. 1 1
      h2o/libs/targs/src/lib.rs
  97. 1 1
      h2o/tinit/.cargo/x86_64-h2o-tinit.json
  98. 1 1
      h2o/tinit/build.rs
  99. 4 9
      h2o/tinit/src/load.rs
  100. 2 0
      h2o/tinit/src/test.rs

+ 3 - 0
.cargo/config.toml

@@ -1,2 +1,5 @@
 [alias]
 xtask = "run --release --package xtask --"
+
+[build]
+rustflags = ["-Zshare-generics=n"]

+ 2 - 2
.cargo/x86_64-pc-oceanic.json

@@ -1,12 +1,12 @@
 {
     "llvm-target": "x86_64-pc-oceanic",
-    "data-layout": "e-m:e-i64:64-f80:128-n8:16:32:64-S128",
+    "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     "dynamic-linking": true,
     "arch": "x86_64",
     "target-endian": "little",
     "target-pointer-width": "64",
     "target-c-int-width": "32",
-    "os": "h2o",
+    "os": "oceanic",
     "has-rpath": true,
     "has-thread-local": true,
     "executables": true,

+ 1 - 1
h2o/boot/Cargo.toml

@@ -12,7 +12,7 @@ bitop_ex = {path = "../libs/bitop_ex"}
 minfo = {path = "../libs/minfo"}
 paging = {path = "../libs/paging"}
 # External crates
-goblin = {version = "0.4", default-features = false, features = ["elf32", "elf64", "endian_fd"]}
+goblin = {version = "0.5", default-features = false, features = ["elf32", "elf64", "endian_fd"]}
 log = "0.4"
 raw-cpuid = "9.0"
 static_assertions = "1.1"

+ 2 - 9
h2o/boot/src/file/elf.rs

@@ -1,4 +1,4 @@
-use core::{alloc::Layout, arch::asm, mem::size_of};
+use core::{alloc::Layout, mem::size_of};
 
 use bitop_ex::BitOpEx;
 use goblin::elf::*;
@@ -94,14 +94,7 @@ fn load_pls(syst: &SystemTable<Boot>, size: usize, align: usize) -> Layout {
         // and therefore should be modified in the kernel.
         self_ptr.write(self_ptr as usize);
 
-        const FS_BASE: u64 = 0xC0000100;
-        asm!(
-              "wrmsr",
-              in("ecx") FS_BASE,
-              in("eax") self_ptr,
-              in("edx") self_ptr as u64 >> 32,
-              options(nostack)
-        );
+        archop::reg::write_fs(self_ptr as u64);
     }
 
     layout

+ 5 - 1
h2o/boot/src/main.rs

@@ -10,7 +10,6 @@
 #![no_main]
 #![feature(abi_efiapi)]
 #![feature(alloc_error_handler)]
-#![feature(bool_to_option)]
 #![feature(box_syntax)]
 #![feature(nonnull_slice_from_raw_parts)]
 #![feature(panic_info_message)]
@@ -85,6 +84,11 @@ unsafe fn init_services(img: Handle, syst: &SystemTable<Boot>) {
     file::init(img, syst);
 
     mem::init(syst);
+    {
+        use archop::reg::cr4;
+        cr4::set(cr4::FSGSBASE);
+        cr4::unset(cr4::TSD);
+    }
 }
 
 #[entry]

+ 1 - 1
h2o/kernel/.cargo/x86_64-h2o-kernel.json

@@ -1,6 +1,6 @@
 {
     "llvm-target": "x86_64-unknown-none",
-    "data-layout": "e-m:e-i64:64-f80:128-n8:16:32:64-S128",
+    "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     "arch": "x86_64",
     "target-endian": "little",
     "target-pointer-width": "64",

+ 1 - 1
h2o/kernel/Cargo.toml

@@ -35,7 +35,7 @@ crossbeam-queue = {version = "0.3", default-features = false, features = ["alloc
 crossbeam-utils = {version = "0.8", default-features = false}
 cty = "0.2"
 derive_builder = {version = "0.10", default-features = false}
-goblin = {version = "0.4", default-features = false, features = ["elf32", "elf64", "endian_fd"]}
+goblin = {version = "0.5", default-features = false, features = ["elf32", "elf64", "endian_fd"]}
 log = "0.4"
 modular-bitfield = "0.11"
 paste = "1.0"

+ 1 - 1
h2o/kernel/build.rs

@@ -7,7 +7,7 @@ fn asm_build(input: &str, output: &str, flags: &[&str]) -> Result<(), Box<dyn Er
 
     println!("cargo:rerun-if-changed={}", input);
     let mut cmd = Command::new("nasm");
-    cmd.args(&[input, "-o", output])
+    cmd.args([input, "-o", output])
         .args(flags)
         .status()?
         .exit_ok()?;

+ 15 - 13
h2o/kernel/entry/x86_64/stub.asm

@@ -4,7 +4,6 @@ USR_CODE_X86 equ 0x18
 USR_DATA_X64 equ 0x20
 USR_CODE_X64 equ 0x28 + 3
 
-FS_BASE           equ 0xc0000100
 GS_BASE           equ 0xc0000101
 KERNEL_GS_BASE    equ 0xc0000102
 
@@ -110,11 +109,15 @@ endstruc
       push  r14
       push  r15
 
-      push_xs FS_BASE
+      push  rcx
+      rdfsbase  rcx
+      xchg  [rsp], rcx
 %if %2 == 1
       push_xs KERNEL_GS_BASE
 %else
-      push_xs GS_BASE
+      push  rcx
+      rdgsbase  rcx
+      xchg  [rsp], rcx
 %endif
 %if %1 == 1
       push  rcx
@@ -147,9 +150,14 @@ endstruc
 %if %1 == 1
       pop_xs KERNEL_GS_BASE
 %else
-      pop_xs GS_BASE
+      xchg  rcx, [rsp]
+      wrgsbase  rcx
+      pop   rcx
 %endif
-      pop_xs FS_BASE
+      xchg  rcx, [rsp]
+      wrfsbase  rcx
+      pop   rcx
+
       pop   r15
       pop   r14
       pop   r13
@@ -339,11 +347,8 @@ intr_entry:
       push_regs   1, 1; The routine has a return address, so we must preserve it.
       lea   rbp, [rsp + 8 + 1]
 
-      mov   rcx, FS_BASE
       mov   rax, [gs:(KernelGs.kernel_fs)]
-      mov   rdx, rax
-      shr   rdx, 32
-      wrmsr
+      wrfsbase rax
 
       align_call  save_regs, r12
 
@@ -398,11 +403,8 @@ rout_syscall:
       push_regs   0, 1
       lea   rbp, [rsp + 8 + 1]
 
-      mov   rcx, FS_BASE
       mov   rax, [gs:(KernelGs.kernel_fs)]
-      mov   rdx, rax
-      shr   rdx, 32
-      wrmsr
+      wrfsbase rax
 
       mov   rcx, GS_BASE
       rdmsr

+ 1 - 1
h2o/kernel/src/cpu.rs

@@ -2,7 +2,7 @@ pub mod intr;
 pub mod time;
 
 // `Lazy` stands for Local (non-atomic) AZY, while `Azy` stands for Atomic laZY.
-pub use core::lazy::Lazy;
+pub use core::cell::LazyCell as Lazy;
 
 use bitvec::prelude::*;
 

+ 16 - 14
h2o/kernel/src/cpu/intr/imp.rs

@@ -46,14 +46,14 @@ impl Interrupt {
     #[inline]
     pub fn new(res: &Resource<u32>, gsi: u32, level_triggered: bool) -> sv_call::Result<Arc<Self>> {
         if res.magic_eq(super::gsi_resource()) && res.range().contains(&gsi) {
-            Ok(Arc::new(Interrupt {
+            Ok(Arc::try_new(Interrupt {
                 gsi,
                 last_time: Mutex::new(None),
                 level_triggered,
                 event_data: EventData::new(0),
-            }))
+            })?)
         } else {
-            Err(sv_call::Error::EPERM)
+            Err(sv_call::EPERM)
         }
     }
 
@@ -68,7 +68,7 @@ impl Interrupt {
     }
 }
 
-unsafe impl DefaultFeature for Arc<Interrupt> {
+unsafe impl DefaultFeature for Interrupt {
     fn default_features() -> Feature {
         Feature::SEND | Feature::WAIT
     }
@@ -111,7 +111,7 @@ mod syscall {
 
         let intr = SCHED.with_current(|cur| {
             let handles = cur.space().handles();
-            let res = handles.get::<Arc<Resource<u32>>>(res)?;
+            let res = handles.get::<Resource<u32>>(res)?;
             Interrupt::new(&res, gsi, level_triggered)
         })?;
 
@@ -123,7 +123,7 @@ mod syscall {
         MANAGER.mask(gsi, false)?;
 
         let event = Arc::downgrade(&intr) as _;
-        SCHED.with_current(|cur| unsafe { cur.space().handles().insert(intr, Some(event)) })
+        SCHED.with_current(|cur| unsafe { cur.space().handles().insert_raw(intr, Some(event)) })
     }
 
     #[syscall]
@@ -132,18 +132,20 @@ mod syscall {
         last_time.check()?;
 
         let pree = PREEMPT.lock();
-        let intr = unsafe { (*SCHED.current()).as_ref().ok_or(Error::ESRCH)? }
+        let intr = unsafe { (*SCHED.current()).as_ref().ok_or(ESRCH)? }
             .space()
             .handles()
-            .get::<Arc<Interrupt>>(hdl)?;
+            .get::<Interrupt>(hdl)?;
         if !intr.features().contains(Feature::WAIT) {
-            return Err(Error::EPERM);
+            return Err(EPERM);
         }
 
-        let blocker = crate::sched::Blocker::new(&(Arc::clone(&intr) as _), false, SIG_GENERIC);
-        blocker.wait(pree, time::from_us(timeout_us))?;
-        if !blocker.detach().0 {
-            return Err(Error::ETIME);
+        if timeout_us > 0 {
+            let blocker = crate::sched::Blocker::new(&(Arc::clone(&intr) as _), false, SIG_GENERIC);
+            blocker.wait(Some(pree), time::from_us(timeout_us))?;
+            if !blocker.detach().0 {
+                return Err(ETIME);
+            }
         }
 
         unsafe { last_time.write(intr.last_time().unwrap().raw()) }?;
@@ -154,7 +156,7 @@ mod syscall {
     fn intr_drop(hdl: Handle) -> Result {
         hdl.check_null()?;
         SCHED.with_current(|cur| {
-            let intr = cur.space().handles().remove::<Arc<Interrupt>>(hdl)?;
+            let intr = cur.space().handles().remove::<Interrupt>(hdl)?;
             intr.cancel();
             MANAGER.register(intr.gsi, None)?;
             Ok(())

+ 17 - 3
h2o/kernel/src/cpu/time.rs

@@ -6,9 +6,7 @@ use core::{
     time::Duration,
 };
 
-pub use self::timer::{
-    tick as timer_tick, Callback as TimerCallback, CallbackArg, Timer, Type as TimerType,
-};
+pub use self::timer::{tick as timer_tick, Timer};
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
 #[repr(transparent)]
@@ -42,6 +40,22 @@ impl Instant {
     pub const unsafe fn from_raw(data: u128) -> Self {
         Instant(data)
     }
+
+    #[inline]
+    pub fn duration_since(&self, earlier: Instant) -> Duration {
+        *self - earlier
+    }
+
+    #[inline]
+    pub fn checked_duration_since(&self, earlier: Instant) -> Option<Duration> {
+        (self >= &earlier).then(|| *self - earlier)
+    }
+
+    #[inline]
+    pub fn saturating_duration_since(&self, earlier: Instant) -> Duration {
+        self.checked_duration_since(earlier)
+            .unwrap_or(Duration::ZERO)
+    }
 }
 
 impl Add<Duration> for Instant {

+ 6 - 8
h2o/kernel/src/cpu/time/chip.rs

@@ -1,15 +1,14 @@
+use core::sync::atomic::Ordering::Release;
+
 use archop::Azy;
 
 use super::Instant;
 use crate::{cpu::arch::tsc::TSC_CLOCK, dev::hpet::HPET_CLOCK};
 
 pub static CLOCK: Azy<&'static dyn ClockChip> = Azy::new(|| {
-    let ret: &'static dyn ClockChip = match *TSC_CLOCK {
-        Some(ref tsc) => tsc,
-        None => HPET_CLOCK.as_ref().expect("No available clock"),
-    };
-    *crate::log::HAS_TIME.write() = true;
-    ret
+    let ret: &crate::cpu::arch::tsc::TscClock = &TSC_CLOCK;
+    crate::log::HAS_TIME.store(true, Release);
+    ret as _
 });
 
 static CALIB_CLOCK: Azy<&'static dyn CalibrationClock> =
@@ -41,7 +40,7 @@ pub fn calibrate(
     let tries = 3;
     let iter_ms = [10u64, 20];
     let mut best = [u64::MAX, u64::MAX];
-    for (i, &duration) in iter_ms.iter().enumerate() {
+    for (best, &duration) in best.iter_mut().zip(iter_ms.iter()) {
         for _ in 0..tries {
             unsafe {
                 CALIB_CLOCK.prepare(duration);
@@ -49,7 +48,6 @@ pub fn calibrate(
 
                 let start = get_start();
                 CALIB_CLOCK.cycle(duration);
-                let best = best.get_unchecked_mut(i);
                 *best = (*best).min(get_end() - start);
 
                 CALIB_CLOCK.cleanup();

+ 199 - 107
h2o/kernel/src/cpu/time/timer.rs

@@ -1,120 +1,144 @@
-use alloc::collections::LinkedList;
+use alloc::{collections::BinaryHeap, sync::Weak};
 use core::{
-    cell::UnsafeCell,
-    ptr::NonNull,
+    cell::{LazyCell, RefCell},
+    cmp,
     sync::atomic::{AtomicBool, Ordering::*},
     time::Duration,
 };
 
+use spin::RwLock;
+use sv_call::ipc::SIG_TIMER;
+
 use super::Instant;
-use crate::sched::{ipc::Arsc, task, PREEMPT};
+use crate::sched::{ipc::Arsc, task, Event, PREEMPT, SCHED};
 
 #[thread_local]
-static TIMER_QUEUE: TimerQueue = TimerQueue::new();
+static TIMER_QUEUE: LazyCell<TimerQueue> = LazyCell::new(TimerQueue::new);
+
+#[derive(Debug, Clone)]
+struct TimerEntry(Arsc<Timer>);
+
+impl PartialEq for TimerEntry {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.0.deadline == other.0.deadline
+    }
+}
+
+impl Eq for TimerEntry {}
+
+impl PartialOrd for TimerEntry {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+        self.0
+            .deadline
+            .partial_cmp(&other.0.deadline)
+            .map(|c| c.reverse())
+    }
+}
+
+impl Ord for TimerEntry {
+    #[inline]
+    fn cmp(&self, other: &Self) -> cmp::Ordering {
+        self.0.deadline.cmp(&other.0.deadline).reverse()
+    }
+}
 
 struct TimerQueue {
-    inner: UnsafeCell<LinkedList<Arsc<Timer>>>,
+    inner: RefCell<BinaryHeap<TimerEntry>>,
 }
 
 impl TimerQueue {
-    const fn new() -> Self {
+    fn new() -> Self {
         TimerQueue {
-            inner: UnsafeCell::new(LinkedList::new()),
+            inner: RefCell::new(BinaryHeap::new()),
         }
     }
 
-    fn push(&self, timer: Arsc<Timer>) {
-        let ddl = timer.deadline;
-        PREEMPT.scope(|| {
-            let queue = unsafe { &mut *self.inner.get() };
-            let mut cur = queue.cursor_front_mut();
-            loop {
-                match cur.current() {
-                    Some(t) if t.deadline >= ddl => {
-                        cur.insert_before(timer);
-                        break;
-                    }
-                    None => {
-                        cur.insert_before(timer);
-                        break;
-                    }
-                    Some(_) => cur.move_next(),
-                }
-            }
-        })
+    #[inline]
+    #[track_caller]
+    fn with_inner<F, R>(&self, func: F) -> R
+    where
+        F: FnOnce(&mut BinaryHeap<TimerEntry>) -> R,
+    {
+        PREEMPT.scope(|| func(&mut self.inner.borrow_mut()))
     }
 
-    fn pop(&self, timer: &Arsc<Timer>) -> bool {
-        PREEMPT.scope(|| {
-            let queue = unsafe { &mut *self.inner.get() };
-            let mut cur = queue.cursor_front_mut();
-            loop {
-                match cur.current() {
-                    Some(t) if Arsc::ptr_eq(t, timer) => {
-                        cur.remove_current();
-                        break true;
-                    }
-                    Some(_) => cur.move_next(),
-                    None => break false,
-                }
-            }
+    #[inline]
+    fn try_with_inner<F, R>(&self, func: F) -> Option<R>
+    where
+        F: FnOnce(&mut BinaryHeap<TimerEntry>) -> R,
+    {
+        PREEMPT.scope(|| self.inner.try_borrow_mut().ok().map(|mut r| func(&mut r)))
+    }
+
+    #[inline]
+    fn push(&self, timer: Arsc<Timer>) {
+        self.with_inner(|queue| {
+            queue.push(TimerEntry(timer));
         })
     }
 }
 
-pub type CallbackArg = NonNull<task::Blocked>;
+#[derive(Debug)]
+pub enum Callback {
+    Task(task::Blocked),
+    Event(Weak<dyn Event>),
+}
 
-type CallbackFn = fn(Arsc<Timer>, Instant, CallbackArg);
+impl From<task::Blocked> for Callback {
+    fn from(task: task::Blocked) -> Self {
+        Self::Task(task)
+    }
+}
 
-#[derive(Debug)]
-pub struct Callback {
-    func: CallbackFn,
-    arg: CallbackArg,
-    fired: AtomicBool,
+impl From<Weak<dyn Event>> for Callback {
+    fn from(event: Weak<dyn Event>) -> Self {
+        Self::Event(event)
+    }
 }
 
 impl Callback {
-    pub fn new(func: CallbackFn, arg: CallbackArg) -> Self {
-        Callback {
-            fired: AtomicBool::new(false),
-            func,
-            arg,
+    fn call(self, timer: &Timer) {
+        timer.fired.store(true, Release);
+        match self {
+            Callback::Task(task) => SCHED.unblock(task, true),
+            Callback::Event(event) => {
+                if let Some(event) = event.upgrade() {
+                    event.notify(0, SIG_TIMER)
+                }
+            }
         }
     }
 
-    pub fn call(&self, timer: Arsc<Timer>, cur_time: Instant) {
-        (self.func)(timer, cur_time, self.arg);
-        self.fired.store(true, Release);
+    fn cancel(self, preempt: bool) {
+        match self {
+            Callback::Task(task) => SCHED.unblock(task, preempt),
+            Callback::Event(event) => {
+                if let Some(event) = event.upgrade() {
+                    event.cancel()
+                }
+            }
+        }
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum Type {
-    Oneshot,
-    // Periodic,
-}
-
 #[derive(Debug)]
 pub struct Timer {
-    ty: Type,
-    callback: Callback,
-    duration: Duration,
+    callback: RwLock<Option<Callback>>,
     deadline: Instant,
-    cancel: AtomicBool,
+    fired: AtomicBool,
 }
 
 impl Timer {
-    pub fn activate(
-        ty: Type,
+    pub fn activate<C: Into<Callback>>(
         duration: Duration,
-        callback: Callback,
+        callback: C,
     ) -> sv_call::Result<Arsc<Self>> {
         let ret = Arsc::try_new(Timer {
-            ty,
-            callback,
-            duration,
+            callback: RwLock::new(Some(callback.into())),
             deadline: Instant::now() + duration,
-            cancel: AtomicBool::new(false),
+            fired: AtomicBool::new(false),
         })?;
         if duration < Duration::MAX {
             TIMER_QUEUE.push(Arsc::clone(&ret));
@@ -122,53 +146,121 @@ impl Timer {
         Ok(ret)
     }
 
-    #[inline]
-    pub fn ty(&self) -> Type {
-        self.ty
+    pub fn cancel(self: &Arsc<Self>, preempt: bool) -> bool {
+        match PREEMPT.scope(|| self.callback.write().take()) {
+            Some(callback) => {
+                callback.cancel(preempt);
+                true
+            }
+            None => false,
+        }
     }
 
-    #[inline]
-    pub fn duration(&self) -> Duration {
-        self.duration
+    fn fire(&self) {
+        if let Some(callback) = PREEMPT.scope(|| self.callback.write().take()) {
+            callback.call(self);
+        }
     }
 
-    pub fn cancel(self: &Arsc<Self>) -> bool {
-        let ret = self.cancel.swap(true, AcqRel);
-        TIMER_QUEUE.pop(self);
-        ret
+    pub fn is_fired(&self) -> bool {
+        self.fired.load(Acquire)
     }
+}
 
-    pub fn is_canceled(&self) -> bool {
-        self.cancel.load(Acquire)
+pub unsafe fn tick() {
+    loop {
+        let now = Instant::now();
+        let timer = TIMER_QUEUE.try_with_inner(|queue| loop {
+            match queue.peek() {
+                Some(TimerEntry(timer))
+                    if timer.callback.try_read().map_or(false, |r| r.is_none()) =>
+                {
+                    queue.pop();
+                }
+                Some(TimerEntry(timer)) if timer.deadline <= now => {
+                    break queue.pop();
+                }
+                _ => break None,
+            }
+        });
+        match timer {
+            Some(Some(TimerEntry(timer))) => timer.fire(),
+            _ => break,
+        }
     }
+}
 
-    pub fn is_fired(&self) -> bool {
-        self.callback.fired.load(Acquire)
+mod syscall {
+    use alloc::sync::{Arc, Weak};
+
+    use spin::Mutex;
+    use sv_call::*;
+
+    use super::Timer;
+    use crate::{
+        cpu::time,
+        sched::{task::hdl::DefaultFeature, Arsc, Event, EventData, SCHED},
+    };
+
+    #[derive(Debug, Default)]
+    struct TimerEvent {
+        event_data: EventData,
+        timer: Mutex<Option<Arsc<Timer>>>,
     }
 
-    pub fn callback_arg(&self) -> CallbackArg {
-        self.callback.arg
+    unsafe impl Send for TimerEvent {}
+    unsafe impl Sync for TimerEvent {}
+
+    impl Event for TimerEvent {
+        fn event_data(&self) -> &EventData {
+            &self.event_data
+        }
     }
-}
 
-pub unsafe fn tick() {
-    let now = Instant::now();
-    PREEMPT.scope(|| {
-        let queue = unsafe { &mut *TIMER_QUEUE.inner.get() };
-        let mut cur = queue.cursor_front_mut();
-        loop {
-            match cur.current() {
-                Some(t) if t.is_canceled() => {
-                    cur.remove_current();
+    impl Drop for TimerEvent {
+        fn drop(&mut self) {
+            match self.timer.get_mut().take() {
+                Some(timer) => {
+                    timer.cancel(false);
                 }
-                Some(t) if t.deadline <= now => {
-                    let timer = cur.remove_current().unwrap();
-                    if !timer.cancel() {
-                        timer.callback.call(Arsc::clone(&timer), now);
-                    }
-                }
-                _ => break,
+                None => self.cancel(),
             }
         }
-    })
+    }
+
+    unsafe impl DefaultFeature for TimerEvent {
+        fn default_features() -> sv_call::Feature {
+            Feature::SEND | Feature::SYNC | Feature::WAIT | Feature::WRITE
+        }
+    }
+
+    #[syscall]
+    fn timer_new() -> Result<Handle> {
+        let event = Arc::try_new(TimerEvent::default())?;
+        let e = Arc::downgrade(&event);
+        SCHED.with_current(|cur| cur.space().handles().insert_raw(event, Some(e)))
+    }
+
+    #[syscall]
+    fn timer_set(handle: Handle, duration_us: u64) -> Result {
+        SCHED.with_current(|cur| {
+            let event = cur.space().handles().get::<TimerEvent>(handle)?;
+
+            if !event.features().contains(Feature::WRITE) {
+                return Err(EPERM);
+            }
+
+            let mut timer = event.timer.lock();
+            if let Some(timer) = timer.take() {
+                timer.cancel(false);
+            }
+            if duration_us > 0 {
+                *timer = Some(Timer::activate(
+                    time::from_us(duration_us),
+                    Weak::clone(event.event()),
+                )?);
+            }
+            Ok(())
+        })
+    }
 }

+ 28 - 3
h2o/kernel/src/cpu/x86_64/apic.rs

@@ -2,7 +2,10 @@ pub mod ipi;
 pub mod timer;
 
 use alloc::collections::BTreeMap;
-use core::arch::asm;
+use core::{
+    arch::asm,
+    ops::{BitOr, BitOrAssign},
+};
 
 use archop::{msr, Azy};
 use modular_bitfield::prelude::*;
@@ -70,6 +73,28 @@ pub enum TriggerMode {
     Level = 1,
 }
 
+impl BitOr for TriggerMode {
+    type Output = Self;
+
+    #[inline]
+    fn bitor(self, rhs: TriggerMode) -> Self::Output {
+        if matches!((self as u64) | (rhs as u64), 0) {
+            TriggerMode::Edge
+        } else {
+            TriggerMode::Level
+        }
+    }
+}
+
+impl BitOrAssign for TriggerMode {
+    #[inline]
+    fn bitor_assign(&mut self, rhs: Self) {
+        if let (Self::Edge, Self::Level) = (*self, rhs) {
+            *self = Self::Level
+        }
+    }
+}
+
 #[derive(Clone, Copy)]
 #[bitfield]
 struct LocalEntry {
@@ -210,7 +235,7 @@ impl Lapic {
 
         // Get the LAPIC ID.
         let mut id = unsafe { Self::read_reg_32(&mut ty, msr::X2APICID) };
-        if let LapicType::X2 = &ty {
+        if let LapicType::X1(_) = &ty {
             id >>= 24;
         }
         LAPIC_ID.write().insert(unsafe { crate::cpu::id() }, id);
@@ -274,7 +299,7 @@ impl Lapic {
     ///
     /// The caller must ensure that IDT is initialized before LAPIC Timer's
     /// activation and that `div` is within the range [`timer::DIV`].
-    pub unsafe fn activate_timer(&mut self, mode: timer::TimerMode, div: u8, init_value: u64) {
+    pub unsafe fn activate_timer(&mut self, mode: timer::TimerMode, div: u8, init_value: u32) {
         timer::activate(self, mode, div, init_value);
     }
 

+ 7 - 7
h2o/kernel/src/cpu/x86_64/apic/ipi.rs

@@ -206,15 +206,15 @@ pub unsafe fn start_cpus(aps: &[acpi::platform::Processor]) -> usize {
 /// This function must be called only by the scheduler of the current CPU and
 /// the caller must ensure that `cpu` is valid.
 pub unsafe fn task_migrate(cpu: usize) {
-    lapic(|lapic| {
-        match PREEMPT.scope(|| super::LAPIC_ID.read().get(&cpu).copied()) {
-            Some(id) => lapic.send_ipi(
+    match PREEMPT.scope(|| super::LAPIC_ID.read().get(&cpu).copied()) {
+        Some(id) => lapic(|lapic| {
+            lapic.send_ipi(
                 intr::def::ApicVec::IpiTaskMigrate as u8,
                 DelivMode::Fixed,
                 Shorthand::None,
                 id,
-            ),
-            None => log::warn!("CPU #{} not present", cpu),
-        };
-    });
+            )
+        }),
+        None => log::warn!("CPU #{} not present", cpu),
+    };
 }

+ 3 - 3
h2o/kernel/src/cpu/x86_64/apic/timer.rs

@@ -23,7 +23,7 @@ pub const DIV: Range<u8> = 0..8;
 ///
 /// The caller must ensure that IDT is initialized before LAPIC Timer's
 /// activation and that `div` is within the range [`DIV`].
-pub unsafe fn activate(lapic: &mut super::Lapic, mode: TimerMode, div: u8, init_value: u64) {
+pub unsafe fn activate(lapic: &mut super::Lapic, mode: TimerMode, div: u8, init_value: u32) {
     /// # Safety
     ///
     /// The caller must ensure that `div` is within the range [`DIV`].
@@ -47,9 +47,9 @@ pub unsafe fn activate(lapic: &mut super::Lapic, mode: TimerMode, div: u8, init_
         Lapic::write_reg_32(&mut lapic.ty, msr::X2APIC_DIV_CONF, encdiv.into());
         Lapic::write_reg_32(&mut lapic.ty, msr::X2APIC_LVT_TIMER, timer_val.into());
         if matches!(mode, TimerMode::TscDeadline) {
-            msr::write(msr::TSC_DEADLINE, init_value);
+            msr::write(msr::TSC_DEADLINE, init_value.into());
         } else {
-            Lapic::write_reg_64(&mut lapic.ty, msr::X2APIC_INIT_COUNT, init_value);
+            Lapic::write_reg_32(&mut lapic.ty, msr::X2APIC_INIT_COUNT, init_value);
         }
     }
 }

+ 6 - 13
h2o/kernel/src/cpu/x86_64/intr.rs

@@ -50,13 +50,10 @@ impl Manager {
 
         let in_use = ALLOC_VEC.contains(&entry.vec());
 
-        let self_apic_id = *LAPIC_ID
-            .read()
-            .get(&self.cpu)
-            .ok_or(sv_call::Error::EINVAL)?;
+        let self_apic_id = *LAPIC_ID.read().get(&self.cpu).ok_or(sv_call::EINVAL)?;
         let apic_id = entry.dest_id();
         if in_use && self_apic_id != apic_id {
-            return Err(sv_call::Error::EEXIST);
+            return Err(sv_call::EEXIST);
         }
 
         let vec = in_use.then_some(entry.vec());
@@ -67,16 +64,12 @@ impl Manager {
                 map.try_insert_with(
                     vec..(vec + 1),
                     || Ok::<_, sv_call::Error>(((), ())),
-                    sv_call::Error::EEXIST,
+                    sv_call::EEXIST,
                 )?;
                 vec
             } else {
-                map.allocate_with(
-                    1,
-                    |_| Ok::<_, sv_call::Error>(((), ())),
-                    sv_call::Error::ENOMEM,
-                )?
-                .0
+                map.allocate_with(1, |_| Ok::<_, sv_call::Error>(((), ())), sv_call::ENOMEM)?
+                    .0
             };
 
             *self.slots[vec as usize].lock() = Some(handler);
@@ -144,7 +137,7 @@ unsafe fn exception(frame_ptr: *mut Frame, vec: def::ExVec) {
                     });
                 }
                 // Kill the fucking task.
-                SCHED.exit_current(sv_call::Error::EFAULT.into_retval())
+                SCHED.exit_current(sv_call::EFAULT.into_retval())
             }
             // unreachable!()
         }

+ 2 - 6
h2o/kernel/src/cpu/x86_64/mod.rs

@@ -31,9 +31,6 @@ pub unsafe fn set_id(bsp: bool) -> usize {
     while !bsp && CPU_COUNT.load(Ordering::SeqCst) == 0 {
         core::hint::spin_loop();
     }
-    if !bsp {
-        crate::cpu::time::delay(core::time::Duration::from_micros(archop::rand::get() % 100));
-    }
     id
 }
 
@@ -42,8 +39,7 @@ pub unsafe fn set_id(bsp: bool) -> usize {
 /// This function is only called after [`set_id`].
 #[inline]
 pub unsafe fn id() -> usize {
-    use archop::msr;
-    msr::read(msr::TSC_AUX) as usize
+    archop::msr::rdtscp().1 as usize
 }
 
 #[inline]
@@ -80,7 +76,7 @@ pub static KERNEL_GS: Lazy<KernelGs> = Lazy::new(|| KernelGs {
     tss_rsp0: UnsafeCell::new(unsafe { seg::ndt::TSS.rsp0() }),
     syscall_user_stack: null_mut(),
     syscall_stack: unsafe { syscall::init() }.expect("Memory allocation failed"),
-    kernel_fs: LAddr::from(unsafe { archop::msr::read(archop::msr::FS_BASE) } as usize),
+    kernel_fs: LAddr::from(unsafe { archop::reg::read_fs() } as usize),
 });
 
 impl KernelGs {

+ 6 - 6
h2o/kernel/src/cpu/x86_64/seg.rs

@@ -106,17 +106,17 @@ impl From<u16> for SegSelector {
 ///
 /// # Safety
 ///
-/// The caller must ensure the value stored in [`archop::msr::FS_BASE`] is a
-/// valid physical address.
+/// The caller must ensure the value stored in FS base is a valid physical
+/// address.
 pub unsafe fn reload_pls() {
     extern "C" {
         static TDATA_START: u8;
         static TBSS_START: u8;
     }
-    use archop::msr;
+    use archop::reg;
     let pls_size = crate::kargs().pls_layout.map_or(0, |layout| layout.size());
 
-    let val = msr::read(msr::FS_BASE) as usize;
+    let val = reg::read_fs() as usize;
     if val != 0 {
         let ptr = PAddr::new(val).to_laddr(minfo::ID_OFFSET).cast::<usize>();
         let base = ptr.cast::<u8>().sub(pls_size);
@@ -125,7 +125,7 @@ pub unsafe fn reload_pls() {
         base.add(size).write_bytes(0, pls_size - size);
         ptr.write(ptr as usize);
 
-        msr::write(msr::FS_BASE, ptr as u64);
+        reg::write_fs(ptr as u64);
     }
 
     test_pls();
@@ -140,7 +140,7 @@ pub fn alloc_pls() -> sv_call::Result<NonNull<u8>> {
 
     let pls_layout = match crate::kargs().pls_layout {
         Some(layout) => layout,
-        None => return Err(sv_call::Error::ENOENT),
+        None => return Err(sv_call::ENOENT),
     };
 
     let base = Global

+ 3 - 6
h2o/kernel/src/cpu/x86_64/syscall.rs

@@ -2,7 +2,6 @@ use core::mem::size_of;
 
 use archop::{msr, reg};
 use paging::LAddr;
-use sv_call::SerdeReg;
 
 use super::seg::ndt::{INTR_CODE, USR_CODE_X86};
 use crate::sched::task::ctx::arch::Frame;
@@ -32,16 +31,14 @@ pub unsafe fn init() -> sv_call::Result<LAddr> {
 
 #[no_mangle]
 unsafe extern "C" fn hdl_syscall(frame: *const Frame) {
-    let (num, args) = (*frame).syscall_args();
+    let syscall = (*frame).syscall_args();
 
     archop::resume_intr(None);
-    let res = crate::syscall::handler(num, &args);
+    let res = crate::syscall::handle(syscall);
     archop::pause_intr();
 
     let _ = crate::sched::SCHED.with_current(|cur| {
-        cur.kstack_mut()
-            .task_frame_mut()
-            .set_syscall_retval(res.encode());
+        cur.kstack_mut().task_frame_mut().set_syscall_retval(res);
         Ok(())
     });
 }

+ 21 - 24
h2o/kernel/src/cpu/x86_64/tsc.rs

@@ -6,12 +6,29 @@ use crate::cpu::time::{
     Instant,
 };
 
-pub static TSC_CLOCK: Azy<Option<TscClock>> = Azy::new(TscClock::new);
+pub static TSC_CLOCK: Azy<TscClock> = Azy::new(|| {
+    if CpuId::new()
+        .get_advanced_power_mgmt_info()
+        .map_or(true, |info| !info.has_invariant_tsc())
+    {
+        log::warn!("The TSC is not invariant. Ticks will be unreliable.");
+    }
+
+    let khz = crate::cpu::time::chip::calibrate(|| {}, rdtsc, rdtsc, || {});
+    let initial = rdtsc();
+    let (mul, sft) = factor_from_freq(khz);
+    log::info!("CPU Timestamp frequency: {} KHz", khz);
+    TscClock {
+        initial,
+        mul: mul as u128,
+        sft: sft as u128,
+    }
+});
 
 pub struct TscClock {
-    initial: u64,
-    mul: u128,
-    sft: u128,
+    pub initial: u64,
+    pub mul: u128,
+    pub sft: u128,
 }
 
 impl ClockChip for TscClock {
@@ -21,23 +38,3 @@ impl ClockChip for TscClock {
         unsafe { Instant::from_raw(ns) }
     }
 }
-
-impl TscClock {
-    pub fn new() -> Option<TscClock> {
-        let cpuid = CpuId::new();
-        cpuid
-            .get_advanced_power_mgmt_info()?
-            .has_invariant_tsc()
-            .then(|| {
-                let khz = crate::cpu::time::chip::calibrate(|| {}, rdtsc, rdtsc, || {});
-                let initial = rdtsc();
-                let (mul, sft) = factor_from_freq(khz);
-                log::info!("CPU Timestamp frequency: {} KHz", khz);
-                TscClock {
-                    initial,
-                    mul: mul as u128,
-                    sft: sft as u128,
-                }
-            })
-    }
-}

+ 5 - 5
h2o/kernel/src/dev.rs

@@ -48,9 +48,9 @@ mod syscall {
     #[syscall]
     fn pio_acq(res: Handle, base: u16, size: u16) -> Result {
         SCHED.with_current(|cur| {
-            let res = cur.space().handles().get::<Arc<Resource<u16>>>(res)?;
+            let res = cur.space().handles().get::<Resource<u16>>(res)?;
             if !{ res.features() }.contains(Feature::READ | Feature::WRITE) {
-                return Err(Error::EPERM);
+                return Err(EPERM);
             }
             if res.magic_eq(pio_resource())
                 && res.range().start <= base
@@ -63,7 +63,7 @@ mod syscall {
                 unsafe { KERNEL_GS.update_tss_io_bitmap(cur.io_bitmap_mut().as_deref()) };
                 Ok(())
             } else {
-                Err(Error::EPERM)
+                Err(EPERM)
             }
         })
     }
@@ -71,7 +71,7 @@ mod syscall {
     #[syscall]
     fn pio_rel(res: Handle, base: u16, size: u16) -> Result {
         SCHED.with_current(|cur| {
-            let res = cur.space().handles().get::<Arc<Resource<u16>>>(res)?;
+            let res = cur.space().handles().get::<Resource<u16>>(res)?;
             if res.magic_eq(pio_resource())
                 && res.range().start <= base
                 && base + size <= res.range().end
@@ -84,7 +84,7 @@ mod syscall {
                 unsafe { KERNEL_GS.update_tss_io_bitmap(cur.io_bitmap_mut().as_deref()) };
                 Ok(())
             } else {
-                Err(Error::EPERM)
+                Err(EPERM)
             }
         })
     }

+ 2 - 2
h2o/kernel/src/dev/acpi.rs

@@ -30,10 +30,10 @@ static PLATFORM_INFO: Azy<acpi::PlatformInfo> =
 
 #[inline]
 pub fn tables() -> &'static acpi::AcpiTables<Handler> {
-    &*TABLES
+    &TABLES
 }
 
 #[inline]
 pub fn platform_info() -> &'static acpi::PlatformInfo {
-    &*PLATFORM_INFO
+    &PLATFORM_INFO
 }

+ 6 - 7
h2o/kernel/src/dev/res.rs

@@ -71,14 +71,13 @@ impl<T: Ord + Copy> Drop for Resource<T> {
     }
 }
 
-unsafe impl<T: Ord + Copy + Send + Sync + Any> DefaultFeature for Arc<Resource<T>> {
+unsafe impl<T: Ord + Copy + Send + Sync + Any> DefaultFeature for Resource<T> {
     fn default_features() -> Feature {
         Feature::SEND | Feature::SYNC | Feature::READ | Feature::WRITE
     }
 }
 
 mod syscall {
-    use alloc::sync::Arc;
     use core::{any::Any, ops::Add};
 
     use sv_call::*;
@@ -91,12 +90,12 @@ mod syscall {
         size: T,
     ) -> Result<Handle> {
         SCHED.with_current(|cur| {
-            let res = cur.space().handles().get::<Arc<Resource<T>>>(hdl)?;
+            let res = cur.space().handles().get::<Resource<T>>(hdl)?;
             if !res.features().contains(Feature::SYNC) {
-                return Err(Error::EPERM);
+                return Err(EPERM);
             }
-            let sub = res.allocate(base..(base + size)).ok_or(Error::ENOMEM)?;
-            cur.space().handles().insert(sub, None)
+            let sub = res.allocate(base..(base + size)).ok_or(ENOMEM)?;
+            cur.space().handles().insert_raw(sub, None)
         })
     }
 
@@ -106,7 +105,7 @@ mod syscall {
             res::RES_MEM => res_alloc_typed(hdl, base, size),
             res::RES_PIO => res_alloc_typed(hdl, u16::try_from(base)?, u16::try_from(size)?),
             res::RES_GSI => res_alloc_typed(hdl, u32::try_from(base)?, u32::try_from(size)?),
-            _ => Err(Error::ETYPE),
+            _ => Err(ETYPE),
         }
     }
 }

+ 8 - 8
h2o/kernel/src/dev/x86_64/ioapic.rs

@@ -271,7 +271,7 @@ impl Ioapics {
             ioapic_data.try_insert_with(
                 ioapic.gsi.clone(),
                 || Ok::<_, sv_call::Error>((ioapic, ())),
-                sv_call::Error::EEXIST,
+                sv_call::EEXIST,
             )?;
         }
 
@@ -303,7 +303,7 @@ impl Ioapics {
     /// The caller must ensure that the entry corresponding to `gsi` is not used
     /// by others.
     pub unsafe fn config_dest(&mut self, gsi: u32, vec: u8, apic_id: u32) -> sv_call::Result {
-        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::Error::EINVAL)?;
+        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::EINVAL)?;
 
         let mut entry = IoapicEntry::from(chip.read_ioredtbl(pin));
         entry.set_vec(vec);
@@ -328,7 +328,7 @@ impl Ioapics {
         trig_mode: TriggerMode,
         polarity: Polarity,
     ) -> sv_call::Result {
-        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::Error::EINVAL)?;
+        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::EINVAL)?;
 
         let (t, p) = if let Some(intr_ovr) = intr_ovr().iter().find(|i| i.gsi == gsi) {
             (intr_ovr.trigger_mode, intr_ovr.polarity)
@@ -338,7 +338,7 @@ impl Ioapics {
             (trig_mode, polarity)
         };
         if t != trig_mode || p != polarity {
-            return Err(sv_call::Error::EPERM);
+            return Err(sv_call::EPERM);
         }
 
         let mut entry = IoapicEntry::from(chip.read_ioredtbl(pin));
@@ -351,7 +351,7 @@ impl Ioapics {
     }
 
     pub fn get_entry(&mut self, gsi: u32) -> sv_call::Result<IoapicEntry> {
-        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::Error::EINVAL)?;
+        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::EINVAL)?;
         Ok(IoapicEntry::from(unsafe { chip.read_ioredtbl(pin) }))
     }
 
@@ -360,7 +360,7 @@ impl Ioapics {
     /// The caller must ensure that the entry corresponding to `gsi` is not used
     /// anymore.
     pub unsafe fn deconfig(&mut self, gsi: u32) -> sv_call::Result {
-        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::Error::EINVAL)?;
+        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::EINVAL)?;
 
         let entry = IoapicEntry::new().with_mask(true);
         chip.write_ioredtbl(pin, entry.into());
@@ -373,7 +373,7 @@ impl Ioapics {
     /// The caller must ensure that the entry corresponding to `gsi` is not used
     /// by others.
     pub unsafe fn mask(&mut self, gsi: u32, masked: bool) -> sv_call::Result {
-        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::Error::EINVAL)?;
+        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::EINVAL)?;
 
         let mut entry = IoapicEntry::from(chip.read_ioredtbl(pin));
         entry.set_mask(masked);
@@ -389,7 +389,7 @@ impl Ioapics {
     pub unsafe fn eoi(&mut self, gsi: u32) -> sv_call::Result {
         lapic(|lapic| lapic.eoi());
 
-        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::Error::EINVAL)?;
+        let (chip, pin) = self.chip_mut_pin(gsi).ok_or(sv_call::EINVAL)?;
 
         let entry = IoapicEntry::from(chip.read_ioredtbl(pin));
         if chip.version >= 0x20 {

+ 0 - 3
h2o/kernel/src/dev/x86_64/lpic.rs

@@ -1,7 +1,5 @@
 #![allow(dead_code)]
 
-use core::mem;
-
 use archop::io::{Io, Port};
 
 const MASTER_PORT: u16 = 0x20;
@@ -72,5 +70,4 @@ pub unsafe fn init(masked: bool) {
     } else {
         lpic.init();
     }
-    mem::forget(lpic);
 }

+ 1 - 2
h2o/kernel/src/kmain.rs

@@ -8,7 +8,6 @@
 #![feature(alloc_error_handler)]
 #![feature(allocator_api)]
 #![feature(assert_matches)]
-#![feature(bool_to_option)]
 #![feature(box_into_inner)]
 #![feature(box_syntax)]
 #![feature(coerce_unsized)]
@@ -16,6 +15,7 @@
 #![feature(core_intrinsics)]
 #![feature(downcast_unchecked)]
 #![feature(drain_filter)]
+#![feature(int_log)]
 #![feature(layout_for_ptr)]
 #![feature(linked_list_cursors)]
 #![feature(map_first_last)]
@@ -25,7 +25,6 @@
 #![feature(once_cell)]
 #![feature(ptr_metadata)]
 #![feature(receiver_trait)]
-#![feature(result_into_ok_or_err)]
 #![feature(result_option_inspect)]
 #![feature(slice_ptr_get)]
 #![feature(slice_ptr_len)]

+ 11 - 8
h2o/kernel/src/log.rs

@@ -1,10 +1,13 @@
 pub mod flags;
 mod serial;
 
-use core::{fmt::*, mem::MaybeUninit};
+use core::{
+    fmt::*,
+    mem::MaybeUninit,
+    sync::atomic::{AtomicBool, Ordering::*},
+};
 
-use archop::IntrMutex;
-use spin::RwLock;
+use spin::Mutex;
 
 pub use self::serial::COM_LOG;
 use crate::{cpu::time::Instant, sched::PREEMPT};
@@ -21,17 +24,17 @@ impl core::fmt::Display for OptionU32Display {
     }
 }
 
-pub static HAS_TIME: RwLock<bool> = RwLock::new(false);
+pub static HAS_TIME: AtomicBool = AtomicBool::new(false);
 
 struct Logger {
-    output: IntrMutex<serial::Output>,
+    output: Mutex<serial::Output>,
     level: log::Level,
 }
 
 impl Logger {
     pub fn new(level: log::Level) -> Logger {
         Logger {
-            output: IntrMutex::new(unsafe { serial::Output::new(COM_LOG) }),
+            output: Mutex::new(unsafe { serial::Output::new(COM_LOG) }),
             level,
         }
     }
@@ -51,7 +54,7 @@ impl log::Log for Logger {
         let _pree = PREEMPT.lock();
         let mut os = self.output.lock();
         let cur_time = HAS_TIME
-            .read()
+            .load(Acquire)
             .then(Instant::now)
             .unwrap_or(unsafe { Instant::from_raw(0) });
 
@@ -108,7 +111,7 @@ mod syscall {
             core::str::from_utf8(unsafe { core::slice::from_raw_parts(buffer.as_ptr(), len) })?;
         let _pree = PREEMPT.lock();
         let mut os = unsafe { LOGGER.assume_init_ref() }.output.lock();
-        writeln!(os, "{}", string).map_err(|_| Error::EFAULT)?;
+        writeln!(os, "{}", string).map_err(|_| EFAULT)?;
         Ok(())
     }
 }

+ 1 - 1
h2o/kernel/src/mem.rs

@@ -23,7 +23,7 @@ pub static MMAP: Azy<PtrIter<pmm::boot::MemRange>> = Azy::new(|| {
 static ALL_AVAILABLE: AtomicUsize = AtomicUsize::new(0);
 
 static MEM_RESOURCE: Azy<Arc<Resource<usize>>> = Azy::new(|| {
-    let (all_available, addr_max) = pmm::init(&*MMAP, minfo::TRAMPOLINE_RANGE);
+    let (all_available, addr_max) = pmm::init(&MMAP, minfo::TRAMPOLINE_RANGE);
     log::info!(
         "Memory size: {:.3} GB ({:#x} Bytes)",
         (all_available as f64) / 1073741824.0,

+ 5 - 5
h2o/kernel/src/mem/arena.rs

@@ -103,7 +103,7 @@ impl<T> Arena<T> {
     /// The caller must ensure that `ptr` is previously allocated by this arena.
     pub unsafe fn deallocate(&self, ptr: NonNull<T>) -> sv_call::Result {
         if !self.check_ptr(ptr) {
-            return Err(sv_call::Error::EINVAL);
+            return Err(sv_call::EINVAL);
         }
 
         let mut next = self.head.load_acquire();
@@ -147,9 +147,9 @@ impl<T> Arena<T> {
             let index = addr.wrapping_sub(base).wrapping_div(self.off);
             Some(index)
                 .filter(|&index| self.check_index(index))
-                .ok_or(sv_call::Error::EINVAL)
+                .ok_or(sv_call::EINVAL)
         } else {
-            Err(sv_call::Error::EINVAL)
+            Err(sv_call::EINVAL)
         }
     }
 
@@ -159,9 +159,9 @@ impl<T> Arena<T> {
             let addr = index.wrapping_mul(self.off).wrapping_add(base);
             NonNull::new(addr as *mut T)
                 .filter(|&ptr| self.check_ptr(ptr))
-                .ok_or(sv_call::Error::EINVAL)
+                .ok_or(sv_call::EINVAL)
         } else {
-            Err(sv_call::Error::EINVAL)
+            Err(sv_call::EINVAL)
         }
     }
 

+ 4 - 2
h2o/kernel/src/mem/arena/atomic.rs

@@ -66,7 +66,9 @@ impl AtomicDoubleU64 {
 
     #[inline]
     pub fn load_acquire(&self) -> (u64, u64) {
-        self.compare_exchange_acqrel((0, 0), (0, 0))
-            .into_ok_or_err()
+        match self.compare_exchange_acqrel((0, 0), (0, 0)) {
+            Ok(res) => res,
+            Err(res) => res,
+        }
     }
 }

+ 7 - 7
h2o/kernel/src/mem/space.rs

@@ -38,16 +38,16 @@ pub static KRL: Azy<Arc<Space>> =
 static mut CURRENT: Option<Arc<Space>> = None;
 
 fn paging_error(err: paging::Error) -> sv_call::Error {
-    use sv_call::Error;
+    use sv_call::*;
     match err {
-        paging::Error::OutOfMemory => Error::ENOMEM,
-        paging::Error::AddrMisaligned { .. } => Error::EALIGN,
-        paging::Error::RangeEmpty => Error::EBUFFER,
+        paging::Error::OutOfMemory => ENOMEM,
+        paging::Error::AddrMisaligned { .. } => EALIGN,
+        paging::Error::RangeEmpty => EBUFFER,
         paging::Error::EntryExistent(b) => {
             if b {
-                Error::EEXIST
+                EEXIST
             } else {
-                Error::ENOENT
+                ENOENT
             }
         }
     }
@@ -124,7 +124,7 @@ pub(crate) unsafe fn reprotect_unchecked(ptr: NonNull<[u8]>, flags: Flags) -> sv
 pub(crate) unsafe fn unmap(ptr: NonNull<u8>) -> sv_call::Result {
     let base = LAddr::from(ptr);
     let ret = PREEMPT.scope(|| KRL.root.children.lock().remove(&base));
-    ret.map_or(Err(sv_call::Error::ENOENT), |child| {
+    ret.map_or(Err(sv_call::ENOENT), |child| {
         let end = child.end(base);
         let _ = PREEMPT.scope(|| KRL.arch.unmaps(base..end));
         Ok(())

+ 3 - 3
h2o/kernel/src/mem/space/phys.rs

@@ -65,7 +65,7 @@ impl Phys {
     /// Returns error if the heap memory is exhausted or the size is zero.
     pub fn allocate(size: usize, zeroed: bool) -> Result<Self> {
         if size == 0 {
-            return Err(sv_call::Error::ENOMEM);
+            return Err(sv_call::ENOMEM);
         }
 
         let mut inner = Arsc::try_new_uninit()?;
@@ -98,7 +98,7 @@ impl Phys {
 
     pub fn create_sub(&self, offset: usize, len: usize, copy: bool) -> Result<Self> {
         if offset.contains_bit(PAGE_SHIFT) || len.contains_bit(PAGE_SHIFT) {
-            return Err(sv_call::Error::EALIGN);
+            return Err(sv_call::EALIGN);
         }
 
         let new_offset = self.offset.wrapping_add(offset);
@@ -120,7 +120,7 @@ impl Phys {
                 })
             }
         } else {
-            Err(sv_call::Error::ERANGE)
+            Err(sv_call::ERANGE)
         }
     }
 

+ 38 - 62
h2o/kernel/src/mem/space/virt.rs

@@ -7,7 +7,7 @@ use core::{alloc::Layout, mem, ops::Range};
 use bitop_ex::BitOpEx;
 use paging::{LAddr, PAddr, PAGE_SHIFT, PAGE_SIZE};
 use spin::Mutex;
-use sv_call::{mem::Flags, Error, Feature, Result};
+use sv_call::{error::*, mem::Flags, Feature, Result};
 
 use super::{paging_error, ty_to_range, Phys, Space};
 use crate::sched::{
@@ -84,13 +84,13 @@ impl Virt {
         let range = find_range(&children, &self.range, offset, layout)?;
         let base = range.start;
 
-        let child = Arc::new(Virt {
+        let child = Arc::try_new(Virt {
             ty: self.ty,
             range,
             space: Weak::clone(&self.space),
             parent: Arc::downgrade(self),
             children: Mutex::new(BTreeMap::new()),
-        });
+        })?;
         let ret = Arc::downgrade(&child);
         let _ = children.insert(base, Child::Virt(child));
         Ok(ret)
@@ -103,7 +103,7 @@ impl Virt {
             let children = self.children.lock();
 
             if { children.iter() }.any(|(&base, child)| !check_vdso(vdso, base, child.end(base))) {
-                return Err(Error::EACCES);
+                return Err(EACCES);
             }
         }
         if let Some(parent) = self.parent.upgrade() {
@@ -120,34 +120,36 @@ impl Virt {
         layout: Layout,
         flags: Flags,
     ) -> Result<LAddr> {
-        if phys == VDSO.1
+        let set_vdso = phys == VDSO.1;
+        if set_vdso
             && (offset.is_some()
                 || phys_offset != 0
                 || layout.size() != VDSO.1.len()
                 || layout.align() != PAGE_SIZE
                 || flags != VDSO.0)
         {
-            return Err(Error::EACCES);
+            return Err(EACCES);
         }
 
         let layout = check_layout(layout)?;
         if phys_offset.contains_bit(PAGE_SHIFT) {
-            return Err(Error::EALIGN);
+            return Err(EALIGN);
         }
         let phys_end = phys_offset.wrapping_add(layout.size());
         if !(phys_offset < phys_end && phys_end <= phys.len()) {
-            return Err(Error::ERANGE);
+            return Err(ERANGE);
         }
 
         let _pree = PREEMPT.lock();
         let mut children = self.children.lock();
-        let space = self.space.upgrade().ok_or(Error::EKILLED)?;
+        let space = self.space.upgrade().ok_or(EKILLED)?;
 
-        let set_vdso = phys == VDSO.1;
         if set_vdso {
-            check_set_vdso(&space.vdso, phys_offset, layout, flags)?;
+            if PREEMPT.scope(|| space.vdso.lock().is_some()) {
+                return Err(EACCES);
+            }
             if self as *const _ != Arc::as_ptr(&space.root) {
-                return Err(Error::EACCES);
+                return Err(EACCES);
             }
         }
         let virt = find_range(&children, &self.range, offset, layout)?;
@@ -172,29 +174,29 @@ impl Virt {
         let end = LAddr::from(base.val() + len);
 
         if !(self.range.start <= start && end <= self.range.end) {
-            return Err(Error::ERANGE);
+            return Err(ERANGE);
         }
 
         let _pree = PREEMPT.lock();
         let children = self.children.lock();
-        let space = self.space.upgrade().ok_or(Error::EKILLED)?;
+        let space = self.space.upgrade().ok_or(EKILLED)?;
 
-        let vdso = { *space.vdso.lock() };
+        let vdso = *space.vdso.lock();
         for (&base, child) in children
             .range(..end)
             .take_while(|(&base, child)| start <= child.end(base))
         {
             let child_end = child.end(base);
             if !(start <= base && child_end <= end) {
-                return Err(Error::ERANGE);
+                return Err(ERANGE);
             }
             if !check_vdso(vdso, base, child_end) {
-                return Err(Error::EACCES);
+                return Err(EACCES);
             }
             match child {
-                Child::Virt(_) => return Err(Error::EINVAL),
+                Child::Virt(_) => return Err(EINVAL),
                 Child::Phys(_, f, _) if flags.intersects(!*f) => {
-                    return Err(Error::EPERM);
+                    return Err(EPERM);
                 }
                 _ => {}
             }
@@ -215,27 +217,27 @@ impl Virt {
         let end = LAddr::from(base.val() + len);
 
         if !(self.range.start <= start && end <= self.range.end) {
-            return Err(Error::ERANGE);
+            return Err(ERANGE);
         }
 
         let _pree = PREEMPT.lock();
         let mut children = self.children.lock();
-        let space = self.space.upgrade().ok_or(Error::EKILLED)?;
+        let space = self.space.upgrade().ok_or(EKILLED)?;
 
-        let vdso = { *space.vdso.lock() };
+        let vdso = *space.vdso.lock();
         for (&base, child) in children
             .range(..end)
             .take_while(|(&base, child)| start <= child.end(base))
         {
             let child_end = child.end(base);
             if !(start <= base && child_end <= end) {
-                return Err(Error::ERANGE);
+                return Err(ERANGE);
             }
             if !check_vdso(vdso, base, child_end) {
-                return Err(Error::EACCES);
+                return Err(EACCES);
             }
             if matches!(child, Child::Virt(_) if !drop_child) {
-                return Err(Error::EPERM);
+                return Err(EPERM);
             }
         }
 
@@ -299,39 +301,14 @@ impl Ord for Virt {
 
 fn check_layout(layout: Layout) -> Result<Layout> {
     if layout.size() == 0 {
-        return Err(Error::ERANGE);
+        return Err(ERANGE);
     }
     if layout.align() < PAGE_SIZE {
-        return Err(Error::EALIGN);
+        return Err(EALIGN);
     }
     Ok(layout.pad_to_align())
 }
 
-fn check_set_vdso(
-    vdso: &Mutex<Option<LAddr>>,
-    phys_offset: usize,
-    layout: Layout,
-    flags: Flags,
-) -> Result {
-    if PREEMPT.scope(|| vdso.lock().is_some()) {
-        return Err(Error::EACCES);
-    }
-
-    if phys_offset != 0 {
-        return Err(Error::EACCES);
-    }
-
-    if layout.size() != VDSO.1.len() || layout.align() != PAGE_SIZE {
-        return Err(Error::EACCES);
-    }
-
-    if flags != VDSO.0 {
-        return Err(Error::EACCES);
-    }
-
-    Ok(())
-}
-
 fn check_vdso(vdso: Option<LAddr>, base: LAddr, end: LAddr) -> bool {
     let vdso_size = VDSO.1.len();
 
@@ -352,24 +329,20 @@ fn find_range(
 ) -> Result<Range<LAddr>> {
     let base = match offset {
         Some(offset) => {
-            let base = LAddr::from(
-                { range.start.val() }
-                    .checked_add(offset)
-                    .ok_or(Error::ERANGE)?,
-            );
-            let end = LAddr::from(base.val().checked_add(layout.size()).ok_or(Error::ERANGE)?);
+            let base = LAddr::from({ range.start.val() }.checked_add(offset).ok_or(ERANGE)?);
+            let end = LAddr::from(base.val().checked_add(layout.size()).ok_or(ERANGE)?);
             if base.val().contains_bit(PAGE_SHIFT) {
-                return Err(Error::EALIGN);
+                return Err(EALIGN);
             }
             if !(range.start <= base && end <= range.end) {
-                return Err(Error::ERANGE);
+                return Err(ERANGE);
             }
             if !check_alloc(map, base..end) {
-                return Err(Error::EEXIST);
+                return Err(EEXIST);
             }
             base
         }
-        None => find_alloc(map, range, layout).ok_or(Error::ENOMEM)?,
+        None => find_alloc(map, range, layout).ok_or(ENOMEM)?,
     };
 
     Ok(base..LAddr::from(base.val() + layout.size()))
@@ -382,6 +355,9 @@ fn check_alloc(map: &ChildMap, request: Range<LAddr>) -> bool {
 
 #[inline]
 fn find_alloc(map: &ChildMap, range: &Range<LAddr>, layout: Layout) -> Option<LAddr> {
+    #[cfg(debug_assertions)]
+    const ASLR_BIT: usize = 1;
+    #[cfg(not(debug_assertions))]
     const ASLR_BIT: usize = 35;
     let mask = (1 << ASLR_BIT) - 1;
     let (ret, cnt) = try_find_alloc(map, range, layout, rand() & mask);

+ 3 - 7
h2o/kernel/src/mem/space/x86_64/mod.rs

@@ -109,7 +109,7 @@ impl Space {
             id_off: minfo::ID_OFFSET,
         };
 
-        paging::maps(&mut *self.root_table.lock(), &map_info, &mut PageAlloc)
+        paging::maps(&mut self.root_table.lock(), &map_info, &mut PageAlloc)
     }
 
     pub(in crate::mem) fn reprotect(
@@ -125,18 +125,14 @@ impl Space {
             id_off: minfo::ID_OFFSET,
         };
 
-        paging::reprotect(
-            &mut *self.root_table.lock(),
-            &reprotect_info,
-            &mut PageAlloc,
-        )
+        paging::reprotect(&mut self.root_table.lock(), &reprotect_info, &mut PageAlloc)
     }
 
     #[allow(dead_code)]
     pub(in crate::mem) fn query(&self, virt: LAddr) -> Result<(PAddr, Flags), paging::Error> {
         self.canary.assert();
 
-        paging::query(&*self.root_table.lock(), virt, minfo::ID_OFFSET)
+        paging::query(&self.root_table.lock(), virt, minfo::ID_OFFSET)
             .map(|(phys, attr)| (phys, Self::pg_attr_to_flags(attr)))
     }
 

+ 22 - 22
h2o/kernel/src/mem/syscall.rs

@@ -20,7 +20,7 @@ use crate::{
 
 fn check_flags(flags: Flags) -> Result<Flags> {
     if !flags.contains(Flags::USER_ACCESS) {
-        return Err(Error::EPERM);
+        return Err(EPERM);
     }
     Ok(flags)
 }
@@ -60,7 +60,7 @@ fn phys_check(hdl: Handle, offset: usize, len: usize) -> Result<(Feature, space:
     hdl.check_null()?;
     let offset_end = offset.wrapping_add(len);
     if offset_end < offset {
-        return Err(Error::ERANGE);
+        return Err(ERANGE);
     }
     let (feat, phys) = SCHED.with_current(|cur| {
         cur.space()
@@ -69,7 +69,7 @@ fn phys_check(hdl: Handle, offset: usize, len: usize) -> Result<(Feature, space:
             .map(|obj| (obj.features(), space::Phys::clone(&obj)))
     })?;
     if offset_end > phys.len() {
-        return Err(Error::ERANGE);
+        return Err(ERANGE);
     }
     Ok((feat, phys))
 }
@@ -79,10 +79,10 @@ fn phys_read(hdl: Handle, offset: usize, len: usize, buffer: UserPtr<Out, u8>) -
     buffer.check_slice(len)?;
     let (feat, phys) = phys_check(hdl, offset, len)?;
     if phys == VDSO.1 {
-        return Err(Error::EACCES);
+        return Err(EACCES);
     }
     if !feat.contains(Feature::READ) {
-        return Err(Error::EPERM);
+        return Err(EPERM);
     }
     if len > 0 {
         unsafe {
@@ -99,10 +99,10 @@ fn phys_write(hdl: Handle, offset: usize, len: usize, buffer: UserPtr<In, u8>) -
     buffer.check_slice(len)?;
     let (feat, phys) = phys_check(hdl, offset, len)?;
     if phys == VDSO.1 {
-        return Err(Error::EACCES);
+        return Err(EACCES);
     }
     if !feat.contains(Feature::WRITE) {
-        return Err(Error::EPERM);
+        return Err(EPERM);
     }
     if len > 0 {
         unsafe {
@@ -117,10 +117,10 @@ fn phys_write(hdl: Handle, offset: usize, len: usize, buffer: UserPtr<In, u8>) -
 fn phys_sub(hdl: Handle, offset: usize, len: usize, copy: bool) -> Result<Handle> {
     let (feat, phys) = phys_check(hdl, offset, len)?;
     if phys == VDSO.1 {
-        return Err(Error::EACCES);
+        return Err(EACCES);
     }
     if !feat.contains(Feature::READ) {
-        return Err(Error::EPERM);
+        return Err(EPERM);
     }
 
     let sub = phys.create_sub(offset, len, copy)?;
@@ -140,7 +140,7 @@ fn space_new(root_virt: UserPtr<Out, Handle>) -> Result<Handle> {
     SCHED.with_current(|cur| {
         let space = TaskSpace::new(cur.tid().ty())?;
         let virt = Arc::downgrade(space.mem().root());
-        let ret = cur.space().handles().insert(space, None)?;
+        let ret = cur.space().handles().insert_raw(space, None)?;
         unsafe {
             let virt = cur.space().handles().insert_unchecked(
                 virt,
@@ -158,9 +158,9 @@ fn virt_alloc(hdl: Handle, offset: usize, size: usize, align: usize) -> Result<H
     hdl.check_null()?;
     SCHED.with_current(|cur| {
         let virt = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
-        let virt = virt.upgrade().ok_or(Error::EKILLED)?;
+        let virt = virt.upgrade().ok_or(EKILLED)?;
         let sub = virt.allocate(
-            (offset != usize::MAX).then(|| offset),
+            (offset != usize::MAX).then_some(offset),
             Layout::from_size_align(size, align)?,
         )?;
         cur.space().handles().insert(sub, None)
@@ -172,7 +172,7 @@ fn virt_info(hdl: Handle, size: UserPtr<Out, usize>) -> Result<*mut u8> {
     hdl.check_null()?;
     SCHED.with_current(|cur| {
         let virt = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
-        let virt = virt.upgrade().ok_or(Error::EKILLED)?;
+        let virt = virt.upgrade().ok_or(EKILLED)?;
         let base = virt.range().start;
         if !size.as_ptr().is_null() {
             unsafe { size.write(virt.len()) }?;
@@ -186,7 +186,7 @@ fn virt_drop(hdl: Handle) -> Result {
     hdl.check_null()?;
     SCHED.with_current(|cur| {
         let virt = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
-        let virt = virt.upgrade().ok_or(Error::EKILLED)?;
+        let virt = virt.upgrade().ok_or(EKILLED)?;
         virt.destroy()
     })
 }
@@ -198,11 +198,11 @@ fn virt_map(hdl: Handle, mi: UserPtr<In, VirtMapInfo>) -> Result<*mut u8> {
     let flags = check_flags(mi.flags)?;
     SCHED.with_current(|cur| {
         let virt = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
-        let virt = virt.upgrade().ok_or(Error::EKILLED)?;
+        let virt = virt.upgrade().ok_or(EKILLED)?;
         let phys = cur.space().handles().remove::<space::Phys>(mi.phys)?;
-        let offset = (mi.offset != usize::MAX).then(|| mi.offset);
+        let offset = (mi.offset != usize::MAX).then_some(mi.offset);
         if flags.intersects(!features_to_flags(phys.features())) {
-            return Err(Error::EPERM);
+            return Err(EPERM);
         }
 
         let addr = virt.map(
@@ -223,7 +223,7 @@ fn virt_reprot(hdl: Handle, base: UserPtr<In, u8>, len: usize, flags: Flags) ->
     let flags = check_flags(flags)?;
     SCHED.with_current(|cur| {
         let virt = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
-        let virt = virt.upgrade().ok_or(Error::EKILLED)?;
+        let virt = virt.upgrade().ok_or(EKILLED)?;
         virt.reprotect(LAddr::new(base.as_ptr()), len, flags)
     })
 }
@@ -234,7 +234,7 @@ fn virt_unmap(hdl: Handle, base: UserPtr<In, u8>, len: usize, drop_child: bool)
     base.check()?;
     SCHED.with_current(|cur| {
         let virt = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
-        let virt = virt.upgrade().ok_or(Error::EKILLED)?;
+        let virt = virt.upgrade().ok_or(EKILLED)?;
         virt.unmap(LAddr::new(base.as_ptr()), len, drop_child)
     })
 }
@@ -255,11 +255,11 @@ fn mem_info(info: UserPtr<Out, MemInfo>) -> Result {
 #[syscall]
 fn phys_acq(res: Handle, addr: usize, size: usize) -> Result<Handle> {
     if addr.contains_bit(paging::PAGE_MASK) || size.contains_bit(paging::PAGE_MASK) {
-        return Err(Error::EINVAL);
+        return Err(EINVAL);
     }
 
     SCHED.with_current(|cur| {
-        let res = cur.space().handles().get::<Arc<Resource<usize>>>(res)?;
+        let res = cur.space().handles().get::<Resource<usize>>(res)?;
         if res.magic_eq(super::mem_resource())
             && res.range().start <= addr
             && addr + size <= res.range().end
@@ -267,7 +267,7 @@ fn phys_acq(res: Handle, addr: usize, size: usize) -> Result<Handle> {
             let phys = space::Phys::new(paging::PAddr::new(addr), size)?;
             unsafe { cur.space().handles().insert(phys, None) }
         } else {
-            Err(Error::EPERM)
+            Err(EPERM)
         }
     })
 }

+ 5 - 1
h2o/kernel/src/sched.rs

@@ -5,7 +5,11 @@ pub mod wait;
 
 pub use self::imp::{deque, epoch};
 pub(crate) use self::{
-    imp::{task_migrate_handler, waiter::Blocker, PREEMPT, SCHED},
+    imp::{
+        task_migrate_handler,
+        waiter::{Blocker, Dispatcher},
+        PREEMPT, SCHED,
+    },
     ipc::{basic::BasicEvent, *},
 };
 

+ 122 - 34
h2o/kernel/src/sched/imp.rs

@@ -2,8 +2,14 @@ pub mod deque;
 pub mod epoch;
 pub mod waiter;
 
-use alloc::{boxed::Box, vec::Vec};
-use core::{assert_matches::assert_matches, cell::UnsafeCell, mem, ptr::NonNull, time::Duration};
+use alloc::vec::Vec;
+use core::{
+    assert_matches::assert_matches,
+    cell::UnsafeCell,
+    hint, mem,
+    sync::atomic::{AtomicU64, Ordering::*},
+    time::Duration,
+};
 
 use archop::{Azy, PreemptState, PreemptStateGuard};
 use canary::Canary;
@@ -12,16 +18,18 @@ use deque::{Injector, Steal, Worker};
 
 use super::{ipc::Arsc, task};
 use crate::cpu::{
-    time::{CallbackArg, Instant, Timer, TimerCallback, TimerType},
+    time::{Instant, Timer},
     Lazy,
 };
 
 pub(super) const MIN_TIME_GRAN: Duration = Duration::from_millis(30);
 const WAKE_TIME_GRAN: Duration = Duration::from_millis(1);
 
-static MIGRATION_QUEUE: Azy<Vec<Injector<task::Ready>>> = Azy::new(|| {
+static SCHED_INFO: Azy<Vec<SchedInfo>> = Azy::new(|| {
     let count = crate::cpu::count();
-    core::iter::repeat_with(Injector::new).take(count).collect()
+    core::iter::repeat_with(SchedInfo::default)
+        .take(count)
+        .collect()
 });
 
 #[thread_local]
@@ -35,6 +43,18 @@ pub static SCHED: Lazy<Scheduler> = Lazy::new(|| Scheduler {
 #[thread_local]
 pub static PREEMPT: PreemptState = PreemptState::new();
 
+#[derive(Default)]
+struct SchedInfo {
+    migration_queue: Injector<task::Ready>,
+    expected_runtime: AtomicU64,
+}
+
+impl SchedInfo {
+    fn expected_runtime(&self) -> u64 {
+        self.expected_runtime.load(Acquire)
+    }
+}
+
 pub struct Scheduler {
     canary: Canary<Scheduler>,
     cpu: usize,
@@ -55,12 +75,15 @@ impl Scheduler {
         if cpu == self.cpu {
             self.enqueue(task, PREEMPT.lock(), preempt);
         } else {
-            MIGRATION_QUEUE[cpu].push(task);
+            SCHED_INFO[cpu].migration_queue.push(task);
             unsafe { crate::cpu::arch::apic::ipi::task_migrate(cpu) };
         }
     }
 
     fn enqueue(&self, task: task::Ready, pree: PreemptStateGuard, preempt: bool) {
+        SCHED_INFO[self.cpu]
+            .expected_runtime
+            .fetch_add(task.time_slice.as_millis() as u64, Release);
         // SAFETY: We have `pree`, which means preemption is disabled.
         match unsafe { &*self.current.get() } {
             Some(ref cur) if preempt && Self::should_preempt(cur, &task) => {
@@ -90,7 +113,7 @@ impl Scheduler {
         PREEMPT.scope(|| unsafe {
             (*self.current.get())
                 .as_mut()
-                .ok_or(sv_call::Error::ESRCH)
+                .ok_or(sv_call::ESRCH)
                 .and_then(func)
         })
     }
@@ -117,14 +140,16 @@ impl Scheduler {
             unsafe { &*self.current.get() }.as_ref().unwrap().tid.raw(),
             PREEMPT.raw(),
         );
+
+        if let Some(current) = unsafe { &*self.current() } {
+            SCHED_INFO[self.cpu]
+                .expected_runtime
+                .fetch_sub(current.time_slice.as_micros() as u64, Release);
+        }
+
         self.schedule_impl(Instant::now(), pree, None, |task| {
             let blocked = task::Ready::block(task, block_desc);
-            let blocked = unsafe { NonNull::new_unchecked(Box::into_raw(box blocked)) };
-            let timer = Timer::activate(
-                TimerType::Oneshot,
-                duration,
-                TimerCallback::new(block_callback, blocked),
-            )?;
+            let timer = Timer::activate(duration, blocked)?;
             if let Some(wq) = wq {
                 wq.push(Arsc::clone(&timer));
             }
@@ -151,6 +176,13 @@ impl Scheduler {
             unsafe { &*self.current.get() }.as_ref().unwrap().tid.raw(),
             PREEMPT.raw(),
         );
+
+        if let Some(current) = unsafe { &*self.current() } {
+            SCHED_INFO[self.cpu]
+                .expected_runtime
+                .fetch_sub(current.time_slice.as_micros() as u64, Release);
+        }
+
         let _ = self.schedule_impl(Instant::now(), pree, None, |task| {
             task::Ready::exit(task, retval);
             Ok(())
@@ -172,7 +204,7 @@ impl Scheduler {
         if unsafe { self.update(cur_time) } {
             let ret = self.schedule(cur_time, pree);
             match ret {
-                Ok(()) | Err(sv_call::Error::ENOENT) => {}
+                Ok(()) | Err(sv_call::ENOENT) => {}
                 Err(err) => log::warn!("Scheduling failed: {:?}", err),
             }
         }
@@ -198,19 +230,29 @@ impl Scheduler {
         match ti.with_signal(|sig| sig.take()) {
             Some(task::Signal::Kill) => {
                 log::trace!("Killing task {:?}, P{}", cur.tid.raw(), PREEMPT.raw());
+
+                SCHED_INFO[self.cpu]
+                    .expected_runtime
+                    .fetch_sub(cur.time_slice.as_micros() as u64, Release);
+
                 let _ = self.schedule_impl(cur_time, pree, None, |task| {
-                    task::Ready::exit(task, sv_call::Error::EKILLED.into_retval());
+                    task::Ready::exit(task, sv_call::EKILLED.into_retval());
                     Ok(())
                 });
                 unreachable!("Dead task");
             }
             Some(task::Signal::Suspend(slot)) => {
                 log::trace!("Suspending task {:?}, P{}", cur.tid.raw(), PREEMPT.raw());
+
+                SCHED_INFO[self.cpu]
+                    .expected_runtime
+                    .fetch_sub(cur.time_slice.as_micros() as u64, Release);
+
                 let ret = self.schedule_impl(cur_time, pree, None, |task| {
                     *slot.lock() = Some(task::Ready::block(task, "task_ctl_suspend"));
                     Ok(())
                 });
-                assert_matches!(ret, Ok(()) | Err(sv_call::Error::ENOENT));
+                assert_matches!(ret, Ok(()) | Err(sv_call::ENOENT));
 
                 None
             }
@@ -230,8 +272,11 @@ impl Scheduler {
 
         match cur.running_state.start_time() {
             Some(start_time) => {
-                debug_assert!(cur_time > start_time);
-                let runtime_delta = cur_time - start_time;
+                // FIXME: Some platform like QEMU doesn't support invariant TSC, so the assert
+                // below can really fail. By far, comment it out to avoid kernel panic.
+                //
+                // debug_assert!(cur_time > start_time);
+                let runtime_delta = cur_time.saturating_duration_since(start_time);
                 cur.runtime += runtime_delta;
                 if cur.time_slice < runtime_delta && !sole {
                     cur.running_state = task::RunningState::NEED_RESCHED;
@@ -279,10 +324,10 @@ impl Scheduler {
             Some(next) => next,
             None => match self.run_queue.pop() {
                 Some(task) => task,
-                None => return Err(sv_call::Error::ENOENT),
+                None => return Err(sv_call::ENOENT),
             },
         };
-        log::trace!("Switching to {:?}, P{}", next.tid.raw(), PREEMPT.raw());
+        log::trace!("Switching to task {:?}, P{}", next.tid.raw(), PREEMPT.raw());
 
         next.running_state = task::RunningState::running(cur_time);
         next.cpu = self.cpu;
@@ -304,35 +349,78 @@ impl Scheduler {
         // We will enable preemption in `switch_ctx`.
         mem::forget(pree);
         unsafe { task::ctx::switch_ctx(old, new) };
-        ret.transpose()
-            .and_then(|res| res.ok_or(sv_call::Error::ESRCH))
+        ret.transpose().and_then(|res| res.ok_or(sv_call::ESRCH))
     }
 }
 
 fn select_cpu(
     affinity: &crate::cpu::CpuMask,
     cur_cpu: usize,
-    _last_cpu: Option<usize>,
+    last_cpu: Option<usize>,
 ) -> Option<usize> {
-    match affinity.get(cur_cpu) {
-        Some(slot) if *slot => Some(cur_cpu),
-        _ => affinity.iter_ones().next(),
+    let mut iter = affinity.iter_ones();
+    let mut ret = iter.next()?;
+
+    if ret == cur_cpu && SCHED_INFO[ret].expected_runtime() == 0 {
+        return Some(ret);
     }
-}
 
-fn block_callback(_: Arsc<Timer>, _: Instant, arg: CallbackArg) {
-    let blocked = unsafe { Box::from_raw(arg.as_ptr()) };
-    SCHED.unblock(Box::into_inner(blocked), true);
+    for b in iter {
+        let rb = SCHED_INFO[b].expected_runtime();
+        if b == cur_cpu && rb == 0 {
+            return Some(b);
+        }
+
+        let a = ret;
+
+        let wlast_cpu = match last_cpu {
+            Some(last_cpu) if a == last_cpu && b != last_cpu => 1,
+            Some(last_cpu) if a != last_cpu && b == last_cpu => -1,
+            _ => 0,
+        };
+
+        let wcur_cpu = if a == cur_cpu && b != cur_cpu {
+            1
+        } else if a != cur_cpu && b == cur_cpu {
+            -1
+        } else {
+            0
+        };
+
+        let wruntime = {
+            let ra = SCHED_INFO[a].expected_runtime();
+            let diff = ra.abs_diff(rb);
+            if diff <= 1 {
+                0
+            } else {
+                (diff + 1).ilog2() as i32 * if ra > rb { -1 } else { 1 }
+            }
+        };
+
+        let weight = wlast_cpu * 10 + wcur_cpu * 2 + wruntime * 20;
+
+        ret = if weight > 0 { a } else { b };
+    }
+
+    Some(ret)
 }
 
 /// # Safety
 ///
 /// This function must be called only in task-migrate IPI handlers.
 pub unsafe fn task_migrate_handler() {
-    loop {
-        match MIGRATION_QUEUE[SCHED.cpu].steal_batch(&SCHED.run_queue) {
-            Steal::Empty | Steal::Success(_) => break,
-            Steal::Retry => {}
+    crate::cpu::arch::apic::lapic(|lapic| lapic.eoi());
+
+    const MAX_TRIAL: usize = 50;
+    for _ in 0..MAX_TRIAL {
+        match SCHED_INFO[SCHED.cpu].migration_queue.steal() {
+            Steal::Empty => break,
+            Steal::Retry => hint::spin_loop(),
+            Steal::Success(task) => {
+                log::trace!("Migrating task {:?}, P{}", task.tid.raw(), PREEMPT.raw());
+                let pree = PREEMPT.lock();
+                SCHED.enqueue(task, pree, true);
+            }
         }
     }
 }

+ 166 - 12
h2o/kernel/src/sched/imp/waiter.rs

@@ -1,13 +1,26 @@
-use alloc::sync::{Arc, Weak};
-use core::{fmt::Debug, time::Duration};
+use alloc::{
+    sync::{Arc, Weak},
+    vec::Vec,
+};
+use core::{
+    fmt::Debug,
+    sync::atomic::{AtomicUsize, Ordering::*},
+    time::Duration,
+};
 
+use archop::PreemptStateGuard;
+use crossbeam_queue::SegQueue;
 use spin::Mutex;
-use sv_call::Feature;
+use sv_call::{
+    call::Syscall,
+    ipc::{SIG_READ, SIG_WRITE},
+    Feature, Result, ENOSPC,
+};
 
 use super::PREEMPT;
 use crate::{
     cpu::arch::apic::TriggerMode,
-    sched::{task::hdl::DefaultFeature, wait::WaitObject, Event, Waiter, WaiterData},
+    sched::{task::hdl::DefaultFeature, wait::WaitObject, BasicEvent, Event, Waiter, WaiterData},
 };
 
 #[derive(Debug)]
@@ -32,11 +45,18 @@ impl Blocker {
         ret
     }
 
-    pub fn wait<T>(&self, guard: T, timeout: Duration) -> sv_call::Result {
-        if timeout.is_zero() || PREEMPT.scope(|| self.status.lock().1 != 0) {
+    pub fn wait(&self, pree: Option<PreemptStateGuard>, timeout: Duration) -> sv_call::Result {
+        let pree = match pree {
+            Some(pree) => pree,
+            None => PREEMPT.lock(),
+        };
+        let status = self.status.lock();
+        if timeout.is_zero() || status.1 != 0 {
             Ok(())
+        } else if self.event.strong_count() == 0 {
+            Err(sv_call::EPIPE)
         } else {
-            self.wo.wait(guard, timeout, "Blocker::wait")
+            self.wo.wait((status, pree), timeout, "Blocker::wait")
         }
     }
 
@@ -58,11 +78,11 @@ impl Blocker {
 
 impl Waiter for Blocker {
     #[inline]
-    fn waiter_data(&self) -> &WaiterData {
-        &self.waiter_data
+    fn waiter_data(&self) -> WaiterData {
+        self.waiter_data
     }
 
-    fn on_cancel(&self, signal: usize) {
+    fn on_cancel(&self, _: *const (), signal: usize) {
         PREEMPT.scope(|| *self.status.lock() = (false, signal));
         let num = if self.wake_all { usize::MAX } else { 1 };
         self.wo.notify(num, false);
@@ -75,8 +95,142 @@ impl Waiter for Blocker {
     }
 }
 
-unsafe impl DefaultFeature for Arc<Blocker> {
+unsafe impl DefaultFeature for Blocker {
+    #[inline]
     fn default_features() -> sv_call::Feature {
-        Feature::SEND | Feature::WAIT
+        Feature::SEND
+    }
+}
+
+#[derive(Debug)]
+struct Request {
+    key: usize,
+    event: Weak<dyn Event>,
+    waiter_data: WaiterData,
+    syscall: Option<Syscall>,
+}
+
+#[derive(Debug)]
+pub struct Dispatcher {
+    next_key: AtomicUsize,
+    event: Arc<BasicEvent>,
+
+    capacity: usize,
+    pending: Mutex<Vec<Request>>,
+    ready: SegQueue<(bool, Request)>,
+}
+
+impl Dispatcher {
+    pub fn new(capacity: usize) -> Result<Arc<Self>> {
+        Ok(Arc::try_new(Dispatcher {
+            next_key: AtomicUsize::new(1),
+            event: BasicEvent::new(0),
+
+            capacity,
+            pending: Mutex::new(Vec::new()),
+            ready: SegQueue::new(),
+        })?)
+    }
+
+    pub fn event(&self) -> Weak<dyn Event> {
+        Arc::downgrade(&self.event) as _
+    }
+
+    pub fn push(
+        self: &Arc<Self>,
+        event: &Arc<dyn Event>,
+        waiter_data: WaiterData,
+        syscall: Option<Syscall>,
+    ) -> Result<usize> {
+        let key = self.next_key.fetch_add(1, AcqRel);
+        let req = Request {
+            key,
+            event: Arc::downgrade(event),
+            waiter_data,
+            syscall,
+        };
+        PREEMPT.scope(|| {
+            let mut pending = self.pending.lock();
+            if pending.len() >= self.capacity - self.ready.len() {
+                return Err(ENOSPC);
+            }
+            pending.push(req);
+            Ok(())
+        })?;
+
+        event.wait(Arc::clone(self) as _);
+        Ok(key)
+    }
+
+    pub fn pop(self: &Arc<Self>) -> Option<(bool, usize, Option<Syscall>)> {
+        let (canceled, req) = self.ready.pop()?;
+        if let Some(event) = req.event.upgrade() {
+            event.unwait(&(Arc::clone(self) as _));
+        }
+        let res = if !canceled { req.syscall } else { None };
+        self.event.notify(0, SIG_WRITE);
+        Some((canceled, req.key, res))
+    }
+}
+
+impl Waiter for Dispatcher {
+    fn waiter_data(&self) -> WaiterData {
+        unimplemented!()
+    }
+
+    fn on_cancel(&self, event: *const (), signal: usize) {
+        let mut has_cancel = false;
+
+        PREEMPT.scope(|| {
+            let mut pending = self.pending.lock();
+            let iter = pending.drain_filter(|req| {
+                let (e, _) = req.event.as_ptr().to_raw_parts();
+                e == event && req.waiter_data.can_signal(signal, false)
+            });
+            iter.for_each(|req| {
+                self.ready.push((false, req));
+                has_cancel = true;
+            });
+        });
+
+        if has_cancel {
+            self.event.notify(0, SIG_READ)
+        }
+    }
+
+    fn on_notify(&self, _: usize) {
+        unimplemented!()
+    }
+
+    fn try_on_notify(&self, event: *const (), signal: usize, on_wait: bool) -> bool {
+        if self.ready.len() >= self.capacity {
+            return false;
+        }
+        let mut has_notify = false;
+
+        let empty = PREEMPT.scope(|| {
+            let mut pending = self.pending.lock();
+            let iter = pending.drain_filter(|req| {
+                let (e, _) = req.event.as_ptr().to_raw_parts();
+                e == event && req.waiter_data.can_signal(signal, on_wait)
+            });
+            iter.for_each(|req| {
+                self.ready.push((false, req));
+                has_notify = true;
+            });
+            pending.is_empty()
+        });
+
+        if has_notify {
+            self.event.notify(0, SIG_READ)
+        }
+        empty
+    }
+}
+
+unsafe impl DefaultFeature for Dispatcher {
+    #[inline]
+    fn default_features() -> Feature {
+        Feature::SEND | Feature::SYNC | Feature::READ | Feature::WRITE | Feature::WAIT
     }
 }

+ 126 - 67
h2o/kernel/src/sched/ipc.rs

@@ -2,15 +2,16 @@ mod arsc;
 pub mod basic;
 mod channel;
 
-use alloc::{sync::Arc, vec::Vec};
+use alloc::sync::Arc;
 use core::{
     fmt::Debug,
-    hint, mem,
+    hash::BuildHasherDefault,
+    hint,
     sync::atomic::{AtomicUsize, Ordering::SeqCst},
 };
 
-use spin::Mutex;
-pub use sv_call::ipc::{SIG_GENERIC, SIG_READ, SIG_WRITE};
+use collection_ex::{CHashMap, FnvHasher};
+pub use sv_call::ipc::{SIG_GENERIC, SIG_READ, SIG_TIMER, SIG_WRITE};
 
 pub use self::{
     arsc::Arsc,
@@ -19,22 +20,24 @@ pub use self::{
 use super::PREEMPT;
 use crate::cpu::arch::apic::TriggerMode;
 
+type BH = BuildHasherDefault<FnvHasher>;
+
 #[derive(Debug, Default)]
 pub struct EventData {
-    waiters: Mutex<Vec<Arc<dyn Waiter>>>,
+    waiters: CHashMap<usize, Arc<dyn Waiter>, BH>,
     signal: AtomicUsize,
 }
 
 impl EventData {
     pub fn new(init_signal: usize) -> Self {
         EventData {
-            waiters: Mutex::new(Vec::new()),
+            waiters: Default::default(),
             signal: AtomicUsize::new(init_signal),
         }
     }
 
     #[inline]
-    pub fn waiters(&self) -> &Mutex<Vec<Arc<dyn Waiter>>> {
+    pub fn waiters(&self) -> &CHashMap<usize, Arc<dyn Waiter>, BH> {
         &self.waiters
     }
 
@@ -53,32 +56,22 @@ pub trait Event: Debug + Send + Sync {
     }
 
     fn wait_impl(&self, waiter: Arc<dyn Waiter>) {
-        if waiter.waiter_data().trigger_mode == TriggerMode::Level {
-            let signal = self.event_data().signal().load(SeqCst);
-            if signal & waiter.waiter_data().signal != 0 {
-                waiter.on_notify(signal);
-                return;
-            }
+        let signal = self.event_data().signal().load(SeqCst);
+        if waiter.try_on_notify(self as *const _ as _, signal, true) {
+            return;
         }
-        PREEMPT.scope(|| self.event_data().waiters.lock().push(waiter));
+        let (key, _) = Arc::as_ptr(&waiter).to_raw_parts();
+        PREEMPT.scope(|| self.event_data().waiters.insert(key as _, waiter));
     }
 
     fn unwait(&self, waiter: &Arc<dyn Waiter>) -> (bool, usize) {
         let signal = self.event_data().signal().load(SeqCst);
         let ret = PREEMPT.scope(|| {
-            let mut waiters = self.event_data().waiters.lock();
-            let pos = waiters.iter().position(|w| {
-                let (this, _) = Arc::as_ptr(w).to_raw_parts();
-                let (other, _) = Arc::as_ptr(waiter).to_raw_parts();
-                this == other
-            });
-            match pos {
-                Some(pos) => {
-                    waiters.swap_remove(pos);
-                    true
-                }
-                None => false,
-            }
+            let (other, _) = Arc::as_ptr(waiter).to_raw_parts();
+            self.event_data()
+                .waiters
+                .remove(&(other as usize))
+                .is_some()
         });
         (ret, signal)
     }
@@ -86,9 +79,9 @@ pub trait Event: Debug + Send + Sync {
     fn cancel(&self) {
         let signal = self.event_data().signal.load(SeqCst);
 
-        let waiters = PREEMPT.scope(|| mem::take(&mut *self.event_data().waiters.lock()));
-        for waiter in waiters {
-            waiter.on_cancel(signal);
+        let waiters = PREEMPT.scope(|| self.event_data().waiters.take());
+        for (_, waiter) in waiters {
+            waiter.on_cancel(self as *const _ as _, signal);
         }
     }
 
@@ -98,8 +91,8 @@ pub trait Event: Debug + Send + Sync {
     }
 
     fn notify_impl(&self, clear: usize, set: usize) {
+        let mut prev = self.event_data().signal.load(SeqCst);
         let signal = loop {
-            let prev = self.event_data().signal.load(SeqCst);
             let new = (prev & !clear) | set;
             if prev == new {
                 return;
@@ -111,20 +104,21 @@ pub trait Event: Debug + Send + Sync {
             {
                 Ok(_) if prev & new == new => return,
                 Ok(_) => break new,
-                _ => hint::spin_loop(),
+                Err(signal) => {
+                    prev = signal;
+                    hint::spin_loop()
+                }
             }
         };
         PREEMPT.scope(|| {
-            let mut waiters = self.event_data().waiters.lock();
-            let waiters = waiters.drain_filter(|w| signal & w.waiter_data().signal != 0);
-            for waiter in waiters {
-                waiter.on_notify(signal);
-            }
+            self.event_data()
+                .waiters
+                .retain(|_, waiter| !waiter.try_on_notify(self as *const _ as _, signal, false))
         });
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, Clone, Copy)]
 pub struct WaiterData {
     trigger_mode: TriggerMode,
     signal: usize,
@@ -145,75 +139,140 @@ impl WaiterData {
     pub fn signal(&self) -> usize {
         self.signal
     }
+
+    #[inline]
+    pub fn can_signal(&self, signal: usize, on_wait: bool) -> bool {
+        if on_wait && self.trigger_mode == TriggerMode::Edge {
+            false
+        } else {
+            self.signal & !signal == 0
+        }
+    }
 }
 
 pub trait Waiter: Debug + Send + Sync {
-    fn waiter_data(&self) -> &WaiterData;
+    fn waiter_data(&self) -> WaiterData;
 
-    fn on_cancel(&self, signal: usize);
+    fn on_cancel(&self, event: *const (), signal: usize);
 
     fn on_notify(&self, signal: usize);
+
+    #[inline]
+    fn try_on_notify(&self, _: *const (), signal: usize, on_wait: bool) -> bool {
+        let ret = self.waiter_data().can_signal(signal, on_wait);
+        if ret {
+            self.on_notify(signal);
+        }
+        ret
+    }
 }
 
 mod syscall {
-    use sv_call::*;
+    use sv_call::{call::Syscall, *};
 
     use super::*;
     use crate::{
-        cpu::time,
-        sched::{Blocker, SCHED},
+        cpu::{arch::apic::TriggerMode, time},
+        sched::{Blocker, Dispatcher, WaiterData, SCHED},
+        syscall::{In, Out, UserPtr},
     };
 
     #[syscall]
     fn obj_wait(hdl: Handle, timeout_us: u64, wake_all: bool, signal: usize) -> Result<usize> {
         let pree = PREEMPT.lock();
-        let cur = unsafe { (*SCHED.current()).as_ref().ok_or(Error::ESRCH) }?;
+        let cur = unsafe { (*SCHED.current()).as_ref().ok_or(ESRCH) }?;
 
         let obj = cur.space().handles().get_ref(hdl)?;
         if !obj.features().contains(Feature::WAIT) {
-            return Err(Error::EPERM);
+            return Err(EPERM);
         }
-        let event = obj.event().upgrade().ok_or(Error::EPIPE)?;
+        let event = obj.event().upgrade().ok_or(EPIPE)?;
 
         let blocker = Blocker::new(&event, wake_all, signal);
-        blocker.wait(pree, time::from_us(timeout_us))?;
+        blocker.wait(Some(pree), time::from_us(timeout_us))?;
 
         let (detach_ret, signal) = blocker.detach();
         if !detach_ret {
-            return Err(Error::ETIME);
+            return Err(ETIME);
         }
         Ok(signal)
     }
 
     #[syscall]
-    fn obj_await(hdl: Handle, wake_all: bool, signal: usize) -> Result<Handle> {
+    fn disp_new(capacity: usize) -> Result<Handle> {
+        let disp = Dispatcher::new(capacity)?;
+        let event = disp.event();
+        SCHED.with_current(|cur| cur.space().handles().insert_raw(disp, Some(event)))
+    }
+
+    #[syscall]
+    fn disp_push(
+        disp: Handle,
+        hdl: Handle,
+        level_triggered: bool,
+        signal: usize,
+        syscall: UserPtr<In, Syscall>,
+    ) -> Result<usize> {
+        hdl.check_null()?;
+        disp.check_null()?;
+        let syscall = (!syscall.as_ptr().is_null())
+            .then(|| {
+                let syscall = unsafe { syscall.read() }?;
+                if matches!(
+                    syscall.num as usize,
+                    SV_DISP_NEW | SV_DISP_PUSH | SV_DISP_POP
+                ) {
+                    return Err(EPERM);
+                }
+                Ok(syscall)
+            })
+            .transpose()?;
+
         SCHED.with_current(|cur| {
             let obj = cur.space().handles().get_ref(hdl)?;
+            let disp = cur.space().handles().get::<Dispatcher>(disp)?;
             if !obj.features().contains(Feature::WAIT) {
-                return Err(Error::EPERM);
+                return Err(EPERM);
             }
-            let event = obj.event().upgrade().ok_or(Error::EPIPE)?;
-
-            let blocker = Blocker::new(&event, wake_all, signal);
-            cur.space().handles().insert(blocker, None)
+            if !disp.features().contains(Feature::WRITE) {
+                return Err(EPERM);
+            }
+            let event = obj.event().upgrade().ok_or(EPIPE)?;
+
+            let waiter_data = WaiterData::new(
+                if level_triggered {
+                    TriggerMode::Level
+                } else {
+                    TriggerMode::Edge
+                },
+                signal,
+            );
+            disp.push(&event, waiter_data, syscall)
         })
     }
 
     #[syscall]
-    fn obj_awend(waiter: Handle, timeout_us: u64) -> Result<usize> {
-        let pree = PREEMPT.lock();
-        let cur = unsafe { (*SCHED.current()).as_ref().ok_or(Error::ESRCH) }?;
-
-        let blocker = cur.space().handles().get::<Arc<Blocker>>(waiter)?;
-        blocker.wait(pree, time::from_us(timeout_us))?;
-
-        let (detach_ret, signal) = Arc::clone(&blocker).detach();
-        SCHED.with_current(|cur| cur.space().handles().remove::<Arc<Blocker>>(waiter))?;
+    fn disp_pop(
+        disp: Handle,
+        canceled: UserPtr<Out, bool>,
+        result: UserPtr<Out, usize>,
+    ) -> Result<usize> {
+        disp.check_null()?;
+        let (c, key, r) = SCHED.with_current(|cur| {
+            let disp = cur.space().handles().get::<Dispatcher>(disp)?;
+            if !disp.features().contains(Feature::READ) {
+                return Err(EPERM);
+            }
+            disp.pop().ok_or(ENOENT)
+        })?;
 
-        if !detach_ret {
-            Err(Error::ETIME)
-        } else {
-            Ok(signal)
+        if !canceled.as_ptr().is_null() {
+            canceled.write(c)?;
+        }
+        let r = r.map_or(0, crate::syscall::handle);
+        if !result.as_ptr().is_null() {
+            result.write(r)?;
         }
+        Ok(key)
     }
 }

+ 67 - 64
h2o/kernel/src/sched/ipc/channel.rs

@@ -6,7 +6,7 @@ use alloc::{
 };
 use core::{
     mem,
-    sync::atomic::{AtomicUsize, Ordering::SeqCst},
+    sync::atomic::{AtomicU64, AtomicUsize, Ordering::SeqCst},
 };
 
 use bytes::Bytes;
@@ -95,8 +95,9 @@ pub struct Channel {
 
 impl Channel {
     pub fn new() -> (Self, Self) {
-        // TODO: Find a better way to acquire an unique id.
-        let peer_id = unsafe { archop::msr::rdtsc() };
+        static PEER_ID: AtomicU64 = AtomicU64::new(0);
+        let peer_id = PEER_ID.fetch_add(1, SeqCst);
+
         let q1 = Arc::new(ChannelSide::default());
         let q2 = Arc::new(ChannelSide::default());
         let c1 = Channel {
@@ -128,35 +129,34 @@ impl Channel {
     ///
     /// Returns error if the peer is closed or if the channel is full.
     pub fn send(&self, msg: &mut Packet) -> sv_call::Result {
-        match self.peer.upgrade() {
-            None => Err(sv_call::Error::EPIPE),
-            Some(peer) => {
-                let called = PREEMPT.scope(|| {
-                    let mut callers = peer.callers.lock();
-                    let called = callers.get_mut(&msg.id);
-                    if let Some(caller) = called {
-                        let _old = caller.cell.replace(mem::take(msg));
-                        caller.event.notify(0, SIG_READ);
-                        debug_assert!(_old.is_none());
-                        true
-                    } else {
-                        false
-                    }
-                });
-                if called {
-                    Ok(())
-                } else if peer.msgs.len() >= MAX_QUEUE_SIZE {
-                    Err(sv_call::Error::ENOSPC)
-                } else {
-                    peer.msgs.push(mem::take(msg));
-                    peer.event.notify(0, SIG_READ);
-                    Ok(())
-                }
+        let peer = self.peer.upgrade().ok_or(sv_call::EPIPE)?;
+        let called = PREEMPT.scope(|| {
+            let mut callers = peer.callers.lock();
+            let called = callers.get_mut(&msg.id);
+            if let Some(caller) = called {
+                let _old = caller.cell.replace(mem::take(msg));
+                caller.event.notify(0, SIG_READ);
+                debug_assert!(_old.is_none());
+                true
+            } else {
+                false
             }
+        });
+        if called {
+            Ok(())
+        } else if peer.msgs.len() >= MAX_QUEUE_SIZE {
+            Err(sv_call::ENOSPC)
+        } else {
+            peer.msgs.push(mem::take(msg));
+            peer.event.notify(0, SIG_READ);
+            Ok(())
         }
     }
 
-    fn get_packet(
+    /// # Safety
+    ///
+    /// `head` must contains a valid packet.
+    unsafe fn get_packet(
         head: &mut Option<Packet>,
         buffer_cap: &mut usize,
         handle_cap: &mut usize,
@@ -165,7 +165,7 @@ impl Channel {
         let buffer_size = packet.buffer().len();
         let handle_count = packet.object_count();
         let ret = if buffer_size > *buffer_cap || handle_count > *handle_cap {
-            Err(sv_call::Error::EBUFFER)
+            Err(sv_call::EBUFFER)
         } else {
             Ok(unsafe { head.take().unwrap_unchecked() })
         };
@@ -182,15 +182,17 @@ impl Channel {
         buffer_cap: &mut usize,
         handle_cap: &mut usize,
     ) -> sv_call::Result<Packet> {
-        if self.peer.strong_count() == 0 {
-            return Err(sv_call::Error::EPIPE);
-        }
         let _pree = PREEMPT.lock();
         let mut head = self.head.lock();
         if head.is_none() {
-            *head = Some(self.me.msgs.pop().ok_or(sv_call::Error::ENOENT)?);
+            let err = if self.peer.strong_count() > 0 {
+                sv_call::ENOENT
+            } else {
+                sv_call::EPIPE
+            };
+            *head = Some(self.me.msgs.pop().ok_or(err)?);
         }
-        Self::get_packet(&mut head, buffer_cap, handle_cap)
+        unsafe { Self::get_packet(&mut head, buffer_cap, handle_cap) }
     }
 
     #[inline]
@@ -206,33 +208,28 @@ impl Channel {
     }
 
     pub fn call_send(&self, msg: &mut Packet) -> sv_call::Result<usize> {
-        match self.peer.upgrade() {
-            None => Err(sv_call::Error::EPIPE),
-            Some(peer) => {
-                if peer.msgs.len() >= MAX_QUEUE_SIZE {
-                    Err(sv_call::Error::ENOSPC)
-                } else {
-                    let id = Self::next_msg_id(&self.me.msg_id);
-                    msg.id = id;
-                    self.me
-                        .callers
-                        .lock()
-                        .try_insert(id, Caller::default())
-                        .map_err(|_| sv_call::Error::EEXIST)?;
-                    peer.msgs.push(mem::take(msg));
-                    peer.event.notify(0, SIG_READ);
-                    Ok(id)
-                }
-            }
+        let peer = self.peer.upgrade().ok_or(sv_call::EPIPE)?;
+        if peer.msgs.len() >= MAX_QUEUE_SIZE {
+            Err(sv_call::ENOSPC)
+        } else {
+            let id = Self::next_msg_id(&self.me.msg_id);
+            msg.id = id;
+            PREEMPT.scope(|| {
+                { self.me.callers.lock() }
+                    .try_insert(id, Caller::default())
+                    .map_or(Err(sv_call::EEXIST), |_| Ok(()))
+            })?;
+            peer.msgs.push(mem::take(msg));
+            peer.event.notify(0, SIG_READ);
+            Ok(id)
         }
     }
 
     fn call_event(&self, id: usize) -> sv_call::Result<Arc<BasicEvent>> {
         PREEMPT.scope(|| {
-            let callers = self.me.callers.lock();
-            callers.get(&id).map_or(Err(sv_call::Error::ENOENT), |ent| {
-                Ok(Arc::clone(&ent.event))
-            })
+            { self.me.callers.lock() }
+                .get(&id)
+                .map_or(Err(sv_call::ENOENT), |ent| Ok(Arc::clone(&ent.event)))
         })
     }
 
@@ -242,20 +239,22 @@ impl Channel {
         buffer_cap: &mut usize,
         handle_cap: &mut usize,
     ) -> sv_call::Result<Packet> {
-        if self.peer.strong_count() == 0 {
-            return Err(sv_call::Error::EPIPE);
-        }
         let _pree = PREEMPT.lock();
         let mut callers = self.me.callers.lock();
         let mut caller = match callers.entry(id) {
-            alloc::collections::btree_map::Entry::Vacant(_) => return Err(sv_call::Error::ENOENT),
+            alloc::collections::btree_map::Entry::Vacant(_) => return Err(sv_call::ENOENT),
             alloc::collections::btree_map::Entry::Occupied(caller) => caller,
         };
         if caller.get().head.is_none() {
-            let packet = caller.get_mut().cell.take().ok_or(sv_call::Error::ENOENT)?;
+            let err = if self.peer.strong_count() > 0 {
+                sv_call::ENOENT
+            } else {
+                sv_call::EPIPE
+            };
+            let packet = caller.get_mut().cell.take().ok_or(err)?;
             caller.get_mut().head = Some(packet);
         }
-        Self::get_packet(&mut caller.get_mut().head, buffer_cap, handle_cap)
+        unsafe { Self::get_packet(&mut caller.get_mut().head, buffer_cap, handle_cap) }
             .inspect(|_| drop(caller.remove()))
     }
 }
@@ -269,7 +268,11 @@ unsafe impl DefaultFeature for Channel {
 impl Drop for Channel {
     fn drop(&mut self) {
         if let Some(peer) = self.peer.upgrade() {
-            peer.event.notify(0, usize::MAX);
+            peer.event.cancel();
+            let _pree = PREEMPT.lock();
+            for (_, caller) in peer.callers.lock().iter() {
+                caller.event.cancel();
+            }
         }
     }
-}
+}

+ 39 - 15
h2o/kernel/src/sched/ipc/channel/syscall.rs

@@ -7,8 +7,8 @@ use sv_call::{
 
 use super::*;
 use crate::{
-    cpu::time,
-    sched::SIG_READ,
+    cpu::{arch::apic::TriggerMode, time},
+    sched::{Dispatcher, WaiterData, SIG_READ},
     syscall::{In, InOut, Out, UserPtr},
 };
 
@@ -38,14 +38,14 @@ where
 
     let packet = unsafe { packet.read()? };
     if packet.buffer_size > MAX_BUFFER_SIZE || packet.handle_count >= MAX_HANDLE_COUNT {
-        return Err(Error::ENOMEM);
+        return Err(ENOMEM);
     }
     UserPtr::<In, Handle>::new(packet.handles).check_slice(packet.handle_count)?;
     UserPtr::<In, u8>::new(packet.buffer).check_slice(packet.buffer_size)?;
 
     let handles = unsafe { slice::from_raw_parts(packet.handles, packet.handle_count) };
     if handles.contains(&hdl) {
-        return Err(Error::EPERM);
+        return Err(EPERM);
     }
     let buffer = unsafe { slice::from_raw_parts(packet.buffer, packet.buffer_size) };
 
@@ -53,7 +53,7 @@ where
         let map = cur.space().handles();
         let channel = map.get::<Channel>(hdl)?;
         if !channel.features().contains(Feature::WRITE) {
-            return Err(Error::EPERM);
+            return Err(EPERM);
         }
         let objects = unsafe { map.send(handles, &channel) }?;
         let mut packet = Packet::new(packet.id, objects, buffer);
@@ -119,7 +119,7 @@ fn chan_recv(hdl: Handle, packet_ptr: UserPtr<InOut, RawPacket>) -> Result {
         let map = cur.space().handles();
         let channel = map.get::<Channel>(hdl)?;
         if !channel.features().contains(Feature::READ) {
-            return Err(Error::EPERM);
+            return Err(EPERM);
         }
 
         raw.buffer_size = raw.buffer_cap;
@@ -150,7 +150,7 @@ fn chan_crecv(
     let call_event = SCHED.with_current(|cur| {
         let channel = cur.space().handles().get::<Channel>(hdl)?;
         if !{ channel.features() }.contains(Feature::WAIT | Feature::READ) {
-            return Err(Error::EPERM);
+            return Err(EPERM);
         }
         Ok(channel.call_event(id)? as _)
     })?;
@@ -159,7 +159,7 @@ fn chan_crecv(
     } else {
         let pree = PREEMPT.lock();
         let blocker = crate::sched::Blocker::new(&call_event, true, SIG_READ);
-        blocker.wait(pree, time::from_us(timeout_us))?;
+        blocker.wait(Some(pree), time::from_us(timeout_us))?;
         Some(blocker)
     };
 
@@ -168,7 +168,7 @@ fn chan_crecv(
 
         let channel = map.get::<Channel>(hdl)?;
         if !channel.features().contains(Feature::READ) {
-            return Err(Error::EPERM);
+            return Err(EPERM);
         }
 
         raw.buffer_size = raw.buffer_cap;
@@ -179,7 +179,7 @@ fn chan_crecv(
 
     if let Some(blocker) = blocker {
         if !blocker.detach().0 {
-            return Err(Error::ETIME);
+            return Err(ETIME);
         }
     }
 
@@ -187,15 +187,39 @@ fn chan_crecv(
 }
 
 #[syscall]
-fn chan_acrecv(hdl: Handle, id: usize, wake_all: bool) -> Result<Handle> {
+fn chan_acrecv(
+    hdl: Handle,
+    id: usize,
+    disp: Handle,
+    syscall: UserPtr<In, Syscall>,
+) -> Result<usize> {
+    hdl.check_null()?;
+    disp.check_null()?;
+    let syscall = (!syscall.as_ptr().is_null())
+        .then(|| {
+            let syscall = unsafe { syscall.read() }?;
+            if matches!(
+                syscall.num as usize,
+                SV_DISP_NEW | SV_DISP_PUSH | SV_DISP_POP
+            ) {
+                return Err(EPERM);
+            }
+            Ok(syscall)
+        })
+        .transpose()?;
+
     SCHED.with_current(|cur| {
         let chan = cur.space().handles().get::<Channel>(hdl)?;
-        if !{ chan.features() }.contains(Feature::READ | Feature::WAIT) {
-            return Err(Error::EPERM);
+        let disp = cur.space().handles().get::<Dispatcher>(disp)?;
+        if !chan.features().contains(Feature::WAIT) {
+            return Err(EPERM);
+        }
+        if !disp.features().contains(Feature::WRITE) {
+            return Err(EPERM);
         }
         let event = chan.call_event(id)? as _;
 
-        let blocker = crate::sched::Blocker::new(&event, wake_all, SIG_READ);
-        cur.space().handles().insert(blocker, None)
+        let waiter_data = WaiterData::new(TriggerMode::Level, SIG_READ);
+        disp.push(&event, waiter_data, syscall)
     })
 }

+ 6 - 6
h2o/kernel/src/sched/task.rs

@@ -35,7 +35,7 @@ impl Type {
     #[inline]
     pub fn pass(this: Option<Self>, cur_ty: Type) -> sv_call::Result<Type> {
         match (this, cur_ty) {
-            (Some(Self::Kernel), Self::User) => Err(sv_call::Error::EPERM),
+            (Some(Self::Kernel), Self::User) => Err(sv_call::EPERM),
             (Some(ty), _) => Ok(ty),
             _ => Ok(cur_ty),
         }
@@ -66,7 +66,7 @@ fn exec_inner(
     name: Option<String>,
     ty: Option<Type>,
     affinity: Option<CpuMask>,
-    space: Arsc<Space>,
+    space: Arc<Space>,
     init_chan: sv_call::Handle,
     s: &Starter,
 ) -> sv_call::Result<Init> {
@@ -80,7 +80,7 @@ fn exec_inner(
         .build()
         .unwrap();
 
-    let tid = tid::allocate(ti).map_err(|_| sv_call::Error::EBUSY)?;
+    let tid = tid::allocate(ti).map_err(|_| sv_call::EBUSY)?;
 
     let entry = ctx::Entry {
         entry: s.entry,
@@ -98,7 +98,7 @@ fn exec_inner(
 #[inline]
 fn exec(
     name: Option<String>,
-    space: Arsc<Space>,
+    space: Arc<Space>,
     init_chan: sv_call::Handle,
     starter: &Starter,
 ) -> sv_call::Result<(Init, sv_call::Handle)> {
@@ -114,7 +114,7 @@ fn exec(
     })
 }
 
-fn create(name: Option<String>, space: Arsc<Space>) -> sv_call::Result<(Init, sv_call::Handle)> {
+fn create(name: Option<String>, space: Arc<Space>) -> sv_call::Result<(Init, sv_call::Handle)> {
     let cur = super::SCHED.with_current(|cur| Ok(cur.tid.clone()))?;
 
     let ty = cur.ty();
@@ -127,7 +127,7 @@ fn create(name: Option<String>, space: Arsc<Space>) -> sv_call::Result<(Init, sv
         .build()
         .unwrap();
 
-    let tid = tid::allocate(ti).map_err(|_| sv_call::Error::EBUSY)?;
+    let tid = tid::allocate(ti).map_err(|_| sv_call::EBUSY)?;
 
     let kstack = ctx::Kstack::new(None, ty);
     let ext_frame = ctx::ExtFrame::zeroed();

+ 21 - 3
h2o/kernel/src/sched/task/boot.rs

@@ -8,6 +8,7 @@ use targs::Targs;
 
 use super::{hdl::DefaultFeature, *};
 use crate::{
+    cpu::arch::tsc::TSC_CLOCK,
     mem::space::{Flags, Phys, Virt},
     sched::SCHED,
 };
@@ -49,6 +50,26 @@ fn flags_to_feat(flags: Flags) -> Feature {
 }
 
 pub fn setup() {
+    unsafe {
+        let constants = sv_call::Constants {
+            ticks_offset: TSC_CLOCK.initial,
+            ticks_multiplier: TSC_CLOCK.mul,
+            ticks_shift: TSC_CLOCK.sft,
+            has_builtin_rand: archop::rand::has_builtin(),
+            num_cpus: crate::cpu::count(),
+        };
+
+        #[allow(clippy::zero_prefixed_literal)]
+        let offset = include!(concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/target/constant_offset.rs"
+        ));
+        let ptr = { VDSO.1.base().to_laddr(minfo::ID_OFFSET) }
+            .add(offset)
+            .cast::<sv_call::Constants>();
+        ptr.write(constants);
+    }
+
     let mut objects = hdl::List::new();
 
     // The sequence of kernel objects must match the one defined in
@@ -125,7 +146,4 @@ pub fn setup() {
     )
     .expect("Failed to initialize TINIT");
     SCHED.unblock(tinit, true);
-
-    // Get rid of EPIPE in TINIT.
-    mem::forget(me);
 }

+ 1 - 1
h2o/kernel/src/sched/task/ctx.rs

@@ -121,7 +121,7 @@ impl Kstack {
         addr: u64,
     ) -> sv_call::Result {
         match self.pf_resume.take() {
-            None => Err(sv_call::Error::ENOENT),
+            None => Err(sv_call::ENOENT),
             Some(ret) => {
                 cur_frame.set_pf_resume(ret.into(), errc, addr);
                 Ok(())

+ 10 - 9
h2o/kernel/src/sched/task/ctx/x86_64.rs

@@ -2,6 +2,7 @@ use alloc::sync::Arc;
 use core::{alloc::Layout, mem::size_of};
 
 use paging::LAddr;
+use sv_call::call::Syscall;
 
 use super::Entry;
 use crate::{
@@ -49,7 +50,7 @@ impl Kframe {
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 #[repr(C)]
 pub struct Frame {
     gs_base: u64,
@@ -108,22 +109,22 @@ impl Frame {
     }
 
     #[inline]
-    pub fn syscall_args(&self) -> (usize, [usize; 5]) {
-        (
-            self.rax as usize,
-            [
+    pub fn syscall_args(&self) -> Syscall {
+        Syscall {
+            num: self.rax as usize,
+            args: [
                 self.rdi as usize,
                 self.rsi as usize,
                 self.rdx as usize,
                 self.r8 as usize,
                 self.r9 as usize,
             ],
-        )
+        }
     }
 
     #[inline]
-    pub fn set_syscall_retval(&mut self, retval: usize) {
-        self.rax = retval as u64;
+    pub fn set_syscall_retval(&mut self, res: usize) {
+        self.rax = res as u64;
     }
 
     #[inline]
@@ -167,7 +168,7 @@ impl Frame {
         if !archop::canonical(LAddr::from(gpr.fs_base))
             || !archop::canonical(LAddr::from(gpr.gs_base))
         {
-            return Err(sv_call::Error::EINVAL);
+            return Err(sv_call::EINVAL);
         }
         self.gs_base = gpr.gs_base;
         self.fs_base = gpr.fs_base;

+ 6 - 6
h2o/kernel/src/sched/task/elf.rs

@@ -21,12 +21,12 @@ fn map_addr(
         .start
         .val()
         .checked_sub(virt.range().start.val())
-        .ok_or(sv_call::Error::ERANGE)?;
+        .ok_or(sv_call::ERANGE)?;
     let len = addr
         .end
         .val()
         .checked_sub(addr.start.val())
-        .ok_or(sv_call::Error::ERANGE)?;
+        .ok_or(sv_call::ERANGE)?;
     let phys = match phys {
         Some(phys) => phys,
         None => Phys::allocate(len, true)?,
@@ -117,7 +117,7 @@ fn load_elf(space: &Arc<Space>, file: &Elf, image: &[u8]) -> sv_call::Result<(LA
                 (phdr.p_memsz as usize).round_up_bit(paging::PAGE_SHIFT),
             )?,
 
-            _ => return Err(sv_call::Error::ESPRT),
+            _ => return Err(sv_call::ESPRT),
         }
     }
     Ok((entry, stack_size))
@@ -125,18 +125,18 @@ fn load_elf(space: &Arc<Space>, file: &Elf, image: &[u8]) -> sv_call::Result<(LA
 
 pub fn from_elf(
     image: &[u8],
-    space: Arsc<super::Space>,
+    space: Arc<super::Space>,
     name: String,
     affinity: CpuMask,
     init_chan: hdl::Ref,
 ) -> sv_call::Result<Init> {
     let file = Elf::parse(image)
-        .map_err(|_| sv_call::Error::EINVAL)
+        .map_err(|_| sv_call::EINVAL)
         .and_then(|file| {
             if file.is_64 {
                 Ok(file)
             } else {
-                Err(sv_call::Error::EPERM)
+                Err(sv_call::EPERM)
             }
         })?;
 

+ 2 - 2
h2o/kernel/src/sched/task/excep.rs

@@ -45,7 +45,7 @@ pub fn dispatch_exception(frame: &mut Frame, vec: ExVec) -> bool {
 
     let blocker =
         crate::sched::Blocker::new(&(Arc::clone(excep_chan.event()) as _), false, SIG_READ);
-    if blocker.wait((), Duration::MAX).is_err() {
+    if blocker.wait(None, Duration::MAX).is_err() {
         return false;
     }
     if !blocker.detach().0 {
@@ -67,7 +67,7 @@ pub fn dispatch_exception(frame: &mut Frame, vec: ExVec) -> bool {
             Some(res.code == EXRES_CODE_RECOVERED)
         }
         Err(err) => match err {
-            sv_call::Error::EPIPE => None,
+            sv_call::EPIPE => None,
             _ => Some(false),
         },
     };

+ 29 - 10
h2o/kernel/src/sched/task/hdl.rs

@@ -1,6 +1,6 @@
 mod node;
 
-use alloc::sync::Weak;
+use alloc::sync::{Arc, Weak};
 use core::{any::Any, pin::Pin, ptr::NonNull};
 
 use archop::Azy;
@@ -17,10 +17,22 @@ struct Value {
     index: B18,
 }
 
-pub unsafe trait DefaultFeature: Any + Send {
+pub unsafe trait DefaultFeature: Any + Send + Sync {
     fn default_features() -> Feature;
 }
 
+unsafe impl<T: DefaultFeature + ?Sized> DefaultFeature for crate::sched::Arsc<T> {
+    fn default_features() -> Feature {
+        T::default_features()
+    }
+}
+
+unsafe impl<T: DefaultFeature + ?Sized> DefaultFeature for alloc::sync::Arc<T> {
+    fn default_features() -> Feature {
+        T::default_features()
+    }
+}
+
 #[derive(Debug)]
 pub struct HandleMap {
     list: Mutex<node::List>,
@@ -51,7 +63,7 @@ impl HandleMap {
         let value = Value::new()
             .with_gen(0)
             .with_index_checked(index)
-            .map_err(|_| sv_call::Error::ERANGE)?;
+            .map_err(|_| sv_call::ERANGE)?;
         Ok(sv_call::Handle::new(
             u32::from_ne_bytes(value.into_bytes()) ^ self.mix,
         ))
@@ -88,11 +100,20 @@ impl HandleMap {
         self.encode(link)
     }
 
+    #[inline]
+    pub fn insert_raw<T: DefaultFeature>(
+        &self,
+        obj: Arc<T>,
+        event: Option<Weak<dyn Event>>,
+    ) -> Result<sv_call::Handle> {
+        self.insert_ref(Ref::from_raw(obj, event)?)
+    }
+
     /// # Safety
     ///
     /// The caller must ensure that `T` is [`Send`] if `send` and [`Sync`] if
     /// `sync`.
-    pub unsafe fn insert_unchecked<T: 'static>(
+    pub unsafe fn insert_unchecked<T: Send + Sync + 'static>(
         &self,
         data: T,
         feat: Feature,
@@ -118,14 +139,14 @@ impl HandleMap {
         PREEMPT.scope(|| self.list.lock().remove(link))
     }
 
-    pub fn remove<T: Send + Any>(&self, handle: sv_call::Handle) -> Result<Ref<T>> {
+    pub fn remove<T: Send + Sync + Any>(&self, handle: sv_call::Handle) -> Result<Ref<T>> {
         self.decode(handle).and_then(|value| {
             // SAFETY: Dereference within the available range.
             let ptr = unsafe { value.as_ref() };
             if ptr.is::<T>() {
                 self.remove_ref(handle).map(|obj| obj.downcast().unwrap())
             } else {
-                Err(sv_call::Error::ETYPE)
+                Err(sv_call::ETYPE)
             }
         })
     }
@@ -137,10 +158,8 @@ impl HandleMap {
         PREEMPT.scope(|| {
             { self.list.lock() }.split(handles.iter().map(|&handle| self.decode(handle)), |value| {
                 match value.downcast_ref::<Channel>() {
-                    Ok(chan) if chan.peer_eq(src) => Err(sv_call::Error::EPERM),
-                    Err(_) if !value.features().contains(Feature::SEND) => {
-                        Err(sv_call::Error::EPERM)
-                    }
+                    Ok(chan) if chan.peer_eq(src) => Err(sv_call::EPERM),
+                    Err(_) if !value.features().contains(Feature::SEND) => Err(sv_call::EPERM),
                     _ => Ok(()),
                 }
             })

+ 40 - 15
h2o/kernel/src/sched/task/hdl/node.rs

@@ -1,4 +1,4 @@
-use alloc::sync::Weak;
+use alloc::sync::{Arc, Weak};
 use core::{
     any::Any,
     fmt,
@@ -15,7 +15,7 @@ use sv_call::{Feature, Result};
 use super::DefaultFeature;
 use crate::{
     mem::Arena,
-    sched::{Arsc, Event, PREEMPT},
+    sched::{Event, PREEMPT},
 };
 
 pub const MAX_HANDLE_COUNT: usize = 1 << 16;
@@ -23,13 +23,13 @@ pub const MAX_HANDLE_COUNT: usize = 1 << 16;
 pub(super) static HR_ARENA: Azy<Arena<Ref>> = Azy::new(|| Arena::new(MAX_HANDLE_COUNT));
 
 #[derive(Debug)]
-pub struct Ref<T: ?Sized = dyn Any> {
+pub struct Ref<T: ?Sized = dyn Any + Send + Sync> {
     _marker: PhantomPinned,
     next: Option<Ptr>,
     prev: Option<Ptr>,
     event: Weak<dyn Event>,
     feat: Feature,
-    obj: Arsc<T>,
+    obj: Arc<T>,
 }
 pub type Ptr = NonNull<Ref>;
 
@@ -50,9 +50,21 @@ impl<T: ?Sized> Ref<T> {
     where
         T: Sized,
     {
+        Self::from_raw_unchecked(Arc::try_new(data)?, feat, event)
+    }
+
+    /// # Safety
+    ///
+    /// The caller must ensure that `T` is [`Send`] if `send` and [`Sync`] if
+    /// `sync`.
+    pub unsafe fn from_raw_unchecked(
+        obj: Arc<T>,
+        feat: Feature,
+        event: Option<Weak<dyn Event>>,
+    ) -> sv_call::Result<Self> {
         let event = event.unwrap_or(Weak::<crate::sched::BasicEvent>::new() as _);
         if event.strong_count() == 0 && feat.contains(Feature::WAIT) {
-            return Err(sv_call::Error::EPERM);
+            return Err(sv_call::EPERM);
         }
         Ok(Ref {
             _marker: PhantomPinned,
@@ -60,7 +72,7 @@ impl<T: ?Sized> Ref<T> {
             prev: None,
             event,
             feat,
-            obj: Arsc::try_new(data)?,
+            obj,
         })
     }
 
@@ -72,11 +84,24 @@ impl<T: ?Sized> Ref<T> {
         unsafe { Self::try_new_unchecked(data, T::default_features(), event) }
     }
 
+    #[inline]
+    pub fn from_raw(obj: Arc<T>, event: Option<Weak<dyn Event>>) -> sv_call::Result<Self>
+    where
+        T: DefaultFeature,
+    {
+        unsafe { Self::from_raw_unchecked(obj, T::default_features(), event) }
+    }
+
+    #[inline]
+    pub fn into_raw(this: Self) -> Arc<T> {
+        this.obj
+    }
+
     /// # Safety
     ///
     /// The caller must ensure that `self` is owned by the current task if its
     /// not [`Send`].
-    pub unsafe fn deref_unchecked(&self) -> &T {
+    pub unsafe fn deref_unchecked(&self) -> &Arc<T> {
         &self.obj
     }
 
@@ -95,13 +120,13 @@ impl<T: ?Sized> Ref<T> {
             self.feat = feat;
             Ok(())
         } else {
-            Err(sv_call::Error::EPERM)
+            Err(sv_call::EPERM)
         }
     }
 }
 
-impl<T: ?Sized + Send> Deref for Ref<T> {
-    type Target = T;
+impl<T: ?Sized + Send + Sync> Deref for Ref<T> {
+    type Target = Arc<T>;
 
     #[inline]
     fn deref(&self) -> &Self::Target {
@@ -120,11 +145,11 @@ impl Ref {
         if self.is::<T>() {
             Ok(unsafe { &*(self as *const Ref as *const Ref<T>) })
         } else {
-            Err(sv_call::Error::ETYPE)
+            Err(sv_call::ETYPE)
         }
     }
 
-    pub fn downcast<T: Any>(self) -> core::result::Result<Ref<T>, Self> {
+    pub fn downcast<T: Any + Send + Sync>(self) -> core::result::Result<Ref<T>, Self> {
         match self.obj.downcast() {
             Ok(obj) => Ok(Ref {
                 _marker: PhantomPinned,
@@ -158,7 +183,7 @@ impl Ref {
             prev: None,
             event: Weak::clone(&self.event),
             feat: self.feat,
-            obj: Arsc::clone(&self.obj),
+            obj: Arc::clone(&self.obj),
         }
     }
 
@@ -168,7 +193,7 @@ impl Ref {
             // SAFETY: The underlying object is `send` and `sync`.
             Ok(unsafe { self.clone_unchecked() })
         } else {
-            Err(sv_call::Error::EPERM)
+            Err(sv_call::EPERM)
         }
     }
 }
@@ -288,7 +313,7 @@ impl List {
                 }
                 // SAFETY: The pointer is allocated from the arena.
                 Some(cur) => unsafe { cur.as_ref().next },
-                None => break Err(sv_call::Error::ENOENT),
+                None => break Err(sv_call::ENOENT),
             }
         }
     }

+ 3 - 5
h2o/kernel/src/sched/task/idle.rs

@@ -28,16 +28,14 @@ pub(super) static IDLE: Lazy<Tid> = Lazy::new(|| {
         .build()
         .unwrap();
 
-    let space = super::Space::new_current().expect("Failed to create space");
+    let space = super::Space::new_current();
     let stack = space::init_stack(space.mem(), DEFAULT_STACK_SIZE)
         .expect("Failed to initialize stack for IDLE");
 
     let entry = ctx::Entry {
         entry: LAddr::new(idle as *mut u8),
         stack,
-        args: [cpu as u64, unsafe {
-            archop::msr::read(archop::msr::FS_BASE)
-        }],
+        args: [cpu as u64, unsafe { archop::reg::read_fs() }],
     };
     let kstack = ctx::Kstack::new(Some(entry), Type::Kernel);
 
@@ -50,7 +48,7 @@ pub(super) static IDLE: Lazy<Tid> = Lazy::new(|| {
 });
 
 fn idle(cpu: usize, fs_base: u64) -> ! {
-    unsafe { archop::msr::write(archop::msr::FS_BASE, fs_base) };
+    unsafe { archop::reg::write_fs(fs_base) };
 
     log::debug!("IDLE #{}", cpu);
 

+ 5 - 18
h2o/kernel/src/sched/task/sm.rs

@@ -16,7 +16,7 @@ use crate::{
 };
 
 #[derive(Debug, Builder)]
-#[builder(no_std)]
+#[builder(no_std, pattern = "owned")]
 pub struct TaskInfo {
     from: Option<Tid>,
     #[builder(setter(skip))]
@@ -77,21 +77,13 @@ impl TaskInfo {
     pub fn excep_chan(&self) -> Arsc<Mutex<Option<Channel>>> {
         Arsc::clone(&self.excep_chan)
     }
-
-    #[inline]
-    pub fn with_excep_chan<F, R>(&self, func: F) -> R
-    where
-        F: FnOnce(&mut Option<Channel>) -> R,
-    {
-        PREEMPT.scope(|| func(&mut self.excep_chan.lock()))
-    }
 }
 
 #[derive(Debug)]
 pub struct Context {
     pub(in crate::sched) tid: Tid,
 
-    pub(in crate::sched) space: Arsc<Space>,
+    pub(in crate::sched) space: Arc<Space>,
     pub(in crate::sched) kstack: ctx::Kstack,
     pub(in crate::sched) ext_frame: ctx::ExtFrame,
     pub(in crate::sched) io_bitmap: Option<BitVec>,
@@ -107,7 +99,7 @@ impl Context {
     }
 
     #[inline]
-    pub fn space(&self) -> &Arsc<Space> {
+    pub fn space(&self) -> &Arc<Space> {
         &self.space
     }
 
@@ -210,12 +202,7 @@ impl IntoReady for Init {
 }
 
 impl Init {
-    pub fn new(
-        tid: Tid,
-        space: Arsc<Space>,
-        kstack: ctx::Kstack,
-        ext_frame: ctx::ExtFrame,
-    ) -> Self {
+    pub fn new(tid: Tid, space: Arc<Space>, kstack: ctx::Kstack, ext_frame: ctx::ExtFrame) -> Self {
         Init {
             ctx: Box::new(Context {
                 tid,
@@ -325,7 +312,7 @@ impl Blocked {
     }
 
     #[inline]
-    pub fn space(&self) -> &Arsc<Space> {
+    pub fn space(&self) -> &Arc<Space> {
         &self.ctx.space
     }
 

+ 10 - 15
h2o/kernel/src/sched/task/space.rs

@@ -8,10 +8,7 @@ use super::{
 };
 use crate::{
     mem,
-    sched::{
-        wait::{Futex, FutexKey, FutexRef, Futexes},
-        Arsc,
-    },
+    sched::wait::{Futex, FutexKey, FutexRef, Futexes},
 };
 
 #[derive(Debug)]
@@ -25,23 +22,21 @@ unsafe impl Send for Space {}
 unsafe impl Sync for Space {}
 
 impl Space {
-    pub fn new(ty: super::Type) -> sv_call::Result<Arsc<Self>> {
+    pub fn new(ty: super::Type) -> sv_call::Result<Arc<Self>> {
         let mem = mem::space::Space::try_new(ty)?;
-        Arsc::try_new(Space {
+        Ok(Arc::try_new(Space {
             mem,
             handles: HandleMap::new(),
-            futexes: Futexes::new(Default::default()),
-        })
-        .map_err(sv_call::Error::from)
+            futexes: Default::default(),
+        })?)
     }
 
-    pub fn new_current() -> sv_call::Result<Arsc<Self>> {
-        Arsc::try_new(Space {
+    pub fn new_current() -> Arc<Self> {
+        Arc::new(Space {
             mem: mem::space::with_current(Arc::clone),
             handles: HandleMap::new(),
-            futexes: Futexes::new(Default::default()),
+            futexes: Default::default(),
         })
-        .map_err(sv_call::Error::from)
     }
 
     #[inline]
@@ -74,14 +69,14 @@ impl Space {
                 if obj.features().contains(need_feature) {
                     Ok(Tid::clone(&obj))
                 } else {
-                    Err(sv_call::Error::EPERM)
+                    Err(sv_call::EPERM)
                 }
             })
         })
     }
 }
 
-unsafe impl DefaultFeature for Arsc<Space> {
+unsafe impl DefaultFeature for Space {
     fn default_features() -> Feature {
         Feature::READ | Feature::WRITE
     }

+ 32 - 30
h2o/kernel/src/sched/task/syscall.rs

@@ -5,7 +5,10 @@ use paging::LAddr;
 use spin::Mutex;
 use sv_call::*;
 
-use super::{hdl::DefaultFeature, Blocked, RunningState, Signal, Space, Tid};
+use super::{
+    hdl::{DefaultFeature, Ref},
+    Blocked, RunningState, Signal, Space, Tid,
+};
 use crate::{
     cpu::time::Instant,
     sched::{imp::MIN_TIME_GRAN, Arsc, PREEMPT, SCHED},
@@ -48,7 +51,7 @@ unsafe impl DefaultFeature for SuspendToken {
 fn task_exit(retval: usize) -> Result {
     SCHED.exit_current(retval);
     #[allow(unreachable_code)]
-    Err(Error::EKILLED)
+    Err(EKILLED)
 }
 
 #[syscall]
@@ -75,7 +78,7 @@ fn get_name(ptr: UserPtr<In, u8>, len: usize) -> Result<Option<String>> {
             ptr.read_slice(buf.as_mut_ptr(), len)?;
             buf.set_len(len);
         }
-        Ok(Some(String::from_utf8(buf).map_err(|_| Error::EINVAL)?))
+        Ok(Some(String::from_utf8(buf).map_err(|_| EINVAL)?))
     } else {
         Ok(None)
     }
@@ -84,7 +87,6 @@ fn get_name(ptr: UserPtr<In, u8>, len: usize) -> Result<Option<String>> {
 #[syscall]
 fn task_exec(ci: UserPtr<In, task::ExecInfo>) -> Result<Handle> {
     let ci = unsafe { ci.read()? };
-    ci.init_chan.check_null()?;
 
     let name = get_name(UserPtr::<In, _>::new(ci.name as *mut u8), ci.name_len)?;
 
@@ -96,10 +98,10 @@ fn task_exec(ci: UserPtr<In, task::ExecInfo>) -> Result<Handle> {
             Some(handles.remove::<crate::sched::ipc::Channel>(ci.init_chan)?)
         };
         if ci.space == Handle::NULL {
-            Ok((init_chan, Arsc::clone(cur.space())))
+            Ok((init_chan, Arc::clone(cur.space())))
         } else {
-            let space = handles.remove::<Arsc<Space>>(ci.space)?;
-            Ok((init_chan, Arsc::clone(&space)))
+            let space = handles.remove::<Space>(ci.space)?;
+            Ok((init_chan, Ref::into_raw(space)))
         }
     })?;
 
@@ -133,18 +135,18 @@ fn task_new(
     let name = get_name(name, name_len)?;
 
     let new_space = if space == Handle::NULL {
-        SCHED.with_current(|cur| Ok(Arsc::clone(cur.space())))?
+        SCHED.with_current(|cur| Ok(Arc::clone(cur.space())))?
     } else {
         SCHED.with_current(|cur| {
             cur.space()
                 .handles()
-                .remove::<Arsc<Space>>(space)
-                .map(|space| Arsc::clone(&space))
+                .remove::<Space>(space)
+                .map(Ref::into_raw)
         })?
     };
     let mut sus_slot = Arsc::try_new_uninit()?;
 
-    let (task, hdl) = super::create(name, Arsc::clone(&new_space))?;
+    let (task, hdl) = super::create(name, Arc::clone(&new_space))?;
 
     let task = super::Ready::block(
         super::IntoReady::into_ready(task, unsafe { crate::cpu::id() }, MIN_TIME_GRAN),
@@ -173,8 +175,8 @@ fn task_join(hdl: Handle, retval: UserPtr<Out, usize>) -> Result {
 
     SCHED.with_current(|cur| {
         let handles = cur.space().handles();
-        let val = { handles.get::<Tid>(hdl) }
-            .and_then(|tid| tid.ret_cell().lock().ok_or(Error::ENOENT))?;
+        let val =
+            { handles.get::<Tid>(hdl) }.and_then(|tid| tid.ret_cell().lock().ok_or(ENOENT))?;
 
         drop(handles.remove::<Tid>(hdl));
         unsafe { retval.write(val) }
@@ -185,7 +187,7 @@ fn task_join(hdl: Handle, retval: UserPtr<Out, usize>) -> Result {
 fn task_ctl(hdl: Handle, op: u32, data: UserPtr<InOut, Handle>) -> Result {
     hdl.check_null()?;
 
-    let cur = SCHED.with_current(|cur| Ok(Arsc::clone(cur.space())))?;
+    let cur = SCHED.with_current(|cur| Ok(Arc::clone(cur.space())))?;
 
     match op {
         task::TASK_CTL_KILL => {
@@ -206,7 +208,7 @@ fn task_ctl(hdl: Handle, op: u32, data: UserPtr<InOut, Handle>) -> Result {
 
             st.tid.with_signal(|sig| {
                 if sig == &Some(Signal::Kill) {
-                    Err(Error::EPERM)
+                    Err(EPERM)
                 } else {
                     *sig = Some(st.signal());
                     Ok(())
@@ -218,7 +220,7 @@ fn task_ctl(hdl: Handle, op: u32, data: UserPtr<InOut, Handle>) -> Result {
 
             Ok(())
         }
-        _ => Err(Error::EINVAL),
+        _ => Err(EINVAL),
     }
 }
 
@@ -230,12 +232,12 @@ fn read_regs(
     len: usize,
 ) -> Result<()> {
     if !feat.contains(Feature::READ) {
-        return Err(Error::EPERM);
+        return Err(EPERM);
     }
     match addr {
         task::TASK_DBGADDR_GPR => {
             if len < task::ctx::GPR_SIZE {
-                Err(Error::EBUFFER)
+                Err(EBUFFER)
             } else {
                 unsafe { data.cast().write(task.kstack().task_frame().debug_get()) }
             }
@@ -243,12 +245,12 @@ fn read_regs(
         task::TASK_DBGADDR_FPU => {
             let size = archop::fpu::frame_size();
             if len < size {
-                Err(Error::EBUFFER)
+                Err(EBUFFER)
             } else {
                 unsafe { data.write_slice(&task.ext_frame()[..size]) }
             }
         }
-        _ => Err(Error::EINVAL),
+        _ => Err(EINVAL),
     }
 }
 
@@ -260,12 +262,12 @@ fn write_regs(
     len: usize,
 ) -> Result<()> {
     if !feat.contains(Feature::WRITE) {
-        return Err(Error::EPERM);
+        return Err(EPERM);
     }
     match addr {
         task::TASK_DBGADDR_GPR => {
             if len < sv_call::task::ctx::GPR_SIZE {
-                Err(Error::EBUFFER)
+                Err(EBUFFER)
             } else {
                 let gpr = unsafe { data.cast().read()? };
                 unsafe { task.kstack_mut().task_frame_mut().debug_set(&gpr) }
@@ -274,19 +276,19 @@ fn write_regs(
         task::TASK_DBGADDR_FPU => {
             let size = archop::fpu::frame_size();
             if len < size {
-                Err(Error::EBUFFER)
+                Err(EBUFFER)
             } else {
                 let ptr = task.ext_frame_mut().as_mut_ptr();
                 unsafe { data.read_slice(ptr, size) }
             }
         }
-        _ => Err(Error::EINVAL),
+        _ => Err(EINVAL),
     }
 }
 
 fn create_excep_chan(task: &Blocked, feat: Feature) -> Result<crate::sched::ipc::Channel> {
     if !feat.contains(Feature::READ) {
-        return Err(Error::EPERM);
+        return Err(EPERM);
     }
     let slot = task.tid().excep_chan();
     let chan = match slot.lock() {
@@ -295,7 +297,7 @@ fn create_excep_chan(task: &Blocked, feat: Feature) -> Result<crate::sched::ipc:
             *g = Some(krl);
             usr
         }
-        _ => return Err(Error::EEXIST),
+        _ => return Err(EEXIST),
     };
     Ok(chan)
 }
@@ -325,7 +327,7 @@ fn task_debug(hdl: Handle, op: u32, addr: usize, data: UserPtr<InOut, u8>, len:
         task::TASK_DBG_READ_MEM => unsafe {
             crate::mem::space::with(task.space().mem(), |_| {
                 if !feat.contains(Feature::READ) {
-                    return Err(Error::EPERM);
+                    return Err(EPERM);
                 }
                 let slice = slice::from_raw_parts(addr as *mut u8, len);
                 data.out().write_slice(slice)
@@ -334,14 +336,14 @@ fn task_debug(hdl: Handle, op: u32, addr: usize, data: UserPtr<InOut, u8>, len:
         task::TASK_DBG_WRITE_MEM => unsafe {
             crate::mem::space::with(task.space().mem(), |_| {
                 if !feat.contains(Feature::WRITE) {
-                    return Err(Error::EPERM);
+                    return Err(EPERM);
                 }
                 data.r#in().read_slice(addr as *mut u8, len)
             })
         },
         task::TASK_DBG_EXCEP_HDL => {
             if len < core::mem::size_of::<Handle>() {
-                Err(Error::EBUFFER)
+                Err(EBUFFER)
             } else {
                 let hdl = SCHED.with_current(|cur| {
                     create_excep_chan(&task, feat).and_then(|chan| {
@@ -353,7 +355,7 @@ fn task_debug(hdl: Handle, op: u32, addr: usize, data: UserPtr<InOut, u8>, len:
                 unsafe { data.out().cast::<Handle>().write(hdl) }
             }
         }
-        _ => Err(Error::EINVAL),
+        _ => Err(EINVAL),
     };
 
     PREEMPT.scope(|| *slot.lock() = Some(task));

+ 2 - 2
h2o/kernel/src/sched/task/tid.rs

@@ -10,7 +10,7 @@ use crate::sched::{Arsc, PREEMPT};
 pub const NR_TASKS: usize = 65536;
 
 type BH = BuildHasherDefault<FnvHasher>;
-static TI_MAP: Azy<CHashMap<u32, Arsc<TaskInfo>, BH>> = Azy::new(|| CHashMap::new(BH::default()));
+static TI_MAP: Azy<CHashMap<u32, Arsc<TaskInfo>, BH>> = Azy::new(Default::default);
 static TID_ALLOC: Azy<spin::Mutex<IdAllocator>> =
     Azy::new(|| spin::Mutex::new(IdAllocator::new(0..=(NR_TASKS as u64 - 1))));
 
@@ -69,7 +69,7 @@ pub fn allocate(ti: TaskInfo) -> sv_call::Result<Tid> {
             debug_assert!(old.is_none());
             Ok(Tid { raw, ti })
         }
-        None => Err(sv_call::Error::ENOSPC),
+        None => Err(sv_call::ENOSPC),
     }
 }
 

+ 2 - 5
h2o/kernel/src/sched/wait.rs

@@ -1,6 +1,5 @@
 mod futex;
 
-use alloc::boxed::Box;
 use core::time::Duration;
 
 use crossbeam_queue::SegQueue;
@@ -37,7 +36,7 @@ impl WaitObject {
             if !timer.is_fired() {
                 Ok(())
             } else {
-                Err(sv_call::Error::ETIME)
+                Err(sv_call::ETIME)
             }
         })
     }
@@ -48,9 +47,7 @@ impl WaitObject {
         let mut cnt = 0;
         while cnt < num {
             match self.wait_queue.pop() {
-                Some(timer) if !timer.cancel() => {
-                    let blocked = unsafe { Box::from_raw(timer.callback_arg().as_ptr()) };
-                    SCHED.unblock(Box::into_inner(blocked), preempt);
+                Some(timer) if timer.cancel(preempt) => {
                     cnt += 1;
                 }
                 Some(_) => {}

+ 30 - 17
h2o/kernel/src/sched/wait/futex.rs

@@ -1,4 +1,4 @@
-use core::{hash::BuildHasherDefault, intrinsics, time::Duration};
+use core::{fmt, hash::BuildHasherDefault, intrinsics, time::Duration};
 
 use collection_ex::{CHashMap, FnvHasher};
 use sv_call::*;
@@ -15,6 +15,15 @@ pub struct Futex {
     wo: WaitObject,
 }
 
+impl fmt::Debug for Futex {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Futex")
+            .field("key", &self.key)
+            .field("wo_len", &self.wo.wait_queue.len())
+            .finish()
+    }
+}
+
 impl Futex {
     #[inline]
     pub fn new(key: FutexKey) -> Self {
@@ -28,18 +37,21 @@ impl Futex {
         self.wo.wait_queue.is_empty()
     }
 
-    fn wait<T>(&self, guard: T, val: u64, timeout: Duration) -> Result {
-        let ptr = self.key.as_ptr();
-        if unsafe { intrinsics::atomic_load(ptr) } == val {
-            self.wo.wait(guard, timeout, "Futex::wait")
+    fn wait<T>(this: FutexRef<'_>, guard: T, val: u64, timeout: Duration) -> Result {
+        let ptr = this.key.as_ptr();
+        if unsafe { intrinsics::atomic_load_seqcst(ptr) } == val {
+            unsafe {
+                let wo = &*(&this.wo as *const WaitObject);
+                wo.wait((this, guard), timeout, "Futex::wait")
+            }
         } else {
-            Err(Error::EINVAL)
+            Err(EINVAL)
         }
     }
 
     #[inline]
     fn wake(&self, num: usize) -> Result<usize> {
-        Ok(self.wo.notify(num, true))
+        Ok(self.wo.notify(num, false))
     }
 
     fn requeue(&self, other: &Self, num: usize) -> Result<usize> {
@@ -60,6 +72,7 @@ impl Futex {
 mod syscall {
     use sv_call::*;
 
+    use super::Futex;
     use crate::{
         cpu::time,
         sched::{PREEMPT, SCHED},
@@ -72,15 +85,12 @@ mod syscall {
 
         let pree = PREEMPT.lock();
         let futex = unsafe { (*SCHED.current()).as_ref().unwrap().space.futex(ptr) };
-        let ret = futex.wait(pree, expected, time::from_us(timeout_us));
-
-        if futex.wo.wait_queue.is_empty() {
-            drop(futex);
-            SCHED.with_current(|cur| {
-                unsafe { cur.space.try_drop_futex(ptr) };
-                Ok(())
-            })?;
-        }
+        let ret = Futex::wait(futex, pree, expected, time::from_us(timeout_us));
+
+        SCHED.with_current(|cur| {
+            unsafe { cur.space.try_drop_futex(ptr) };
+            Ok(())
+        })?;
 
         ret
     }
@@ -88,7 +98,10 @@ mod syscall {
     #[syscall]
     fn futex_wake(ptr: UserPtr<In, u64>, num: usize) -> Result<usize> {
         let _ = unsafe { ptr.read() }?;
-        SCHED.with_current(|cur| unsafe { cur.space.futex(ptr) }.wake(num))
+        SCHED.with_current(|cur| {
+            let futex = unsafe { cur.space.futex(ptr) };
+            futex.wake(num)
+        })
     }
 
     #[syscall]

+ 27 - 9
h2o/kernel/src/syscall.rs

@@ -2,15 +2,32 @@
 //!
 //! ## Adding a syscall (`fn cast_init(k: *mut K) -> *const L`)
 //!
-//! Just create a private submodule `syscall` in a file and write the processing
-//! code:
+//! 1. Add a new JSON file in the target directory of the kernel source root
+//! with the following content as the prototype, or append it to an existing
+//! file:
+//!
+//! ```json
+//! {
+//!     "name": "sv_cast_init",
+//!     "returns": "*const L",
+//!     "args": [
+//!         {
+//!             "name": "k",
+//!             "ty": "*mut K"
+//!         }
+//!     ]
+//! }
+//! ```
+//!
+//! 2. Create a private submodule `syscall` in a source file and write the
+//! processing code:
 //!
 //! ```rust,no_run
 //! mod syscall {
 //!       use sv_call::*;
 //!       #[syscall]
-//!       fn cast_init(k: *mut K) -> *const L {
-//!             init(k);
+//!       fn cast_init(k: *mut K) -> Result<*const L> {
+//!             // init(k);
 //!             Ok(k.cast())
 //!       }
 //! }
@@ -20,7 +37,7 @@
 
 mod user_ptr;
 
-use sv_call::*;
+use sv_call::{call::Syscall, *};
 
 pub use self::user_ptr::*;
 
@@ -28,10 +45,11 @@ type SyscallWrapper = unsafe extern "C" fn(usize, usize, usize, usize, usize) ->
 static SYSCALL_TABLE: &[SyscallWrapper] =
     &include!(concat!(env!("CARGO_MANIFEST_DIR"), "/target/wrapper.rs"));
 
-pub fn handler(num: usize, args: &[usize; 5]) -> usize {
-    match SYSCALL_TABLE.get(num).copied() {
+pub fn handle(syscall: Syscall) -> usize {
+    let args = syscall.args;
+    match SYSCALL_TABLE.get(syscall.num).copied() {
         Some(handler) => unsafe { handler(args[0], args[1], args[2], args[3], args[4]) },
-        _ => Error::EINVAL.into_retval(),
+        _ => ESPRT.into_retval(),
     }
 }
 
@@ -55,6 +73,6 @@ mod syscall {
 
     #[syscall]
     fn int_get(hdl: Handle) -> Result<u64> {
-        SCHED.with_current(|cur| cur.space().handles().get::<u64>(hdl).map(|obj| **obj))
+        SCHED.with_current(|cur| cur.space().handles().get::<u64>(hdl).map(|obj| ***obj))
     }
 }

+ 4 - 4
h2o/kernel/src/syscall/user_ptr.rs

@@ -52,7 +52,7 @@ impl<T: Type, D> UserPtr<T, D> {
     pub fn check_slice(&self, len: usize) -> Result<()> {
         check_ptr(
             self.data.cast(),
-            mem::size_of::<T>() * len,
+            mem::size_of::<D>() * len,
             mem::align_of::<D>(),
         )
     }
@@ -204,9 +204,9 @@ fn check_ptr(ptr: *mut u8, size: usize, align: usize) -> Result<()> {
     let is_aligned = (ptr as usize) & (align - 1) == 0;
     // TODO: Decide whether to check the validity of pointers of empty slices.
     if !is_in_range && size > 0 {
-        Err(sv_call::Error::EPERM)
+        Err(sv_call::EPERM)
     } else if !is_aligned {
-        Err(sv_call::Error::EALIGN)
+        Err(sv_call::EALIGN)
     } else {
         Ok(())
     }
@@ -240,7 +240,7 @@ impl CheckedCopyRet {
                 self.addr_p1 - 1,
                 self.errc
             );
-            Err(sv_call::Error::EPERM)
+            Err(sv_call::EPERM)
         } else {
             Ok(())
         }

+ 7 - 3
h2o/kernel/syscall/channel.json

@@ -79,7 +79,7 @@
     },
     {
         "name": "sv_chan_acrecv",
-        "returns": "Handle",
+        "returns": "usize",
         "args": [
             {
                 "name": "hdl",
@@ -90,8 +90,12 @@
                 "ty": "usize"
             },
             {
-                "name": "wake_all",
-                "ty": "bool"
+                "name": "disp",
+                "ty": "Handle"
+            },
+            {
+                "name": "syscall",
+                "ty": "*const Syscall"
             }
         ]
     }

+ 56 - 0
h2o/kernel/syscall/dispatcher.json

@@ -0,0 +1,56 @@
+[
+    {
+        "name": "sv_disp_new",
+        "returns": "Handle",
+        "args": [
+            {
+                "name": "capacity",
+                "ty": "usize"
+            }
+        ]
+    },
+    {
+        "name": "sv_disp_push",
+        "returns": "usize",
+        "args": [
+            {
+                "name": "disp",
+                "ty": "Handle"
+            },
+            {
+                "name": "hdl",
+                "ty": "Handle"
+            },
+            {
+                "name": "level_triggered",
+                "ty": "bool"
+            },
+            {
+                "name": "signal",
+                "ty": "usize"
+            },
+            {
+                "name": "syscall",
+                "ty": "*const Syscall"
+            }
+        ]
+    },
+    {
+        "name": "sv_disp_pop",
+        "returns": "usize",
+        "args": [
+            {
+                "name": "disp",
+                "ty": "Handle"
+            },
+            {
+                "name": "canceled",
+                "ty": "*mut bool"
+            },
+            {
+                "name": "result",
+                "ty": "*mut usize"
+            }
+        ]
+    }
+]

+ 0 - 32
h2o/kernel/syscall/object.json

@@ -54,37 +54,5 @@
                 "ty": "usize"
             }
         ]
-    },
-    {
-        "name": "sv_obj_await",
-        "returns": "Handle",
-        "args": [
-            {
-                "name": "hdl",
-                "ty": "Handle"
-            },
-            {
-                "name": "wake_all",
-                "ty": "bool"
-            },
-            {
-                "name": "signal",
-                "ty": "usize"
-            }
-        ]
-    },
-    {
-        "name": "sv_obj_awend",
-        "returns": "usize",
-        "args": [
-            {
-                "name": "waiter",
-                "ty": "Handle"
-            },
-            {
-                "name": "timeout_us",
-                "ty": "u64"
-            }
-        ]
     }
 ]

+ 7 - 0
h2o/kernel/syscall/task.json

@@ -118,5 +118,12 @@
                 "ty": "usize"
             }
         ]
+    },
+    {
+        "name": "sv_cpu_num",
+        "returns": "usize",
+        "vdso_specific": true,
+        "vdso_only": true,
+        "args": []
     }
 ]

+ 27 - 0
h2o/kernel/syscall/time.json

@@ -2,11 +2,38 @@
     {
         "name": "sv_time_get",
         "returns": "()",
+        "vdso_specific": true,
         "args": [
             {
                 "name": "ptr",
                 "ty": "*mut ()"
             }
         ]
+    },
+    {
+        "name": "sv_random",
+        "returns": "u64",
+        "vdso_specific": true,
+        "vdso_only": true,
+        "args": []
+    },
+    {
+        "name": "sv_timer_new",
+        "returns": "Handle",
+        "args": []
+    },
+    {
+        "name": "sv_timer_set",
+        "returns": "()",
+        "args": [
+            {
+                "name": "handle",
+                "ty": "Handle"
+            },
+            {
+                "name": "duration_us",
+                "ty": "u64"
+            }
+        ]
     }
 ]

+ 1 - 0
h2o/libs/archop/src/x86_64/lock.rs

@@ -43,6 +43,7 @@ impl PreemptState {
     }
 
     #[inline]
+    #[track_caller]
     pub fn scope<F, R>(&self, func: F) -> R
     where
         F: FnOnce() -> R,

+ 9 - 0
h2o/libs/archop/src/x86_64/msr.rs

@@ -279,3 +279,12 @@ pub fn rdtsc() -> u64 {
         ((edx as u64) << 32) | (eax as u64)
     }
 }
+
+#[inline]
+pub fn rdtscp() -> (u64, u32) {
+    unsafe {
+        let (eax, edx, ecx): (u32, u32, u32);
+        asm!("rdtscp", out("eax")eax, out("edx")edx, out("ecx")ecx, options(nostack));
+        (((edx as u64) << 32) | (eax as u64), ecx)
+    }
+}

+ 5 - 0
h2o/libs/archop/src/x86_64/rand.rs

@@ -33,3 +33,8 @@ pub fn get() -> u64 {
     let ret = crate::msr::rdtsc();
     ret.wrapping_mul(0xc345c6b72fd16123)
 }
+
+#[inline]
+pub fn has_builtin() -> bool {
+    *RAND_AVAILABLE
+}

+ 36 - 0
h2o/libs/archop/src/x86_64/reg.rs

@@ -166,3 +166,39 @@ pub mod rflags {
     pub const ID: u64 = 1 << 21;
     pub const USER_ACCESS: u64 = CF | PF | AF | ZF | SF | TF | DF | OF | NT | AC | ID;
 }
+
+/// # Safety
+///
+/// The caller is responsible for the validity of the architecture context.
+#[inline]
+pub unsafe fn read_fs() -> u64 {
+    let mut ret;
+    core::arch::asm!("rdfsbase {}", out(reg) ret, options(nostack));
+    ret
+}
+
+/// # Safety
+///
+/// The caller is responsible for the validity of the architecture context.
+#[inline]
+pub unsafe fn write_fs(value: u64) {
+    core::arch::asm!("wrfsbase {}", in(reg) value, options(nostack));
+}
+
+/// # Safety
+///
+/// The caller is responsible for the validity of the architecture context.
+#[inline]
+pub unsafe fn read_gs() -> u64 {
+    let mut ret;
+    core::arch::asm!("rdgsbase {}", out(reg) ret, options(nostack));
+    ret
+}
+
+/// # Safety
+///
+/// The caller is responsible for the validity of the architecture context.
+#[inline]
+pub unsafe fn write_gs(value: u64) {
+    core::arch::asm!("wrgsbase {}", in(reg) value, options(nostack));
+}

+ 21 - 13
h2o/libs/bitop_ex/src/lib.rs

@@ -1,7 +1,7 @@
 #![no_std]
 #![feature(core_intrinsics)]
 
-use core::{intrinsics as ci, ops::*};
+use core::{intrinsics as ci, num::Wrapping, ops::*};
 
 use num_traits::{Num, NumCast};
 
@@ -16,24 +16,29 @@ pub trait BitOpEx:
     + Shl<Output = Self>
     + Shr<Output = Self>
     + Not<Output = Self>
+where
+    Wrapping<Self>: Add<Output = Wrapping<Self>>
+        + Sub<Output = Wrapping<Self>>
+        + Mul<Output = Wrapping<Self>>
+        + Div<Output = Wrapping<Self>>
+        + Rem<Output = Wrapping<Self>>,
 {
     const BIT_SIZE: usize = core::mem::size_of::<Self>() * 8;
 
     #[inline]
     #[must_use]
     fn round_up_bit(&self, bit: Self) -> Self {
-        let val = Self::one() << bit;
-        ci::wrapping_add(
-            ci::wrapping_sub(*self, Self::one()) | ci::wrapping_sub(val, Self::one()),
-            Self::one(),
-        )
+        let val = Wrapping(Self::one() << bit);
+        let this = Wrapping(*self);
+        let one = Wrapping(Self::one());
+        (Wrapping((this - one).0 | (val - one).0) + one).0
     }
 
     #[inline]
     #[must_use]
     fn round_down_bit(&self, bit: Self) -> Self {
         let val = Self::one() << bit;
-        *self & !ci::wrapping_sub(val, Self::one())
+        *self & !(Wrapping(val) - Wrapping(Self::one())).0
     }
 
     #[inline]
@@ -51,10 +56,7 @@ pub trait BitOpEx:
     #[inline]
     #[must_use]
     fn msb(&self) -> Self {
-        ci::wrapping_sub(
-            Self::from(Self::BIT_SIZE).unwrap(),
-            ci::ctlz(*self) + Self::one(),
-        )
+        (Wrapping(Self::from(Self::BIT_SIZE).unwrap()) - Wrapping(ci::ctlz(*self) + Self::one())).0
     }
 
     #[inline]
@@ -76,7 +78,8 @@ pub trait BitOpEx:
     }
 }
 
-impl<T> BitOpEx for T where
+impl<T> BitOpEx for T
+where
     T: Sized
         + Num
         + NumCast
@@ -86,6 +89,11 @@ impl<T> BitOpEx for T where
         + BitXor<Output = Self>
         + Shl<Output = Self>
         + Shr<Output = Self>
-        + Not<Output = Self>
+        + Not<Output = Self>,
+    Wrapping<Self>: Add<Output = Wrapping<Self>>
+        + Sub<Output = Wrapping<Self>>
+        + Mul<Output = Wrapping<Self>>
+        + Div<Output = Wrapping<Self>>
+        + Rem<Output = Wrapping<Self>>,
 {
 }

+ 3 - 2
h2o/libs/canary/src/lib.rs

@@ -8,7 +8,8 @@ use core::{
     marker::PhantomData,
 };
 
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, Copy, PartialOrd, Ord, Hash, PartialEq, Eq)]
 pub struct Canary<T> {
     id: TypeId,
     _marker: PhantomData<T>,
@@ -51,7 +52,7 @@ impl<T: 'static> Default for Canary<T> {
 impl<T: 'static> core::fmt::Debug for Canary<T> {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         if self.check() {
-            write!(f, "{}", core::any::type_name::<T>())
+            write!(f, "{}", type_name::<T>())
         } else {
             write!(f, "<Invalid type>")
         }

+ 82 - 4
h2o/libs/collection_ex/src/chash_map.rs

@@ -78,12 +78,12 @@ unsafe impl<K: Sync + Send, V: Sync + Send, S> Sync for CHashMap<K, V, S> {}
 impl<K, V, S: Default> Default for CHashMap<K, V, S> {
     #[inline]
     fn default() -> Self {
-        Self::new(S::default())
+        Self::with_hasher(S::default())
     }
 }
 
 impl<K, V, S> CHashMap<K, V, S> {
-    pub fn new(hasher: S) -> Self {
+    pub fn with_hasher(hasher: S) -> Self {
         CHashMap {
             inner: RwLock::new(inner::Buckets::with_capacity(hasher, MIN_CAPACITY)),
             len: AtomicUsize::new(0),
@@ -99,12 +99,24 @@ impl<K, V, S> CHashMap<K, V, S> {
     }
 }
 
+impl<K, V, S: Clone> CHashMap<K, V, S> {
+    pub fn take(&self) -> CHashMap<K, V, S> {
+        let mut buckets = self.inner.write();
+
+        let mut ret = Self::with_hasher(buckets.hasher().clone());
+        *ret.len.get_mut() = self.len.swap(0, SeqCst);
+        mem::swap(ret.inner.get_mut(), &mut *buckets);
+
+        ret
+    }
+}
+
 impl<K, V, S: BuildHasher + Default> CHashMap<K, V, S> {
-    fn grow(&self, new_len: usize)
+    fn grow(&self, old_len: usize)
     where
         K: Hash,
     {
-        let len = new_len * GROW_FACTOR;
+        let len = old_len * GROW_FACTOR;
         let mut buckets = self.inner.write();
         if buckets.len() < len {
             let new = inner::Buckets::with_capacity(S::default(), len);
@@ -268,6 +280,33 @@ impl<K, V, S: BuildHasher + Default> CHashMap<K, V, S> {
     {
         self.remove_entry(key).map(|ret| ret.1)
     }
+
+    pub fn retain_mut<F>(&self, predicate: F)
+    where
+        F: Fn(&K, &mut V) -> bool,
+    {
+        let buckets = self.inner.read();
+        for ent in buckets.as_inner() {
+            let mut ent = ent.write();
+
+            let remain = match *ent {
+                inner::Entry::Data((ref key, ref mut value)) => predicate(key, value),
+                _ => true,
+            };
+            if !remain {
+                *ent = inner::Entry::Removed;
+                self.len.fetch_sub(1, SeqCst);
+            }
+        }
+    }
+
+    #[inline]
+    pub fn retain<F>(&self, predicate: F)
+    where
+        F: Fn(&K, &V) -> bool,
+    {
+        self.retain_mut(|key, value| predicate(key, value))
+    }
 }
 
 impl<K, V, S: BuildHasher + Default> fmt::Debug for CHashMap<K, V, S> {
@@ -275,3 +314,42 @@ impl<K, V, S: BuildHasher + Default> fmt::Debug for CHashMap<K, V, S> {
         f.debug_list().entry(&"..").finish()
     }
 }
+
+impl<K: Clone, V: Clone, S: Clone> Clone for CHashMap<K, V, S> {
+    fn clone(&self) -> Self {
+        Self {
+            inner: RwLock::new(self.inner.read().clone()),
+            len: self.len.load(SeqCst).into(),
+        }
+    }
+}
+
+pub struct IntoIter<K, V> {
+    inner: alloc::vec::IntoIter<RwLock<inner::Entry<(K, V)>>>,
+}
+
+impl<K, V> Iterator for IntoIter<K, V> {
+    type Item = (K, V);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        for ent in self.inner.by_ref() {
+            if let inner::Entry::Data((key, value)) = ent.into_inner() {
+                return Some((key, value));
+            }
+        }
+        None
+    }
+}
+
+impl<K, V, S> IntoIterator for CHashMap<K, V, S> {
+    type Item = (K, V);
+
+    type IntoIter = IntoIter<K, V>;
+
+    #[inline]
+    fn into_iter(self) -> Self::IntoIter {
+        IntoIter {
+            inner: self.inner.into_inner().into_inner().into_iter(),
+        }
+    }
+}

+ 29 - 0
h2o/libs/collection_ex/src/chash_map/inner.rs

@@ -7,6 +7,7 @@ use core::{
 
 use spin::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 
+#[derive(Debug, Clone, Copy)]
 pub enum Entry<T> {
     Empty,
     Data(T),
@@ -84,9 +85,21 @@ impl<K, V, S> Buckets<K, V, S> {
         Buckets { hasher, data }
     }
 
+    pub fn as_inner(&self) -> &[RwLock<Entry<(K, V)>>] {
+        &self.data
+    }
+
     pub fn len(&self) -> usize {
         self.data.len()
     }
+
+    pub fn hasher(&self) -> &S {
+        &self.hasher
+    }
+
+    pub fn into_inner(self) -> Vec<RwLock<Entry<(K, V)>>> {
+        self.data
+    }
 }
 
 impl<K, V, S: BuildHasher> Buckets<K, V, S> {
@@ -175,3 +188,19 @@ impl<K, V, S: BuildHasher> Buckets<K, V, S> {
         }
     }
 }
+
+impl<K: Clone, V: Clone, S: Clone> Clone for Buckets<K, V, S> {
+    fn clone(&self) -> Self {
+        Buckets {
+            // Since we copy plainly without rehashing etc., it is important that we keep the same
+            // hash function.
+            hasher: self.hasher.clone(),
+            // Lock and clone every bucket individually.
+            data: self
+                .data
+                .iter()
+                .map(|x| RwLock::new(x.read().clone()))
+                .collect(),
+        }
+    }
+}

+ 1 - 0
h2o/libs/heap/Cargo.toml

@@ -8,6 +8,7 @@ version = "0.1.0"
 [features]
 default = ["global"]
 global = []
+tcache = ["global"]
 
 [dependencies]
 # Local crates

+ 105 - 36
h2o/libs/heap/src/alloc.rs

@@ -43,6 +43,7 @@ pub struct Allocator {
 }
 
 impl Allocator {
+    #[inline]
     pub const fn new(alloc_pages: crate::AllocPages, dealloc_pages: crate::DeallocPages) -> Self {
         Allocator {
             pool: Mutex::new(pool::Pool::new()),
@@ -50,14 +51,28 @@ impl Allocator {
         }
     }
 
+    #[inline]
     pub const fn new_null() -> Self {
         Self::new(null_alloc_pages, null_dealloc_pages)
     }
 
+    #[inline]
     pub fn stat(&self) -> crate::stat::Stat {
         self.pool.lock().stat()
     }
 
+    #[inline]
+    #[allow(dead_code)]
+    pub fn pool(&self) -> &Mutex<pool::Pool> {
+        &self.pool
+    }
+
+    #[inline]
+    #[allow(dead_code)]
+    pub fn pager(&self) -> &Mutex<Pager> {
+        &self.pager
+    }
+
     /// # Safety
     ///
     /// This function resets the allocation and deallocation provider for the
@@ -94,34 +109,73 @@ unsafe impl GlobalAlloc for Allocator {
 
         // The size is not too big
         if size <= page::MAX_OBJ_SIZE {
-            let mut pool = self.pool.lock();
-
-            // The first allocation (assuming something available)
-            match pool.allocate(layout) {
-                // Whoosh! Returning
-                Ok(x) => *x,
-
-                Err(e) => match e {
-                    // Oops! The pool is full
-                    Error::NeedExt => {
-                        let page = {
-                            let mut pager = self.pager.lock();
-                            // Allocate a new page
-                            pager.alloc_pages(1)
-                        };
-
-                        if let Some(page) = page {
-                            pool.extend(layout, page.cast()).unwrap();
-                            // The second allocation
-                            pool.allocate(layout).map_or(null_mut(), |x| *x)
-                        } else {
-                            // A-o! Out of memory
-                            null_mut()
+            #[cfg(feature = "tcache")]
+            {
+                // The first allocation (assuming something available)
+                match crate::TCACHE.allocate(layout, &self.pool) {
+                    // Whoosh! Returning
+                    Ok(x) => *x,
+
+                    Err(e) => match e {
+                        // Oops! The pool is full
+                        Error::NeedExt => {
+                            let mut pool = self.pool.lock();
+
+                            let page = {
+                                let mut pager = self.pager.lock();
+                                // Allocate a new page
+                                pager.alloc_pages(1)
+                            };
+
+                            if let Some(page) = page {
+                                pool.extend(layout, page.cast()).unwrap();
+                                drop(pool);
+
+                                // The second allocation
+                                crate::TCACHE
+                                    .allocate(layout, &self.pool)
+                                    .map_or(null_mut(), |x| *x)
+                            } else {
+                                // A-o! Out of memory
+                                null_mut()
+                            }
                         }
-                    }
-                    // A-o! There's a bug
-                    _ => null_mut(),
-                },
+                        // A-o! There's a bug
+                        _ => null_mut(),
+                    },
+                }
+            }
+            #[cfg(not(feature = "tcache"))]
+            {
+                let mut pool = self.pool.lock();
+
+                // The first allocation (assuming something available)
+                match pool.allocate(layout) {
+                    // Whoosh! Returning
+                    Ok(x) => *x,
+
+                    Err(e) => match e {
+                        // Oops! The pool is full
+                        Error::NeedExt => {
+                            let page = {
+                                let mut pager = self.pager.lock();
+                                // Allocate a new page
+                                pager.alloc_pages(1)
+                            };
+
+                            if let Some(page) = page {
+                                pool.extend(layout, page.cast()).unwrap();
+                                // The second allocation
+                                pool.allocate(layout).map_or(null_mut(), |x| *x)
+                            } else {
+                                // A-o! Out of memory
+                                null_mut()
+                            }
+                        }
+                        // A-o! There's a bug
+                        _ => null_mut(),
+                    },
+                }
             }
         } else {
             // The size is too big, call the pager directly
@@ -139,13 +193,28 @@ unsafe impl GlobalAlloc for Allocator {
 
         // The size is not too big
         if size <= page::MAX_OBJ_SIZE {
-            let mut pool = self.pool.lock();
-
-            // Deallocate it
-            if let Some(page) = pool.deallocate(LAddr::new(ptr), layout).unwrap_or(None) {
-                // A page is totally empty, drop it
-                let mut pager = self.pager.lock();
-                pager.dealloc_pages(NonNull::slice_from_raw_parts(page, 1));
+            #[cfg(feature = "tcache")]
+            {
+                // Deallocate it
+                if let Some(page) = crate::TCACHE
+                    .deallocate(LAddr::new(ptr), layout, &self.pool)
+                    .unwrap_or(None)
+                {
+                    // A page is totally empty, drop it
+                    let mut pager = self.pager.lock();
+                    pager.dealloc_pages(NonNull::slice_from_raw_parts(page, 1));
+                }
+            }
+            #[cfg(not(feature = "tcache"))]
+            {
+                let mut pool = self.pool.lock();
+
+                // Deallocate it
+                if let Some(page) = pool.deallocate(LAddr::new(ptr), layout).unwrap_or(None) {
+                    // A page is totally empty, drop it
+                    let mut pager = self.pager.lock();
+                    pager.dealloc_pages(NonNull::slice_from_raw_parts(page, 1));
+                }
             }
         } else {
             // The size is too big, call the pager directly
@@ -160,14 +229,14 @@ unsafe impl GlobalAlloc for Allocator {
 unsafe impl AllocTrait for Allocator {
     fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
         let size = layout.size();
-        let ptr = unsafe { (self as &dyn GlobalAlloc).alloc(layout) };
+        let ptr = unsafe { GlobalAlloc::alloc(self, layout) };
         NonNull::new(ptr)
             .map(|ptr| NonNull::slice_from_raw_parts(ptr, size))
             .ok_or(AllocError)
     }
 
     unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
-        (self as &dyn GlobalAlloc).dealloc(ptr.as_ptr(), layout)
+        GlobalAlloc::dealloc(self, ptr.as_ptr(), layout)
     }
 }
 

+ 11 - 4
h2o/libs/heap/src/lib.rs

@@ -45,10 +45,10 @@
 #![feature(allocator_api)]
 #![feature(lang_items)]
 #![feature(nonnull_slice_from_raw_parts)]
-#![feature(result_into_ok_or_err)]
 #![feature(result_option_inspect)]
 #![feature(slice_ptr_get)]
 #![feature(slice_ptr_len)]
+#![feature(thread_local)]
 
 mod alloc;
 mod page;
@@ -56,21 +56,28 @@ mod stat;
 
 mod pool;
 mod slab;
+mod tcache;
 
+#[cfg(not(feature = "tcache"))]
+pub use self::alloc::Allocator;
 pub use self::{
-    alloc::Allocator,
     page::{AllocPages, DeallocPages, Page, MAX_OBJ_SIZE, OBJ_SIZES},
     pool::unwrap_layout,
     stat::Stat,
+    tcache::ThreadCache,
 };
 
+#[cfg(feature = "tcache")]
+#[thread_local]
+static mut TCACHE: tcache::ThreadCache = tcache::ThreadCache::new();
+
 cfg_if::cfg_if! { if #[cfg(feature = "global")] {
 
 #[global_allocator]
-static GLOBAL_ALLOC: Allocator = Allocator::new_null();
+static GLOBAL_ALLOC: alloc::Allocator = alloc::Allocator::new_null();
 
 /// Set the functions for allocating and deallocating pages.
-pub unsafe fn set_alloc(alloc_pages: AllocPages, dealloc_pages: DeallocPages) {
+pub unsafe fn set_alloc(alloc_pages: page::AllocPages, dealloc_pages: page::DeallocPages) {
     GLOBAL_ALLOC.set_alloc(alloc_pages, dealloc_pages)
 }
 

+ 19 - 3
h2o/libs/heap/src/page.rs

@@ -37,18 +37,34 @@ pub type DeallocPages = unsafe fn(pages: NonNull<[Page]>);
 /// They're divided into 3 classes. The constants in each class are made of
 /// arithmetic and geometric series.
 pub const OBJ_SIZES: &[usize] = &[
-    16, 24, // \ - Class 1
+    16, 24, // \ - Small
     32, 48, // /
-    64, 80, 96, 112, // \ - Class 2
+    64, 80, 96, 112, // \ - Medium
     128, 160, 192, 224, // |
     256, 320, 384, 448, // |
     512, 640, 768, 896, // /
-    1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, // \ - Class 3
+    1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, // \ - Large
 ];
 // The last line is useless because their `max_count` is 1 and that block of
 // memory is used for headers and no free memory for allocations.
 // 2048, 2304, 2560, 2816, 3072, 3328, 3584, 3840, // /
 
+pub enum Classes {
+    Small = 0,
+    Medium = 4,
+    Large = 20,
+}
+
+impl Classes {
+    pub const fn from_index(index: usize) -> Self {
+        match index {
+            20.. => Classes::Large,
+            4.. => Classes::Medium,
+            _ => Classes::Small,
+        }
+    }
+}
+
 /// The number of the items of [`OBJ_SIZES`].
 pub const NR_OBJ_SIZES: usize = OBJ_SIZES.len();
 

+ 4 - 1
h2o/libs/heap/src/pool.rs

@@ -99,7 +99,10 @@ pub fn unwrap_layout(layout: Layout) -> Result<usize, Error> {
     }
 
     let size = layout.pad_to_align().size();
-    let idx = OBJ_SIZES.binary_search(&size).into_ok_or_err();
+    let idx = match OBJ_SIZES.binary_search(&size) {
+        Ok(idx) => idx,
+        Err(idx) => idx,
+    };
 
     if !(0..NR_OBJ_SIZES).contains(&idx) {
         Err(Error::InvLayout(layout))

+ 142 - 0
h2o/libs/heap/src/tcache.rs

@@ -0,0 +1,142 @@
+use core::{alloc::Layout, mem, ptr::NonNull};
+
+use array_macro::array;
+use paging::LAddr;
+use spin::Mutex;
+
+use crate::{
+    alloc::Error,
+    page::{self, Classes, NR_OBJ_SIZES},
+    pool, unwrap_layout, OBJ_SIZES,
+};
+
+enum TcSlabSize {
+    Small = 32,
+    Medium = 8,
+    Large = 4,
+}
+
+pub struct ThreadCache {
+    slabs: [TcSlab; NR_OBJ_SIZES],
+}
+
+impl ThreadCache {
+    pub const fn new() -> Self {
+        ThreadCache {
+            slabs: array![i => match Classes::from_index(i) {
+                Classes::Small => TcSlab::new(TcSlabSize::Small, OBJ_SIZES[i]),
+                Classes::Medium => TcSlab::new(TcSlabSize::Medium, OBJ_SIZES[i]),
+                Classes::Large => TcSlab::new(TcSlabSize::Large, OBJ_SIZES[i]),
+            }; NR_OBJ_SIZES],
+        }
+    }
+
+    pub fn allocate(&mut self, layout: Layout, pool: &Mutex<pool::Pool>) -> Result<LAddr, Error> {
+        let index = unwrap_layout(layout)?;
+        self.slabs[index].pop(pool)
+    }
+
+    pub fn deallocate(
+        &mut self,
+        addr: LAddr,
+        layout: Layout,
+        pool: &Mutex<pool::Pool>,
+    ) -> Result<Option<NonNull<page::Page>>, Error> {
+        let index = unwrap_layout(layout)?;
+        self.slabs[index].push(addr, pool)
+    }
+}
+
+pub struct TcSlab {
+    memory: Option<NonNull<LAddr>>,
+    count: usize,
+    size: usize,
+    obj_size: usize,
+}
+
+impl TcSlab {
+    const fn new(size: TcSlabSize, obj_size: usize) -> Self {
+        TcSlab {
+            memory: None,
+            count: 0,
+            size: size as usize,
+            obj_size,
+        }
+    }
+
+    fn memory(&mut self, pool: &Mutex<pool::Pool>) -> Result<NonNull<LAddr>, Error> {
+        match self.memory {
+            Some(memory) => Ok(memory),
+            None => {
+                let mut pool = pool.lock();
+                let layout =
+                    Layout::from_size_align(self.size * self.obj_size, mem::align_of::<LAddr>())
+                        .unwrap();
+                let memory = pool
+                    .allocate(layout)
+                    .ok()
+                    .and_then(|addr| addr.as_non_null())
+                    .ok_or(Error::Internal(
+                        "Memory exhausted when allocating space for thread cache",
+                    ))?
+                    .cast();
+                self.memory = Some(memory);
+                Ok(memory)
+            }
+        }
+    }
+
+    fn pop(&mut self, pool: &Mutex<pool::Pool>) -> Result<LAddr, Error> {
+        let memory = self.memory(pool)?;
+
+        if self.count == 0 {
+            let mut pool = pool.lock();
+            let layout = Layout::from_size_align(self.obj_size, mem::align_of::<LAddr>()).unwrap();
+            while self.count < self.size {
+                let addr = match pool.allocate(layout) {
+                    Ok(addr) => addr,
+                    Err(_) => break,
+                };
+
+                unsafe { memory.as_ptr().add(self.count).write(addr) };
+                self.count += 1;
+            }
+        }
+
+        if self.count > 0 {
+            self.count -= 1;
+            let addr = unsafe { memory.as_ptr().add(self.count).read() };
+            Ok(addr)
+        } else {
+            Err(Error::NeedExt)
+        }
+    }
+
+    fn push(
+        &mut self,
+        addr: LAddr,
+        pool: &Mutex<pool::Pool>,
+    ) -> Result<Option<NonNull<page::Page>>, Error> {
+        let memory = self.memory(pool)?;
+
+        let mut page = None;
+        if self.count == self.size {
+            let mut pool = pool.lock();
+            let layout = Layout::from_size_align(self.obj_size, mem::align_of::<LAddr>()).unwrap();
+
+            self.count -= 1;
+
+            let addr = unsafe { memory.as_ptr().add(self.count).read() };
+
+            page = pool.deallocate(addr, layout).unwrap_or(None);
+        }
+
+        if self.count < self.size {
+            unsafe { memory.as_ptr().add(self.count).write(addr) };
+            self.count += 1;
+            Ok(page)
+        } else {
+            Err(Error::Internal("No more space for deallocating the object"))
+        }
+    }
+}

+ 14 - 0
h2o/libs/paging/src/addr.rs

@@ -127,6 +127,20 @@ impl From<u64> for LAddr {
     }
 }
 
+impl From<*const u8> for LAddr {
+    #[inline]
+    fn from(val: *const u8) -> Self {
+        LAddr(val as _)
+    }
+}
+
+impl From<*mut u8> for LAddr {
+    #[inline]
+    fn from(val: *mut u8) -> Self {
+        LAddr(val as _)
+    }
+}
+
 impl<T: ?Sized> From<NonNull<T>> for LAddr {
     #[inline]
     fn from(ptr: NonNull<T>) -> Self {

+ 1 - 1
h2o/libs/pmm/src/buddy.rs

@@ -120,7 +120,7 @@ type PfList = LinkedList<PFAdapter>;
 /// The page frame type for allocation. See [the module level doc](./index.html)
 /// for more.
 #[repr(C)]
-#[derive(Debug, Clone, Copy, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum PfType {
     /// Representing the low (below 4GB) physical memory area.
     Low,

+ 1 - 0
h2o/libs/syscall/Cargo.toml

@@ -6,6 +6,7 @@ version = "0.1.0"
 
 [features]
 call = []
+vdso = []
 default = ["stub"]
 stub = []
 

+ 14 - 15
h2o/libs/syscall/build.rs

@@ -1,20 +1,19 @@
-use std::error::Error;
-
-fn main() -> Result<(), Box<dyn Error>> {
-    #[cfg(feature = "call")]
+fn main() {
+    #[cfg(all(feature = "call", feature = "vdso"))]
     {
-        let config = cbindgen::Config::from_file("cbindgen.toml")?;
-        println!("cargo:rerun-if-changed=cbindgen.toml");
+        std::thread::spawn(|| {
+            let config = cbindgen::Config::from_file("cbindgen.toml").unwrap();
+            println!("cargo:rerun-if-changed=cbindgen.toml");
 
-        let src_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
-        let bindings = cbindgen::Builder::new()
-            .with_config(config)
-            .with_crate(".")
-            .generate()?;
+            let src_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+            let bindings = cbindgen::Builder::new()
+                .with_config(config)
+                .with_crate(".")
+                .generate()
+                .unwrap();
 
-        let c_target_path = src_dir.join("target/svc.h");
-        bindings.write_to_file(c_target_path);
+            let c_target_path = src_dir.join("../../../target/sysroot/usr/include/h2o.h");
+            bindings.write_to_file(c_target_path);
+        });
     }
-
-    Ok(())
 }

+ 1 - 1
h2o/libs/syscall/cbindgen.toml

@@ -144,7 +144,7 @@ crates = ["sv-call"]
 # `expand = ["euclid"]` shorthand is used.
 #
 # default: false
-all_features = true
+all_features = false
 
 # When `all_features` is disabled and this is also disabled, use the
 # `--no-default-features` option when expanding.

+ 0 - 8
h2o/libs/syscall/rxx.rs.in

@@ -1,8 +0,0 @@
-#[panic_handler]
-#[linkage = "weak"]
-#[no_mangle]
-pub extern "C" fn rust_begin_unwind(_: &core::panic::PanicInfo) -> ! {
-    loop {
-        unsafe { core::arch::asm!("pause; ud2") }
-    }
-}

+ 66 - 0
h2o/libs/syscall/src/call.rs

@@ -6,6 +6,13 @@ pub(crate) mod hdl;
 mod raw;
 pub(crate) mod reg;
 
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
+#[repr(C)]
+pub struct Syscall {
+    pub num: usize,
+    pub args: [usize; 5],
+}
+
 #[cfg(all(not(feature = "stub"), feature = "call"))]
 use crate::{
     c_ty::*,
@@ -16,5 +23,64 @@ use crate::{
     Feature, Handle, SerdeReg,
 };
 
+#[cfg(feature = "vdso")]
+#[no_mangle]
+pub unsafe extern "C" fn sv_time_get(ptr: *mut ()) -> crate::c_ty::Status {
+    let ticks = {
+        let (eax, edx): (u32, u32);
+        core::arch::asm!("rdtsc", out("eax")eax, out("edx")edx);
+        ((edx as u64) << 32) | (eax as u64)
+    };
+
+    let c = crate::constants();
+
+    let val = ticks - c.ticks_offset;
+    let ns = (val as u128 * c.ticks_multiplier) >> c.ticks_shift;
+
+    ptr.cast::<u128>().write(ns);
+
+    Status::from_res(Ok(()))
+}
+
+#[cfg(feature = "vdso")]
+#[no_mangle]
+pub extern "C" fn sv_random() -> crate::c_ty::StatusOrValue {
+    let c = crate::constants();
+    if c.has_builtin_rand {
+        for _ in 0..10 {
+            let ret;
+            let flags: u64;
+            unsafe {
+                core::arch::asm!(
+                      "rdrand {}",
+                      "pushfq",
+                      "pop {}",
+                      out(reg) ret,
+                      out(reg) flags
+                );
+                if flags & 1 != 0 {
+                    return crate::c_ty::StatusOrValue::from_res(Ok(ret));
+                }
+            }
+        }
+    }
+
+    // Fall back to time-based randomization.
+    let ticks = unsafe {
+        let (eax, edx): (u32, u32);
+        core::arch::asm!("rdtsc", out("eax")eax, out("edx")edx);
+        ((edx as u64) << 32) | (eax as u64)
+    };
+    let ret = ticks.wrapping_mul(0xb7123c2fd16c6345);
+    crate::c_ty::StatusOrValue::from_res(Ok(ret))
+}
+
+#[cfg(feature = "vdso")]
+#[no_mangle]
+#[inline(never)]
+pub extern "C" fn sv_cpu_num() -> crate::c_ty::StatusOrValue {
+    crate::c_ty::StatusOrValue::from_res(Ok(crate::constants().num_cpus as u64))
+}
+
 #[cfg(all(not(feature = "stub"), feature = "call"))]
 include!(concat!(env!("CARGO_MANIFEST_DIR"), "/target/call.rs"));

+ 1 - 1
h2o/libs/syscall/src/call/hdl.rs

@@ -17,7 +17,7 @@ impl Handle {
         if self.raw != 0 {
             Ok(*self)
         } else {
-            Err(crate::Error::EINVAL)
+            Err(crate::EINVAL)
         }
     }
 

+ 15 - 0
h2o/libs/syscall/src/call/raw.rs

@@ -25,3 +25,18 @@ pub unsafe fn syscall(
     );
     ret
 }
+
+#[inline]
+pub fn pack_syscall(
+    num: usize,
+    arg1: usize,
+    arg2: usize,
+    arg3: usize,
+    arg4: usize,
+    arg5: usize,
+) -> crate::Syscall {
+    crate::Syscall {
+        num,
+        args: [arg1, arg2, arg3, arg4, arg5],
+    }
+}

+ 49 - 51
h2o/libs/syscall/src/error.rs

@@ -45,14 +45,14 @@ impl Error {
         if ERRC_RANGE.contains(&index) {
             ERRC_DESC[index as usize]
         } else {
-            CUSTOM_DESC[(index - Self::CUSTOM_OFFSET) as usize]
+            CUSTOM_DESC[(index - CUSTOM_OFFSET) as usize]
         }
     }
 
     pub fn desc_by_index(errnum: i32) -> Option<&'static str> {
         let index = -errnum as usize;
         { ERRC_DESC.get(index) }
-            .or_else(|| CUSTOM_DESC.get(index - Self::CUSTOM_OFFSET as usize))
+            .or_else(|| CUSTOM_DESC.get(index - CUSTOM_OFFSET as usize))
             .copied()
     }
 
@@ -80,28 +80,28 @@ impl Debug for Error {
 impl From<core::alloc::AllocError> for Error {
     #[inline]
     fn from(_: core::alloc::AllocError) -> Self {
-        Error::ENOMEM
+        ENOMEM
     }
 }
 
 impl From<core::alloc::LayoutError> for Error {
     #[inline]
     fn from(_: core::alloc::LayoutError) -> Self {
-        Error::ENOMEM
+        ENOMEM
     }
 }
 
 impl From<core::num::TryFromIntError> for Error {
     #[inline]
     fn from(_: core::num::TryFromIntError) -> Self {
-        Error::EINVAL
+        EINVAL
     }
 }
 
 impl From<core::str::Utf8Error> for Error {
     #[inline]
     fn from(_: core::str::Utf8Error) -> Self {
-        Error::EINVAL
+        EINVAL
     }
 }
 
@@ -112,51 +112,49 @@ macro_rules! declare_error {
     };
 }
 
-impl Error {
-    declare_error!(OK, 0, "Success");
-    declare_error!(EPERM, 1, "Operation not permitted");
-    declare_error!(ENOENT, 2, "No such file or directory");
-    declare_error!(ESRCH, 3, "No such process");
-    declare_error!(EINTR, 4, "Interrupted system call");
-    declare_error!(EIO, 5, "I/O error");
-    declare_error!(ENXIO, 6, "No such device or address");
-    declare_error!(E2BIG, 7, "Argument list too long");
-    declare_error!(ENOEXEC, 8, "Executable format error");
-    declare_error!(EBADF, 9, "Bad file number");
-    declare_error!(ECHILD, 10, "No child processes");
-    declare_error!(EAGAIN, 11, "Try again");
-    declare_error!(ENOMEM, 12, "Out of memory");
-    declare_error!(EACCES, 13, "Permission denied");
-    declare_error!(EFAULT, 14, "Bad address");
-    declare_error!(ENOTBLK, 15, "Block device required");
-    declare_error!(EBUSY, 16, "Device or resource busy");
-    declare_error!(EEXIST, 17, "File exists");
-    declare_error!(EXDEV, 18, "Cross-device link");
-    declare_error!(ENODEV, 19, "No such device");
-    declare_error!(ENOTDIR, 20, "Not a directory");
-    declare_error!(EISDIR, 21, "Is a directory");
-    declare_error!(EINVAL, 22, "Invalid argument");
-    declare_error!(ENFILE, 23, "File table overflow");
-    declare_error!(EMFILE, 24, "Too many open files");
-    declare_error!(ENOTTY, 25, "Not a typewriter");
-    declare_error!(ETXTBSY, 26, "Text file busy");
-    declare_error!(EFBIG, 27, "File too large");
-    declare_error!(ENOSPC, 28, "No space left on device");
-    declare_error!(ESPIPE, 29, "Illegal seek");
-    declare_error!(EROFS, 30, "Read-only file system");
-    declare_error!(EMLINK, 31, "Too many links");
-    declare_error!(EPIPE, 32, "Broken pipe");
-    declare_error!(EDOM, 33, "Math argument out of domain of func");
-    declare_error!(ERANGE, 34, "Range not available");
-
-    const CUSTOM_OFFSET: i32 = CUSTOM_RANGE.start;
-    declare_error!(EKILLED, 1001, "Object already killed");
-    declare_error!(EBUFFER, 1002, "Buffer range exceeded");
-    declare_error!(ETIME, 1003, "Timed out");
-    declare_error!(EALIGN, 1004, "Pointer unaligned");
-    declare_error!(ETYPE, 1005, "Object type mismatch");
-    declare_error!(ESPRT, 1006, "Function not supported");
-}
+declare_error!(OK, 0, "Success");
+declare_error!(EPERM, 1, "Operation not permitted");
+declare_error!(ENOENT, 2, "No such file or directory");
+declare_error!(ESRCH, 3, "No such process");
+declare_error!(EINTR, 4, "Interrupted system call");
+declare_error!(EIO, 5, "I/O error");
+declare_error!(ENXIO, 6, "No such device or address");
+declare_error!(E2BIG, 7, "Argument list too long");
+declare_error!(ENOEXEC, 8, "Executable format error");
+declare_error!(EBADF, 9, "Bad file number");
+declare_error!(ECHILD, 10, "No child processes");
+declare_error!(EAGAIN, 11, "Try again");
+declare_error!(ENOMEM, 12, "Out of memory");
+declare_error!(EACCES, 13, "Permission denied");
+declare_error!(EFAULT, 14, "Bad address");
+declare_error!(ENOTBLK, 15, "Block device required");
+declare_error!(EBUSY, 16, "Device or resource busy");
+declare_error!(EEXIST, 17, "File exists");
+declare_error!(EXDEV, 18, "Cross-device link");
+declare_error!(ENODEV, 19, "No such device");
+declare_error!(ENOTDIR, 20, "Not a directory");
+declare_error!(EISDIR, 21, "Is a directory");
+declare_error!(EINVAL, 22, "Invalid argument");
+declare_error!(ENFILE, 23, "File table overflow");
+declare_error!(EMFILE, 24, "Too many open files");
+declare_error!(ENOTTY, 25, "Not a typewriter");
+declare_error!(ETXTBSY, 26, "Text file busy");
+declare_error!(EFBIG, 27, "File too large");
+declare_error!(ENOSPC, 28, "No space left on device");
+declare_error!(ESPIPE, 29, "Illegal seek");
+declare_error!(EROFS, 30, "Read-only file system");
+declare_error!(EMLINK, 31, "Too many links");
+declare_error!(EPIPE, 32, "Broken pipe");
+declare_error!(EDOM, 33, "Math argument out of domain of func");
+declare_error!(ERANGE, 34, "Range not available");
+
+const CUSTOM_OFFSET: i32 = CUSTOM_RANGE.start;
+declare_error!(EKILLED, 1001, "Object already killed");
+declare_error!(EBUFFER, 1002, "Buffer range exceeded");
+declare_error!(ETIME, 1003, "Timed out");
+declare_error!(EALIGN, 1004, "Pointer unaligned");
+declare_error!(ETYPE, 1005, "Object type mismatch");
+declare_error!(ESPRT, 1006, "Function not supported");
 
 const ERRC_DESC: &[&str] = &[
     "OK",

+ 4 - 4
h2o/libs/syscall/src/error/c_ty.rs

@@ -1,4 +1,4 @@
-use crate::{Error, Handle, Result, SerdeReg};
+use crate::{Error, Handle, Result, SerdeReg, OK};
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 #[repr(transparent)]
@@ -10,14 +10,14 @@ impl SerdeReg for Status {
     }
 
     fn decode(val: usize) -> Self {
-        Self(Error::try_decode(val).unwrap_or(Error::OK))
+        Self(Error::try_decode(val).unwrap_or(OK))
     }
 }
 
 impl Status {
     #[inline]
     pub fn into_res(self) -> Result {
-        if self.0 == Error::OK {
+        if self.0 == OK {
             Ok(())
         } else {
             Err(self.0)
@@ -32,7 +32,7 @@ impl Status {
 
 impl Default for Status {
     fn default() -> Self {
-        Status(Error::OK)
+        Status(OK)
     }
 }
 

+ 4 - 3
h2o/libs/syscall/src/ipc.rs

@@ -17,6 +17,7 @@ pub const MAX_BUFFER_SIZE: usize = crate::mem::PAGE_SIZE;
 pub const CUSTOM_MSG_ID_START: usize = 0;
 pub const CUSTOM_MSG_ID_END: usize = 12;
 
-pub const SIG_GENERIC: usize = 0b001;
-pub const SIG_READ: usize = 0b010;
-pub const SIG_WRITE: usize = 0b100;
+pub const SIG_GENERIC: usize = 0b0001;
+pub const SIG_READ: usize = 0b0010;
+pub const SIG_WRITE: usize = 0b0100;
+pub const SIG_TIMER: usize = 0b1000;

+ 64 - 3
h2o/libs/syscall/src/lib.rs

@@ -1,11 +1,12 @@
 #![no_std]
 #![warn(clippy::missing_panics_doc)]
 #![feature(allocator_api)]
+#![feature(asm_const)]
 #![feature(lang_items)]
 #![feature(linkage)]
 
 pub mod call;
-mod error;
+pub mod error;
 pub mod feat;
 pub mod ipc;
 pub mod mem;
@@ -21,9 +22,69 @@ pub use self::call::*;
 #[cfg(feature = "stub")]
 pub use self::stub::*;
 pub use self::{
-    call::{hdl::Handle, reg::*},
+    call::{hdl::Handle, reg::*, Syscall},
     error::*,
     feat::*,
 };
 
-include!(concat!(env!("CARGO_MANIFEST_DIR"), "/target/rxx.rs"));
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+pub struct Constants {
+    pub ticks_offset: u64,
+    pub ticks_multiplier: u128,
+    pub ticks_shift: u128,
+    pub has_builtin_rand: bool,
+    pub num_cpus: usize,
+}
+
+impl Constants {
+    pub const fn new() -> Constants {
+        Constants {
+            ticks_offset: 0,
+            ticks_multiplier: 1,
+            ticks_shift: 0,
+            has_builtin_rand: false,
+            num_cpus: 1,
+        }
+    }
+}
+
+#[cfg(feature = "vdso")]
+pub const CONSTANTS_SIZE: usize = core::mem::size_of::<Constants>();
+#[cfg(feature = "vdso")]
+core::arch::global_asm!("
+    .section .rodata
+    .global CONSTANTS
+    .type CONSTANTS, object
+CONSTANTS:
+    .fill {CONSTANTS_SIZE}, 1, 0xcc", 
+    CONSTANTS_SIZE = const CONSTANTS_SIZE
+);
+
+#[cfg(feature = "vdso")]
+fn constants() -> Constants {
+    let mut addr: *const Constants;
+
+    unsafe {
+        core::arch::asm!(
+            "lea {}, [rip + CONSTANTS]",
+            out(reg) addr
+        );
+        core::ptr::read(addr)
+    }
+}
+
+#[cfg(all(not(feature = "call"), feature = "vdso"))]
+compile_error!("The VDSO feature is only supported with call feature");
+
+#[cfg(feature = "vdso")]
+#[panic_handler]
+#[linkage = "weak"]
+#[no_mangle]
+pub extern "C" fn rust_begin_unwind(_: &core::panic::PanicInfo) -> ! {
+    loop {
+        unsafe { core::arch::asm!("pause; ud2") }
+    }
+}
+
+include!(concat!(env!("CARGO_MANIFEST_DIR"), "/target/num.rs"));

+ 0 - 4
h2o/libs/syscall/src/mem.rs

@@ -31,13 +31,9 @@ pub struct MemInfo {
     pub current_used: usize,
 }
 
-cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] {
-
 pub const PAGE_SHIFT: usize = 12;
 pub const PAGE_SIZE: usize = 4096;
 
-} }
-
 #[repr(C)]
 pub struct VirtMapInfo {
     pub offset: usize,

+ 1 - 3
h2o/libs/syscall/src/stub.rs

@@ -1,12 +1,10 @@
-#[cfg(feature = "stub")]
 use crate::{
     c_ty::*,
     ipc::RawPacket,
     mem::{Flags, MemInfo, VirtMapInfo},
     res::IntrConfig,
     task::ExecInfo,
-    Feature, Handle,
+    Feature, Handle, Syscall,
 };
 
-#[cfg(feature = "stub")]
 include!(concat!(env!("CARGO_MANIFEST_DIR"), "/target/stub.rs"));

+ 3 - 0
h2o/libs/syscall/syscall.ld

@@ -1,9 +1,12 @@
+ENTRY(sv_task_exit)
+
 SECTIONS
 {
     . = SIZEOF_HEADERS;
     .note.gnu.build-id  : { *(.note.gnu.build-id) } :note
 
     .text       : { *(.text*) }     :load
+    .rodata     : { *(.rodata*) }   :load
     .dynamic    : { *(.dynamic*) }  :load :dynamic
 
     /DISCARD/ : { *(.comment*) }

+ 1 - 1
h2o/libs/targs/src/lib.rs

@@ -1,6 +1,6 @@
 #![no_std]
 
-#[derive(Debug, Copy, Clone, PartialEq)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
 #[repr(usize)]
 pub enum HandleIndex {
     MemRes = 0,

+ 1 - 1
h2o/tinit/.cargo/x86_64-h2o-tinit.json

@@ -1,6 +1,6 @@
 {
     "llvm-target": "x86_64-h2o-tinit",
-    "data-layout": "e-m:e-i64:64-f80:128-n8:16:32:64-S128",
+    "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
     "arch": "x86_64",
     "target-endian": "little",
     "target-pointer-width": "64",

+ 1 - 1
h2o/tinit/build.rs

@@ -7,7 +7,7 @@ fn asm_build(input: &str, output: &str, flags: &[&str]) -> Result<(), Box<dyn Er
 
     println!("cargo:rerun-if-changed={}", input);
     let mut cmd = Command::new("nasm");
-    cmd.args(&[input, "-o", output])
+    cmd.args([input, "-o", output])
         .args(flags)
         .status()?
         .exit_ok()?;

+ 4 - 9
h2o/tinit/src/load.rs

@@ -2,7 +2,7 @@ use core::{alloc::Layout, ptr::NonNull};
 
 use bootfs::parse::Directory;
 use solvent::prelude::{Error as SError, Flags, Phys, Virt, PAGE_LAYOUT, PAGE_SIZE};
-use sv_call::task::DEFAULT_STACK_SIZE;
+use sv_call::{task::DEFAULT_STACK_SIZE, ENOENT};
 
 const STACK_PROTECTOR_SIZE: usize = PAGE_SIZE;
 const STACK_PROTECTOR_LAYOUT: Layout = PAGE_LAYOUT;
@@ -27,17 +27,12 @@ fn load_segs(
 ) -> Result<elfload::LoadedElf, Error> {
     let phys = match elfload::get_interp(phys) {
         Ok(Some(mut interp)) => {
-            use SError as SvError;
-
             let last = interp.pop();
             assert_eq!(last, Some(0), "Not a valid c string");
 
-            let data = bootfs
-                .find(&interp, b'/')
-                .ok_or(SvError::ENOENT)
-                .inspect_err(|_| {
-                    log::error!("Failed to find the interpreter for the executable")
-                })?;
+            let data = bootfs.find(&interp, b'/').ok_or(ENOENT).inspect_err(|_| {
+                log::error!("Failed to find the interpreter for the executable")
+            })?;
 
             crate::sub_phys(data, bootfs, bootfs_phys)?
         }

+ 2 - 0
h2o/tinit/src/test.rs

@@ -3,9 +3,11 @@ use solvent::prelude::Virt;
 mod ipc;
 mod mem;
 mod task;
+mod time;
 
 pub unsafe fn test_syscall(virt: &Virt) {
     let stack = task::test(virt);
     ipc::test(virt, stack);
     mem::test(virt);
+    time::test();
 }

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.