1
1

41 کامیت‌ها 9fb6ab324b ... 854bb8e336

نویسنده SHA1 پیام تاریخ
  Js2xxx 854bb8e336 Integrate CI (#4) 1 سال پیش
  徐启航 fa64147c3a Fix `xtask` dir not found 1 سال پیش
  徐启航 6557a25490 Fix `xtask` dir not found 1 سال پیش
  徐启航 470aa6a6c0 Update README.md 1 سال پیش
  徐启航 39e4736a27 MSI allocation (unfinished) 1 سال پیش
  徐启航 29a3b4a859 Tweak CPU selection in creating interrupts 1 سال پیش
  徐启航 a931d2694a Replace `intr_wait` with `intr_query` 1 سال پیش
  徐启航 20a0a8ab14 Tweak some code 1 سال پیش
  徐启航 6991a96b63 Tweak some code 1 سال پیش
  徐启航 b0742d49d6 Driver framework infrastructures (unfinished) 1 سال پیش
  徐启航 ae8954cfe5 Optimize some code & Bugfix: MemDir and RpcNode 1 سال پیش
  徐启航 464798fc87 Move back xtask because of dir dependency 1 سال پیش
  徐启航 dc8c71c664 Move xtask to tools 1 سال پیش
  徐启航 b6b975cd39 Bugfix: DSO liftime with Phys 1 سال پیش
  徐启航 4802be1cd2 Adapt to new rust version 1 سال پیش
  徐启航 750a527a4b Optimize some code 1 سال پیش
  徐启航 7e371cc4d8 Optimize the executor's behavior 1 سال پیش
  徐启航 6148077e90 Bugfix: LAPIC IPI without delay 1 سال پیش
  徐启航 9d197d1505 Update to new rust version 1 سال پیش
  徐启航 9a865b6861 Fix exe poller bug & Optimize some code 1 سال پیش
  徐启航 af23471c97 Optimize process builder code 1 سال پیش
  徐启航 b8f3aced7e Fix DidntWait bug & improve futures receiving code 1 سال پیش
  徐启航 cdf73eef10 Add std-local feature for h2o_fs & Optim some code 1 سال پیش
  徐启航 7d85ee9b4d Make solvent-fs independent on async runtime 1 سال پیش
  徐启航 0904a54542 Move io_task to public 1 سال پیش
  徐启航 d0adcddb1d Refactor the async exe based on `async-executor` 1 سال پیش
  徐启航 e2b3b1570d Adjust crate features 1 سال پیش
  徐启航 42348af160 Reduce thread usage in LocalPool executor 1 سال پیش
  徐启航 6e093652da Reduce some code 1 سال پیش
  徐启航 e41c4107b4 Modify std & runtime features in h2o_rpc 1 سال پیش
  徐启航 d14b77c6cf Cancel `SerdePacket` impl for endpoints 1 سال پیش
  徐启航 632251f352 Adapted to new rust version 1 سال پیش
  徐启航 d5c5a2e951 Fix memory incons bugs & Optimize some code 1 سال پیش
  徐启航 81aaf94f97 Adapt to new rust version & fix `Ref` layout bugs 1 سال پیش
  徐启航 23cfa7de31 Fix kernel PF & async dispatcher bugs 1 سال پیش
  徐启航 2eaac5eef1 Lock free the task handle map 1 سال پیش
  徐启航 0c46521d16 Adapt to new rust version 1 سال پیش
  徐启航 fbc43e3da1 Optimize TID allocation 1 سال پیش
  徐启航 63217e91ec Add local executor && fix RPC client & server bugs 1 سال پیش
  徐启航 1bc69e3bd6 Replace extensible phys with COW 1 سال پیش
  徐启航 6dad208129 Make kernel phys trait-based 1 سال پیش
100فایلهای تغییر یافته به همراه3540 افزوده شده و 1875 حذف شده
  1. 61 0
      .github/workflows/ci.yml
  2. 5 1
      Cargo.toml
  3. 24 2
      README.md
  4. 24 2
      README.zh-cn.md
  5. BIN
      h2o/assets/Oceanic.500.bmp
  6. 1 1
      h2o/boot/Cargo.toml
  7. 1 3
      h2o/boot/src/file.rs
  8. 0 1
      h2o/boot/src/main.rs
  9. 3 2
      h2o/kernel/Cargo.toml
  10. 4 4
      h2o/kernel/build.rs
  11. 1 1
      h2o/kernel/src/cpu.rs
  12. 10 0
      h2o/kernel/src/cpu/intr.rs
  13. 38 57
      h2o/kernel/src/cpu/intr/imp.rs
  14. 2 5
      h2o/kernel/src/cpu/time.rs
  15. 1 1
      h2o/kernel/src/cpu/time/chip.rs
  16. 6 4
      h2o/kernel/src/cpu/x86_64/apic.rs
  17. 8 16
      h2o/kernel/src/cpu/x86_64/apic/ipi.rs
  18. 138 51
      h2o/kernel/src/cpu/x86_64/intr.rs
  19. 1 1
      h2o/kernel/src/cpu/x86_64/mod.rs
  20. 6 8
      h2o/kernel/src/dev.rs
  21. 1 0
      h2o/kernel/src/dev/res.rs
  22. 4 3
      h2o/kernel/src/dev/x86_64/hpet.rs
  23. 4 4
      h2o/kernel/src/dev/x86_64/ioapic.rs
  24. 7 11
      h2o/kernel/src/kmain.rs
  25. 2 2
      h2o/kernel/src/logger.rs
  26. 1 1
      h2o/kernel/src/logger/flags.rs
  27. 7 5
      h2o/kernel/src/logger/serial.rs
  28. 1 0
      h2o/kernel/src/mem/arena.rs
  29. 25 15
      h2o/kernel/src/mem/space.rs
  30. 71 144
      h2o/kernel/src/mem/space/phys.rs
  31. 49 51
      h2o/kernel/src/mem/space/phys/contiguous.rs
  32. 428 408
      h2o/kernel/src/mem/space/phys/extensible.rs
  33. 18 25
      h2o/kernel/src/mem/space/virt.rs
  34. 26 7
      h2o/kernel/src/mem/space/x86_64/mod.rs
  35. 51 46
      h2o/kernel/src/mem/syscall.rs
  36. 1 1
      h2o/kernel/src/rxx.rs
  37. 13 19
      h2o/kernel/src/sched/imp.rs
  38. 1 4
      h2o/kernel/src/sched/imp/waiter.rs
  39. 6 6
      h2o/kernel/src/sched/ipc.rs
  40. 27 30
      h2o/kernel/src/sched/ipc/channel.rs
  41. 10 5
      h2o/kernel/src/sched/ipc/channel/syscall.rs
  42. 7 4
      h2o/kernel/src/sched/task.rs
  43. 23 34
      h2o/kernel/src/sched/task/boot.rs
  44. 24 14
      h2o/kernel/src/sched/task/ctx.rs
  45. 3 3
      h2o/kernel/src/sched/task/ctx/x86_64.rs
  46. 4 4
      h2o/kernel/src/sched/task/elf.rs
  47. 2 2
      h2o/kernel/src/sched/task/excep.rs
  48. 121 78
      h2o/kernel/src/sched/task/hdl.rs
  49. 4 303
      h2o/kernel/src/sched/task/hdl/node.rs
  50. 6 14
      h2o/kernel/src/sched/task/idle.rs
  51. 7 5
      h2o/kernel/src/sched/task/sm.rs
  52. 8 8
      h2o/kernel/src/sched/task/space.rs
  53. 7 8
      h2o/kernel/src/sched/task/syscall.rs
  54. 32 20
      h2o/kernel/src/sched/task/tid.rs
  55. 2 4
      h2o/kernel/src/sched/wait/futex.rs
  56. 1 15
      h2o/kernel/syscall/interrupt.json
  57. 1 1
      h2o/libs/collection_ex/Cargo.toml
  58. 79 16
      h2o/libs/collection_ex/src/chash_map.rs
  59. 1 1
      h2o/libs/collection_ex/src/id_alloc.rs
  60. 5 10
      h2o/libs/collection_ex/src/range_map.rs
  61. 1 1
      h2o/libs/heap/Cargo.toml
  62. 2 0
      h2o/libs/heap/src/lib.rs
  63. 1 1
      h2o/libs/heap/src/page.rs
  64. 6 1
      h2o/libs/minfo/src/lib.rs
  65. 4 4
      h2o/libs/paging/src/addr.rs
  66. 1 3
      h2o/libs/syscall/src/lib.rs
  67. 3 3
      h2o/tinit/build.rs
  68. 6 1
      h2o/tinit/src/test/ipc.rs
  69. 1 1
      scripts/genimg.sh
  70. 3 1
      scripts/install.sh
  71. 15 0
      src/bin/devm/Cargo.toml
  72. 30 0
      src/bin/devm/src/device.rs
  73. 61 0
      src/bin/devm/src/main.rs
  74. 16 0
      src/bin/drvhost/Cargo.toml
  75. 38 0
      src/bin/drvhost/src/ffi.rs
  76. 90 0
      src/bin/drvhost/src/instance.rs
  77. 23 0
      src/bin/drvhost/src/main.rs
  78. 1 1
      src/bin/progm/Cargo.toml
  79. 2 2
      src/bin/progm/src/boot.rs
  80. 33 1
      src/bin/progm/src/main.rs
  81. 18 0
      src/drv/.cargo/config.toml
  82. 18 0
      src/drv/pc/Cargo.toml
  83. 11 0
      src/drv/pc/src/lib.rs
  84. 3 2
      src/lib/bootfs/Cargo.toml
  85. 3 3
      src/lib/bootfs/src/gen.rs
  86. 5 8
      src/lib/dbglog/src/lib.rs
  87. 2 1
      src/lib/h2o_async/Cargo.toml
  88. 25 41
      src/lib/h2o_async/src/dev.rs
  89. 30 14
      src/lib/h2o_async/src/disp.rs
  90. 272 189
      src/lib/h2o_async/src/exe.rs
  91. 1 2
      src/lib/h2o_async/src/exe/enter.rs
  92. 15 15
      src/lib/h2o_async/src/exe/park.rs
  93. 17 8
      src/lib/h2o_async/src/ipc.rs
  94. 23 16
      src/lib/h2o_async/src/ipc/channel.rs
  95. 16 14
      src/lib/h2o_async/src/lib.rs
  96. 22 25
      src/lib/h2o_async/src/mem.rs
  97. 1 0
      src/lib/h2o_async/src/sync.rs
  98. 1272 0
      src/lib/h2o_async/src/sync/channel.rs
  99. 6 26
      src/lib/h2o_async/src/time.rs
  100. 19 0
      src/lib/h2o_ddk/Cargo.toml

+ 61 - 0
.github/workflows/ci.yml

@@ -0,0 +1,61 @@
+name: CI
+
+on:
+  push:
+    branches: ["master"]
+  pull_request:
+    branches: ["master"]
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install nasm and qemu-utils
+        run: sudo apt-get install nasm qemu-utils
+
+      - name: Cache LLVM
+        id: cache-llvm
+        uses: actions/cache@v3
+        with:
+          path: llvm
+          key: llvm-14.0
+
+      - name: Install LLVM
+        uses: KyleMayes/install-llvm-action@v1
+        with:
+          version: "14.0"
+          directory: llvm
+          cached: ${{ steps.cache-llvm.output.cache-hit }}
+
+      - name: Cache cargo crates
+        id: cache-cargo
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cargo/.crates.toml
+            ~/.cargo/.crates2.json
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+          key: cargo-crates
+
+      - name: Configure sccache
+        uses: visvirial/sccache-action@v1
+
+      - name: Build
+        run: cargo xtask dist --release img
+
+  fmt:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Check format
+        run: cargo fmt --all --check

+ 5 - 1
Cargo.toml

@@ -5,6 +5,7 @@ members = [
   "h2o/libs/*",
   "h2o/tinit",
   "src/bin/*",
+  "src/drv/*",
   "src/lib/*",
   "src/lib/libc/ldso",
   "src/lib/h2o_std/core",
@@ -14,4 +15,7 @@ members = [
   "xtask",
 ]
 
-exclude = ["src/bin/.cargo"]
+exclude = [
+  "src/bin/.cargo",
+  "src/drv/.cargo",
+]

+ 24 - 2
README.md

@@ -11,6 +11,28 @@ virtual machines or bare metals should be taken into account by the user.
 Currently, the project only supports x86_64 architecture, and it will probably
 support aarch64 in the future. 
 
+# Features
+
+- **Pure microkernel architecture**: Only necessary functions such as memory management, task scheduling and inter-process communication are owned by the kernel, leaving others to the userspace tasks.
+- **Fully asynchronous kernel objects**: Every system call supports **proactor** async API based on async dispatcher objects, which is theoretically more convenient and more resource-friendly than Linux's `io-uring`.
+- **Fully asynchronous drivers**: Drivers are represented by dynamic libraries, loaded by driver hosts and controlled by the device manager. The DDK library enables drivers to run as async tasks on driver hosts, and multiple drivers can run on single driver host as they wish.
+- **Type-based task management**: Every task's state are represented by Rust's type system instead of a single state field, and its structure doesn't need to rely on reference-counted pointers. The running tasks are owned by the scheduler, while the blocking ones by futex references or suspend tokens, thus decreasing code complexity.
+- **Isolated and local VFS**: (Inspired by Fuchsia) Every process has its own VFS, and it's the task's choice whether to share the VFS with its child tasks.
+
+# Road map
+
+## Current working
+
+- [ ] Complete the DDK library.
+- [ ] Implement basic drivers (PCI, ACPI, etc).
+
+## Further to-dos (may change)
+
+- Implement storage drivers.
+- Implement some storage FSes (FAT, etc).
+- Complete the device manager and the program manager implementation.
+- Merge into Rust std.
+
 # Source tree
 
 - `debug` - contains the decompiled assembly files, debug symbols, object file informations. and the serial log files of the virtual machines.
@@ -28,8 +50,8 @@ support aarch64 in the future.
    ```sh
    # Select the nightly channel for rust
    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
-   bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
-   sudo apt install build-essential qemu-system-x86
+   sudo apt install build-essential qemu-system-x86 llvm-14 nasm
+   export LLVM_PATH="/usr/lib/llvm-14"
    ```
 
 2. Add the following target:

+ 24 - 2
README.zh-cn.md

@@ -6,6 +6,28 @@
 
 当前这个项目只支持x86_64架构。未来可能会支持aarch64。
 
+# 特色
+
+- **纯·微内核架构**:内核中只保留内存管理、任务调度、进程间通信等必要功能,其他的都在用户空间实现。
+- **全异步内核对象**:所有系统调用均在异步派发器内核对象的支持下实现proactor的异步API,理论上比Linux的`io-uring`更加方便,占用更少资源。
+- **全异步的驱动程序**:驱动程序以动态库为单位,在驱动宿主上运行,由设备管理进程控制。DDK使驱动程序能以异步任务的方式运行,也支持多个驱动程序在同一个宿主上运行。
+- **以类型系统为基础的任务调度**:任务的状态用Rust的类型系统表示,而不是一个状态变量,并且不需要依赖引用计数指针。正在运行的任务由调度器拥有,而正在等待(堵塞)的任务则由futex引用或者挂起令牌拥有。这样可以减小代码复杂度。
+- **非全局的隔离虚拟文件系统**:(借鉴自Fuchsia)每一个进程有自己的VFS,也可以自由选择是否将自己的VFS共享给自己的子进程。
+
+# 路线图
+
+## 正在实现的
+
+- [ ] 完成DDK。
+- [ ] 实现基础的驱动(PCI,ACPI等)。
+
+## 之后要做的(可能会改)
+
+- 实现存储设备的驱动。
+- 实现一些文件系统(FAT等)。
+- 完成设备管理进程和程序管理进程的实现。
+- 进入Rust标准库。
+
 # 代码结构
 
 - `debug` - 存储反汇编文件、调试符号表、二进制文件信息和虚拟机的串口记录文件。
@@ -23,8 +45,8 @@
    ```sh
    # 配置 Rust 时需要选择 nightly 通道
    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
-   bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
-   sudo apt install build-essential qemu-system-x86
+   sudo apt install build-essential qemu-system-x86 llvm-14 nasm
+   export LLVM_PATH="/usr/lib/llvm-14"
    ```
 
 2. 添加下列目标:

BIN
h2o/assets/Oceanic.500.bmp


+ 1 - 1
h2o/boot/Cargo.toml

@@ -16,5 +16,5 @@ goblin = {version = "0.5", default-features = false, features = ["elf32", "elf64
 log = "0.4"
 raw-cpuid = "9.0"
 static_assertions = "1.1"
-tinybmp = "0.3"
+tinybmp = "0.4"
 uefi = {version = "0.11", features = ["logger", "alloc"]}

+ 1 - 3
h2o/boot/src/file.rs

@@ -87,9 +87,7 @@ pub fn load(syst: &SystemTable<Boot>, filename: &str) -> *mut [u8] {
                 .expect_success("Failed to read kernel file");
             assert!(
                 asize == ksize,
-                "Failed to read whole kernel file: read {:#x}, required {:#x}",
-                asize,
-                ksize
+                "Failed to read whole kernel file: read {asize:#x}, required {ksize:#x}"
             );
         }
         _ => panic!("Kernel file should be a regular file"),

+ 0 - 1
h2o/boot/src/main.rs

@@ -8,7 +8,6 @@
 
 #![no_std]
 #![no_main]
-#![feature(abi_efiapi)]
 #![feature(alloc_error_handler)]
 #![feature(box_syntax)]
 #![feature(nonnull_slice_from_raw_parts)]

+ 3 - 2
h2o/kernel/Cargo.toml

@@ -27,7 +27,7 @@ targs = {path = "../libs/targs"}
 acpi = "4.1"
 array-macro = "2.1"
 bitflags = "1.3"
-bitvec = {version = "0.22", default-features = false, features = ["atomic"]}
+bitvec = {version = "1.0", default-features = false, features = ["atomic"]}
 bytes = {version = "1.1", default-features = false}
 cfg-if = "1.0"
 crossbeam-epoch = {version = "0.9", default-features = false, features = ["alloc"]}
@@ -35,11 +35,12 @@ crossbeam-queue = {version = "0.3", default-features = false, features = ["alloc
 crossbeam-utils = {version = "0.8", default-features = false}
 cty = "0.2"
 derive_builder = {version = "0.10", default-features = false}
+enum_dispatch = "0.3"
 goblin = {version = "0.5", default-features = false, features = ["elf32", "elf64", "endian_fd"]}
 log = "0.4"
+memoffset = "0.6"
 modular-bitfield = "0.11"
 paste = "1.0"
 raw-cpuid = "10"
 spin = {version = "0.9", features = ["use_ticket_mutex"]}
 static_assertions = "1.1"
-memoffset = "0.6"

+ 4 - 4
h2o/kernel/build.rs

@@ -5,7 +5,7 @@ use std::{env, error::Error, path::Path};
 fn asm_build(input: &str, output: &str, flags: &[&str]) -> Result<(), Box<dyn Error>> {
     use std::process::Command;
 
-    println!("cargo:rerun-if-changed={}", input);
+    println!("cargo:rerun-if-changed={input}");
     let mut cmd = Command::new("nasm");
     cmd.args([input, "-o", output])
         .args(flags)
@@ -19,7 +19,7 @@ fn main() -> Result<(), Box<dyn Error>> {
     let target_dir = env::var("OUT_DIR")?;
     {
         let tram_src = "src/cpu/x86_64/apic/tram.asm";
-        let tram_dst = format!("{}/tram", target_dir);
+        let tram_dst = format!("{target_dir}/tram");
         asm_build(tram_src, &tram_dst, &[])?;
     }
 
@@ -28,10 +28,10 @@ fn main() -> Result<(), Box<dyn Error>> {
         dst_name += ".o";
 
         let src_path = file.path();
-        let dst_path = format!("{}/{}", target_dir, dst_name);
+        let dst_path = format!("{target_dir}/{dst_name}");
 
         asm_build(src_path.to_str().unwrap(), &dst_path, &["-f", "elf64"])?;
-        println!("cargo:rustc-link-arg={}", dst_path);
+        println!("cargo:rustc-link-arg={dst_path}");
         println!("cargo:rerun-if-changed={}", src_path.to_str().unwrap());
     }
 

+ 1 - 1
h2o/kernel/src/cpu.rs

@@ -16,7 +16,7 @@ cfg_if::cfg_if! {
 
 pub fn all_mask() -> CpuMask {
     let mut arr = bitarr![0; MAX_CPU];
-    arr[0..count()].set_all(true);
+    arr[0..count()].fill(true);
     arr
 }
 

+ 10 - 0
h2o/kernel/src/cpu/intr.rs

@@ -1,6 +1,7 @@
 mod imp;
 
 use alloc::sync::Arc;
+use core::ops::Range;
 
 use archop::Azy;
 
@@ -26,6 +27,15 @@ pub enum IsaIrq {
     Ide1 = 15,
 }
 
+#[derive(Debug, Clone)]
+pub struct Msi {
+    pub target_address: u32,
+    pub target_data: u32,
+
+    pub vecs: Range<u8>,
+    pub cpu: usize,
+}
+
 pub type IntrHandler = fn(*mut u8);
 
 static GSI_RES: Azy<Arc<Resource<u32>>> = Azy::new(|| {

+ 38 - 57
h2o/kernel/src/cpu/intr/imp.rs

@@ -1,19 +1,22 @@
 use alloc::sync::Arc;
 
-use spin::Mutex;
+use crossbeam_queue::ArrayQueue;
 use sv_call::Feature;
 
-use super::arch::MANAGER;
+use super::arch::Manager;
 use crate::{
     cpu::time::Instant,
     dev::Resource,
-    sched::{task::hdl::DefaultFeature, Event, EventData, PREEMPT, SIG_GENERIC},
+    sched::{task::hdl::DefaultFeature, Event, EventData, SIG_GENERIC},
 };
 
+const MAX_TIMES: usize = 100;
+
 #[derive(Debug)]
 pub struct Interrupt {
     gsi: u32,
-    last_time: Mutex<Option<Instant>>,
+    cpu: usize,
+    last_time: ArrayQueue<Instant>,
     level_triggered: bool,
     event_data: EventData,
 }
@@ -25,31 +28,37 @@ impl Event for Interrupt {
 
     fn wait(&self, waiter: Arc<dyn crate::sched::Waiter>) {
         if self.level_triggered {
-            MANAGER.mask(self.gsi, false).unwrap();
+            Manager::mask(self.gsi, false).unwrap();
         }
         self.wait_impl(waiter);
     }
 
     fn notify(&self, clear: usize, set: usize) -> usize {
-        PREEMPT.scope(|| *self.last_time.lock() = Some(Instant::now()));
+        self.last_time.force_push(Instant::now());
 
         let signal = self.notify_impl(clear, set);
 
         if self.level_triggered {
-            MANAGER.mask(self.gsi, true).unwrap();
+            Manager::mask(self.gsi, true).unwrap();
         }
-        MANAGER.eoi(self.gsi).unwrap();
+        Manager::eoi(self.gsi).unwrap();
         signal
     }
 }
 
 impl Interrupt {
     #[inline]
-    pub fn new(res: &Resource<u32>, gsi: u32, level_triggered: bool) -> sv_call::Result<Arc<Self>> {
+    pub fn new(
+        res: &Resource<u32>,
+        gsi: u32,
+        cpu: usize,
+        level_triggered: bool,
+    ) -> sv_call::Result<Arc<Self>> {
         if res.magic_eq(super::gsi_resource()) && res.range().contains(&gsi) {
             Ok(Arc::try_new(Interrupt {
                 gsi,
-                last_time: Mutex::new(None),
+                cpu,
+                last_time: ArrayQueue::new(MAX_TIMES),
                 level_triggered,
                 event_data: EventData::new(0),
             })?)
@@ -60,7 +69,7 @@ impl Interrupt {
 
     #[inline]
     pub fn last_time(&self) -> Option<Instant> {
-        PREEMPT.scope(|| *self.last_time.lock())
+        self.last_time.pop()
     }
 
     #[inline]
@@ -69,6 +78,13 @@ impl Interrupt {
     }
 }
 
+impl Drop for Interrupt {
+    fn drop(&mut self) {
+        self.cancel();
+        let _ = Manager::deregister(self.gsi, self.cpu);
+    }
+}
+
 unsafe impl DefaultFeature for Interrupt {
     fn default_features() -> Feature {
         Feature::SEND | Feature::WAIT
@@ -87,11 +103,7 @@ mod syscall {
 
     use super::*;
     use crate::{
-        cpu::{
-            arch::apic::{Polarity, TriggerMode},
-            intr::arch::MANAGER,
-            time,
-        },
+        cpu::arch::apic::{Polarity, TriggerMode},
         sched::SCHED,
         syscall::{Out, UserPtr},
     };
@@ -110,62 +122,31 @@ mod syscall {
             Polarity::Low
         };
 
+        let cpu = Manager::select_cpu();
+
         let intr = SCHED.with_current(|cur| {
             let handles = cur.space().handles();
             let res = handles.get::<Resource<u32>>(res)?;
-            Interrupt::new(&res, gsi, level_triggered)
+            Interrupt::new(&res, gsi, cpu, level_triggered)
         })?;
 
-        MANAGER.config(gsi, trig_mode, polarity)?;
-        MANAGER.register(
-            gsi,
-            Some((handler, (&*intr as *const Interrupt) as *mut u8)),
-        )?;
-        MANAGER.mask(gsi, false)?;
+        Manager::config(gsi, trig_mode, polarity)?;
+        Manager::register(gsi, cpu, (handler, (&*intr as *const Interrupt) as *mut u8))?;
+        Manager::mask(gsi, false)?;
 
         let event = Arc::downgrade(&intr) as _;
         SCHED.with_current(|cur| unsafe { cur.space().handles().insert_raw(intr, Some(event)) })
     }
 
     #[syscall]
-    fn intr_wait(hdl: Handle, timeout_us: u64, last_time: UserPtr<Out, u128>) -> Result {
+    fn intr_query(hdl: Handle, last_time: UserPtr<Out, u128>) -> Result {
         hdl.check_null()?;
         last_time.check()?;
 
-        let pree = PREEMPT.lock();
-        let intr = unsafe { (*SCHED.current()).as_ref().ok_or(ESRCH)? }
-            .space()
-            .handles()
-            .get::<Interrupt>(hdl)?;
-        if !intr.features().contains(Feature::WAIT) {
-            return Err(EPERM);
-        }
-
-        if timeout_us > 0 {
-            let blocker = crate::sched::Blocker::new(
-                &(Arc::clone(&intr) as _),
-                intr.level_triggered,
-                false,
-                SIG_GENERIC,
-            );
-            blocker.wait(Some(pree), time::from_us(timeout_us))?;
-            if !blocker.detach().0 {
-                return Err(ETIME);
-            }
-        }
-
-        unsafe { last_time.write(intr.last_time().unwrap().raw()) }?;
-        Ok(())
-    }
-
-    #[syscall]
-    fn intr_drop(hdl: Handle) -> Result {
-        hdl.check_null()?;
         SCHED.with_current(|cur| {
-            let intr = cur.space().handles().remove::<Interrupt>(hdl)?;
-            intr.cancel();
-            MANAGER.register(intr.gsi, None)?;
-            Ok(())
+            let intr = cur.space().handles().get::<Interrupt>(hdl)?;
+            let data = intr.last_time().ok_or(ENOENT)?;
+            last_time.write(unsafe { data.raw() })
         })
     }
 }

+ 2 - 5
h2o/kernel/src/cpu/time.rs

@@ -114,7 +114,7 @@ impl core::fmt::Display for Instant {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         let ns = unsafe { self.raw() };
         let s = ns as f64 / 1_000_000_000.0;
-        write!(f, "{:.6}", s)
+        write!(f, "{s:.6}")
     }
 }
 
@@ -126,10 +126,7 @@ mod syscall {
     #[syscall]
     pub(super) fn time_get(ptr: UserPtr<Out, u128>) -> Result {
         #[cfg(target_arch = "x86_64")]
-        unsafe {
-            let raw = super::Instant::now().raw();
-            ptr.write(raw)?
-        };
+        ptr.write(unsafe { super::Instant::now().raw() })?;
         Ok(())
     }
 }

+ 1 - 1
h2o/kernel/src/cpu/time/chip.rs

@@ -7,7 +7,7 @@ use crate::{cpu::arch::tsc::TSC_CLOCK, dev::hpet::HPET_CLOCK};
 
 pub static CLOCK: Azy<&'static dyn ClockChip> = Azy::new(|| {
     let ret: &crate::cpu::arch::tsc::TscClock = &TSC_CLOCK;
-    crate::log::HAS_TIME.store(true, Release);
+    crate::logger::HAS_TIME.store(true, Release);
     ret as _
 });
 

+ 6 - 4
h2o/kernel/src/cpu/x86_64/apic.rs

@@ -14,17 +14,19 @@ use raw_cpuid::CpuId;
 use spin::RwLock;
 
 use super::intr::def::ApicVec;
-use crate::mem::space::{self, Flags, Phys};
+use crate::mem::space::{self, Flags, PhysTrait};
 
 pub static LAPIC_ID: RwLock<BTreeMap<usize, u32>> = RwLock::new(BTreeMap::new());
 static LAPIC_BASE: Azy<usize> = Azy::new(|| {
-    let phys = Phys::new(PAddr::new(0xFEE00000), PAGE_SIZE).expect("Failed to acquire LAPIC base");
+    let phys = space::new_phys(PAddr::new(minfo::LAPIC_BASE), PAGE_SIZE)
+        .expect("Failed to acquire LAPIC base");
+    let layout = space::page_aligned(phys.len());
     space::KRL
         .map(
             None,
-            Phys::clone(&phys),
+            phys,
             0,
-            space::page_aligned(phys.len()),
+            layout,
             Flags::READABLE | Flags::WRITABLE | Flags::UNCACHED,
         )
         .expect("Failed to allocate memory")

+ 8 - 16
h2o/kernel/src/cpu/x86_64/apic/ipi.rs

@@ -127,7 +127,7 @@ impl TramHeader {
         elapsed < limit
     }
 
-    pub unsafe fn reset_subheader(&self) {
+    pub unsafe fn allocate_subheader(&self) {
         let stack = crate::mem::alloc_system_stack()
             .expect("System memory allocation failed")
             .as_ptr() as u64;
@@ -148,6 +148,7 @@ pub unsafe fn start_cpus(aps: &[acpi::platform::Processor]) -> usize {
     static TRAM_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/tram"));
 
     let base_phys = PAddr::new(minfo::TRAMPOLINE_RANGE.start);
+    let base_vec = (*base_phys >> 3) as u8;
     let base = base_phys.to_laddr(minfo::ID_OFFSET);
 
     let ptr = *base;
@@ -165,30 +166,21 @@ pub unsafe fn start_cpus(aps: &[acpi::platform::Processor]) -> usize {
 
     let mut cnt = aps.len();
 
-    for acpi::platform::Processor {
+    for &acpi::platform::Processor {
         local_apic_id: id, ..
     } in aps
     {
-        header.reset_subheader();
+        delay(Duration::from_millis(5));
+        header.allocate_subheader();
 
         lapic(|lapic| {
-            lapic.send_ipi(0, DelivMode::Init, ipi::Shorthand::None, *id);
+            lapic.send_ipi(0, DelivMode::Init, ipi::Shorthand::None, id);
             delay(Duration::from_millis(50));
 
-            lapic.send_ipi(
-                (*base_phys >> 3) as u8,
-                DelivMode::StartUp,
-                ipi::Shorthand::None,
-                *id,
-            );
+            lapic.send_ipi(base_vec, DelivMode::StartUp, ipi::Shorthand::None, id);
 
             if !header.test_booted() {
-                lapic.send_ipi(
-                    (*base_phys >> 3) as u8,
-                    DelivMode::StartUp,
-                    ipi::Shorthand::None,
-                    *id,
-                );
+                lapic.send_ipi(base_vec, DelivMode::StartUp, ipi::Shorthand::None, id);
 
                 if !header.test_booted() {
                     log::warn!("CPU with LAPIC ID {} failed to boot", id);

+ 138 - 51
h2o/kernel/src/cpu/x86_64/intr.rs

@@ -1,5 +1,12 @@
 pub(super) mod def;
 
+use alloc::vec::Vec;
+use core::{
+    iter,
+    sync::atomic::{AtomicUsize, Ordering},
+};
+
+use archop::Azy;
 use array_macro::array;
 use collection_ex::RangeMap;
 use spin::Mutex;
@@ -7,35 +14,47 @@ use spin::Mutex;
 pub use self::def::{ExVec, ALLOC_VEC};
 use super::apic::{Polarity, TriggerMode, LAPIC_ID};
 use crate::{
-    cpu::{arch::seg::ndt::USR_CODE_X64, intr::IntrHandler, time::Instant, Lazy},
+    cpu::{
+        arch::seg::ndt::USR_CODE_X64,
+        intr::{IntrHandler, Msi},
+        time::Instant,
+    },
     dev::ioapic,
+    mem::space::PageFaultErrCode,
     sched::{
         task::{self, ctx::arch::Frame},
         PREEMPT, SCHED,
     },
 };
 
-#[thread_local]
-pub static MANAGER: Lazy<Manager> = Lazy::new(|| Manager::new(unsafe { crate::cpu::id() }));
+static MANAGER: Azy<Vec<Manager>> = Azy::new(|| {
+    iter::repeat_with(Default::default)
+        .take(crate::cpu::count())
+        .collect()
+});
 
 pub struct Manager {
-    cpu: usize,
     map: Mutex<RangeMap<u8, ()>>,
     slots: [Mutex<Option<(IntrHandler, *mut u8)>>; u8::MAX as usize + 1],
+    count: AtomicUsize,
 }
 
+unsafe impl Sync for Manager {}
+unsafe impl Send for Manager {}
+
 impl Manager {
-    pub fn new(cpu: usize) -> Self {
+    pub fn new() -> Self {
         Manager {
-            cpu,
             map: Mutex::new(RangeMap::new(ALLOC_VEC)),
             slots: array![_ => Mutex::new(None); 256],
+            count: AtomicUsize::new(0),
         }
     }
 
-    pub fn invoke(&self, vec: u8) {
+    pub fn invoke(vec: u8) {
         PREEMPT.scope(|| {
-            if let Some((handler, arg)) = *self.slots[vec as usize].lock() {
+            let manager = &MANAGER[unsafe { crate::cpu::id() }];
+            if let Some((handler, arg)) = *manager.slots[vec as usize].lock() {
                 handler(arg);
             } else {
                 log::trace!("Unhandled interrupt #{:?}", vec);
@@ -43,57 +62,126 @@ impl Manager {
         })
     }
 
-    pub fn register(&self, gsi: u32, handler: Option<(IntrHandler, *mut u8)>) -> sv_call::Result {
+    #[inline]
+    pub fn config(gsi: u32, trig_mode: TriggerMode, polarity: Polarity) -> sv_call::Result {
+        PREEMPT.scope(|| unsafe { ioapic::chip().lock().config(gsi, trig_mode, polarity) })
+    }
+
+    #[inline]
+    pub fn mask(gsi: u32, masked: bool) -> sv_call::Result {
+        PREEMPT.scope(|| unsafe { ioapic::chip().lock().mask(gsi, masked) })
+    }
+
+    #[inline]
+    pub fn eoi(gsi: u32) -> sv_call::Result {
+        PREEMPT.scope(|| unsafe { ioapic::chip().lock().eoi(gsi) })
+    }
+
+    pub fn select_cpu() -> usize {
+        MANAGER
+            .iter()
+            .enumerate()
+            .fold((usize::MAX, usize::MAX), |(acc, iacc), (index, manager)| {
+                let value = manager.count.load(Ordering::Acquire);
+                if value < acc {
+                    (value, index)
+                } else {
+                    (acc, iacc)
+                }
+            })
+            .1
+    }
+
+    pub fn register(gsi: u32, cpu: usize, handler: (IntrHandler, *mut u8)) -> sv_call::Result {
         let _pree = PREEMPT.lock();
         let mut ioapic = ioapic::chip().lock();
         let entry = ioapic.get_entry(gsi)?;
 
-        let in_use = ALLOC_VEC.contains(&entry.vec());
-
-        let self_apic_id = *LAPIC_ID.read().get(&self.cpu).ok_or(sv_call::EINVAL)?;
-        let apic_id = entry.dest_id();
-        if in_use && self_apic_id != apic_id {
+        if ALLOC_VEC.contains(&entry.vec()) {
             return Err(sv_call::EEXIST);
         }
 
-        let vec = in_use.then_some(entry.vec());
-
-        if let Some(handler) = handler {
-            let mut map = self.map.lock();
-            let vec = if let Some(vec) = vec {
-                map.try_insert_with(
-                    vec..(vec + 1),
-                    || Ok::<_, sv_call::Error>(((), ())),
-                    sv_call::EEXIST,
-                )?;
-                vec
-            } else {
-                map.allocate_with(1, |_| Ok::<_, sv_call::Error>(((), ())), sv_call::ENOMEM)?
-                    .0
-            };
-
-            *self.slots[vec as usize].lock() = Some(handler);
-            unsafe { ioapic.config_dest(gsi, vec, self_apic_id) }?;
-        } else if let Some(vec) = vec {
-            *self.slots[vec as usize].lock() = None;
-            unsafe { ioapic.deconfig(gsi) }?;
+        let apic_id = *LAPIC_ID.read().get(&cpu).ok_or(sv_call::EINVAL)?;
+        let manager = MANAGER.get(cpu).ok_or(sv_call::ENODEV)?;
+
+        let vec = manager.map.lock().allocate_with(
+            1,
+            |_| {
+                manager.count.fetch_add(1, Ordering::SeqCst);
+                Ok(())
+            },
+            sv_call::ENOMEM,
+        )?;
+
+        *manager.slots[vec as usize].lock() = Some(handler);
+        unsafe { ioapic.config_dest(gsi, vec, apic_id) }?;
+
+        Ok(())
+    }
+
+    pub fn deregister(gsi: u32, cpu: usize) -> sv_call::Result {
+        let _pree = PREEMPT.lock();
+        let mut ioapic = ioapic::chip().lock();
+        let entry = ioapic.get_entry(gsi)?;
+
+        let vec = entry.vec();
+
+        if !ALLOC_VEC.contains(&vec) {
+            return Err(sv_call::ENOENT);
+        }
+        let manager = MANAGER.get(cpu).ok_or(sv_call::ENODEV)?;
+
+        *manager.slots[vec as usize].lock() = None;
+        unsafe { ioapic.deconfig(gsi) }?;
+
+        {
+            let mut lock = manager.map.lock();
+            manager.count.fetch_sub(1, Ordering::SeqCst);
+            lock.remove(vec);
         }
+
         Ok(())
     }
 
-    #[inline]
-    pub fn config(&self, gsi: u32, trig_mode: TriggerMode, polarity: Polarity) -> sv_call::Result {
-        PREEMPT.scope(|| unsafe { ioapic::chip().lock().config(gsi, trig_mode, polarity) })
+    pub fn allocate_msi(num_vec: u8, cpu: usize) -> sv_call::Result<Msi> {
+        const MAX_NUM_VEC: u8 = 32;
+        let num_vec = num_vec
+            .checked_next_power_of_two()
+            .filter(|&size| size <= MAX_NUM_VEC)
+            .ok_or(sv_call::EINVAL)?;
+
+        let manager = MANAGER.get(cpu).ok_or(sv_call::ENODEV)?;
+        let apic_id = *LAPIC_ID.read().get(&cpu).ok_or(sv_call::EINVAL)?;
+
+        let start = PREEMPT.scope(|| {
+            manager.map.lock().allocate_with(
+                num_vec,
+                |_| {
+                    manager.count.fetch_add(num_vec as usize, Ordering::SeqCst);
+                    Ok(())
+                },
+                sv_call::ENOMEM,
+            )
+        })?;
+
+        Ok(Msi {
+            target_address: minfo::LAPIC_BASE as u32 | (apic_id << 12),
+            target_data: start as u32,
+            vecs: start..(start + num_vec),
+            cpu,
+        })
     }
 
-    #[inline]
-    pub fn mask(&self, gsi: u32, masked: bool) -> sv_call::Result {
-        PREEMPT.scope(|| unsafe { ioapic::chip().lock().mask(gsi, masked) })
+    pub fn deallocate_msi(msi: Msi) -> sv_call::Result {
+        let manager = MANAGER.get(msi.cpu).ok_or(sv_call::ENODEV)?;
+        PREEMPT.scope(|| manager.map.lock().remove(msi.vecs.start));
+        Ok(())
     }
+}
 
-    #[inline]
-    pub fn eoi(&self, gsi: u32) -> sv_call::Result {
-        PREEMPT.scope(|| unsafe { ioapic::chip().lock().eoi(gsi) })
+impl Default for Manager {
+    fn default() -> Self {
+        Self::new()
     }
 }
 
@@ -103,7 +191,7 @@ impl Manager {
 #[no_mangle]
 unsafe extern "C" fn common_interrupt(frame: *mut Frame) {
     let vec = unsafe { &*frame }.errc_vec as u8;
-    MANAGER.invoke(vec);
+    Manager::invoke(vec);
     super::apic::lapic(|lapic| lapic.eoi());
     crate::sched::SCHED.tick(Instant::now());
 }
@@ -113,9 +201,8 @@ unsafe fn exception(frame_ptr: *mut Frame, vec: def::ExVec) {
     use def::ExVec::*;
 
     let frame = &mut *frame_ptr;
-    match vec {
-        PageFault if crate::mem::space::page_fault(&mut *frame_ptr, frame.errc_vec) => return,
-        _ => {}
+    if vec == PageFault && crate::mem::space::page_fault(&mut *frame_ptr, frame.errc_vec) {
+        return;
     }
 
     match SCHED.with_current(|cur| Ok(cur.tid().ty())) {
@@ -131,7 +218,7 @@ unsafe fn exception(frame_ptr: *mut Frame, vec: def::ExVec) {
                     log::error!("{:?}", vec);
 
                     frame.dump(if vec == PageFault {
-                        Frame::ERRC_PF
+                        PageFaultErrCode::FMT
                     } else {
                         Frame::ERRC
                     });
@@ -148,7 +235,7 @@ unsafe fn exception(frame_ptr: *mut Frame, vec: def::ExVec) {
     log::error!("{:?} in the kernel", vec);
 
     frame.dump(if vec == PageFault {
-        Frame::ERRC_PF
+        PageFaultErrCode::FMT
     } else {
         Frame::ERRC
     });
@@ -158,5 +245,5 @@ unsafe fn exception(frame_ptr: *mut Frame, vec: def::ExVec) {
 
 #[inline]
 pub(super) fn init() {
-    Lazy::force(&MANAGER);
+    Azy::force(&MANAGER);
 }

+ 1 - 1
h2o/kernel/src/cpu/x86_64/mod.rs

@@ -120,7 +120,7 @@ impl KernelGs {
         if let Some(bitmap) = bitmap {
             (*ptr).copy_from_bitslice(bitmap);
         } else {
-            let ptr = (*ptr).as_mut_raw_slice();
+            let ptr = (*ptr).as_raw_mut_slice();
             ptr.fill(usize::MAX);
         }
     }

+ 6 - 8
h2o/kernel/src/dev.rs

@@ -18,7 +18,7 @@ pub use crate::{cpu::intr::gsi_resource, mem::mem_resource};
 static PIO_RESOURCE: Azy<Arc<Resource<u16>>> = Azy::new(|| {
     let ret = Resource::new_root(archop::rand::get(), 0..u16::MAX);
     core::mem::forget(
-        ret.allocate(crate::log::COM_LOG..(crate::log::COM_LOG + 1))
+        ret.allocate(crate::logger::COM_LOG..(crate::logger::COM_LOG + 1))
             .expect("Failed to reserve debug port"),
     );
     ret
@@ -56,10 +56,9 @@ mod syscall {
                 && res.range().start <= base
                 && base + size <= res.range().end
             {
+                drop(res);
                 let io_bitmap = cur.io_bitmap_mut().get_or_insert_with(|| bitvec![1; 65536]);
-                for item in io_bitmap.iter_mut().skip(base as usize).take(size as usize) {
-                    item.set(false);
-                }
+                io_bitmap[(base as usize)..(size as usize)].fill(false);
                 unsafe { KERNEL_GS.update_tss_io_bitmap(cur.io_bitmap_mut().as_deref()) };
                 Ok(())
             } else {
@@ -76,11 +75,10 @@ mod syscall {
                 && res.range().start <= base
                 && base + size <= res.range().end
             {
+                drop(res);
                 if let Some(io_bitmap) = cur.io_bitmap_mut() {
-                    for item in io_bitmap.iter_mut().skip(base as usize).take(size as usize) {
-                        item.set(true);
-                    }
-                };
+                    io_bitmap[(base as usize)..(size as usize)].fill(true);
+                }
                 unsafe { KERNEL_GS.update_tss_io_bitmap(cur.io_bitmap_mut().as_deref()) };
                 Ok(())
             } else {

+ 1 - 0
h2o/kernel/src/dev/res.rs

@@ -95,6 +95,7 @@ mod syscall {
                 return Err(EPERM);
             }
             let sub = res.allocate(base..(base + size)).ok_or(ENOMEM)?;
+            drop(res);
             cur.space().handles().insert_raw(sub, None)
         })
     }

+ 4 - 3
h2o/kernel/src/dev/x86_64/hpet.rs

@@ -1,3 +1,4 @@
+use alloc::sync::Arc;
 use core::{mem, ptr::addr_of};
 
 use archop::Azy;
@@ -10,7 +11,7 @@ use crate::{
         chip::{factor_from_freq, CalibrationClock, ClockChip},
         Instant,
     },
-    mem::space::{self, Flags, Phys},
+    mem::space::{self, Flags, PhysTrait},
     sched::Arsc,
 };
 
@@ -59,12 +60,12 @@ unsafe impl Sync for Hpet {}
 
 impl Hpet {
     unsafe fn new(data: acpi::HpetInfo) -> Result<Self, &'static str> {
-        let phys = Phys::new(PAddr::new(data.base_address), PAGE_SIZE)
+        let phys = space::new_phys(PAddr::new(data.base_address), PAGE_SIZE)
             .map_err(|_| "Failed to acquire memory for HPET")?;
         let addr = space::KRL
             .map(
                 None,
-                Phys::clone(&phys),
+                Arc::clone(&phys),
                 0,
                 space::page_aligned(phys.len()),
                 Flags::READABLE | Flags::WRITABLE | Flags::UNCACHED,

+ 4 - 4
h2o/kernel/src/dev/x86_64/ioapic.rs

@@ -1,4 +1,4 @@
-use alloc::vec::Vec;
+use alloc::{sync::Arc, vec::Vec};
 use core::ops::Range;
 
 use acpi::platform::interrupt::{
@@ -13,7 +13,7 @@ use spin::Mutex;
 
 use crate::{
     cpu::arch::apic::{lapic, DelivMode, Polarity, TriggerMode},
-    mem::space::{self, Flags, Phys},
+    mem::space::{self, Flags, PhysTrait},
 };
 
 const LEGACY_IRQ: Range<u32> = 0..16;
@@ -173,12 +173,12 @@ impl Ioapic {
             address: paddr,
             global_system_interrupt_base: gsi_base,
         } = node;
-        let phys = Phys::new(PAddr::new(*paddr as usize), PAGE_SIZE)
+        let phys = space::new_phys(PAddr::new(*paddr as usize), PAGE_SIZE)
             .expect("Failed to acquire memory for I/O APIC");
         let addr = space::KRL
             .map(
                 None,
-                Phys::clone(&phys),
+                Arc::clone(&phys),
                 0,
                 space::page_aligned(phys.len()),
                 Flags::READABLE | Flags::WRITABLE | Flags::UNCACHED,

+ 7 - 11
h2o/kernel/src/kmain.rs

@@ -14,7 +14,6 @@
 #![feature(core_intrinsics)]
 #![feature(downcast_unchecked)]
 #![feature(drain_filter)]
-#![feature(int_log)]
 #![feature(layout_for_ptr)]
 #![feature(linked_list_cursors)]
 #![feature(map_try_insert)]
@@ -30,21 +29,18 @@
 #![feature(thread_local)]
 #![feature(trace_macros)]
 #![feature(unsize)]
-#![feature(unzip_option)]
 #![feature(vec_into_raw_parts)]
 
 pub mod cpu;
 pub mod dev;
-mod log;
-pub mod mem;
+mod logger;
+mod mem;
 mod rxx;
 pub mod sched;
 mod syscall;
 
 use core::mem::MaybeUninit;
 
-use ::log as l;
-
 extern crate alloc;
 
 static mut KARGS: MaybeUninit<minfo::KernelArgs> = MaybeUninit::uninit();
@@ -64,8 +60,8 @@ pub extern "C" fn kmain() {
     }
 
     // SAFETY: Everything is uninitialized.
-    unsafe { self::log::init(l::Level::Debug) };
-    l::info!("Starting the kernel");
+    unsafe { logger::init(log::Level::Debug) };
+    log::info!("Starting the kernel");
 
     mem::init();
     sched::task::init_early();
@@ -77,19 +73,19 @@ pub extern "C" fn kmain() {
     sched::init();
 
     // Test end
-    l::trace!("Reaching end of kernel");
+    log::trace!("Reaching end of kernel");
 }
 
 pub fn kmain_ap() {
     unsafe { cpu::set_id(false) };
     cpu::arch::seg::test_pls();
-    l::trace!("Starting the kernel");
+    log::trace!("Starting the kernel");
 
     unsafe { mem::space::init() };
     unsafe { cpu::arch::init_ap() };
 
     sched::init();
 
-    l::trace!("Finished");
+    log::trace!("Finished");
     unsafe { archop::halt_loop(Some(true)) };
 }

+ 2 - 2
h2o/kernel/src/log.rs → h2o/kernel/src/logger.rs

@@ -17,7 +17,7 @@ struct OptionU32Display(Option<u32>);
 impl core::fmt::Display for OptionU32Display {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
         if let Some(val) = self.0 {
-            write!(f, "{}", val)
+            write!(f, "{val}")
         } else {
             write!(f, "<NULL>")
         }
@@ -111,7 +111,7 @@ mod syscall {
             core::str::from_utf8(unsafe { core::slice::from_raw_parts(buffer.as_ptr(), len) })?;
         let _pree = PREEMPT.lock();
         let mut os = unsafe { LOGGER.assume_init_ref() }.output.lock();
-        writeln!(os, "{}", string).map_err(|_| EFAULT)?;
+        writeln!(os, "{string}").map_err(|_| EFAULT)?;
         Ok(())
     }
 }

+ 1 - 1
h2o/kernel/src/log/flags.rs → h2o/kernel/src/logger/flags.rs

@@ -43,7 +43,7 @@ impl Display for Flags {
                 } else {
                     out.make_ascii_lowercase();
                 }
-                write!(f, "{} ", out)?;
+                write!(f, "{out} ")?;
             }
             Ok(())
         }

+ 7 - 5
h2o/kernel/src/log/serial.rs → h2o/kernel/src/logger/serial.rs

@@ -45,18 +45,20 @@ impl Output {
 
     /// Output a character byte to the serial port for logging.
     unsafe fn out_char(&mut self, c: u8) {
-        while self.buf_full() {
+        self.flush();
+        self.0.write(c);
+    }
+
+    pub fn flush(&self) {
+        while unsafe { self.buf_full() } {
             hint::spin_loop();
         }
-        self.0.write(c);
     }
 }
 
 impl fmt::Write for Output {
     fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> {
-        for b in s.bytes() {
-            unsafe { self.out_char(b) };
-        }
+        s.bytes().for_each(|b| unsafe { self.out_char(b) });
         Ok(())
     }
 }

+ 1 - 0
h2o/kernel/src/mem/arena.rs

@@ -32,6 +32,7 @@ pub struct Arena<T> {
 unsafe impl<T: Send> Send for Arena<T> {}
 unsafe impl<T: Send> Sync for Arena<T> {}
 
+#[allow(dead_code)]
 impl<T> Arena<T> {
     pub fn new(max_count: usize) -> Self {
         let (layout, off) = Layout::new::<T>()

+ 25 - 15
h2o/kernel/src/mem/space.rs

@@ -22,7 +22,8 @@ use core::{
 };
 
 use archop::Azy;
-use paging::LAddr;
+use bitop_ex::BitOpEx;
+use paging::{LAddr, PAGE_SHIFT};
 use spin::Mutex;
 pub use sv_call::mem::Flags;
 use sv_call::mem::PhysOptions;
@@ -94,6 +95,17 @@ impl Space {
     pub fn root(&self) -> &Arc<Virt> {
         &self.root
     }
+
+    pub fn assert_mapped(&self, base: LAddr, len: usize) {
+        PREEMPT.scope(|| {
+            for offset in (0..len).step_by(paging::PAGE_SIZE) {
+                let addr = LAddr::from(base.val() + offset);
+                if let Err(err) = self.arch.query(addr) {
+                    panic!("Assert failed: address {addr:?} mapping error: {err:?}");
+                }
+            }
+        })
+    }
 }
 
 impl Deref for Space {
@@ -105,8 +117,8 @@ impl Deref for Space {
 }
 
 pub(crate) fn allocate(size: usize, flags: Flags, zeroed: bool) -> sv_call::Result<NonNull<[u8]>> {
-    let phys = Phys::allocate(
-        size,
+    let phys = allocate_phys(
+        size.round_up_bit(PAGE_SHIFT),
         if zeroed {
             PhysOptions::ZEROED
         } else {
@@ -124,19 +136,17 @@ pub(crate) fn allocate(size: usize, flags: Flags, zeroed: bool) -> sv_call::Resu
         })
 }
 
-pub(crate) unsafe fn reprotect_unchecked(ptr: NonNull<[u8]>, flags: Flags) -> sv_call::Result {
-    let base = LAddr::from(ptr);
-    let end = LAddr::from(base.val() + ptr.len());
-    KRL.arch.reprotect(base..end, flags).map_err(paging_error)
-}
-
 pub(crate) unsafe fn unmap(ptr: NonNull<u8>) -> sv_call::Result {
     let base = LAddr::from(ptr);
-    let ret = PREEMPT.scope(|| KRL.root.children.lock().remove(&base));
-    ret.map_or(Err(sv_call::ENOENT), |child| {
-        let end = child.end(base);
-        let _ = PREEMPT.scope(|| KRL.arch.unmaps(base..end));
-        Ok(())
+    PREEMPT.scope(|| {
+        let mut children = KRL.root.children.lock();
+        let ret = children.remove(&base);
+
+        ret.map_or(Err(sv_call::ENOENT), |child| {
+            let end = child.end(base);
+            let _ = KRL.arch.unmaps(base..end);
+            Ok(())
+        })
     })
 }
 
@@ -145,7 +155,7 @@ pub fn init_stack(virt: &Arc<Virt>, size: usize) -> sv_call::Result<LAddr> {
     let virt = virt.allocate(None, unsafe {
         Layout::from_size_align_unchecked(paging::PAGE_SIZE * 2 + size, paging::PAGE_SIZE)
     })?;
-    let phys = Phys::allocate(size, Default::default(), false)?;
+    let phys = allocate_phys(size, Default::default(), false)?;
     let ret = virt.upgrade().unwrap().map(
         Some(paging::PAGE_SIZE),
         phys,

+ 71 - 144
h2o/kernel/src/mem/space/phys.rs

@@ -1,178 +1,85 @@
 mod contiguous;
 mod extensible;
 
-use alloc::sync::Weak;
+use alloc::{
+    sync::{Arc, Weak},
+    vec::Vec,
+};
 
+use enum_dispatch::enum_dispatch;
 use paging::PAddr;
 use sv_call::{mem::PhysOptions, Feature, Result, EPERM};
 
 use crate::{
-    sched::{task::hdl::DefaultFeature, BasicEvent, Event},
+    sched::{task::hdl::DefaultFeature, Event},
     syscall::{In, Out, UserPtr},
 };
 
 type Cont = self::contiguous::Phys;
-type PinnedCont = self::contiguous::PinnedPhys;
 
-use self::extensible::*;
+type Ext = self::extensible::Phys;
 
-#[derive(Debug, Clone, PartialEq)]
+/// # Note
+///
+/// The task handle map doesn't support dynamic sized objects, and the vtable of
+/// `PhysTrait` is very large (containing lots of function pointers), so we use
+/// enum dispatch instead.
+#[enum_dispatch(PhysTrait)]
+#[derive(Debug, PartialEq)]
 pub enum Phys {
-    Contiguous(Cont),
-    Static(Static),
-    Dynamic(Dynamic),
+    Cont,
+    Ext,
 }
 
-#[derive(Debug)]
-pub enum PinnedPhys {
-    Contiguous(PinnedCont),
-    Static(PinnedStatic),
-    Dynamic(PinnedDynamic),
-}
+#[allow(clippy::len_without_is_empty)]
+#[enum_dispatch]
+pub trait PhysTrait {
+    fn event(&self) -> Weak<dyn Event>;
 
-impl Phys {
-    #[inline]
-    pub fn new(base: PAddr, size: usize) -> Result<Self> {
-        Ok(Phys::Contiguous(Cont::new(base, size)?))
-    }
+    fn len(&self) -> usize;
 
-    /// # Errors
-    ///
-    /// Returns error if the heap memory is exhausted or the size is zero.
-    pub fn allocate(size: usize, options: PhysOptions, contiguous: bool) -> Result<Self> {
-        let resizable = options.contains(PhysOptions::RESIZABLE);
-        Ok(if contiguous {
-            if resizable {
-                return Err(EPERM);
-            }
-            Phys::Contiguous(Cont::allocate(size, options.contains(PhysOptions::ZEROED))?)
-        } else {
-            let zeroed = options.contains(PhysOptions::ZEROED);
-            if resizable {
-                Phys::Dynamic(Dynamic::allocate(size, zeroed)?)
-            } else {
-                Phys::Static(Static::allocate(size, zeroed)?)
-            }
-        })
-    }
+    fn pin(&self, offset: usize, len: usize, write: bool) -> Result<Vec<(PAddr, usize)>>;
 
-    pub fn event(&self) -> Weak<dyn Event> {
-        match self {
-            Phys::Dynamic(d) => d.event(),
-            _ => Weak::<BasicEvent>::new() as _,
-        }
-    }
+    fn unpin(&self, offset: usize, len: usize);
 
-    #[inline]
-    #[allow(clippy::len_without_is_empty)]
-    pub fn len(&self) -> usize {
-        match self {
-            Phys::Contiguous(cont) => cont.len(),
-            Phys::Static(s) => s.len(),
-            Phys::Dynamic(d) => d.len(),
-        }
-    }
+    fn create_sub(&self, offset: usize, len: usize, copy: bool) -> Result<Arc<Phys>>;
 
-    #[inline]
-    pub fn pin(this: Self) -> Result<PinnedPhys> {
-        match this {
-            Phys::Contiguous(cont) => Ok(PinnedPhys::Contiguous(Cont::pin(cont))),
-            Phys::Static(ext) => Ok(PinnedPhys::Static(Static::pin(ext))),
-            Phys::Dynamic(ext) => Ok(PinnedPhys::Dynamic(Dynamic::pin(ext)?)),
-        }
-    }
+    fn base(&self) -> PAddr;
 
-    #[inline]
-    pub fn create_sub(&self, offset: usize, len: usize, copy: bool) -> Result<Self> {
-        match self {
-            Phys::Contiguous(cont) => cont.create_sub(offset, len, copy).map(Phys::Contiguous),
-            Phys::Static(ext) => ext.create_sub(offset, len, copy).map(Phys::Static),
-            Phys::Dynamic(_) => Err(EPERM),
-        }
-    }
+    fn resize(&self, new_len: usize, zeroed: bool) -> Result;
 
-    #[inline]
-    pub fn base(&self) -> PAddr {
-        match self {
-            Phys::Contiguous(cont) => cont.base(),
-            _ => unimplemented!("Extensible phys have multiple bases"),
-        }
-    }
+    fn read(&self, offset: usize, len: usize, buffer: UserPtr<Out>) -> Result<usize>;
 
-    #[inline]
-    pub fn resize(&self, new_len: usize, zeroed: bool) -> Result {
-        match self {
-            Phys::Dynamic(d) => d.resize(new_len, zeroed),
-            _ => Err(EPERM),
-        }
-    }
+    fn write(&self, offset: usize, len: usize, buffer: UserPtr<In>) -> Result<usize>;
 
-    #[inline]
-    pub fn read(&self, offset: usize, len: usize, buffer: UserPtr<Out>) -> Result<usize> {
-        match self {
-            Phys::Contiguous(cont) => cont.read(offset, len, buffer),
-            Phys::Static(s) => s.read(offset, len, buffer),
-            Phys::Dynamic(d) => d.read(offset, len, buffer),
-        }
-    }
-
-    #[inline]
-    pub fn write(&self, offset: usize, len: usize, buffer: UserPtr<In>) -> Result<usize> {
-        match self {
-            Phys::Contiguous(cont) => cont.write(offset, len, buffer),
-            Phys::Static(s) => s.write(offset, len, buffer),
-            Phys::Dynamic(d) => d.write(offset, len, buffer),
-        }
-    }
-
-    #[inline]
-    pub fn read_vectored(&self, offset: usize, bufs: &[(UserPtr<Out>, usize)]) -> Result<usize> {
-        match self {
-            Phys::Contiguous(cont) => cont.read_vectored(offset, bufs),
-            Phys::Static(s) => s.read_vectored(offset, bufs),
-            Phys::Dynamic(d) => d.read_vectored(offset, bufs),
-        }
-    }
-
-    #[inline]
-    pub fn write_vectored(&self, offset: usize, bufs: &[(UserPtr<In>, usize)]) -> Result<usize> {
-        match self {
-            Phys::Contiguous(cont) => cont.write_vectored(offset, bufs),
-            Phys::Static(s) => s.write_vectored(offset, bufs),
-            Phys::Dynamic(d) => d.write_vectored(offset, bufs),
+    fn read_vectored(
+        &self,
+        mut offset: usize,
+        bufs: &[(UserPtr<Out>, usize)],
+    ) -> sv_call::Result<usize> {
+        let mut read_len = 0;
+        for (buffer, len) in bufs.iter().copied() {
+            let actual = self.read(offset, len, buffer)?;
+            read_len += actual;
+            offset += actual;
+            if actual < len {
+                break;
+            }
         }
+        Ok(read_len)
     }
-}
 
-impl PinnedPhys {
-    #[inline]
-    pub fn map_iter(&self, offset: usize, len: usize) -> impl Iterator<Item = (PAddr, usize)> + '_ {
-        enum OneOf<A, B, C> {
-            A(A),
-            B(B),
-            C(C),
-        }
-        impl<A, B, C, T> Iterator for OneOf<A, B, C>
-        where
-            A: Iterator<Item = T>,
-            B: Iterator<Item = T>,
-            C: Iterator<Item = T>,
-        {
-            type Item = T;
-            fn next(&mut self) -> Option<Self::Item> {
-                match self {
-                    OneOf::A(a) => a.next(),
-                    OneOf::B(b) => b.next(),
-                    OneOf::C(c) => c.next(),
-                }
+    fn write_vectored(&self, mut offset: usize, bufs: &[(UserPtr<In>, usize)]) -> Result<usize> {
+        let mut written_len = 0;
+        for (buffer, len) in bufs.iter().copied() {
+            let actual = self.write(offset, len, buffer)?;
+            written_len += actual;
+            offset += actual;
+            if actual < len {
+                break;
             }
         }
-
-        match self {
-            PinnedPhys::Contiguous(cont) => OneOf::A(cont.map_iter(offset, len)),
-            PinnedPhys::Static(s) => OneOf::B(s.map_iter(offset, len)),
-            PinnedPhys::Dynamic(d) => OneOf::C(d.map_iter(offset, len)),
-        }
+        Ok(written_len)
     }
 }
 
@@ -186,3 +93,23 @@ unsafe impl DefaultFeature for Phys {
             | Feature::WAIT
     }
 }
+
+#[inline]
+pub fn new_phys(base: PAddr, size: usize) -> Result<Arc<Phys>> {
+    Ok(Arc::try_new(Phys::from(Cont::new(base, size)?))?)
+}
+
+/// # Errors
+///
+/// Returns error if the heap memory is exhausted or the size is zero.
+pub fn allocate_phys(size: usize, options: PhysOptions, contiguous: bool) -> Result<Arc<Phys>> {
+    let resizable = options.contains(PhysOptions::RESIZABLE);
+    Ok(Arc::try_new(if contiguous {
+        if resizable {
+            return Err(EPERM);
+        }
+        Phys::from(Cont::allocate(size, options.contains(PhysOptions::ZEROED))?)
+    } else {
+        Phys::from(Ext::new(size))
+    })?)
+}

+ 49 - 51
h2o/kernel/src/mem/space/phys/contiguous.rs

@@ -1,4 +1,8 @@
-use alloc::alloc::Global;
+use alloc::{
+    alloc::Global,
+    sync::{Arc, Weak},
+    vec::Vec,
+};
 use core::{
     alloc::{Allocator, Layout},
     slice,
@@ -6,11 +10,12 @@ use core::{
 
 use bitop_ex::BitOpEx;
 use paging::{LAddr, PAddr, PAGE_SHIFT, PAGE_SIZE};
-use sv_call::Result;
+use sv_call::{Result, EPERM};
 
+use super::PhysTrait;
 use crate::{
-    sched::Arsc,
-    syscall::{In, InPtrType, Out, OutPtrType, UserPtr},
+    sched::{Arsc, BasicEvent, Event},
+    syscall::{In, Out, UserPtr},
 };
 
 #[derive(Debug)]
@@ -48,8 +53,6 @@ pub struct Phys {
     inner: Arsc<PhysInner>,
 }
 
-pub type PinnedPhys = Phys;
-
 impl From<Arsc<PhysInner>> for Phys {
     fn from(inner: Arsc<PhysInner>) -> Self {
         Phys {
@@ -97,22 +100,37 @@ impl Phys {
         .map(Self::from)
     }
 
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
+    fn raw(&self) -> *mut u8 {
+        unsafe { self.inner.base.to_laddr(minfo::ID_OFFSET).add(self.offset) }
     }
+}
 
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
+impl PartialEq for Phys {
+    fn eq(&self, other: &Self) -> bool {
+        self.offset == other.offset
+            && self.len == other.len
+            && Arsc::ptr_eq(&self.inner, &other.inner)
     }
+}
 
-    #[inline]
-    pub fn pin(this: Self) -> PinnedPhys {
-        this
+impl PhysTrait for Phys {
+    fn event(&self) -> Weak<dyn Event> {
+        Weak::<BasicEvent>::new()
+    }
+
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    fn pin(&self, offset: usize, len: usize, _: bool) -> Result<Vec<(PAddr, usize)>> {
+        let base = PAddr::new(*self.inner.base + self.offset + offset);
+        let len = self.len.saturating_sub(offset).min(len);
+        Ok((len > 0).then_some((base, len)).into_iter().collect())
     }
 
-    pub fn create_sub(&self, offset: usize, len: usize, copy: bool) -> Result<Self> {
+    fn unpin(&self, _: usize, _: usize) {}
+
+    fn create_sub(&self, offset: usize, len: usize, copy: bool) -> Result<Arc<super::Phys>> {
         if offset.contains_bit(PAGE_SHIFT) || len.contains_bit(PAGE_SHIFT) {
             return Err(sv_call::EALIGN);
         }
@@ -120,42 +138,38 @@ impl Phys {
         let new_offset = self.offset.wrapping_add(offset);
         let end = new_offset.wrapping_add(len);
         if self.offset <= new_offset && new_offset < end && end <= self.offset + self.len {
-            if copy {
+            let mut ret = Arc::try_new_uninit()?;
+            let phys = if copy {
                 let child = Self::allocate(len, true)?;
                 let dst = child.raw();
                 unsafe {
                     let src = self.raw().add(offset);
                     dst.copy_from_nonoverlapping(src, len);
                 }
-                Ok(child)
+                child
             } else {
-                Ok(Phys {
+                Phys {
                     offset: new_offset,
                     len,
                     inner: Arsc::clone(&self.inner),
-                })
-            }
+                }
+            };
+            Arc::get_mut(&mut ret).unwrap().write(phys.into());
+            Ok(unsafe { ret.assume_init() })
         } else {
             Err(sv_call::ERANGE)
         }
     }
 
-    pub fn base(&self) -> PAddr {
+    fn base(&self) -> PAddr {
         PAddr::new(*self.inner.base + self.offset)
     }
 
-    #[inline]
-    pub fn map_iter(&self, offset: usize, len: usize) -> impl Iterator<Item = (PAddr, usize)> {
-        let base = PAddr::new(*self.inner.base + self.offset + offset);
-        let len = self.len.saturating_sub(offset).min(len);
-        (len > 0).then_some((base, len)).into_iter()
-    }
-
-    fn raw(&self) -> *mut u8 {
-        unsafe { self.inner.base.to_laddr(minfo::ID_OFFSET).add(self.offset) }
+    fn resize(&self, _: usize, _: bool) -> Result {
+        Err(EPERM)
     }
 
-    pub fn read(&self, offset: usize, len: usize, buffer: UserPtr<Out>) -> Result<usize> {
+    fn read(&self, offset: usize, len: usize, buffer: UserPtr<Out>) -> Result<usize> {
         let offset = self.len.min(offset);
         let len = self.len.saturating_sub(offset).min(len);
         unsafe {
@@ -166,7 +180,7 @@ impl Phys {
         Ok(len)
     }
 
-    pub fn write(&self, offset: usize, len: usize, buffer: UserPtr<In>) -> Result<usize> {
+    fn write(&self, offset: usize, len: usize, buffer: UserPtr<In>) -> Result<usize> {
         let offset = self.len.min(offset);
         let len = self.len.saturating_sub(offset).min(len);
         unsafe {
@@ -176,11 +190,7 @@ impl Phys {
         Ok(len)
     }
 
-    pub fn read_vectored<T: OutPtrType>(
-        &self,
-        mut offset: usize,
-        bufs: &[(UserPtr<T>, usize)],
-    ) -> sv_call::Result<usize> {
+    fn read_vectored(&self, mut offset: usize, bufs: &[(UserPtr<Out>, usize)]) -> Result<usize> {
         let mut read_len = 0;
         for buf in bufs {
             let actual_offset = self.len.min(offset);
@@ -201,11 +211,7 @@ impl Phys {
         Ok(read_len)
     }
 
-    pub fn write_vectored<T: InPtrType>(
-        &self,
-        mut offset: usize,
-        bufs: &[(UserPtr<T>, usize)],
-    ) -> sv_call::Result<usize> {
+    fn write_vectored(&self, mut offset: usize, bufs: &[(UserPtr<In>, usize)]) -> Result<usize> {
         let mut written_len = 0;
         for buf in bufs {
             let actual_offset = self.len.min(offset);
@@ -225,11 +231,3 @@ impl Phys {
         Ok(written_len)
     }
 }
-
-impl PartialEq for Phys {
-    fn eq(&self, other: &Self) -> bool {
-        self.offset == other.offset
-            && self.len == other.len
-            && Arsc::ptr_eq(&self.inner, &other.inner)
-    }
-}

+ 428 - 408
h2o/kernel/src/mem/space/phys/extensible.rs

@@ -1,528 +1,548 @@
 use alloc::{
     alloc::Global,
-    collections::BTreeMap,
+    collections::{btree_map::Entry, BTreeMap},
     sync::{Arc, Weak},
+    vec::Vec,
 };
 use core::{
-    alloc::{AllocError, Allocator, Layout},
-    mem, slice,
+    alloc::Allocator,
+    mem,
+    ptr::NonNull,
+    slice,
+    sync::atomic::{AtomicUsize, Ordering::SeqCst},
 };
 
+use archop::Azy;
 use bitop_ex::BitOpEx;
-use paging::{LAddr, PAddr, PAGE_SHIFT, PAGE_SIZE};
-use spin::RwLock;
+use paging::{LAddr, PAddr, PAGE_LAYOUT, PAGE_SHIFT, PAGE_SIZE};
+use spin::Mutex;
 use sv_call::{
     ipc::{SIG_READ, SIG_WRITE},
-    EAGAIN,
+    EAGAIN, EBUSY, EFAULT, ENOMEM, EPERM, ERANGE,
 };
 
+use super::PhysTrait;
 use crate::{
     sched::{Arsc, BasicEvent, Event, PREEMPT},
-    syscall::{In, InPtrType, Out, OutPtrType, UserPtr},
+    syscall::{In, Out, UserPtr},
 };
 
+static ZERO_PAGE: Azy<Page> = Azy::new(|| Page::allocate().unwrap());
+
 #[derive(Debug)]
-struct Block {
-    from_allocator: bool,
+struct Page {
     base: PAddr,
-    len: usize,
-    capacity: usize,
+    ptr: NonNull<u8>,
 }
 
-impl Block {
-    unsafe fn new_manual(from_allocator: bool, base: PAddr, len: usize, capacity: usize) -> Block {
-        Block {
-            from_allocator,
+unsafe impl Send for Page {}
+unsafe impl Sync for Page {}
+
+impl Page {
+    fn allocate() -> Option<Page> {
+        let ptr = Global.allocate_zeroed(PAGE_LAYOUT).ok()?;
+        let base = LAddr::from(ptr).to_paddr(minfo::ID_OFFSET);
+        Some(Page {
             base,
-            len,
-            capacity,
-        }
+            ptr: ptr.as_non_null_ptr(),
+        })
     }
 
-    fn allocate(len: usize, zeroed: bool) -> Result<Block, AllocError> {
-        let capacity = len.round_up_bit(PAGE_SHIFT);
-        let layout = unsafe { Layout::from_size_align_unchecked(capacity, PAGE_SIZE) };
-        let memory = if zeroed {
-            Global.allocate_zeroed(layout)
-        } else {
-            Global.allocate(layout)
-        }?;
-        Ok(unsafe {
-            Block::new_manual(
-                true,
-                LAddr::from(memory).to_paddr(minfo::ID_OFFSET),
-                len,
-                capacity,
-            )
-        })
+    fn copy_from(&mut self, addr: PAddr) {
+        let src = addr.to_laddr(minfo::ID_OFFSET);
+        unsafe {
+            let ptr = self.ptr.as_ptr();
+            ptr.copy_from_nonoverlapping(*src, PAGE_SIZE)
+        }
     }
 }
 
-impl Drop for Block {
+impl Drop for Page {
     fn drop(&mut self) {
-        if self.from_allocator {
-            let ptr = unsafe { self.base.to_laddr(minfo::ID_OFFSET).as_non_null_unchecked() };
-            let layout = unsafe { Layout::from_size_align_unchecked(self.capacity, PAGE_SIZE) }
-                .pad_to_align();
-            unsafe { Global.deallocate(ptr, layout) };
-        }
+        unsafe { Global.deallocate(self.ptr, PAGE_LAYOUT) }
     }
 }
 
-#[derive(Debug)]
-struct PhysInner {
-    map: BTreeMap<usize, Block>,
-    len: usize,
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum Error {
+    Alloc,
+    WouldBlock,
+    OutOfRange(usize),
+    Pinned,
+    MaxPinCount,
+    Other(sv_call::Error),
 }
 
-impl PhysInner {
-    fn range(&self, offset: usize, len: usize) -> impl Iterator<Item = (PAddr, usize)> + '_ {
-        let end = offset + len;
-        let first = self.map.range(..offset).next_back();
-        let first = first.and_then(|(&base, block)| {
-            let offset = offset - base;
-            let len = block.len.saturating_sub(offset).min(len);
-            (len > 0).then_some((PAddr::new(*block.base + offset), len))
-        });
-        let next = self
-            .map
-            .range(offset..end)
-            .filter_map(move |(&base, block)| {
-                let len = block.len.min(end.saturating_sub(base));
-                (len > 0).then_some((block.base, len))
-            });
-        first.into_iter().chain(next)
-    }
-
-    fn iter(&self) -> impl Iterator<Item = (PAddr, usize)> + '_ {
-        self.map.values().map(|block| (block.base, block.len))
+impl From<Error> for sv_call::Error {
+    fn from(value: Error) -> Self {
+        match value {
+            Error::Alloc => ENOMEM,
+            Error::WouldBlock => EAGAIN,
+            Error::OutOfRange(_) => ERANGE,
+            Error::Pinned => EBUSY,
+            Error::MaxPinCount => EFAULT,
+            Error::Other(err) => err,
+        }
     }
+}
 
-    fn allocate(len: usize, zeroed: bool) -> Result<Self, AllocError> {
-        let mut map = BTreeMap::new();
+#[derive(Debug)]
+enum Commit {
+    Insert(Page),
+    Ref(PAddr),
+}
 
-        let mut acc = len;
-        let mut offset = 0;
-        while acc > PAGE_SIZE {
-            let part = 1 << (usize::BITS - acc.leading_zeros() - 1);
+#[derive(Debug)]
+enum PageState {
+    ShouldCopy,
+    ShouldMove,
+}
 
-            let new = Block::allocate(part, zeroed)?;
-            map.insert(offset, new);
+#[derive(Debug)]
+struct PageNode {
+    state: PageState,
+    page: Option<Page>,
+    pin_count: usize,
+}
 
-            offset += part;
-            acc -= part;
+impl PageNode {
+    fn new(page: Page) -> Self {
+        PageNode {
+            state: PageState::ShouldCopy,
+            page: Some(page),
+            pin_count: 0,
         }
-        if acc > 0 {
-            let new = Block::allocate(acc, zeroed)?;
-            map.insert(offset, new);
-        }
-        Ok(PhysInner { map, len })
     }
 
-    fn truncate(&mut self, new_len: usize) {
-        self.map.split_off(&new_len);
-        if let Some(mut ent) = self.map.last_entry() {
-            if *ent.key() < new_len && ent.get().len + ent.key() > new_len {
-                ent.get_mut().len = new_len - ent.key();
+    fn get_from_branch(&mut self, write: bool) -> Result<(Commit, bool), Error> {
+        match self.state {
+            PageState::ShouldCopy => {
+                if write {
+                    let mut page = Page::allocate().ok_or(Error::Alloc)?;
+                    let src = self.page.as_ref().expect("the page has been moved");
+                    page.copy_from(src.base);
+                    self.state = PageState::ShouldMove;
+                    Ok((Commit::Insert(page), false))
+                } else {
+                    let page = self.page.as_ref().expect("the page has been moved");
+                    Ok((Commit::Ref(page.base), false))
+                }
+            }
+            PageState::ShouldMove => {
+                let page = self.page.take().expect("the page has been moved");
+                Ok((Commit::Insert(page), true))
             }
         }
-        self.len = new_len;
     }
 
-    fn extend(&mut self, new_len: usize, zeroed: bool) -> Result<(), AllocError> {
-        let start = self.len;
-        let mut len = new_len.saturating_sub(start);
-        if let Some(mut last) = self.map.last_entry() {
-            let delta = (last.get().capacity - last.get().len).min(len);
-            len -= delta;
-            last.get_mut().len += delta;
-            self.len += delta;
+    fn get_from_leaf(&mut self, write: bool) -> Result<PAddr, Error> {
+        if let Some(ref page) = self.page {
+            return Ok(page.base);
         }
-        if len > 0 {
-            let new = Block::allocate(len, zeroed)?;
-            self.map.insert(self.len, new);
-            self.len += len;
-        }
-        Ok(())
-    }
 
-    fn resize(&mut self, new_len: usize, zeroed: bool) -> Result<(), AllocError> {
-        if self.len < new_len {
-            self.extend(new_len, zeroed)
+        Ok(if write {
+            let page = Page::allocate().ok_or(Error::Alloc)?;
+            let base = page.base;
+            self.page = Some(page);
+            base
         } else {
-            self.truncate(new_len);
-            Ok(())
-        }
+            ZERO_PAGE.base
+        })
     }
 }
 
-#[derive(Debug, Clone)]
-pub struct Static {
-    offset: usize,
-    len: usize,
-    inner: Arsc<PhysInner>,
-}
-
-impl Static {
-    pub fn allocate(len: usize, zeroed: bool) -> Result<Self, AllocError> {
-        PhysInner::allocate(len, zeroed).and_then(|inner| {
-            Ok(Static {
-                offset: 0,
-                len: inner.len,
-                inner: Arsc::try_new(inner)?,
-            })
-        })
-    }
+#[derive(Debug)]
+struct PageList {
+    branch: bool,
 
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
-    }
+    parent: Option<Arsc<Phys>>,
+    parent_start: usize,
+    parent_end: usize,
 
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
+    pages: BTreeMap<usize, PageNode>,
+    count: usize,
+    pin_count: usize,
+}
 
-    #[inline]
-    pub fn pin(this: Self) -> PinnedStatic {
-        PinnedStatic(this)
-    }
+#[derive(Debug)]
+pub struct Phys {
+    event: Arc<BasicEvent>,
+    len: AtomicUsize,
+    list: Mutex<PageList>,
+}
 
-    pub fn create_sub(&self, offset: usize, len: usize, copy: bool) -> sv_call::Result<Self> {
-        if offset.contains_bit(PAGE_SHIFT) || len.contains_bit(PAGE_SHIFT) {
-            return Err(sv_call::EALIGN);
+impl PageList {
+    fn commit_impl(&mut self, index: usize, write: bool) -> Result<Commit, Error> {
+        if index >= self.count {
+            return Err(Error::OutOfRange(index));
         }
-        let cloned = Arsc::clone(&self.inner);
-
-        let new_offset = self.offset.wrapping_add(offset);
-        let end = new_offset.wrapping_add(len);
-        if self.offset <= new_offset && new_offset < end && end <= self.offset + self.len() {
-            if copy {
-                let child = Self::allocate(len, false)?;
-
-                let (dst, _) = child.inner.iter().next().expect("Inconsistent map");
-                let mut dst = *dst.to_laddr(minfo::ID_OFFSET);
-
-                for (src, sl) in self.inner.range(new_offset, len) {
-                    let src = *src.to_laddr(minfo::ID_OFFSET);
-                    unsafe {
-                        dst.copy_from_nonoverlapping(src, sl);
-                        dst = dst.add(sl);
-                    }
-                }
 
-                Ok(child)
-            } else {
-                Ok(Static {
-                    offset: new_offset,
-                    len,
-                    inner: cloned,
+        let ent = match self.pages.entry(index) {
+            Entry::Vacant(ent) => ent,
+            Entry::Occupied(mut ent) => {
+                return Ok(if self.branch {
+                    let (ret, should_remove) = ent.get_mut().get_from_branch(write)?;
+                    if should_remove {
+                        ent.remove();
+                    }
+                    ret
+                } else {
+                    Commit::Ref(ent.get_mut().get_from_leaf(write)?)
                 })
             }
-        } else {
-            Err(sv_call::ERANGE)
+        };
+
+        if let Some(parent) = self.parent.clone() {
+            let mut list = parent.list.try_lock().ok_or(Error::WouldBlock)?;
+            let parent_index = self.parent_start + index;
+            if parent_index < self.parent_end {
+                return match list.commit_impl(parent_index, write) {
+                    Ok(Commit::Ref(base)) => Ok(Commit::Ref(base)),
+                    Ok(Commit::Insert(page)) => {
+                        let base = page.base;
+                        ent.insert(PageNode::new(page));
+                        Ok(Commit::Ref(base))
+                    }
+                    Err(err) => Err(err),
+                };
+            }
         }
+
+        if !write {
+            return Ok(Commit::Ref(ZERO_PAGE.base));
+        }
+
+        let page = Page::allocate().ok_or(Error::Alloc)?;
+        Ok(if self.branch {
+            Commit::Insert(page)
+        } else {
+            let base = page.base;
+            ent.insert(PageNode::new(page));
+
+            Commit::Ref(base)
+        })
     }
 
-    pub fn read(&self, offset: usize, len: usize, buffer: UserPtr<Out>) -> sv_call::Result<usize> {
-        let mut buffer = buffer;
-        let offset = self.len.min(offset);
-        let len = self.len.saturating_sub(offset).min(len);
+    fn commit(&mut self, index: usize, write: bool) -> Result<PAddr, Error> {
+        assert!(!self.branch);
+        match self.commit_impl(index, write) {
+            Ok(Commit::Ref(base)) => Ok(base),
+            Ok(Commit::Insert(_)) => unreachable!(),
+            Err(err) => Err(err),
+        }
+    }
 
-        for (base, len) in self.inner.range(self.offset + offset, len) {
-            let src = *base.to_laddr(minfo::ID_OFFSET);
-            unsafe {
-                let src = slice::from_raw_parts(src, len);
-                buffer.write_slice(src)?;
-                buffer = UserPtr::new(buffer.as_ptr().add(len));
+    fn decommit(&mut self, index: usize) -> Result<(), Error> {
+        if let Entry::Occupied(mut ent) = self.pages.entry(index) {
+            if ent.get().pin_count > 0 {
+                return Err(Error::Pinned);
+            }
+            if self.parent.is_some() {
+                // Avoid getting a unowned copy from the parent again.
+                ent.get_mut().page = None;
+            } else {
+                ent.remove();
             }
         }
-        Ok(len)
+        Ok(())
     }
 
-    pub fn write(&self, offset: usize, len: usize, buffer: UserPtr<In>) -> sv_call::Result<usize> {
-        let mut buffer = buffer;
-        let offset = self.len.min(offset);
-        let len = self.len.saturating_sub(offset).min(len);
+    fn create_sub(&mut self, offset: usize, len: usize) -> Result<Phys, Error> {
+        if self.pin_count > 0 {
+            return Err(Error::Pinned);
+        }
+        let start = offset >> PAGE_SHIFT;
+        let end = (offset + len).div_ceil_bit(PAGE_SHIFT);
 
-        for (base, len) in self.inner.range(self.offset + offset, len) {
-            let dst = *base.to_laddr(minfo::ID_OFFSET);
+        let branch = {
+            let mut branch = Arsc::try_new_uninit().map_err(|_| Error::Alloc)?;
             unsafe {
-                buffer.read_slice(dst, len)?;
-                buffer = UserPtr::new(buffer.as_ptr().add(len));
+                let uninit = Arsc::get_mut(&mut branch).unwrap();
+                uninit.write(Phys {
+                    event: BasicEvent::new(0),
+                    len: AtomicUsize::new(0),
+                    list: Mutex::new(PageList {
+                        branch: true,
+                        parent: self.parent.clone(),
+                        parent_start: self.parent_start,
+                        parent_end: self.parent_end,
+                        pages: mem::take(&mut self.pages),
+                        count: self.count,
+                        pin_count: self.pin_count,
+                    }),
+                });
+                Arsc::assume_init(branch)
             }
+        };
+
+        let sub = Phys {
+            event: BasicEvent::new(0),
+            len: AtomicUsize::new(len),
+            list: Mutex::new(PageList {
+                branch: false,
+                parent: Some(branch.clone()),
+                parent_start: start,
+                parent_end: end,
+                pages: BTreeMap::new(),
+                count: end - start,
+                pin_count: 0,
+            }),
+        };
+
+        self.parent = Some(branch);
+        self.parent_start = 0;
+        self.parent_end = self.count;
+
+        Ok(sub)
+    }
+
+    fn pin_impl(&mut self, index: usize, write: bool) -> Result<(), Error> {
+        assert!(index < self.count, "Out of range");
+        if let Some(node) = self.pages.get_mut(&index) {
+            if node.pin_count >= isize::MAX as usize || self.pin_count >= isize::MAX as usize {
+                return Err(Error::MaxPinCount);
+            }
+            node.pin_count += 1;
+            self.pin_count += 1;
+        } else if write {
+            let parent = self.parent.clone().expect("Uncommitted page");
+            let parent_index = self.parent_start + index;
+            assert!(parent_index < self.parent_end, "Out of range");
+            let mut list = parent.list.try_lock().ok_or(Error::WouldBlock)?;
+            list.pin_impl(parent_index, write)?
         }
-        Ok(len)
+        Ok(())
     }
 
-    pub fn read_vectored<T: OutPtrType>(
-        &self,
-        mut offset: usize,
-        bufs: &[(UserPtr<T>, usize)],
-    ) -> sv_call::Result<usize> {
-        let mut read_len = 0;
-
-        for buf in bufs {
-            let actual_offset = self.len.min(offset);
-            let len = self.len.saturating_sub(actual_offset).min(buf.1);
-
-            let mut buffer = buf.0.out();
-            for (base, len) in self.inner.range(self.offset + actual_offset, len) {
-                let src = *base.to_laddr(minfo::ID_OFFSET);
-                unsafe {
-                    let src = slice::from_raw_parts(src, len);
-                    buffer.write_slice(src)?;
-                    buffer = UserPtr::new(buffer.as_ptr().add(len));
+    fn pin(&mut self, start: usize, end: usize, write: bool) -> Result<Vec<(PAddr, usize)>, Error> {
+        let bases = (start..end)
+            .map(|index| self.commit(index, write).map(|base| (base, PAGE_SIZE)))
+            .collect::<Result<Vec<_>, _>>()?;
+        for index in start..end {
+            if let Err(err) = self.pin_impl(index, write) {
+                for index in start..index {
+                    self.unpin_impl(index);
                 }
-            }
-            read_len += len;
-            offset += len;
-            if len < buf.1 {
-                break;
+                return Err(err);
             }
         }
-
-        Ok(read_len)
+        Ok(bases)
     }
 
-    pub fn write_vectored<T: InPtrType>(
-        &self,
-        mut offset: usize,
-        bufs: &[(UserPtr<T>, usize)],
-    ) -> sv_call::Result<usize> {
-        let mut written_len = 0;
+    fn unpin_impl(&mut self, index: usize) {
+        assert!(index < self.count, "Out of range");
+        if let Some(node) = self.pages.get_mut(&index) {
+            node.pin_count = node.pin_count.saturating_sub(1);
+            self.pin_count = self.pin_count.saturating_sub(1);
+        }
+    }
 
-        for buf in bufs {
-            let actual_offset = self.len.min(offset);
-            let len = self.len.saturating_sub(actual_offset).min(buf.1);
+    fn unpin(&mut self, start: usize, end: usize) {
+        for index in start..end {
+            self.unpin_impl(index)
+        }
+    }
 
-            let mut buffer = buf.0.r#in();
-            for (base, len) in self.inner.range(self.offset + actual_offset, len) {
-                let dst = *base.to_laddr(minfo::ID_OFFSET);
-                unsafe {
-                    buffer.read_slice(dst, len)?;
-                    buffer = UserPtr::new(buffer.as_ptr().add(len));
-                }
-            }
-            written_len += len;
-            offset += len;
-            if len < buf.1 {
-                break;
+    fn resize(&mut self, new_count: usize) -> Result<(), Error> {
+        if self.pin_count > 0 {
+            return Err(Error::Pinned);
+        }
+        if new_count < self.count {
+            for index in self.count..new_count {
+                let _ = self.decommit(index);
             }
         }
+        self.count = new_count;
 
-        Ok(written_len)
+        Ok(())
     }
 }
 
-impl PartialEq for Static {
-    fn eq(&self, other: &Self) -> bool {
-        self.offset == other.offset
-            && self.len == other.len
-            && Arsc::ptr_eq(&self.inner, &other.inner)
+impl Phys {
+    pub fn new(len: usize) -> Self {
+        Phys {
+            event: BasicEvent::new(0),
+            len: AtomicUsize::new(len),
+            list: Mutex::new(PageList {
+                branch: false,
+                parent: None,
+                parent_start: 0,
+                parent_end: 0,
+                pages: BTreeMap::new(),
+                count: len.div_ceil_bit(PAGE_SHIFT),
+                pin_count: 0,
+            }),
+        }
     }
-}
 
-#[derive(Debug)]
-pub struct PinnedStatic(Static);
+    pub fn read(&self, pos: usize, len: usize, buffer: UserPtr<Out>) -> Result<usize, Error> {
+        let self_len = self.len.load(SeqCst);
+        let pos = pos.min(self_len);
+        let len = (self_len - pos).min(len);
 
-impl PinnedStatic {
-    #[inline]
-    pub fn map_iter(&self, offset: usize, len: usize) -> impl Iterator<Item = (PAddr, usize)> + '_ {
-        self.0.inner.range(self.0.offset + offset, len)
+        let mut list = self.list.try_lock().ok_or(Error::WouldBlock)?;
+        let mut read_len = 0;
+
+        let start = pos >> PAGE_SHIFT;
+        let end = (pos + len).div_ceil_bit(PAGE_SHIFT);
+        let mut pos_in_page = pos - (start << PAGE_SHIFT);
+        for base in (start..end).map(|index| list.commit(index, false)) {
+            match base {
+                Ok(base) => unsafe {
+                    let src = base.to_laddr(minfo::ID_OFFSET);
+                    let src = LAddr::from(src.val() + pos_in_page);
+                    let len = (len - read_len).min(PAGE_SIZE);
+
+                    let buffer = UserPtr::<Out>::new(buffer.as_ptr().add(read_len));
+                    let src = slice::from_raw_parts(*src, len);
+                    buffer.write_slice(src).map_err(Error::Other)?;
+
+                    read_len += len;
+                    pos_in_page = 0;
+                },
+                Err(err) => log::warn!("read error: {err:?}"),
+            }
+        }
+        Ok(read_len)
     }
-}
 
-#[derive(Debug, Clone)]
-pub struct Dynamic {
-    inner: Arsc<RwLock<PhysInner>>,
-    event: Arc<BasicEvent>,
-}
+    pub fn write(&self, pos: usize, len: usize, buffer: UserPtr<In>) -> Result<usize, Error> {
+        let self_len = self.len.load(SeqCst);
+        let pos = pos.min(self_len);
+        let len = (self_len - pos).min(len);
 
-impl Dynamic {
-    pub fn allocate(len: usize, zeroed: bool) -> Result<Self, AllocError> {
-        PhysInner::allocate(len, zeroed).and_then(|inner| {
-            Ok(Dynamic {
-                inner: Arsc::try_new(RwLock::new(inner))?,
-                event: BasicEvent::new(0),
-            })
-        })
-    }
+        let mut list = self.list.try_lock().ok_or(Error::WouldBlock)?;
+        let mut written_len = 0;
 
-    pub fn event(&self) -> Weak<dyn Event> {
-        Arc::downgrade(&self.event) as _
+        let start = pos >> PAGE_SHIFT;
+        let end = (pos + len).div_ceil_bit(PAGE_SHIFT);
+        let mut pos_in_page = pos - (start << PAGE_SHIFT);
+        for base in (start..end).map(|index| list.commit(index, true)) {
+            match base {
+                Ok(base) => unsafe {
+                    let src = base.to_laddr(minfo::ID_OFFSET);
+                    let src = LAddr::from(src.val() + pos_in_page);
+                    let len = (len - written_len).min(PAGE_SIZE);
+
+                    let buffer = UserPtr::<In>::new(buffer.as_ptr().add(written_len));
+                    buffer.read_slice(*src, len).map_err(Error::Other)?;
+
+                    written_len += len;
+                    pos_in_page = 0;
+                },
+                Err(err) => log::warn!("read error: {err:?}"),
+            }
+        }
+        Ok(written_len)
     }
 
-    #[inline]
-    pub fn len(&self) -> usize {
-        // FIXME: For now, just let this slip.
-        unsafe { (*self.inner.as_mut_ptr()).len }
+    // pub fn commit(&self, start: usize, end: usize, write: bool) -> Result<(),
+    // Error> {     let mut list =
+    // self.list.try_lock().ok_or(Error::WouldBlock)?;     (start..end).
+    // try_for_each(|index| list.commit(index, write).map(drop)) }
+
+    // pub fn decommit(&self, start: usize, end: usize) -> Result<(), Error> {
+    //     let mut list = self.list.try_lock().ok_or(Error::WouldBlock)?;
+    //     (start..end).for_each(|index| {
+    //         if let Err(err) = list.decommit(index) {
+    //             log::warn!("decommit error: {err:?}")
+    //         }
+    //     });
+    //     Ok(())
+    // }
+
+    pub fn create_sub(&self, offset: usize, len: usize) -> Result<Phys, Error> {
+        self.list
+            .try_lock()
+            .ok_or(Error::WouldBlock)?
+            .create_sub(offset, len)
+    }
+
+    pub fn resize(&self, new_len: usize) -> Result<(), Error> {
+        let new_count = new_len.div_ceil_bit(PAGE_SHIFT);
+        self.list
+            .try_lock()
+            .ok_or(Error::WouldBlock)?
+            .resize(new_count)?;
+        self.len.store(new_len, SeqCst);
+        Ok(())
     }
+}
 
+impl PhysTrait for Phys {
     #[inline]
-    pub fn pin(this: Self) -> sv_call::Result<PinnedDynamic> {
-        mem::forget(this.inner.try_read().ok_or(EAGAIN)?);
-        Ok(PinnedDynamic(this))
+    fn event(&self) -> Weak<dyn Event> {
+        Arc::downgrade(&self.event) as _
     }
 
-    fn notify_read(&self) {
-        if self.inner.reader_count() > 0 {
-            self.event.notify(0, SIG_READ);
-        } else {
-            self.event.notify(0, SIG_READ | SIG_WRITE);
-        }
+    #[inline]
+    fn len(&self) -> usize {
+        self.len.load(SeqCst)
     }
 
-    fn notify_write(&self) {
+    #[inline]
+    fn pin(&self, offset: usize, len: usize, write: bool) -> sv_call::Result<Vec<(PAddr, usize)>> {
+        let start = offset >> PAGE_SHIFT;
+        let end = (offset + len).div_ceil_bit(PAGE_SHIFT);
+        let ret = PREEMPT.scope(|| self.list.lock().pin(start, end, write))?;
         self.event.notify(0, SIG_READ | SIG_WRITE);
+        Ok(ret)
     }
 
-    pub fn resize(&self, new_len: usize, zeroed: bool) -> sv_call::Result {
-        PREEMPT.scope(|| {
-            let mut this = self.inner.try_write().ok_or(EAGAIN)?;
-            this.resize(new_len, zeroed)?;
-            Ok::<_, sv_call::Error>(())
-        })?;
-        self.notify_write();
-        Ok(())
-    }
-
-    pub fn read(&self, offset: usize, len: usize, buffer: UserPtr<Out>) -> sv_call::Result<usize> {
-        let mut buffer = buffer;
-        let len = PREEMPT.scope(|| {
-            let this = self.inner.try_read().ok_or(EAGAIN)?;
-
-            let offset = this.len.min(offset);
-            let len = this.len.saturating_sub(offset).min(len);
-
-            for (base, len) in this.range(offset, len) {
-                let src = *base.to_laddr(minfo::ID_OFFSET);
-                unsafe {
-                    let src = slice::from_raw_parts(src, len);
-                    buffer.write_slice(src)?;
-                    buffer = UserPtr::new(buffer.as_ptr().add(len));
-                }
-            }
-            Ok::<_, sv_call::Error>(len)
-        })?;
-        self.notify_read();
-        Ok(len)
-    }
-
-    pub fn write(&self, offset: usize, len: usize, buffer: UserPtr<In>) -> sv_call::Result<usize> {
-        let mut buffer = buffer;
-        let len = PREEMPT.scope(|| {
-            let this = self.inner.try_write().ok_or(EAGAIN)?;
-
-            let offset = this.len.min(offset);
-            let len = this.len.saturating_sub(offset).min(len);
-
-            for (base, len) in this.range(offset, len) {
-                let dst = *base.to_laddr(minfo::ID_OFFSET);
-                unsafe {
-                    buffer.read_slice(dst, len)?;
-                    buffer = UserPtr::new(buffer.as_ptr().add(len));
-                }
-            }
-            Ok::<_, sv_call::Error>(len)
-        })?;
-        self.notify_write();
-        Ok(len)
+    #[inline]
+    fn unpin(&self, offset: usize, len: usize) {
+        let start = offset >> PAGE_SHIFT;
+        let end = (offset + len).div_ceil_bit(PAGE_SHIFT);
+        PREEMPT.scope(|| self.list.lock().unpin(start, end));
+        self.event.notify(0, SIG_READ | SIG_WRITE);
     }
 
-    pub fn read_vectored<T: OutPtrType>(
+    fn create_sub(
         &self,
-        mut offset: usize,
-        bufs: &[(UserPtr<T>, usize)],
-    ) -> sv_call::Result<usize> {
-        let mut read_len = 0;
-        PREEMPT.scope(|| {
-            let this = self.inner.try_read().ok_or(EAGAIN)?;
-            let self_len = this.len;
-            for buf in bufs {
-                let actual_offset = self_len.min(offset);
-                let len = self_len.saturating_sub(actual_offset).min(buf.1);
-
-                let mut buffer = buf.0.out();
-                for (base, len) in this.range(actual_offset, len) {
-                    let src = *base.to_laddr(minfo::ID_OFFSET);
-                    unsafe {
-                        let src = slice::from_raw_parts(src, len);
-                        buffer.write_slice(src)?;
-                        buffer = UserPtr::new(buffer.as_ptr().add(len));
-                    }
-                }
-                read_len += len;
-                offset += len;
-                if len < buf.1 {
-                    break;
-                }
-            }
-            Ok::<_, sv_call::Error>(())
-        })?;
-        self.notify_read();
-        Ok(read_len)
+        offset: usize,
+        len: usize,
+        copy: bool,
+    ) -> sv_call::Result<Arc<super::Phys>> {
+        if !copy {
+            return Err(EPERM);
+        }
+        let mut ret = Arc::try_new_uninit()?;
+        let sub = Arc::get_mut(&mut ret).unwrap();
+        let value = self.create_sub(offset, len)?;
+        self.event.notify(0, SIG_READ | SIG_WRITE);
+        sub.write(value.into());
+        Ok(unsafe { ret.assume_init() })
     }
 
-    pub fn write_vectored<T: InPtrType>(
-        &self,
-        mut offset: usize,
-        bufs: &[(UserPtr<T>, usize)],
-    ) -> sv_call::Result<usize> {
-        let mut written_len = 0;
-        PREEMPT.scope(|| {
-            let this = self.inner.try_write().ok_or(EAGAIN)?;
-            let self_len = this.len;
-            for buf in bufs {
-                let actual_offset = self_len.min(offset);
-                let len = self_len.saturating_sub(actual_offset).min(buf.1);
-
-                let mut buffer = buf.0.r#in();
-                for (base, len) in this.range(actual_offset, len) {
-                    let dst = *base.to_laddr(minfo::ID_OFFSET);
-                    unsafe {
-                        buffer.read_slice(dst, len)?;
-                        buffer = UserPtr::new(buffer.as_ptr().add(len));
-                    }
-                }
-                written_len += len;
-                offset += len;
-                if len < buf.1 {
-                    break;
-                }
-            }
-            Ok::<_, sv_call::Error>(())
-        })?;
-        self.notify_write();
-        Ok(written_len)
+    #[inline]
+    fn base(&self) -> PAddr {
+        unimplemented!("Extensible phys have multiple bases")
     }
-}
 
-impl PartialEq for Dynamic {
     #[inline]
-    fn eq(&self, other: &Self) -> bool {
-        Arsc::ptr_eq(&self.inner, &other.inner)
+    fn resize(&self, new_len: usize, _: bool) -> sv_call::Result {
+        self.resize(new_len)?;
+        self.event.notify(0, SIG_READ | SIG_WRITE);
+        Ok(())
     }
-}
-
-#[derive(Debug)]
-pub struct PinnedDynamic(Dynamic);
 
-impl PinnedDynamic {
-    pub fn map_iter(&self, offset: usize, len: usize) -> impl Iterator<Item = (PAddr, usize)> + '_ {
-        assert!(self.0.inner.writer_count() == 0 && self.0.inner.reader_count() > 0);
+    #[inline]
+    fn read(&self, offset: usize, len: usize, buffer: UserPtr<Out>) -> sv_call::Result<usize> {
+        let ret = self.read(offset, len, buffer)?;
+        self.event.notify(0, SIG_READ | SIG_WRITE);
+        Ok(ret)
+    }
 
-        unsafe {
-            let ptr = self.0.inner.as_mut_ptr();
-            (*ptr).range(offset, len)
-        }
+    #[inline]
+    fn write(&self, offset: usize, len: usize, buffer: UserPtr<In>) -> sv_call::Result<usize> {
+        let ret = self.write(offset, len, buffer)?;
+        self.event.notify(0, SIG_READ | SIG_WRITE);
+        Ok(ret)
     }
 }
 
-impl Drop for PinnedDynamic {
-    fn drop(&mut self) {
-        assert!(self.0.inner.reader_count() > 0);
-        unsafe { self.0.inner.force_read_decrement() }
+impl PartialEq for Phys {
+    fn eq(&self, other: &Self) -> bool {
+        Arc::ptr_eq(&self.event, &other.event)
     }
 }

+ 18 - 25
h2o/kernel/src/mem/space/virt.rs

@@ -9,24 +9,27 @@ use paging::{LAddr, PAGE_SHIFT, PAGE_SIZE};
 use spin::Mutex;
 use sv_call::{error::*, mem::Flags, Feature, Result};
 
-use super::{paging_error, ty_to_range, Phys, PinnedPhys, Space};
-use crate::sched::{
-    task,
-    task::{hdl::DefaultFeature, VDSO},
-    PREEMPT,
+use super::{paging_error, ty_to_range, Phys, Space};
+use crate::{
+    mem::space::PhysTrait,
+    sched::{
+        task,
+        task::{hdl::DefaultFeature, VDSO},
+        PREEMPT,
+    },
 };
 
 #[derive(Debug)]
 pub(super) enum Child {
     Virt(Arc<Virt>),
-    Phys(PinnedPhys, Flags, usize),
+    Phys(Arc<Phys>, Flags, usize, usize),
 }
 
 impl Child {
     fn len(&self) -> usize {
         match self {
             Child::Virt(virt) => virt.len(),
-            Child::Phys(_, _, len) => *len,
+            Child::Phys(.., len) => *len,
         }
     }
 
@@ -97,17 +100,8 @@ impl Virt {
     }
 
     pub fn destroy(&self) -> Result {
-        if let Some(space) = self.space.upgrade() {
-            let _pree = PREEMPT.lock();
-            let vdso = *space.vdso.lock();
-            let children = self.children.lock();
-
-            if { children.iter() }.any(|(&base, child)| !check_vdso(vdso, base, child.end(base))) {
-                return Err(EACCES);
-            }
-        }
         if let Some(parent) = self.parent.upgrade() {
-            let _ = PREEMPT.scope(|| parent.children.lock().remove(&self.range.start));
+            let _ = parent.unmap(self.range.start, self.len(), true);
         }
         Ok(())
     }
@@ -115,7 +109,7 @@ impl Virt {
     pub fn map(
         &self,
         offset: Option<usize>,
-        phys: Phys,
+        phys: Arc<Phys>,
         phys_offset: usize,
         layout: Layout,
         flags: Flags,
@@ -136,12 +130,10 @@ impl Virt {
             return Err(EALIGN);
         }
         let phys_end = phys_offset.wrapping_add(layout.size());
-        if !(phys_offset < phys_end && phys_end <= phys.len()) {
+        if !(phys_offset < phys_end && phys_end <= phys.len().round_up_bit(PAGE_SHIFT)) {
             return Err(ERANGE);
         }
 
-        let phys = Phys::pin(phys)?;
-
         let _pree = PREEMPT.lock();
         let mut children = self.children.lock();
         let space = self.space.upgrade().ok_or(EKILLED)?;
@@ -159,7 +151,7 @@ impl Virt {
 
         {
             let mut end = base;
-            let phys = phys.map_iter(phys_offset, virt.end.val() - base.val());
+            let phys = phys.pin(phys_offset, layout.size(), flags.contains(Flags::WRITABLE))?;
             for (phys_base, len) in phys {
                 let next = LAddr::from(end.val() + len);
                 let virt = end..next;
@@ -174,7 +166,7 @@ impl Virt {
             assert!(end == virt.end);
         }
 
-        let _ = children.insert(base, Child::Phys(phys, flags, layout.size()));
+        let _ = children.insert(base, Child::Phys(phys, flags, phys_offset, layout.size()));
 
         if set_vdso {
             *space.vdso.lock() = Some(base);
@@ -208,7 +200,7 @@ impl Virt {
             }
             match child {
                 Child::Virt(_) => return Err(EINVAL),
-                Child::Phys(_, f, _) if flags.intersects(!*f) => {
+                Child::Phys(_, f, ..) if flags.intersects(!*f) => {
                     return Err(EPERM);
                 }
                 _ => {}
@@ -262,7 +254,8 @@ impl Virt {
         let mut ret = Ok(None);
         for (base, child) in mid {
             let end = child.end(base);
-            if let Child::Phys(..) = child {
+            if let Child::Phys(phys, _, offset, len) = child {
+                phys.unpin(offset, len);
                 let r = space.arch.unmaps(base..end);
                 ret = ret.and(r.map_err(paging_error));
             }

+ 26 - 7
h2o/kernel/src/mem/space/x86_64/mod.rs

@@ -8,7 +8,8 @@ use core::{alloc::Allocator, ops::Range};
 
 use archop::Azy;
 use canary::Canary;
-use paging::{LAddr, PAddr, Table};
+use minfo::KERNEL_ALLOCABLE_RANGE;
+use paging::{Attr, LAddr, Level, PAddr, Table};
 use spin::Mutex;
 
 use super::Flags;
@@ -23,6 +24,29 @@ static KERNEL_ROOT: Azy<(Box<Table>, u64)> = Azy::new(|| {
     let init_table = unsafe { core::slice::from_raw_parts(cr3_laddr.cast(), paging::NR_ENTRIES) };
     table.copy_from_slice(init_table);
 
+    // Allocate the top-level table for `KERNEL_ALLOCABLE_RANGE`.
+    //
+    // The `KERNEL_ROOT` is initialized before the creation of the first memory
+    // space (a.k.a. `KRL`), so before it handles any allocation for `Virt`s,
+    // `KERNEL_ROOT` doesn't have any page tables of the range for allocation.
+    // Hence, if we don't pre-allocate the stub, every creation of a new memory
+    // space will not inherit (or share) the page tables of that range, leading to
+    // memory inconsistency.
+    //
+    // As long as the static kernel objects are not deallocated, this chunk of
+    // memory will not be recycled, and the condition will never be met since
+    // they're static.
+    //
+    // See `KRL` and `self::Space::new` for more information.
+    {
+        let allocable_page = Box::leak(Box::new(Table::zeroed()));
+        let addr = LAddr::from(allocable_page as *mut _).to_paddr(minfo::ID_OFFSET);
+        let ent = paging::Entry::new(addr, Attr::KERNEL_RW, Level::Pt);
+
+        let index = Level::P4.addr_idx(KERNEL_ALLOCABLE_RANGE.start.into(), false);
+        table[index] = ent;
+    }
+
     (table, cr3)
 });
 
@@ -199,12 +223,7 @@ bitflags::bitflags! {
 }
 
 impl ErrCode {
-    const FMT: &'static str = "P WR US RSVD ID PK SS - - - - - - - - SGX";
-
-    #[inline]
-    pub fn display(errc: u64) -> crate::log::flags::Flags {
-        crate::log::flags::Flags::new(errc, Self::FMT)
-    }
+    pub const FMT: &'static str = "P WR US RSVD ID PK SS - - - - - - - - SGX";
 }
 
 pub unsafe fn page_fault(frame: &mut Frame, errc: u64) -> bool {

+ 51 - 46
h2o/kernel/src/mem/syscall.rs

@@ -14,8 +14,12 @@ use sv_call::{
 use super::space;
 use crate::{
     dev::Resource,
+    mem::space::PhysTrait,
     sched::{
-        task::{hdl::DefaultFeature, Space as TaskSpace, VDSO},
+        task::{
+            hdl::{DefaultFeature, Ref},
+            Space as TaskSpace, VDSO,
+        },
         PREEMPT, SCHED,
     },
     syscall::{In, InOut, Out, PtrType, UserPtr},
@@ -44,10 +48,10 @@ fn features_to_flags(feat: Feature) -> Flags {
 
 #[syscall]
 fn phys_alloc(size: usize, options: PhysOptions) -> Result<Handle> {
-    let phys = PREEMPT.scope(|| space::Phys::allocate(size, options, false))?;
+    let phys = PREEMPT.scope(|| space::allocate_phys(size, options, false))?;
     SCHED.with_current(|cur| {
         let event = phys.event();
-        cur.space().handles().insert(phys, Some(event))
+        cur.space().handles().insert_raw(phys, Some(event))
     })
 }
 
@@ -62,7 +66,7 @@ fn phys_size(hdl: Handle) -> Result<usize> {
     })
 }
 
-fn phys_check(hdl: Handle, offset: usize, len: usize) -> Result<(Feature, space::Phys)> {
+fn phys_check(hdl: Handle, offset: usize, len: usize) -> Result<(Feature, Arc<space::Phys>)> {
     hdl.check_null()?;
     let offset_end = offset.wrapping_add(len);
     if offset_end < offset {
@@ -72,7 +76,7 @@ fn phys_check(hdl: Handle, offset: usize, len: usize) -> Result<(Feature, space:
         cur.space()
             .handles()
             .get::<space::Phys>(hdl)
-            .map(|obj| (obj.features(), space::Phys::clone(&obj)))
+            .map(|obj| (obj.features(), Arc::clone(&obj)))
     })?;
     if phys == VDSO.1 {
         return Err(EACCES);
@@ -119,14 +123,14 @@ fn check_physv<T: PtrType>(
     hdl: Handle,
     bufs: UserPtr<In, IoVec>,
     count: usize,
-) -> Result<(Feature, space::Phys, Vec<(UserPtr<T>, usize)>)> {
+) -> Result<(Feature, Arc<space::Phys>, Vec<(UserPtr<T>, usize)>)> {
     hdl.check_null()?;
     bufs.check_slice(count)?;
     let (feat, phys) = SCHED.with_current(|cur| {
         cur.space()
             .handles()
             .get::<space::Phys>(hdl)
-            .map(|obj| (obj.features(), space::Phys::clone(&obj)))
+            .map(|obj| (obj.features(), Arc::clone(&obj)))
     })?;
     let bufs = {
         let mut vec = Vec::<(UserPtr<T>, usize)>::with_capacity(count);
@@ -175,9 +179,9 @@ fn phys_sub(hdl: Handle, offset: usize, len: usize, copy: bool) -> Result<Handle
         let handles = cur.space().handles();
         let event = sub.event();
         if copy {
-            handles.insert(sub, Some(event))
+            handles.insert_raw(sub, Some(event))
         } else {
-            unsafe { handles.insert_unchecked(sub, feat, Some(event)) }
+            unsafe { handles.insert_raw_unchecked(sub, feat, Some(event)) }
         }
     })
 }
@@ -191,7 +195,7 @@ fn phys_resize(hdl: Handle, new_len: usize, zeroed: bool) -> Result {
         cur.space()
             .handles()
             .get::<space::Phys>(hdl)
-            .map(|obj| (obj.features(), space::Phys::clone(&obj)))
+            .map(|obj| (obj.features(), Arc::clone(&obj)))
     })?;
     if phys == VDSO.1 {
         return Err(EACCES);
@@ -206,17 +210,17 @@ fn phys_resize(hdl: Handle, new_len: usize, zeroed: bool) -> Result {
 fn space_new(root_virt: UserPtr<Out, Handle>) -> Result<Handle> {
     root_virt.check()?;
     SCHED.with_current(|cur| {
-        let space = TaskSpace::new(cur.tid().ty())?;
+        let space = TaskSpace::new()?;
         let virt = Arc::downgrade(space.mem().root());
         let ret = cur.space().handles().insert_raw(space, None)?;
-        unsafe {
-            let virt = cur.space().handles().insert_unchecked(
+        let virt = unsafe {
+            cur.space().handles().insert_unchecked(
                 virt,
                 Weak::<space::Virt>::default_features() | Feature::SEND,
                 None,
-            )?;
-            root_virt.write(virt)?;
-        }
+            )?
+        };
+        root_virt.write(virt)?;
         Ok(ret)
     })
 }
@@ -225,8 +229,9 @@ fn space_new(root_virt: UserPtr<Out, Handle>) -> Result<Handle> {
 fn virt_alloc(hdl: Handle, offset: usize, size: usize, align: usize) -> Result<Handle> {
     hdl.check_null()?;
     SCHED.with_current(|cur| {
-        let virt = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
-        let virt = virt.upgrade().ok_or(EKILLED)?;
+        let virt_obj = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
+        let virt = virt_obj.upgrade().ok_or(EKILLED)?;
+        drop(virt_obj);
         let sub = virt.allocate(
             (offset != usize::MAX).then_some(offset),
             Layout::from_size_align(size, align)?,
@@ -243,7 +248,7 @@ fn virt_info(hdl: Handle, size: UserPtr<Out, usize>) -> Result<*mut u8> {
         let virt = virt.upgrade().ok_or(EKILLED)?;
         let base = virt.range().start;
         if !size.as_ptr().is_null() {
-            unsafe { size.write(virt.len()) }?;
+            size.write(virt.len())?;
         }
         Ok(*base)
     })
@@ -265,8 +270,9 @@ fn virt_map(hdl: Handle, mi_ptr: UserPtr<InOut, VirtMapInfo>) -> Result<*mut u8>
     let mi = unsafe { mi_ptr.read() }?;
     let flags = check_flags(mi.flags)?;
     SCHED.with_current(|cur| {
-        let virt = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
-        let virt = virt.upgrade().ok_or(EKILLED)?;
+        let virt_obj = cur.space().handles().get::<Weak<space::Virt>>(hdl)?;
+        let virt = virt_obj.upgrade().ok_or(EKILLED)?;
+        drop(virt_obj);
         let phys = cur.space().handles().remove::<space::Phys>(mi.phys)?;
         let offset = (mi.offset != usize::MAX).then_some(mi.offset);
         if flags.intersects(!features_to_flags(phys.features())) {
@@ -275,17 +281,11 @@ fn virt_map(hdl: Handle, mi_ptr: UserPtr<InOut, VirtMapInfo>) -> Result<*mut u8>
 
         let size = if mi.len == 0 { phys.len() } else { mi.len };
         let layout = Layout::from_size_align(size, mi.align)?;
-        let addr = virt.map(
-            offset,
-            space::Phys::clone(&phys),
-            mi.phys_offset,
-            layout,
-            flags,
-        )?;
-        unsafe {
-            let len = UserPtr::<Out, _>::new(ptr::addr_of_mut!((*mi_ptr.as_ptr()).len));
-            len.write(size)?;
-        }
+        let addr = virt.map(offset, Ref::into_raw(phys), mi.phys_offset, layout, flags)?;
+
+        let len = UserPtr::<Out, _>::new(unsafe { ptr::addr_of_mut!((*mi_ptr.as_ptr()).len) });
+        len.write(size)?;
+
         Ok(*addr)
     })
 }
@@ -318,12 +318,10 @@ fn mem_info(info: UserPtr<Out, MemInfo>) -> Result {
     info.check()?;
     let all_available = super::ALL_AVAILABLE.load(core::sync::atomic::Ordering::Relaxed);
     let current_used = super::heap::current_used();
-    unsafe {
-        info.write(MemInfo {
-            all_available,
-            current_used,
-        })
-    }
+    info.write(MemInfo {
+        all_available,
+        current_used,
+    })
 }
 
 #[syscall]
@@ -334,14 +332,21 @@ fn phys_acq(res: Handle, addr: usize, size: usize) -> Result<Handle> {
 
     SCHED.with_current(|cur| {
         let res = cur.space().handles().get::<Resource<usize>>(res)?;
-        if res.magic_eq(super::mem_resource())
-            && res.range().start <= addr
-            && addr + size <= res.range().end
-        {
-            let phys = space::Phys::new(paging::PAddr::new(addr), size)?;
-            unsafe { cur.space().handles().insert(phys, None) }
-        } else {
-            Err(EPERM)
+        if !res.magic_eq(super::mem_resource()) {
+            return Err(EPERM);
+        }
+
+        if addr == 0 {
+            drop(res);
+            let phys = space::allocate_phys(size, PhysOptions::ZEROED, true)?;
+            return unsafe { cur.space().handles().insert_raw(phys, None) };
+        }
+
+        if !(res.range().start <= addr && addr + size <= res.range().end) {
+            return Err(EPERM);
         }
+        drop(res);
+        let phys = space::new_phys(paging::PAddr::new(addr), size)?;
+        unsafe { cur.space().handles().insert_raw(phys, None) }
     })
 }

+ 1 - 1
h2o/kernel/src/rxx.rs

@@ -1,7 +1,7 @@
 #[panic_handler]
 fn panic_handler(info: &core::panic::PanicInfo) -> ! {
     log::error!("CPU #{} {}", unsafe { crate::cpu::id() }, info);
-    unsafe { archop::halt_loop(Some(false)) }
+    unsafe { archop::halt_loop(Some(true)) }
 }
 
 /// The function indicating memory runs out.

+ 13 - 19
h2o/kernel/src/sched/imp.rs

@@ -233,15 +233,7 @@ impl Scheduler {
                 PREEMPT.raw()
             );
 
-            SCHED_INFO[self.cpu]
-                .expected_runtime
-                .fetch_sub(cur.time_slice.as_micros() as u64, Release);
-
-            let _ = self.schedule_impl(cur_time, pree, None, |task| {
-                task::Ready::exit(task, sv_call::EKILLED.into_retval());
-                Ok(())
-            });
-            unreachable!("Dead task");
+            self.kill(cur, cur_time, pree);
         }
 
         let ti = &*cur.tid;
@@ -254,16 +246,7 @@ impl Scheduler {
             Some(task::Signal::Kill) => {
                 log::trace!("Killing task {:?}, P{}", cur.tid.raw(), PREEMPT.raw());
 
-                SCHED_INFO[self.cpu]
-                    .expected_runtime
-                    .fetch_sub(cur.time_slice.as_micros() as u64, Release);
-                cur.space().try_stop(&cur.tid);
-
-                let _ = self.schedule_impl(cur_time, pree, None, |task| {
-                    task::Ready::exit(task, sv_call::EKILLED.into_retval());
-                    Ok(())
-                });
-                unreachable!("Dead task");
+                self.kill(cur, cur_time, pree)
             }
             Some(task::Signal::Suspend(slot)) => {
                 log::trace!("Suspending task {:?}, P{}", cur.tid.raw(), PREEMPT.raw());
@@ -284,6 +267,17 @@ impl Scheduler {
         }
     }
 
+    fn kill(&self, cur: &task::Ready, cur_time: Instant, pree: PreemptStateGuard) -> ! {
+        SCHED_INFO[self.cpu]
+            .expected_runtime
+            .fetch_sub(cur.time_slice.as_micros() as u64, Release);
+        let _ = self.schedule_impl(cur_time, pree, None, |task| {
+            task::Ready::exit(task, sv_call::EKILLED.into_retval());
+            Ok(())
+        });
+        unreachable!("Dead task");
+    }
+
     unsafe fn update(&self, cur_time: Instant) -> bool {
         self.canary.assert();
 

+ 1 - 4
h2o/kernel/src/sched/imp/waiter.rs

@@ -191,9 +191,6 @@ impl Dispatcher {
             signal,
             request,
         } = self.ready.pop()?;
-        if let Some(event) = request.event.upgrade() {
-            event.unwait(&(Arc::clone(self) as _));
-        }
         let res = if !canceled { request.syscall } else { None };
         self.event.notify(0, SIG_WRITE);
         *key = request.key;
@@ -218,7 +215,7 @@ impl Waiter for Dispatcher {
             });
             iter.for_each(|request| {
                 self.ready.push(Ready {
-                    canceled: false,
+                    canceled: true,
                     signal,
                     request,
                 });

+ 6 - 6
h2o/kernel/src/sched/ipc.rs

@@ -226,6 +226,7 @@ mod syscall {
             return Err(EPERM);
         }
         let event = obj.event().upgrade().ok_or(EPIPE)?;
+        drop(obj);
 
         let blocker = Blocker::new(&event, level_triggered, wake_all, signal);
         blocker.wait(Some(pree), time::from_us(timeout_us))?;
@@ -274,6 +275,7 @@ mod syscall {
                 return Err(EPERM);
             }
             let event = obj.event().upgrade().ok_or(EPIPE)?;
+            drop(obj);
 
             let waiter_data = WaiterData::new(
                 if level_triggered {
@@ -294,9 +296,10 @@ mod syscall {
         result: UserPtr<Out, usize>,
     ) -> Result<usize> {
         disp.check_null()?;
+
         let mut key = 0;
         let mut signal = 0;
-        let (c, r) = SCHED.with_current(|cur| {
+        let (canceled, r) = SCHED.with_current(|cur| {
             let disp = cur.space().handles().get::<Dispatcher>(disp)?;
             if !disp.features().contains(Feature::READ) {
                 return Err(EPERM);
@@ -305,12 +308,9 @@ mod syscall {
         })?;
 
         if !signal_slot.as_ptr().is_null() {
-            if c {
-                signal_slot.write(0)?;
-            } else {
-                signal_slot.write(signal)?;
-            }
+            signal_slot.write(if canceled { 0 } else { signal })?;
         }
+
         let r = r.map_or(0, crate::syscall::handle);
         if !result.as_ptr().is_null() {
             result.write(r)?;

+ 27 - 30
h2o/kernel/src/sched/ipc/channel.rs

@@ -1,6 +1,9 @@
 mod syscall;
 
-use alloc::sync::{Arc, Weak};
+use alloc::{
+    sync::{Arc, Weak},
+    vec::Vec,
+};
 use core::{
     mem,
     sync::atomic::{AtomicU64, Ordering::SeqCst},
@@ -22,7 +25,7 @@ const MAX_QUEUE_SIZE: usize = 2048;
 #[derive(Debug, Default)]
 pub struct Packet {
     id: usize,
-    objects: hdl::List,
+    objects: Vec<hdl::Ref>,
     buffer: Bytes,
 }
 
@@ -30,7 +33,7 @@ unsafe impl Send for Packet {}
 unsafe impl Sync for Packet {}
 
 impl Packet {
-    pub fn new(id: usize, objects: hdl::List, data: &[u8]) -> Self {
+    pub fn new(id: usize, objects: Vec<hdl::Ref>, data: &[u8]) -> Self {
         let buffer = Bytes::copy_from_slice(data);
         Packet {
             id,
@@ -125,47 +128,41 @@ impl Channel {
         }
     }
 
-    /// # Safety
+    /// # Errors
     ///
-    /// `head` must contains a valid packet.
-    unsafe fn get_packet(
-        head: &mut Option<Packet>,
+    /// Returns error if the peer is closed.
+    pub fn receive(
+        &self,
         buffer_cap: &mut usize,
         handle_cap: &mut usize,
     ) -> sv_call::Result<Packet> {
-        let packet = unsafe { head.as_mut().unwrap_unchecked() };
+        let _pree = PREEMPT.lock();
+        let mut head = self.head.lock();
+
+        let packet = match head.take() {
+            Some(packet) => packet,
+            None => {
+                let err = if self.peer.strong_count() > 0 {
+                    sv_call::ENOENT
+                } else {
+                    sv_call::EPIPE
+                };
+                self.me.msgs.pop().ok_or(err)?
+            }
+        };
+
         let buffer_size = packet.buffer().len();
         let handle_count = packet.object_count();
         let ret = if buffer_size > *buffer_cap || handle_count > *handle_cap {
+            *head = Some(packet);
             Err(sv_call::EBUFFER)
         } else {
-            Ok(unsafe { head.take().unwrap_unchecked() })
+            Ok(packet)
         };
         *buffer_cap = buffer_size;
         *handle_cap = handle_count;
         ret
     }
-
-    /// # Errors
-    ///
-    /// Returns error if the peer is closed.
-    pub fn receive(
-        &self,
-        buffer_cap: &mut usize,
-        handle_cap: &mut usize,
-    ) -> sv_call::Result<Packet> {
-        let _pree = PREEMPT.lock();
-        let mut head = self.head.lock();
-        if head.is_none() {
-            let err = if self.peer.strong_count() > 0 {
-                sv_call::ENOENT
-            } else {
-                sv_call::EPIPE
-            };
-            *head = Some(self.me.msgs.pop().ok_or(err)?);
-        }
-        unsafe { Self::get_packet(&mut head, buffer_cap, handle_cap) }
-    }
 }
 
 unsafe impl DefaultFeature for Channel {

+ 10 - 5
h2o/kernel/src/sched/ipc/channel/syscall.rs

@@ -50,11 +50,14 @@ where
 
     SCHED.with_current(|cur| {
         let map = cur.space().handles();
-        let channel = map.get::<Channel>(hdl)?;
-        if !channel.features().contains(Feature::WRITE) {
+        let obj = map.get::<Channel>(hdl)?;
+        if !obj.features().contains(Feature::WRITE) {
             return Err(EPERM);
         }
-        let objects = unsafe { map.send(handles, &channel) }?;
+        let channel = Arc::clone(&obj);
+        drop(obj);
+
+        let objects = map.send(handles, &channel)?;
         let mut packet = Packet::new(packet.id, objects, buffer);
         send(&channel, &mut packet)
     })
@@ -74,7 +77,7 @@ fn receive_handles<E: ?Sized + Event>(
     res: Result<Packet>,
     map: &crate::sched::task::hdl::HandleMap,
     raw: &mut RawPacket,
-    event: &Arc<E>,
+    event: Arc<E>,
 ) -> Result<Packet> {
     match res {
         Ok(mut packet) => {
@@ -124,7 +127,9 @@ fn chan_recv(hdl: Handle, packet_ptr: UserPtr<InOut, RawPacket>) -> Result {
         raw.buffer_size = raw.buffer_cap;
         raw.handle_count = raw.handle_cap;
         let res = channel.receive(&mut raw.buffer_size, &mut raw.handle_count);
-        receive_handles(res, map, &mut raw, (**channel).event())
+        let event = (**channel).event().clone();
+        drop(channel);
+        receive_handles(res, map, &mut raw, event)
     });
 
     write_raw_with_rest_of_packet(packet_ptr.out(), raw, res)

+ 7 - 4
h2o/kernel/src/sched/task.rs

@@ -50,7 +50,6 @@ pub(super) fn init() {
 
 #[inline]
 pub fn init_early() {
-    hdl::init();
     tid::init();
 }
 
@@ -103,7 +102,7 @@ fn exec(
     init_chan: sv_call::Handle,
     starter: &Starter,
 ) -> sv_call::Result<(Init, sv_call::Handle)> {
-    let cur = super::SCHED.with_current(|cur| Ok(cur.tid.clone()))?;
+    let cur = super::SCHED.with_current(|cur| Ok(cur.tid().clone()))?;
     let init = exec_inner(cur, name, None, None, space, init_chan, starter)?;
     super::SCHED.with_current(|cur| {
         let event = Arc::downgrade(&init.tid().event) as _;
@@ -115,8 +114,12 @@ fn exec(
     })
 }
 
-fn create(name: Option<String>, space: Arc<Space>, init_chan: sv_call::Handle) -> sv_call::Result<(Init, sv_call::Handle)> {
-    let cur = super::SCHED.with_current(|cur| Ok(cur.tid.clone()))?;
+fn create(
+    name: Option<String>,
+    space: Arc<Space>,
+    init_chan: sv_call::Handle,
+) -> sv_call::Result<(Init, sv_call::Handle)> {
+    let cur = super::SCHED.with_current(|cur| Ok(cur.tid().clone()))?;
 
     let ty = cur.ty();
     let ti = TaskInfo::builder()

+ 23 - 34
h2o/kernel/src/sched/task/boot.rs

@@ -1,4 +1,4 @@
-use alloc::sync::Weak;
+use alloc::{sync::Weak, vec::Vec};
 use core::mem;
 
 use archop::Azy;
@@ -9,14 +9,14 @@ use targs::Targs;
 use super::{hdl::DefaultFeature, *};
 use crate::{
     cpu::arch::tsc::TSC_CLOCK,
-    mem::space::{Flags, Phys, Virt},
+    mem::space::{self, Flags, Phys, PhysTrait, Virt},
     sched::SCHED,
 };
 
 static VDSO_DATA: &[u8] = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/target/vdso"));
-pub static VDSO: Azy<(Flags, Phys)> = Azy::new(|| {
+pub static VDSO: Azy<(Flags, Arc<Phys>)> = Azy::new(|| {
     let flags = Flags::READABLE | Flags::EXECUTABLE | Flags::USER_ACCESS;
-    let vdso_mem = Phys::allocate(
+    let vdso_mem = space::allocate_phys(
         VDSO_DATA.len().round_up_bit(paging::PAGE_SHIFT),
         Default::default(),
         true,
@@ -28,10 +28,10 @@ pub static VDSO: Azy<(Flags, Phys)> = Azy::new(|| {
     }
     (flags, vdso_mem)
 });
-pub static BOOTFS: Azy<(Flags, Phys)> = Azy::new(|| {
+pub static BOOTFS: Azy<(Flags, Arc<Phys>)> = Azy::new(|| {
     (
         Flags::READABLE | Flags::EXECUTABLE | Flags::USER_ACCESS,
-        Phys::new(
+        space::new_phys(
             crate::kargs().bootfs_phys,
             crate::kargs().bootfs_len.round_up_bit(paging::PAGE_SHIFT),
         )
@@ -74,44 +74,34 @@ pub fn setup() {
         ptr.write(constants);
     }
 
-    let mut objects = hdl::List::new();
+    let mut objects = Vec::<hdl::Ref>::new();
 
     // The sequence of kernel objects must match the one defined in
     // `targs::HandleIndex`.
-    {
-        let mem_res = Arc::clone(crate::dev::mem_resource());
-        let res = objects
-            .insert(hdl::Ref::try_new(mem_res, None).expect("Failed to create memory resource"));
-        res.expect("Failed to insert memory resource");
-    }
-    {
-        let pio_res = Arc::clone(crate::dev::pio_resource());
-        let res = objects
-            .insert(hdl::Ref::try_new(pio_res, None).expect("Failed to create port I/O resource"));
-        res.expect("Failed to insert port I/O resource");
-    }
-    {
-        let gsi_res = Arc::clone(crate::dev::gsi_resource());
-        let res = objects
-            .insert(hdl::Ref::try_new(gsi_res, None).expect("Failed to create GSI resource"));
-        res.expect("Failed to insert GSI resource");
-    }
+
+    let mem_res = Arc::clone(crate::dev::mem_resource());
+    objects.push(hdl::Ref::from_raw(mem_res, None).expect("Failed to create memory resource"));
+
+    let pio_res = Arc::clone(crate::dev::pio_resource());
+    objects.push(hdl::Ref::from_raw(pio_res, None).expect("Failed to create port I/O resource"));
+
+    let gsi_res = Arc::clone(crate::dev::gsi_resource());
+    objects.push(hdl::Ref::from_raw(gsi_res, None).expect("Failed to create GSI resource"));
+
     unsafe {
-        let res = objects.insert(
-            hdl::Ref::try_new_unchecked(Phys::clone(&VDSO.1), flags_to_feat(VDSO.0), None)
+        objects.push(
+            hdl::Ref::from_raw_unchecked(Arc::clone(&VDSO.1), flags_to_feat(VDSO.0), None)
                 .expect("Failed to create VDSO reference"),
         );
-        res.expect("Failed to insert VDSO");
 
-        let res = objects.insert(
-            hdl::Ref::try_new_unchecked(Phys::clone(&BOOTFS.1), flags_to_feat(BOOTFS.0), None)
+        objects.push(
+            hdl::Ref::from_raw_unchecked(Arc::clone(&BOOTFS.1), flags_to_feat(BOOTFS.0), None)
                 .expect("Failed to create boot FS reference"),
         );
-        res.expect("Failed to insert boot FS");
     }
-    let space = super::Space::new(Type::User).expect("Failed to create space");
+    let space = super::Space::new().expect("Failed to create space");
     unsafe {
-        let res = objects.insert(
+        objects.push(
             hdl::Ref::try_new_unchecked(
                 Arc::downgrade(space.mem().root()),
                 Weak::<Virt>::default_features() | Feature::SEND,
@@ -119,7 +109,6 @@ pub fn setup() {
             )
             .expect("Failed to create root virt"),
         );
-        res.expect("Failed to insert root virt");
     }
 
     let buf = {

+ 24 - 14
h2o/kernel/src/sched/task/ctx.rs

@@ -5,9 +5,8 @@ cfg_if::cfg_if! {
     }
 }
 
-use alloc::boxed::Box;
+use alloc::{boxed::Box, sync::Arc};
 use core::{
-    alloc::Layout,
     fmt::Debug,
     num::NonZeroU64,
     ops::{Deref, DerefMut},
@@ -56,6 +55,7 @@ impl KstackData {
 
 pub struct Kstack {
     ptr: NonNull<KstackData>,
+    virt: Arc<space::Virt>,
     kframe_ptr: *mut u8,
     pf_resume: Option<NonZeroU64>,
 }
@@ -64,18 +64,27 @@ unsafe impl Send for Kstack {}
 
 impl Kstack {
     pub fn new(entry: Option<Entry>, ty: super::Type) -> Self {
-        let ptr = space::allocate(
-            Layout::new::<KstackData>().size(),
-            Flags::READABLE | Flags::WRITABLE,
-            false,
-        )
-        .expect("Failed to allocate kernel stack");
-        unsafe {
-            let pad = NonNull::slice_from_raw_parts(ptr.as_non_null_ptr(), PAGE_SIZE);
-            space::reprotect_unchecked(pad, Flags::READABLE).expect("Failed to set padding");
-        }
+        let virt = space::KRL
+            .allocate(None, space::page_aligned(KSTACK_SIZE + PAGE_SIZE))
+            .expect("Failed to allocate space for task's kernel stack")
+            .upgrade()
+            .expect("Kernel root virt unexpectedly dropped it!");
+
+        let phys = space::allocate_phys(KSTACK_SIZE, Default::default(), false)
+            .expect("Failed to allocate memory for kernel stack");
+
+        let addr = virt
+            .map(
+                Some(PAGE_SIZE),
+                phys,
+                0,
+                space::page_aligned(KSTACK_SIZE),
+                Flags::READABLE | Flags::WRITABLE,
+            )
+            .expect("Failed to map kernel stack");
+
+        let mut kstack = unsafe { addr.as_non_null_unchecked().cast::<KstackData>() };
 
-        let mut kstack = ptr.cast::<KstackData>();
         let kframe_ptr = unsafe {
             let this = kstack.as_mut();
             let frame = this.task_frame_mut();
@@ -92,6 +101,7 @@ impl Kstack {
         };
         Kstack {
             ptr: kstack,
+            virt,
             kframe_ptr,
             pf_resume: None,
         }
@@ -142,7 +152,7 @@ impl Deref for Kstack {
 impl Drop for Kstack {
     #[inline]
     fn drop(&mut self) {
-        let _ = unsafe { space::unmap(self.ptr.cast()) };
+        let _ = self.virt.destroy();
     }
 }
 

+ 3 - 3
h2o/kernel/src/sched/task/ctx/x86_64.rs

@@ -16,6 +16,7 @@ use crate::{
             KERNEL_GS,
         },
     },
+    mem::space::PageFaultErrCode,
     sched::{task, PREEMPT},
 };
 
@@ -209,18 +210,17 @@ impl Frame {
         "CF - PF - AF - ZF SF TF IF DF OF IOPLL IOPLH NT - RF VM AC VIF VIP ID";
 
     pub const ERRC: &'static str = "EXT IDT TI";
-    pub const ERRC_PF: &'static str = "P WR US RSVD ID PK SS - - - - - - - - SGX";
 
     pub fn dump(&self, errc_format: &'static str) {
         use log::info;
 
-        use crate::log::flags::Flags;
+        use crate::logger::flags::Flags;
 
         info!("Frame dump on CPU #{}", unsafe { crate::cpu::id() });
 
         if self.errc_vec != 0u64.wrapping_sub(1) && !errc_format.is_empty() {
             info!("> Error Code = {}", Flags::new(self.errc_vec, errc_format));
-            if errc_format == Self::ERRC_PF {
+            if errc_format == PageFaultErrCode::FMT {
                 info!("> cr2 (PF addr) = {:#018x}", unsafe {
                     archop::reg::cr2::read()
                 });

+ 4 - 4
h2o/kernel/src/sched/task/elf.rs

@@ -15,7 +15,7 @@ use crate::{
 fn map_addr(
     virt: &Arc<Virt>,
     addr: Range<LAddr>,
-    phys: Option<Phys>,
+    phys: Option<Arc<Phys>>,
     flags: Flags,
 ) -> sv_call::Result {
     let offset = addr
@@ -30,7 +30,7 @@ fn map_addr(
         .ok_or(sv_call::ERANGE)?;
     let phys = match phys {
         Some(phys) => phys,
-        None => Phys::allocate(len, PhysOptions::ZEROED, false)?,
+        None => space::allocate_phys(len, PhysOptions::ZEROED, false)?,
     };
     virt.map(Some(offset), phys, 0, space::page_aligned(len), flags)?;
     Ok(())
@@ -77,7 +77,7 @@ fn load_prog(
     if fend > 0 {
         let virt = LAddr::from(vstart)..LAddr::from(vend);
         log::trace!("Mapping {:?}", virt);
-        let phys = Phys::new(phys, fend)?;
+        let phys = space::new_phys(phys, fend)?;
         map_addr(space, virt, Some(phys), flags)?;
     }
 
@@ -167,7 +167,7 @@ pub fn from_elf(
         arg: 0,
     };
 
-    let tid = crate::sched::SCHED.with_current(|cur| Ok(cur.tid.clone()))?;
+    let tid = crate::sched::SCHED.with_current(|cur| Ok(cur.tid().clone()))?;
 
     let ret = super::exec_inner(
         tid,

+ 2 - 2
h2o/kernel/src/sched/task/excep.rs

@@ -9,7 +9,7 @@ use archop::reg::cr2;
 use bytes::Buf;
 use sv_call::task::excep::{Exception, ExceptionResult, EXRES_CODE_RECOVERED};
 
-use super::{ctx::x86_64::Frame, hdl};
+use super::ctx::x86_64::Frame;
 use crate::{
     cpu::intr::arch::ExVec,
     sched::{ipc::Packet, PREEMPT, SCHED, SIG_READ},
@@ -37,7 +37,7 @@ pub fn dispatch_exception(frame: &mut Frame, vec: ExVec) -> bool {
         })
     };
 
-    let mut excep = Packet::new(0, hdl::List::default(), &data);
+    let mut excep = Packet::new(0, Default::default(), &data);
     if excep_chan.send(&mut excep).is_err() {
         PREEMPT.scope(|| *slot.lock() = Some(excep_chan));
         return false;

+ 121 - 78
h2o/kernel/src/sched/task/hdl.rs

@@ -1,21 +1,25 @@
 mod node;
 
-use alloc::sync::{Arc, Weak};
-use core::{any::Any, pin::Pin, ptr::NonNull};
+use alloc::{
+    sync::{Arc, Weak},
+    vec::Vec,
+};
+use core::{
+    any::Any,
+    hash::BuildHasherDefault,
+    mem,
+    ops::Deref,
+    ptr::NonNull,
+    sync::atomic::{AtomicU32, Ordering::SeqCst},
+};
 
-use archop::Azy;
-use modular_bitfield::prelude::*;
-use spin::Mutex;
-use sv_call::{Feature, Result};
+use collection_ex::{CHashMap, FnvHasher};
+use sv_call::{Feature, Result, EINVAL, ETYPE};
 
-pub use self::node::{List, Ptr, Ref, MAX_HANDLE_COUNT};
+pub use self::node::{Ref, MAX_HANDLE_COUNT};
 use crate::sched::{ipc::Channel, Event, PREEMPT};
 
-#[bitfield]
-struct Value {
-    gen: B14,
-    index: B18,
-}
+type BH = BuildHasherDefault<FnvHasher>;
 
 pub unsafe trait DefaultFeature: Any + Send + Sync {
     fn default_features() -> Feature;
@@ -33,71 +37,86 @@ unsafe impl<T: DefaultFeature + ?Sized> DefaultFeature for alloc::sync::Arc<T> {
     }
 }
 
+pub struct RefGuard<'a, T: ?Sized + 'a> {
+    _guard: collection_ex::CHashMapReadGuard<'a, u32, Ref, BH>,
+    _handle: sv_call::Handle,
+    object: NonNull<Ref<T>>,
+}
+
+impl<'a, T: ?Sized + 'a> Deref for RefGuard<'a, T> {
+    type Target = Ref<T>;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        unsafe { self.object.as_ref() }
+    }
+}
+
 #[derive(Debug)]
 pub struct HandleMap {
-    list: Mutex<node::List>,
+    list: CHashMap<u32, Ref, BH>,
     mix: u32,
+    next_id: AtomicU32,
 }
 
 impl HandleMap {
     #[inline]
     pub fn new() -> Self {
         HandleMap {
-            list: Mutex::new(List::new()),
+            list: CHashMap::default(),
             mix: archop::rand::get() as u32,
+            next_id: AtomicU32::new(1),
         }
     }
 
-    fn decode(&self, handle: sv_call::Handle) -> Result<Ptr> {
-        let value = handle.raw() ^ self.mix;
-        let value = Value::from_bytes(value.to_ne_bytes());
-        let _ = value.gen();
-        usize::try_from(value.index())
-            .map_err(Into::into)
-            .and_then(node::decode)
-    }
-
-    fn encode(&self, value: Ptr) -> Result<sv_call::Handle> {
-        let index =
-            node::encode(value).and_then(|index| u32::try_from(index).map_err(Into::into))?;
-        let value = Value::new()
-            .with_gen(0)
-            .with_index_checked(index)
-            .map_err(|_| sv_call::ERANGE)?;
-        Ok(sv_call::Handle::new(
-            u32::from_ne_bytes(value.into_bytes()) ^ self.mix,
-        ))
+    fn decode(&self, handle: sv_call::Handle) -> u32 {
+        handle.raw() ^ self.mix
     }
 
     #[inline]
-    pub fn get_ref(&self, handle: sv_call::Handle) -> Result<Pin<&Ref>> {
-        self.decode(handle)
-            // SAFETY: Dereference within the available range and the 
-            // reference is at a fixed address.
-            .map(|ptr| unsafe { Pin::new_unchecked(ptr.as_ref()) })
+    pub fn get_ref(&self, handle: sv_call::Handle) -> Result<RefGuard<'_, dyn Any + Send + Sync>> {
+        let key = self.decode(handle);
+        self.list.get(&key).ok_or(EINVAL).map(|guard| {
+            let object = NonNull::from(&guard as &Ref);
+            RefGuard {
+                _guard: guard,
+                _handle: handle,
+                object,
+            }
+        })
     }
 
     #[inline]
-    pub fn get<T: Send + Any>(&self, handle: sv_call::Handle) -> Result<Pin<&Ref<T>>> {
-        self.decode(handle)
-            // SAFETY: Dereference within the available range.
-            .and_then(|ptr| unsafe { ptr.as_ref().downcast_ref::<T>() })
-            // SAFETY: The reference is at a fixed address.
-            .map(|obj| unsafe { Pin::new_unchecked(obj) })
+    pub fn get<T: Send + Any>(&self, handle: sv_call::Handle) -> Result<RefGuard<'_, T>> {
+        let key = self.decode(handle);
+        self.list.get(&key).ok_or(EINVAL).and_then(|guard| {
+            if guard.is::<T>() {
+                let object = NonNull::from(guard.downcast_ref::<T>().unwrap());
+                Ok(RefGuard {
+                    _guard: guard,
+                    _handle: handle,
+                    object,
+                })
+            } else {
+                Err(ETYPE)
+            }
+        })
     }
 
     #[inline]
     pub fn clone_ref(&self, handle: sv_call::Handle) -> Result<sv_call::Handle> {
-        let old_ptr = self.decode(handle)?;
-        let new = unsafe { old_ptr.as_ref() }.try_clone()?;
+        let old = self.get_ref(handle)?;
+        let new = old.try_clone()?;
+        drop(old);
         self.insert_ref(new)
     }
 
     #[inline]
     pub fn insert_ref(&self, value: Ref) -> Result<sv_call::Handle> {
-        // SAFETY: The safety condition is guaranteed by the caller.
-        let link = PREEMPT.scope(|| self.list.lock().insert(value))?;
-        self.encode(link)
+        let key = self.next_id.fetch_add(1, SeqCst);
+        let old = PREEMPT.scope(|| self.list.insert(key, value));
+        assert!(old.is_none());
+        Ok(sv_call::Handle::new(key ^ self.mix))
     }
 
     #[inline]
@@ -124,6 +143,21 @@ impl HandleMap {
         self.insert_ref(value)
     }
 
+    /// # Safety
+    ///
+    /// The caller must ensure that `T` is [`Send`] if `send` and [`Sync`] if
+    /// `sync`.
+    pub unsafe fn insert_raw_unchecked<T: Send + Sync + 'static>(
+        &self,
+        data: Arc<T>,
+        feat: Feature,
+        event: Option<Weak<dyn Event>>,
+    ) -> Result<sv_call::Handle> {
+        // SAFETY: The safety condition is guaranteed by the caller.
+        let value = unsafe { Ref::from_raw_unchecked(data, feat, event) }?;
+        self.insert_ref(value)
+    }
+
     #[inline]
     pub fn insert<T: DefaultFeature>(
         &self,
@@ -135,43 +169,57 @@ impl HandleMap {
 
     #[inline]
     pub fn remove_ref(&self, handle: sv_call::Handle) -> Result<Ref> {
-        let link = self.decode(handle)?;
-        PREEMPT.scope(|| self.list.lock().remove(link))
+        let key = self.decode(handle);
+        PREEMPT.scope(|| self.list.remove(&key).ok_or(EINVAL))
     }
 
     pub fn remove<T: Send + Sync + Any>(&self, handle: sv_call::Handle) -> Result<Ref<T>> {
-        self.decode(handle).and_then(|value| {
-            // SAFETY: Dereference within the available range.
-            let ptr = unsafe { value.as_ref() };
-            if ptr.is::<T>() {
-                self.remove_ref(handle).map(|obj| obj.downcast().unwrap())
-            } else {
-                Err(sv_call::ETYPE)
-            }
-        })
+        let key = self.decode(handle);
+        let res = self
+            .list
+            .try_remove(&key, |obj| if obj.is::<T>() { Ok(()) } else { Err(ETYPE) });
+        res.map_err(|err| err.unwrap_or(EINVAL))
+            .map(|obj| obj.downcast().unwrap())
     }
 
-    pub fn send(&self, handles: &[sv_call::Handle], src: &Channel) -> Result<List> {
-        if handles.is_empty() {
-            return Ok(List::new());
-        }
-        PREEMPT.scope(|| {
-            { self.list.lock() }.split(handles.iter().map(|&handle| self.decode(handle)), |value| {
-                match value.downcast_ref::<Channel>() {
+    fn merge(&self, objects: Vec<Ref>) -> impl Iterator<Item = Result<sv_call::Handle>> + '_ {
+        objects.into_iter().map(|obj| self.insert_ref(obj))
+    }
+
+    fn split(&self, handles: &[sv_call::Handle], src: &Channel) -> Result<Vec<Ref>> {
+        let mut result = Vec::with_capacity(handles.len());
+        for handle in handles.iter().copied() {
+            let key = self.decode(handle);
+            let res = self
+                .list
+                .try_remove(&key, |value| match value.downcast_ref::<Channel>() {
                     Ok(chan) if chan.peer_eq(src) => Err(sv_call::EPERM),
                     Err(_) if !value.features().contains(Feature::SEND) => Err(sv_call::EPERM),
                     _ => Ok(()),
+                });
+            match res.map_err(|err| err.unwrap_or(EINVAL)) {
+                Ok(obj) => result.push(obj),
+                Err(err) => {
+                    self.merge(result).for_each(drop);
+                    return Err(err);
                 }
-            })
-        })
+            }
+        }
+        Ok(result)
+    }
+
+    pub fn send(&self, handles: &[sv_call::Handle], src: &Channel) -> Result<Vec<Ref>> {
+        if handles.is_empty() {
+            return Ok(Vec::new());
+        }
+        PREEMPT.scope(|| self.split(handles, src))
     }
 
     #[inline]
-    pub fn receive(&self, other: &mut List, handles: &mut [sv_call::Handle]) {
+    pub fn receive(&self, other: &mut Vec<Ref>, handles: &mut [sv_call::Handle]) {
         PREEMPT.scope(|| {
-            let mut list = self.list.lock();
-            for (hdl, obj) in handles.iter_mut().zip(list.merge(other)) {
-                *hdl = self.encode(NonNull::from(obj)).unwrap();
+            for (hdl, obj) in handles.iter_mut().zip(self.merge(mem::take(other))) {
+                *hdl = obj.unwrap();
             }
         })
     }
@@ -184,11 +232,6 @@ impl Default for HandleMap {
     }
 }
 
-#[inline]
-pub(super) fn init() {
-    Azy::force(&node::HR_ARENA);
-}
-
 mod syscall {
     use sv_call::*;
 

+ 4 - 303
h2o/kernel/src/sched/task/hdl/node.rs

@@ -1,37 +1,24 @@
 use alloc::sync::{Arc, Weak};
 use core::{
     any::Any,
-    fmt,
-    iter::FusedIterator,
-    marker::{PhantomData, PhantomPinned, Unsize},
-    mem,
+    marker::Unsize,
     ops::{CoerceUnsized, Deref},
-    ptr::NonNull,
 };
 
-use archop::Azy;
 use sv_call::{Feature, Result};
 
 use super::DefaultFeature;
-use crate::{
-    mem::Arena,
-    sched::{Event, PREEMPT},
-};
+use crate::sched::Event;
 
 pub const MAX_HANDLE_COUNT: usize = 1 << 16;
 
-pub(super) static HR_ARENA: Azy<Arena<Ref>> = Azy::new(|| Arena::new(MAX_HANDLE_COUNT));
-
 #[derive(Debug)]
+#[repr(C)]
 pub struct Ref<T: ?Sized = dyn Any + Send + Sync> {
-    _marker: PhantomPinned,
-    next: Option<Ptr>,
-    prev: Option<Ptr>,
     event: Weak<dyn Event>,
     feat: Feature,
     obj: Arc<T>,
 }
-pub type Ptr = NonNull<Ref>;
 
 unsafe impl<T: ?Sized> Send for Ref<T> {}
 
@@ -66,14 +53,7 @@ impl<T: ?Sized> Ref<T> {
             return Err(sv_call::EPERM);
         }
         let event = event.unwrap_or(Weak::<crate::sched::BasicEvent>::new() as _);
-        Ok(Ref {
-            _marker: PhantomPinned,
-            next: None,
-            prev: None,
-            event,
-            feat,
-            obj,
-        })
+        Ok(Ref { event, feat, obj })
     }
 
     #[inline]
@@ -129,9 +109,6 @@ impl<T: ?Sized> Ref<T> {
         T: Sized,
     {
         Arc::try_unwrap(this.obj).map_err(|obj| Ref {
-            _marker: PhantomPinned,
-            next: this.next,
-            prev: this.prev,
             event: this.event,
             feat: this.feat,
             obj,
@@ -166,17 +143,11 @@ impl Ref {
     pub fn downcast<T: Any + Send + Sync>(self) -> core::result::Result<Ref<T>, Self> {
         match self.obj.downcast() {
             Ok(obj) => Ok(Ref {
-                _marker: PhantomPinned,
-                next: None,
-                prev: None,
                 event: self.event,
                 feat: self.feat,
                 obj,
             }),
             Err(obj) => Err(Ref {
-                _marker: PhantomPinned,
-                next: None,
-                prev: None,
                 event: self.event,
                 feat: self.feat,
                 obj,
@@ -192,9 +163,6 @@ impl Ref {
     #[must_use = "Don't make useless clonings"]
     unsafe fn clone_unchecked(&self) -> Ref {
         Ref {
-            _marker: PhantomPinned,
-            next: None,
-            prev: None,
             event: Weak::clone(&self.event),
             feat: self.feat,
             obj: Arc::clone(&self.obj),
@@ -211,270 +179,3 @@ impl Ref {
         }
     }
 }
-
-pub struct List {
-    head: Option<Ptr>,
-    tail: Option<Ptr>,
-    len: usize,
-    _marker: PhantomData<Ref>,
-}
-
-unsafe impl Send for List {}
-
-impl List {
-    #[inline]
-    pub fn new() -> Self {
-        List {
-            head: None,
-            tail: None,
-            len: 0,
-            _marker: PhantomData,
-        }
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-}
-
-impl Default for List {
-    #[inline]
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl List {
-    /// # Safety
-    ///
-    /// The caller must ensure that the pointers belongs to the list and `start`
-    /// is some predecessor of `end` or equals `end`.
-    unsafe fn splice_nodes(&mut self, mut start: Ptr, mut end: Ptr) {
-        // These two are ours now, and we can create &mut s.
-        let (start, end) = unsafe { (start.as_mut(), end.as_mut()) };
-
-        // Not creating new mutable (unique!) references overlapping `element`.
-        match start.prev {
-            Some(mut prev) => unsafe { prev.as_mut().next = end.next },
-            // These nodes start with the head.
-            None => self.head = end.next,
-        }
-
-        match end.next {
-            Some(mut next) => unsafe { next.as_mut().prev = start.prev },
-            // These nodes end with the tail.
-            None => self.tail = start.prev,
-        }
-
-        start.prev = None;
-        end.next = None;
-    }
-
-    /// # Safety
-    ///
-    /// The caller must ensure that `link` doesn't belong to another list.
-    unsafe fn insert_node(&mut self, mut link: Ptr) {
-        // This one is ours now, and we can create a &mut.
-        let value = unsafe { link.as_mut() };
-        value.next = None;
-        value.prev = self.tail;
-
-        match self.tail {
-            // SAFETY: If tail is not null, then tail is allocated from the arena.
-            Some(mut tail) => unsafe { tail.as_mut().next = Some(link) },
-            None => self.head = Some(link),
-        }
-
-        self.tail = Some(link);
-    }
-}
-
-impl List {
-    pub fn insert(&mut self, value: Ref) -> Result<Ptr> {
-        let link = HR_ARENA.allocate()?;
-        // SAFETY: The pointer is allocated from the arena.
-        unsafe { link.as_ptr().write(value) };
-
-        // SAFETY: The node is freshly allocated.
-        unsafe { self.insert_node(link) };
-        self.len += 1;
-
-        Ok(link)
-    }
-
-    pub fn remove(&mut self, link: Ptr) -> Result<Ref> {
-        let mut cur = self.head;
-        loop {
-            cur = match cur {
-                Some(cur) if cur == link => {
-                    // SAFETY: The pointer is ours.
-                    unsafe { self.splice_nodes(cur, cur) };
-                    self.len -= 1;
-
-                    // SAFETY: The pointer will be no longer read again and the ownership is moved
-                    // to `value`.
-                    let value = unsafe { cur.as_ptr().read() };
-                    // SAFETY: The pointer is ours.
-                    let _ = unsafe { HR_ARENA.deallocate(cur) };
-
-                    break Ok(value);
-                }
-                // SAFETY: The pointer is allocated from the arena.
-                Some(cur) => unsafe { cur.as_ref().next },
-                None => break Err(sv_call::ENOENT),
-            }
-        }
-    }
-
-    pub(super) fn split<I, F>(&mut self, iter: I, check: F) -> Result<List>
-    where
-        I: Iterator<Item = Result<Ptr>>,
-        F: Fn(&Ref) -> Result,
-    {
-        let mut ret = List::new();
-
-        let mut cnt = 0;
-        for ptr in iter {
-            let link = match ptr {
-                Err(err) => {
-                    self.merge(&mut ret);
-                    return Err(err);
-                }
-                Ok(link) => match check(unsafe { link.as_ref() }) {
-                    Ok(()) => link,
-                    Err(err) => {
-                        self.merge(&mut ret);
-                        return Err(err);
-                    }
-                },
-            };
-            unsafe {
-                self.splice_nodes(link, link);
-                ret.insert_node(link);
-            }
-            cnt += 1;
-        }
-        ret.len = cnt;
-        self.len -= cnt;
-
-        Ok(ret)
-    }
-
-    pub(super) fn merge(&mut self, other: &mut List) -> Iter {
-        let mut start = match other.head {
-            Some(head) => head,
-            None => return Iter::empty(),
-        };
-        let mut end = match other.tail {
-            Some(tail) => tail,
-            None => return Iter::empty(),
-        };
-        let list = mem::take(other);
-        let len = list.len;
-        mem::forget(list);
-
-        let (start, end) = unsafe {
-            start.as_mut().prev = self.tail;
-            end.as_mut().next = None;
-            (Some(start), Some(end))
-        };
-
-        match self.tail {
-            // SAFETY: If tail is not null, then tail is allocated from the arena.
-            Some(mut tail) => unsafe { tail.as_mut().next = start },
-            None => self.head = start,
-        }
-
-        self.tail = end;
-        self.len += len;
-
-        Iter {
-            head: start,
-            len,
-            _marker: PhantomData,
-        }
-    }
-
-    pub fn iter(&self) -> Iter {
-        Iter {
-            head: self.head,
-            len: self.len,
-            _marker: PhantomData,
-        }
-    }
-}
-
-impl fmt::Debug for List {
-    #[inline]
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_list().entries(self.iter()).finish()
-    }
-}
-
-impl Drop for List {
-    fn drop(&mut self) {
-        while let Some(head) = self.head {
-            let _ = self.remove(head);
-        }
-    }
-}
-
-pub struct Iter<'a> {
-    head: Option<Ptr>,
-    len: usize,
-    _marker: PhantomData<&'a Ref>,
-}
-
-impl<'a> Iter<'a> {
-    #[inline]
-    pub fn empty() -> Self {
-        Iter {
-            head: None,
-            len: 0,
-            _marker: PhantomData,
-        }
-    }
-}
-
-impl<'a> Iterator for Iter<'a> {
-    type Item = &'a Ref;
-
-    #[inline]
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.len == 0 {
-            None
-        } else {
-            self.head.map(|head| unsafe {
-                let ret = head.as_ref();
-                self.head = ret.next;
-                self.len -= 1;
-                ret
-            })
-        }
-    }
-
-    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.len, Some(self.len))
-    }
-}
-
-impl<'a> ExactSizeIterator for Iter<'a> {}
-
-impl<'a> FusedIterator for Iter<'a> {}
-
-#[inline]
-pub fn decode(index: usize) -> Result<Ptr> {
-    PREEMPT.scope(|| HR_ARENA.get_ptr(index))
-}
-
-#[inline]
-pub fn encode(value: Ptr) -> Result<usize> {
-    PREEMPT.scope(|| HR_ARENA.get_index(value))
-}

+ 6 - 14
h2o/kernel/src/sched/task/idle.rs

@@ -1,7 +1,7 @@
-use core::hint;
+use crossbeam_queue::SegQueue;
 
 use super::*;
-use crate::{cpu::Lazy, mem::space, sched::deque};
+use crate::{cpu::Lazy, mem::space};
 
 /// Context dropper - used for dropping kernel stacks of threads.
 ///
@@ -12,8 +12,8 @@ use crate::{cpu::Lazy, mem::space, sched::deque};
 ///
 /// [`task_exit`]: crate::sched::task::syscall::task_exit
 #[thread_local]
-pub(super) static CTX_DROPPER: Lazy<deque::Injector<alloc::boxed::Box<Context>>> =
-    Lazy::new(deque::Injector::new);
+pub(super) static CTX_DROPPER: Lazy<SegQueue<alloc::boxed::Box<Context>>> =
+    Lazy::new(SegQueue::new);
 
 #[thread_local]
 pub(super) static IDLE: Lazy<Tid> = Lazy::new(|| {
@@ -22,7 +22,7 @@ pub(super) static IDLE: Lazy<Tid> = Lazy::new(|| {
     let ti = TaskInfo::builder()
         .from(Default::default())
         .excep_chan(Arsc::try_new(Default::default()).expect("Failed to create task info"))
-        .name(format!("IDLE{}", cpu))
+        .name(format!("IDLE{cpu}"))
         .ty(Type::Kernel)
         .affinity(crate::cpu::current_mask())
         .build()
@@ -57,16 +57,8 @@ fn idle(cpu: usize, fs_base: u64) -> ! {
         boot::setup();
     }
 
-    let worker = deque::Worker::new_fifo();
     loop {
-        match CTX_DROPPER.steal_batch(&worker) {
-            deque::Steal::Empty | deque::Steal::Retry => hint::spin_loop(),
-            deque::Steal::Success(_) => {
-                while let Some(obj) = worker.pop() {
-                    drop(obj);
-                }
-            }
-        }
+        drop(CTX_DROPPER.pop());
         let _ = crate::sched::SCHED.with_current(|cur| {
             cur.running_state = RunningState::NEED_RESCHED;
             Ok(())

+ 7 - 5
h2o/kernel/src/sched/task/sm.rs

@@ -1,6 +1,7 @@
 use alloc::{boxed::Box, string::String, sync::Arc};
 use core::{
     fmt,
+    mem::ManuallyDrop,
     ops::{Deref, DerefMut},
     time::Duration,
 };
@@ -86,7 +87,7 @@ impl TaskInfo {
 
 #[derive(Debug)]
 pub struct Context {
-    pub(in crate::sched) tid: Tid,
+    pub(in crate::sched) tid: ManuallyDrop<Tid>,
 
     pub(in crate::sched) space: Arc<Space>,
     pub(in crate::sched) kstack: ctx::Kstack,
@@ -166,7 +167,7 @@ impl RunningState {
 impl fmt::Debug for RunningState {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self.start_time() {
-            Some(st) => write!(f, "Running({:?})", st),
+            Some(st) => write!(f, "Running({st:?})"),
             None => {
                 if self.needs_resched() {
                     f.write_str("NeedResched")
@@ -210,7 +211,7 @@ impl Init {
     pub fn new(tid: Tid, space: Arc<Space>, kstack: ctx::Kstack, ext_frame: ctx::ExtFrame) -> Self {
         Init {
             ctx: Box::new(Context {
-                tid,
+                tid: ManuallyDrop::new(tid),
                 space,
                 kstack,
                 ext_frame,
@@ -252,8 +253,9 @@ impl Ready {
         }
     }
 
-    pub fn exit(this: Self, retval: usize) {
-        tid::deallocate(&this.ctx.tid);
+    pub fn exit(mut this: Self, retval: usize) {
+        // SAFETY: The context won't be dropped twice.
+        tid::deallocate(unsafe { ManuallyDrop::take(&mut this.ctx.tid) });
         *this.ctx.tid.ret_cell.lock() = Some(retval);
         this.ctx.tid.event.notify(0, SIG_READ);
         idle::CTX_DROPPER.push(this.ctx);

+ 8 - 8
h2o/kernel/src/sched/task/space.rs

@@ -1,5 +1,5 @@
 use alloc::sync::Arc;
-use core::sync::atomic::{AtomicU32, Ordering::*};
+use core::sync::atomic::{AtomicU64, Ordering::*};
 
 use sv_call::Feature;
 
@@ -17,20 +17,20 @@ pub struct Space {
     mem: Arc<mem::space::Space>,
     handles: HandleMap,
     futexes: Futexes,
-    main: AtomicU32,
+    main: AtomicU64,
 }
 
 unsafe impl Send for Space {}
 unsafe impl Sync for Space {}
 
 impl Space {
-    pub fn new(ty: super::Type) -> sv_call::Result<Arc<Self>> {
-        let mem = mem::space::Space::try_new(ty)?;
+    pub fn new() -> sv_call::Result<Arc<Self>> {
+        let mem = mem::space::Space::try_new(super::Type::User)?;
         Ok(Arc::try_new(Space {
             mem,
             handles: HandleMap::new(),
             futexes: Default::default(),
-            main: AtomicU32::new(0),
+            main: AtomicU64::new(0),
         })?)
     }
 
@@ -39,7 +39,7 @@ impl Space {
             mem: mem::space::with_current(Arc::clone),
             handles: HandleMap::new(),
             futexes: Default::default(),
-            main: AtomicU32::new(0),
+            main: AtomicU64::new(0),
         })
     }
 
@@ -82,10 +82,10 @@ impl Space {
         let _ = self.futexes.remove_if(&key, |futex| futex.is_empty());
     }
 
-    pub fn child(&self, hdl: sv_call::Handle, need_feature: Feature) -> sv_call::Result<Tid> {
+    pub fn child(&self, hdl: sv_call::Handle) -> sv_call::Result<Tid> {
         super::PREEMPT.scope(|| {
             self.handles().get::<Tid>(hdl).and_then(|obj| {
-                if obj.features().contains(need_feature) {
+                if obj.features().contains(Feature::EXECUTE) {
                     Ok(Tid::clone(&obj))
                 } else {
                     Err(sv_call::EPERM)

+ 7 - 8
h2o/kernel/src/sched/task/syscall.rs

@@ -171,10 +171,7 @@ fn task_new(
             tid,
         }
     };
-    SCHED.with_current(|cur| {
-        let st_h = cur.space().handles().insert(st_data, None)?;
-        unsafe { st.write(st_h) }
-    })?;
+    SCHED.with_current(|cur| st.write(cur.space().handles().insert(st_data, None)?))?;
 
     Ok(hdl)
 }
@@ -185,8 +182,10 @@ fn task_join(hdl: Handle, retval: UserPtr<Out, usize>) -> Result {
 
     SCHED.with_current(|cur| {
         let handles = cur.space().handles();
-        let val =
-            { handles.get::<Tid>(hdl) }.and_then(|tid| tid.ret_cell().lock().ok_or(ENOENT))?;
+        let val = match handles.get::<Tid>(hdl) {
+            Ok(tid) => tid.ret_cell().lock().ok_or(ENOENT)?,
+            Err(e) => return Err(e),
+        };
 
         drop(handles.remove::<Tid>(hdl));
         unsafe { retval.write(val) }
@@ -201,7 +200,7 @@ fn task_ctl(hdl: Handle, op: u32, data: UserPtr<InOut, Handle>) -> Result {
 
     match op {
         task::TASK_CTL_KILL => {
-            let child = cur.child(hdl, Feature::EXECUTE)?;
+            let child = cur.child(hdl)?;
             child.with_signal(|sig| *sig = Some(Signal::Kill));
 
             Ok(())
@@ -209,7 +208,7 @@ fn task_ctl(hdl: Handle, op: u32, data: UserPtr<InOut, Handle>) -> Result {
         task::TASK_CTL_SUSPEND => {
             data.check()?;
 
-            let child = cur.child(hdl, Feature::EXECUTE)?;
+            let child = cur.child(hdl)?;
 
             let st = SuspendToken {
                 slot: Arsc::try_new(Mutex::new(None))?,

+ 32 - 20
h2o/kernel/src/sched/task/tid.rs

@@ -1,8 +1,13 @@
 use alloc::sync::{Arc, Weak};
-use core::{hash::BuildHasherDefault, num::NonZeroU32, ops::Deref};
+use core::{
+    hash::BuildHasherDefault,
+    num::NonZeroU64,
+    ops::Deref,
+    sync::atomic::{AtomicU64, AtomicUsize, Ordering::*},
+};
 
 use archop::Azy;
-use collection_ex::{CHashMap, FnvHasher, IdAllocator};
+use collection_ex::{CHashMap, FnvHasher};
 use sv_call::Feature;
 
 use super::{hdl::DefaultFeature, TaskInfo};
@@ -11,19 +16,18 @@ use crate::sched::PREEMPT;
 pub const NR_TASKS: usize = 65536;
 
 type BH = BuildHasherDefault<FnvHasher>;
-static TI_MAP: Azy<CHashMap<u32, Arc<TaskInfo>, BH>> = Azy::new(Default::default);
-static TID_ALLOC: Azy<spin::Mutex<IdAllocator>> =
-    Azy::new(|| spin::Mutex::new(IdAllocator::new(0..=(NR_TASKS as u64 - 1))));
+static TI_MAP: Azy<CHashMap<u64, Arc<TaskInfo>, BH>> = Azy::new(Default::default);
+static TASK_COUNT: AtomicUsize = AtomicUsize::new(0);
 
 #[derive(Debug, Clone)]
 pub struct Tid {
-    raw: NonZeroU32,
+    raw: NonZeroU64,
     ti: Arc<TaskInfo>,
 }
 
 #[derive(Debug, Clone)]
 pub struct WeakTid {
-    raw: Option<NonZeroU32>,
+    raw: Option<NonZeroU64>,
     ti: Weak<TaskInfo>,
 }
 
@@ -38,7 +42,7 @@ impl Deref for Tid {
 
 impl Tid {
     #[inline]
-    pub fn raw(&self) -> u32 {
+    pub fn raw(&self) -> u64 {
         self.raw.get()
     }
 
@@ -60,7 +64,7 @@ impl WeakTid {
         }
     }
 
-    pub fn raw(&self) -> Option<u32> {
+    pub fn raw(&self) -> Option<u64> {
         self.raw.map(|raw| raw.get())
     }
 
@@ -91,11 +95,18 @@ unsafe impl DefaultFeature for Tid {
     }
 }
 
-fn next() -> Option<NonZeroU32> {
-    let mut alloc = TID_ALLOC.lock();
-    alloc
-        .allocate()
-        .and_then(|id| NonZeroU32::new((id + 1) as u32))
+fn next() -> Option<NonZeroU64> {
+    static GEN: AtomicU64 = AtomicU64::new(1);
+    let mut old = TASK_COUNT.load(Acquire);
+    loop {
+        if old >= NR_TASKS {
+            return None;
+        }
+        match TASK_COUNT.compare_exchange(old, old + 1, SeqCst, SeqCst) {
+            Ok(_) => return NonZeroU64::new(GEN.fetch_add(1, Relaxed)),
+            Err(m) => old = m,
+        }
+    }
 }
 
 /// # Errors
@@ -114,16 +125,17 @@ pub fn allocate(ti: TaskInfo) -> sv_call::Result<Tid> {
     }
 }
 
-pub fn deallocate(tid: &Tid) -> bool {
+pub fn deallocate(tid: Tid) -> bool {
     let _flags = PREEMPT.lock();
-    TI_MAP.remove(&tid.raw.get()).map_or(false, |_| {
-        TID_ALLOC.lock().deallocate(u64::from(tid.raw.get()));
-        true
-    })
+    TI_MAP
+        .remove(&tid.raw.get())
+        .inspect(|_| {
+            TASK_COUNT.fetch_sub(1, SeqCst);
+        })
+        .is_some()
 }
 
 #[inline]
 pub fn init() {
     Azy::force(&TI_MAP);
-    Azy::force(&TID_ALLOC);
 }

+ 2 - 4
h2o/kernel/src/sched/wait/futex.rs

@@ -123,10 +123,8 @@ mod syscall {
         let requeue = futex.requeue(&other, requeue)?;
         drop(pree);
 
-        unsafe {
-            wake_num.write(wake)?;
-            requeue_num.write(requeue)?;
-        }
+        wake_num.write(wake)?;
+        requeue_num.write(requeue)?;
 
         Ok(())
     }

+ 1 - 15
h2o/kernel/syscall/interrupt.json

@@ -22,32 +22,18 @@
             ]
         },
         {
-            "name": "sv_intr_wait",
+            "name": "sv_intr_query",
             "returns": "()",
             "args": [
                 {
                     "name": "hdl",
                     "ty": "Handle"
                 },
-                {
-                    "name": "timeout_us",
-                    "ty": "u64"
-                },
                 {
                     "name": "last_time",
                     "ty": "*mut ()"
                 }
             ]
-        },
-        {
-            "name": "sv_intr_drop",
-            "returns": "()",
-            "args": [
-                {
-                    "name": "hdl",
-                    "ty": "Handle"
-                }
-            ]
         }
     ]
 }

+ 1 - 1
h2o/libs/collection_ex/Cargo.toml

@@ -10,5 +10,5 @@ version = "0.1.0"
 bitop_ex = {path = "../bitop_ex"}
 iter_ex = {path = "../iter_ex"}
 # External crates
-bitvec = {version = "0.22", default-features = false, features = ["atomic", "alloc"]}
+bitvec = {version = "1.0", default-features = false, features = ["atomic", "alloc"]}
 spin = {version = "0.9", features = ["use_ticket_mutex"]}

+ 79 - 16
h2o/libs/collection_ex/src/chash_map.rs

@@ -36,7 +36,7 @@ impl<'a, K, V, S> Deref for ReadGuard<'a, K, V, S> {
 }
 
 pub struct WriteGuard<'a, K, V, S> {
-    _buckets: RwLockReadGuard<'a, inner::Buckets<K, V, S>>,
+    buckets: RwLockReadGuard<'a, inner::Buckets<K, V, S>>,
     inner: RwLockWriteGuard<'a, inner::Entry<(K, V)>>,
 }
 
@@ -47,7 +47,7 @@ impl<'a, K, V, S> WriteGuard<'a, K, V, S> {
 
     pub fn downgrade(self) -> ReadGuard<'a, K, V, S> {
         ReadGuard {
-            _buckets: self._buckets,
+            _buckets: self.buckets,
             inner: self.inner.downgrade(),
         }
     }
@@ -168,32 +168,41 @@ impl<K, V, S: BuildHasher + Default> CHashMap<K, V, S> {
         let entry = unsafe { &*(&buckets as *const RwLockReadGuard<inner::Buckets<K, V, S>>) }
             .find_write(key, |entry| entry.contains_key(key));
         entry.map(|entry| WriteGuard {
-            _buckets: buckets,
+            buckets,
             inner: entry,
         })
     }
 
-    pub fn insert(&self, key: K, value: V) -> Option<(K, V)>
+    pub fn insert(&self, mut key: K, mut value: V) -> Option<(K, V)>
     where
         K: Hash + PartialEq,
     {
         loop {
             let buckets = self.inner.read();
-            let old = match buckets.entry(&key) {
-                Some(mut entry) => mem::replace(&mut *entry, inner::Entry::Data((key, value))),
-                None => {
-                    hint::spin_loop();
-                    continue;
-                }
+
+            let res = match buckets.entry(&key) {
+                Some(mut entry) => Ok(mem::replace(&mut *entry, inner::Entry::Data((key, value)))),
+                None => Err((key, value)),
             };
-            if old.is_free() {
-                let len = self.len.fetch_add(1, SeqCst) + 1;
-                if len * LOAD_FACTOR_D >= buckets.len() * LOAD_FACTOR_N {
+
+            match res {
+                Ok(old) => {
+                    if old.is_free() {
+                        let len = self.len.fetch_add(1, SeqCst) + 1;
+                        if len * LOAD_FACTOR_D >= buckets.len() * LOAD_FACTOR_N {
+                            drop(buckets);
+                            self.grow(len);
+                        }
+                    }
+                    break old.into();
+                }
+                Err(new) => {
+                    let len = self.len.load(SeqCst);
                     drop(buckets);
-                    self.grow(len);
+                    self.grow(len * GROW_FACTOR);
+                    (key, value) = new;
                 }
             }
-            break old.into();
         }
     }
 
@@ -224,11 +233,27 @@ impl<K, V, S: BuildHasher + Default> CHashMap<K, V, S> {
         }
 
         WriteGuard {
-            _buckets: buckets,
+            buckets,
             inner: entry,
         }
     }
 
+    /// # Safety
+    ///
+    /// `guard` must come from [`Self::get_mut`] method.
+    pub unsafe fn remove_from(&self, mut guard: WriteGuard<'_, K, V, S>) -> (K, V)
+    where
+        K: Hash,
+    {
+        let ret = mem::replace(&mut *guard.inner, inner::Entry::Removed);
+        let len = self.len.fetch_sub(1, SeqCst) - 1;
+        if len * GROW_FACTOR * LOAD_FACTOR_D < guard.buckets.len() * LOAD_FACTOR_N {
+            drop(guard);
+            self.shrink(len);
+        }
+        Option::<(K, V)>::from(ret).unwrap()
+    }
+
     pub fn remove_entry_if<Q, F>(&self, key: &Q, predicate: F) -> Option<(K, V)>
     where
         Q: Hash + PartialEq,
@@ -253,6 +278,33 @@ impl<K, V, S: BuildHasher + Default> CHashMap<K, V, S> {
         ret.into()
     }
 
+    pub fn try_remove_entry<Q, F, E>(&self, key: &Q, predicate: F) -> Result<(K, V), Option<E>>
+    where
+        Q: Hash + PartialEq,
+        K: Borrow<Q> + Hash,
+        F: FnOnce(&V) -> Result<(), E>,
+    {
+        let buckets = self.inner.read();
+        let ret = match buckets.entry(key) {
+            Some(mut entry) => match entry.get() {
+                Some((_, v)) => match predicate(v) {
+                    Ok(()) => mem::replace(&mut *entry, inner::Entry::Removed),
+                    Err(err) => return Err(Some(err)),
+                },
+                None => return Err(None),
+            },
+            None => return Err(None),
+        };
+        if !ret.is_free() {
+            let len = self.len.fetch_sub(1, SeqCst) - 1;
+            if len * GROW_FACTOR * LOAD_FACTOR_D < buckets.len() * LOAD_FACTOR_N {
+                drop(buckets);
+                self.shrink(len);
+            }
+        }
+        Option::from(ret).ok_or(None)
+    }
+
     #[inline]
     pub fn remove_entry<Q>(&self, key: &Q) -> Option<(K, V)>
     where
@@ -281,6 +333,17 @@ impl<K, V, S: BuildHasher + Default> CHashMap<K, V, S> {
         self.remove_entry(key).map(|ret| ret.1)
     }
 
+    #[inline]
+    pub fn try_remove<Q, F, E>(&self, key: &Q, predicate: F) -> Result<V, Option<E>>
+    where
+        Q: Hash + PartialEq,
+        K: Borrow<Q> + Hash,
+        F: FnOnce(&V) -> Result<(), E>,
+    {
+        self.try_remove_entry(key, predicate)
+            .map(|(_, value)| value)
+    }
+
     pub fn retain_mut<F>(&self, predicate: F)
     where
         F: Fn(&K, &mut V) -> bool,

+ 1 - 1
h2o/libs/collection_ex/src/id_alloc.rs

@@ -90,7 +90,7 @@ impl IdAllocator {
             None => return,
         };
 
-        let r = match bvec.get_mut(sec) {
+        let mut r = match bvec.get_mut(sec) {
             Some(r) => r,
             None => return,
         };

+ 5 - 10
h2o/libs/collection_ex/src/range_map.rs

@@ -29,15 +29,10 @@ impl<K, V> RangeMap<K, V> {
         &self.range
     }
 
-    pub fn allocate_with<F, E, R>(
-        &mut self,
-        size: K,
-        value: F,
-        no_fit: impl Into<E>,
-    ) -> Result<(K, R), E>
+    pub fn allocate_with<F, E>(&mut self, size: K, value: F, no_fit: E) -> Result<K, E>
     where
         K: Ord + Sub<Output = K> + Add<Output = K> + Copy,
-        F: FnOnce(Range<K>) -> Result<(V, R), E>,
+        F: FnOnce(Range<K>) -> Result<V, E>,
     {
         let mut range = None;
 
@@ -56,11 +51,11 @@ impl<K, V> RangeMap<K, V> {
 
         if let Some(range) = range {
             let start = range.start;
-            let (value, ret) = value(range.clone())?;
+            let value = value(range.clone())?;
             self.inner.entry(start).or_insert((range, value));
-            Ok((start, ret))
+            Ok(start)
         } else {
-            Err(no_fit.into())
+            Err(no_fit)
         }
     }
 

+ 1 - 1
h2o/libs/heap/Cargo.toml

@@ -17,7 +17,7 @@ paging = {path = "../paging"}
 pmm = {path = "../pmm"}
 # External crates
 array-macro = "2.1"
-bitvec = {version = "0.22", default-features = false, features = ["atomic"]}
+bitvec = {version = "1.0", default-features = false, features = ["atomic"]}
 cfg-if = "1.0"
 intrusive-collections = {version = "0.9", default-features = false, features = ["nightly"]}
 log = "0.4"

+ 2 - 0
h2o/libs/heap/src/lib.rs

@@ -126,6 +126,7 @@ pub fn test(a: &impl core::alloc::GlobalAlloc, start_seed: usize) {
                 let layout = core::alloc::Layout::from_size_align(seed, seed.next_power_of_two())
                     .expect("Invalid layout");
                 *u = (allocator.alloc(layout), layout);
+                assert!(!u.0.is_null(), "allocation failed");
                 seed = random(seed);
             }
 
@@ -137,6 +138,7 @@ pub fn test(a: &impl core::alloc::GlobalAlloc, start_seed: usize) {
                 let layout = core::alloc::Layout::from_size_align(seed, seed.next_power_of_two())
                     .expect("Invalid layout");
                 *w = (allocator.alloc(layout), layout);
+                assert!(!w.0.is_null(), "allocation failed");
                 seed = random(seed);
             }
 

+ 1 - 1
h2o/libs/heap/src/page.rs

@@ -137,7 +137,7 @@ impl Page {
     pub fn init(&mut self, sz: usize) {
         self.link = RBTreeLink::new();
         self.objsize = sz;
-        self.used = BitArray::zeroed();
+        self.used.as_raw_mut_slice().fill(0);
         self.used_count = 0;
 
         let hdrcnt = self.header_count();

+ 6 - 1
h2o/libs/minfo/src/lib.rs

@@ -10,6 +10,8 @@ pub const KARGS_BASE: usize = 0x1000;
 
 pub const TRAMPOLINE_RANGE: core::ops::Range<usize> = 0..0x100000;
 
+pub const LAPIC_BASE: usize = 0xFEE0_0000;
+
 pub const INITIAL_ID_SPACE: usize = 0x1_0000_0000;
 
 pub use pmm::{KMEM_PHYS_BASE, PF_SIZE};
@@ -22,8 +24,11 @@ pub const USER_END: usize = 0x7FFF_0000_0000;
 
 pub const KERNEL_SPACE_START: usize = 0xFFFF_8000_0000_0000;
 
+/// WARN: The range must contains only 1 page sized 512G (a.k.a. the largest
+/// size). If the kernel memory space may be exhausted, be sure to make
+/// corresponding modifications to `KERNEL_ROOT` in the kernel crate!
 pub const KERNEL_ALLOCABLE_RANGE: core::ops::Range<usize> =
-    0xFFFF_A000_0000_0000..0xFFFF_F000_0000_0000;
+    0xFFFF_A000_0000_0000..0xFFFF_A080_0000_0000;
 
 pub const ID_OFFSET: usize = KERNEL_SPACE_START;
 

+ 4 - 4
h2o/libs/paging/src/addr.rs

@@ -127,16 +127,16 @@ impl From<u64> for LAddr {
     }
 }
 
-impl From<*const u8> for LAddr {
+impl<T> From<*const T> for LAddr {
     #[inline]
-    fn from(val: *const u8) -> Self {
+    fn from(val: *const T) -> Self {
         LAddr(val as _)
     }
 }
 
-impl From<*mut u8> for LAddr {
+impl<T> From<*mut T> for LAddr {
     #[inline]
-    fn from(val: *mut u8) -> Self {
+    fn from(val: *mut T) -> Self {
         LAddr(val as _)
     }
 }

+ 1 - 3
h2o/libs/syscall/src/lib.rs

@@ -19,12 +19,10 @@ pub mod task;
 
 pub use sv_gen::*;
 
-#[cfg(all(not(feature = "stub"), feature = "call"))]
-pub use self::call::*;
 #[cfg(feature = "stub")]
 pub use self::stub::*;
 pub use self::{
-    call::{hdl::Handle, reg::*, Syscall},
+    call::{hdl::Handle, reg::*, Syscall, *},
     error::*,
     feat::*,
 };

+ 3 - 3
h2o/tinit/build.rs

@@ -5,7 +5,7 @@ use std::{env, error::Error, path::Path};
 fn asm_build(input: &str, output: &str, flags: &[&str]) -> Result<(), Box<dyn Error>> {
     use std::process::Command;
 
-    println!("cargo:rerun-if-changed={}", input);
+    println!("cargo:rerun-if-changed={input}");
     let mut cmd = Command::new("nasm");
     cmd.args([input, "-o", output])
         .args(flags)
@@ -23,10 +23,10 @@ fn main() -> Result<(), Box<dyn Error>> {
         dst_name += ".o";
 
         let src_path = file.path();
-        let dst_path = format!("{}/{}", target_dir, dst_name);
+        let dst_path = format!("{target_dir}/{dst_name}");
 
         asm_build(src_path.to_str().unwrap(), &dst_path, &["-f", "elf64"])?;
-        println!("cargo:rustc-link-arg={}", dst_path);
+        println!("cargo:rustc-link-arg={dst_path}");
         println!("cargo:rerun-if-changed={}", src_path.to_str().unwrap());
     }
 

+ 6 - 1
h2o/tinit/src/test/ipc.rs

@@ -120,7 +120,9 @@ pub unsafe fn test(virt: &Virt, stack: (*mut u8, *mut u8, Handle)) {
                 .expect("Failed to send the response");
 
             ::log::trace!("Finished");
-            sv_task_exit(0, false).into_res().expect("Failed to exit the task");
+            sv_task_exit(0, false)
+                .into_res()
+                .expect("Failed to exit the task");
         }
 
         let other = {
@@ -168,6 +170,9 @@ pub unsafe fn test(virt: &Virt, stack: (*mut u8, *mut u8, Handle)) {
             .expect("Failed to drop the event in master");
 
         let mut retval = Default::default();
+        sv_obj_wait(other, u64::MAX, true, false, SIG_READ)
+            .into_res()
+            .expect("Failed to wait for the task");
         sv_task_join(other, &mut retval)
             .into_res()
             .expect("Failed to join the task");

+ 1 - 1
scripts/genimg.sh

@@ -8,7 +8,7 @@ tar vcf H2O.k KERNEL TINIT BOOT.fs
 cd img
 
 rm -f efi.img
-dd if=/dev/zero of=efi.img bs=1k count=23040
+dd if=/dev/zero of=efi.img bs=1k count=46080
 mkfs.vfat efi.img
 sudo mount efi.img mnt
 sudo mkdir -p mnt/EFI/BOOT

+ 3 - 1
scripts/install.sh

@@ -1,7 +1,9 @@
+#!/bin/bash
+
 mkdir -p target/img/mnt
 cd target
 
-tar vcf H2O.k KERNEL TINIT
+tar vcf H2O.k KERNEL TINIT BOOT.fs
 
 cd img
 

+ 15 - 0
src/bin/devm/Cargo.toml

@@ -0,0 +1,15 @@
+[package]
+edition = "2021"
+name = "devm"
+version = "0.1.0"
+
+[dependencies]
+# Local crates
+solvent = {path = "../../lib/h2o_rs"}
+solvent-async = {path = "../../lib/h2o_async"}
+solvent-fs = {path = "../../lib/h2o_fs"}
+solvent-rpc = {path = "../../lib/h2o_rpc"}
+solvent-std = {path = "../../lib/h2o_std"}
+# External crates
+log = "0.4"
+futures-lite = {version = "1.12", default-features = false, features = ["alloc"]}

+ 30 - 0
src/bin/devm/src/device.rs

@@ -0,0 +1,30 @@
+use futures_lite::StreamExt;
+use solvent_rpc::{
+    ddk::driver::{DriverRequest, DriverServer},
+    Server,
+};
+
+pub async fn handle_driver(server: DriverServer) {
+    let (mut stream, _) = server.serve();
+    while let Some(request) = stream.next().await {
+        let request = match request {
+            Ok(request) => request,
+            Err(err) => {
+                log::warn!("RPC receive error: {err}");
+                continue;
+            }
+        };
+
+        let res = match request {
+            DriverRequest::CloseConnection { responder } => responder.send(()),
+            DriverRequest::Unknown(_) => {
+                log::warn!("unknown request received");
+                continue;
+            }
+        };
+
+        if let Err(err) = res {
+            log::warn!("RPC send error: {err}")
+        }
+    }
+}

+ 61 - 0
src/bin/devm/src/main.rs

@@ -0,0 +1,61 @@
+#![no_std]
+#![no_main]
+
+mod device;
+
+use alloc::vec;
+
+use solvent::prelude::{Channel, Phys};
+use solvent_fs::{process::Process, rpc::RpcNode, spawner};
+use solvent_rpc::{
+    io::{self, file::PhysOptions, OpenOptions},
+    sync::Client,
+};
+
+extern crate alloc;
+
+async fn main() {
+    let drvhost = driver_host().expect("Failed to get driver host");
+
+    let root_driver = "boot/drv/libpc.so";
+
+    let bootfs = solvent_fs::open_dir("/boot", OpenOptions::READ).expect("Failed to open bootfs");
+    let bootfs = bootfs.into_async().expect("Failed to get loader");
+
+    let mut vfs = vec![];
+    solvent_fs::fs::local()
+        .export(&mut vfs)
+        .expect("Failed to export vfs");
+    let (instance, server) = Channel::new();
+
+    vfs.push(("use/devm".into(), instance.into()));
+
+    let mut task = Process::builder()
+        .executable(drvhost, "drvhost")
+        .expect("Failed to set executable")
+        .arg(root_driver)
+        .load_dirs(vec![bootfs])
+        .expect("Failed to set load dirs")
+        .local_fs(vfs)
+        .build()
+        .await
+        .expect("Failed to build the process");
+    log::debug!("Starting the root driver");
+
+    let node = RpcNode::new(|server, _| async move { device::handle_driver(server).await });
+    node.open_conn(spawner(), Default::default(), server);
+
+    let ret = task.ajoin().await.expect("Failed to join the process");
+    assert_eq!(ret, 0);
+}
+
+fn driver_host() -> Result<Phys, io::Error> {
+    let drvhost = solvent_fs::open(
+        "boot/bin/drvhost",
+        OpenOptions::READ | OpenOptions::EXECUTE | OpenOptions::EXPECT_FILE,
+    )?;
+    let drvhost = drvhost.phys(PhysOptions::Copy)??;
+    Ok(drvhost)
+}
+
+solvent_async::entry!(main, solvent_std, Some(1));

+ 16 - 0
src/bin/drvhost/Cargo.toml

@@ -0,0 +1,16 @@
+[package]
+edition = "2021"
+name = "drvhost"
+version = "0.1.0"
+
+[dependencies]
+# Local crates
+solvent = {path = "../../lib/h2o_rs"}
+solvent-async = {path = "../../lib/h2o_async"}
+solvent-ddk = {path = "../../lib/h2o_ddk", default-features = false}
+solvent-fs = {path = "../../lib/h2o_fs"}
+solvent-rpc = {path = "../../lib/h2o_rpc"}
+solvent-std = {path = "../../lib/h2o_std"}
+# External crates
+async-task = {version = "4.3", default-features = false}
+log = "0.4"

+ 38 - 0
src/bin/drvhost/src/ffi.rs

@@ -0,0 +1,38 @@
+use alloc::alloc::Global;
+use core::{
+    alloc::{Allocator, Layout},
+    ptr::NonNull,
+};
+
+use solvent_async::{global_executor, local_executor};
+use solvent_ddk::ffi::VTable;
+use solvent_fs::fs;
+
+#[no_mangle]
+unsafe extern "C" fn __h2o_ddk_alloc(size: usize, align: usize) -> *mut () {
+    let layout = Layout::from_size_align(size, align).unwrap();
+    Global
+        .allocate(layout)
+        .map_or(core::ptr::null_mut(), |ptr| ptr.as_ptr().cast())
+}
+
+#[no_mangle]
+unsafe extern "C" fn __h2o_ddk_dealloc(ptr: *mut (), size: usize, align: usize) {
+    if let (Some(ptr), Ok(layout)) = (
+        NonNull::new(ptr.cast()),
+        Layout::from_size_align(size, align),
+    ) {
+        Global.deallocate(ptr, layout)
+    }
+}
+
+pub fn vtable() -> VTable {
+    VTable {
+        global_exe: global_executor() as _,
+        local_exe: local_executor(|exe| exe as *const _),
+        local_fs: fs::local() as *const _,
+
+        alloc: __h2o_ddk_alloc,
+        dealloc: __h2o_ddk_dealloc,
+    }
+}

+ 90 - 0
src/bin/drvhost/src/instance.rs

@@ -0,0 +1,90 @@
+use alloc::{boxed::Box, ffi::CString};
+use core::{
+    error::Error,
+    ffi::{c_char, c_void, CStr},
+    future::Future,
+    ptr,
+};
+
+use async_task::Task;
+use solvent::prelude::{Channel, Handle, Object, Phys};
+use solvent_fs::fs;
+use solvent_rpc::io::{
+    file::{FileSyncClient, PhysOptions},
+    OpenOptions,
+};
+use solvent_std::{c_str, path::Path};
+
+pub fn bootstrap(file_path: &Path) -> Result<impl Future<Output = ()>, Box<dyn Error>> {
+    let (driver, dserver) = Channel::new();
+    fs::local().open("use/devm", OpenOptions::READ | OpenOptions::WRITE, dserver)?;
+
+    let (file, fserver) = Channel::new();
+    fs::local().open(
+        file_path,
+        OpenOptions::READ | OpenOptions::EXECUTE | OpenOptions::EXPECT_FILE,
+        fserver,
+    )?;
+    let file = FileSyncClient::from(file);
+    let phys = file.phys(PhysOptions::Shared)??;
+
+    let name = CString::new(file_path.to_str().unwrap())?;
+    create(driver, phys, &name)
+}
+
+fn create(
+    driver: Channel,
+    phys: Phys,
+    name: &CStr,
+) -> Result<impl Future<Output = ()>, Box<dyn Error>> {
+    // Load the DSO.
+    let dso = {
+        let phys = Phys::into_raw(phys);
+        unsafe { dlphys(phys, name.as_ptr()) }
+    };
+
+    // Get `__h2o_ddk_enter` function.
+    let ddk_enter =
+        unsafe { ddk_fn::<Enter>(dso, c_str!("__h2o_ddk_enter")) }.ok_or("ddk_enter not found")?;
+
+    // And `__h2o_ddk_exit`.
+    let ddk_exit =
+        unsafe { ddk_fn::<Exit>(dso, c_str!("__h2o_ddk_exit")) }.ok_or("ddk_exit not found")?;
+
+    // Initialize the driver environment.
+    let task = unsafe {
+        let ptr = ddk_enter(&crate::ffi::vtable() as _, Channel::into_raw(driver));
+        Box::from_raw(ptr.cast::<Task<()>>())
+    };
+
+    Ok(async move {
+        task.await;
+        unsafe { ddk_exit() };
+    })
+}
+
+#[link(name = "ldso")]
+extern "C" {
+    fn dlphys(phys: Handle, name: *const c_char) -> *const c_void;
+
+    fn dlsym(handle: *const c_void, name: *const c_char) -> *mut c_void;
+}
+
+/// # Safety
+///
+/// `F` must be a static `fn` type and the same signature with the
+/// definition.
+unsafe fn ddk_fn<F>(dso: *const c_void, name: &CStr) -> Option<F> {
+    let func = dlsym(dso, name.as_ptr());
+    if func.is_null() {
+        return None;
+    }
+    Some(ptr::read(&func as *const _ as *const F))
+}
+
+type Enter = unsafe extern "C" fn(
+    vtable: *const solvent_ddk::ffi::VTable,
+    instance: solvent::obj::Handle,
+) -> *mut ();
+
+type Exit = unsafe extern "C" fn();

+ 23 - 0
src/bin/drvhost/src/main.rs

@@ -0,0 +1,23 @@
+#![no_std]
+#![no_main]
+#![feature(allocator_api)]
+#![feature(error_in_core)]
+
+mod ffi;
+mod instance;
+
+use alloc::boxed::Box;
+use core::error::Error;
+
+use solvent_std::env;
+
+extern crate alloc;
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let driver = env::args().nth(1).expect("Failed to get the driver path");
+    let task = instance::bootstrap(driver.as_ref())?;
+    solvent_async::block_on(Some(1), task);
+    Ok(())
+}
+
+solvent_std::entry!(main);

+ 1 - 1
src/bin/progm/Cargo.toml

@@ -15,5 +15,5 @@ svrt = {path = "../../lib/svrt"}
 # External crates
 async-task = {version = "4.3", default-features = false}
 either = {version = "1.6", default-features = false}
-futures = {version = "0.3", default-features = false, features = ["alloc"]}
+futures-lite = {version = "1.12", default-features = false, features = ["alloc"]}
 log = "0.4"

+ 2 - 2
src/bin/progm/src/boot.rs

@@ -54,8 +54,7 @@ unsafe fn build_inner(
                 let offset = unsafe { data.as_ptr().offset_from(base.as_ptr()) as usize };
                 assert!(
                     offset & PAGE_MASK == 0,
-                    "offset is not aligned: {:#x}",
-                    offset
+                    "offset is not aligned: {offset:#x}"
                 );
                 let len = data.len();
                 let data = root_phys
@@ -104,6 +103,7 @@ pub fn mount() {
         let (client, server) = Directory::sync_channel();
         bootfs
             .open(
+                solvent_fs::spawner(),
                 Default::default(),
                 Path::new(""),
                 OpenOptions::READ | OpenOptions::EXECUTE,

+ 33 - 1
src/bin/progm/src/main.rs

@@ -4,6 +4,11 @@
 
 mod boot;
 
+use alloc::vec;
+
+use solvent_fs::process::Process;
+use solvent_rpc::{io::OpenOptions, sync::Client};
+
 extern crate alloc;
 
 async fn main() {
@@ -16,10 +21,37 @@ async fn main() {
 
     solvent_async::test::test_disp().await;
 
+    let bootfs = solvent_fs::open_dir("/boot", OpenOptions::READ).expect("Failed to open bootfs");
+    let bootfs = bootfs.into_async().expect("Failed to get loader");
+
+    let devm =
+        solvent_fs::loader::get_object_from_dir(solvent_async::dispatch(), &bootfs, "bin/devm")
+            .await
+            .expect("Failed to get executable");
+
+    let mut vfs = vec![];
+    solvent_fs::fs::local()
+        .export(&mut vfs)
+        .expect("Failed to export vfs");
+
+    let mut task = Process::builder()
+        .executable(devm, "devm")
+        .expect("Failed to add executable")
+        .load_dirs(vec![bootfs])
+        .expect("Failed to add loader client")
+        .local_fs(vfs)
+        .build()
+        .await
+        .expect("Failed to build a process");
+
+    log::debug!("Waiting for devm");
+    let retval = task.ajoin().await.expect("Failed to wait for devm");
+    assert_eq!(retval, 0, "The process failed: {retval:#x}");
+
     log::debug!("Goodbye!");
 }
 
-solvent_async::entry!(main, solvent_std);
+solvent_async::entry!(main, solvent_std, None);
 
 #[link(name = "ldso")]
 extern "C" {

+ 18 - 0
src/drv/.cargo/config.toml

@@ -0,0 +1,18 @@
+[build]
+target = "../../.cargo/x86_64-pc-oceanic.json"
+
+[unstable]
+# build-std = ["std", "panic_abort"]
+build-std = ["core", "compiler_builtins", "alloc", "panic_abort"]
+build-std-features = ["compiler-builtins-mem"]
+
+[profile.dev]
+incremental = true
+lto = 'thin'
+panic = 'abort'
+
+[profile.release]
+incremental = true
+lto = 'fat'
+opt-level = 3
+panic = 'abort'

+ 18 - 0
src/drv/pc/Cargo.toml

@@ -0,0 +1,18 @@
+[package]
+edition = "2021"
+name = "pc"
+version = "0.1.0"
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+# Local crates
+solvent = {path = "../../lib/h2o_rs", default-features = false}
+solvent-async = {path = "../../lib/h2o_async", default-features = false}
+solvent-core = {path = "../../lib/h2o_std/core"}
+solvent-ddk = {path = "../../lib/h2o_ddk"}
+solvent-fs = {path = "../../lib/h2o_fs", default-features = false}
+solvent-rpc = {path = "../../lib/h2o_rpc", default-features = false}
+# External crates
+log = "0.4"

+ 11 - 0
src/drv/pc/src/lib.rs

@@ -0,0 +1,11 @@
+#![no_std]
+
+use solvent::prelude::Channel;
+
+extern crate alloc;
+
+async fn init(_driver_instance: Channel) {
+    log::debug!("Hello from driver");
+}
+
+solvent_ddk::entry!(init);

+ 3 - 2
src/lib/bootfs/Cargo.toml

@@ -4,9 +4,10 @@ name = "bootfs"
 version = "0.1.0"
 
 [features]
-gen = []
+gen = ["dep:anyhow"]
 
 [dependencies]
+anyhow = {version = "1.0", optional = true}
 either = {version = "1.6", default-features = false}
+plain = "0.2"
 static_assertions = "1.1"
-plain = "0.2"

+ 3 - 3
src/lib/bootfs/src/gen.rs

@@ -1,4 +1,4 @@
-use std::{boxed::Box, collections::VecDeque, error::Error, io::Write, mem, vec::Vec};
+use std::{collections::VecDeque, io::Write, mem, vec::Vec};
 
 use plain::Plain;
 
@@ -59,7 +59,7 @@ fn write_typed<T: ?Sized + Plain>(
     data: &T,
     size: usize,
     output: &mut impl Write,
-) -> Result<(), Box<dyn Error>> {
+) -> anyhow::Result<()> {
     let alsize = mem::size_of_val(data);
     let size = alsize.max(size);
 
@@ -72,7 +72,7 @@ fn write_typed<T: ?Sized + Plain>(
     Ok(())
 }
 
-pub fn generate(input: &Entry, output: &mut impl Write) -> Result<(), Box<dyn Error>> {
+pub fn generate(input: &Entry, output: &mut impl Write) -> anyhow::Result<()> {
     let mut entries = Vec::new();
     let mut contents = Vec::new();
     split(input, &mut entries, &mut contents);

+ 5 - 8
src/lib/dbglog/src/lib.rs

@@ -24,14 +24,11 @@ struct Logger;
 impl Write for Buffer {
     fn write_str(&mut self, s: &str) -> fmt::Result {
         let bytes = s.as_bytes();
-        self.0
-            .get_mut(self.1..)
-            .and_then(|buf| buf.get_mut(..bytes.len()))
-            .map_or(Err(fmt::Error), |buf| {
-                buf.copy_from_slice(bytes);
-                self.1 += bytes.len();
-                Ok(())
-            })
+        let start = self.1;
+        let end = (start + bytes.len()).min(BUFFER_SIZE);
+        self.0[start..end].copy_from_slice(&bytes[..(end - start)]);
+        self.1 = end;
+        Ok(())
     }
 }
 

+ 2 - 1
src/lib/h2o_async/Cargo.toml

@@ -14,6 +14,7 @@ solvent-core = {path = "../h2o_std/core"}
 # External crates
 async-task = {version = "4.3", default-features = false}
 cfg-if = "1.0"
-futures = {version = "0.3", default-features = false, features = ["alloc"]}
+crossbeam-queue = {version = "0.3", default-features = false, features = ["alloc"]}
+futures-lite = {version = "1.12", default-features = false, features = ["alloc"]}
 log = "0.4"
 waker-fn = "1.1"

+ 25 - 41
src/lib/h2o_async/src/dev.rs

@@ -1,13 +1,12 @@
 use core::{
+    future::Future,
     num::NonZeroUsize,
     pin::Pin,
     task::{Context, Poll},
-    time::Duration,
 };
 
-use futures::Future;
 use solvent::{
-    prelude::{PackIntrWait, Result, SerdeReg, Syscall, EPIPE, SIG_GENERIC},
+    prelude::{PackIntrWait, Result, SerdeReg, Syscall, ENOENT, EPIPE, SIG_GENERIC},
     time::Instant,
 };
 use solvent_core::{sync::channel::oneshot, thread::Backoff};
@@ -48,23 +47,17 @@ impl Interrupt {
 
     #[inline]
     pub fn last_time(&self) -> Result<Instant> {
-        self.inner.wait(Duration::ZERO)
+        self.inner.last_time()
     }
 
     #[inline]
-    pub fn wait_until(&self, deadline: Instant) -> WaitUntil<'_> {
-        WaitUntil {
+    pub fn wait_next(&self) -> WaitNext<'_> {
+        WaitNext {
             intr: self,
-            deadline,
             result: None,
             key: None,
         }
     }
-
-    #[inline]
-    pub async fn wait_next(&self) -> Result<Instant> {
-        self.wait_until(Instant::now()).await
-    }
 }
 
 unsafe impl PackedSyscall for (PackIntrWait, oneshot::Sender<Result<Instant>>) {
@@ -81,14 +74,13 @@ unsafe impl PackedSyscall for (PackIntrWait, oneshot::Sender<Result<Instant>>) {
 }
 
 #[must_use]
-pub struct WaitUntil<'a> {
+pub struct WaitNext<'a> {
     intr: &'a Interrupt,
-    deadline: Instant,
     result: Option<oneshot::Receiver<Result<Instant>>>,
     key: Option<usize>,
 }
 
-impl Future for WaitUntil<'_> {
+impl Future for WaitNext<'_> {
     type Output = Result<Instant>;
 
     fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
@@ -101,38 +93,30 @@ impl Future for WaitUntil<'_> {
             return Poll::Pending;
         }
 
-        let mut last_time = self.intr.last_time()?;
-        if self.deadline <= last_time {
-            return Poll::Ready(Ok(last_time));
-        }
-
         let backoff = Backoff::new();
         let (mut tx, rx) = oneshot();
         self.result = Some(rx);
         loop {
-            let pack = if self.deadline > last_time {
-                let pack = self.intr.inner.pack_wait(self.deadline - last_time)?;
-                let res = self.intr.disp.poll_send(
-                    &self.intr.inner,
-                    true,
-                    SIG_GENERIC,
-                    (pack, tx),
-                    cx.waker(),
-                );
-                match res {
-                    Err((_, pack)) => pack,
-                    Ok(Err(err)) => panic!("poll send: {err:?}"),
-                    Ok(Ok(key)) => {
-                        self.key = Some(key);
-                        break Poll::Pending;
+            match self.intr.inner.last_time() {
+                Err(ENOENT) => {
+                    match self.intr.disp.poll_send(
+                        &self.intr.inner,
+                        true,
+                        SIG_GENERIC,
+                        (self.intr.inner.pack_query()?, tx),
+                        cx.waker(),
+                    ) {
+                        Err(pack) => tx = pack.1,
+                        Ok(Err(err)) => panic!("poll send: {err:?}"),
+                        Ok(Ok(key)) => {
+                            self.key = Some(key);
+                            return Poll::Pending;
+                        }
                     }
                 }
-            } else {
-                break Poll::Ready(Ok(last_time));
-            };
-            tx = pack;
-            backoff.snooze();
-            last_time = self.intr.last_time()?;
+                res => return Poll::Ready(res),
+            }
+            backoff.snooze()
         }
     }
 }

+ 30 - 14
src/lib/h2o_async/src/disp.rs

@@ -1,11 +1,11 @@
 use alloc::boxed::Box;
 use core::{
+    hint,
     num::NonZeroUsize,
-    sync::atomic::{AtomicBool, AtomicUsize, Ordering::*},
+    sync::atomic::{AtomicUsize, Ordering::*},
     task::{Poll, Waker},
 };
 
-use futures::task::AtomicWaker;
 use solvent::prelude::{Dispatcher as Inner, Object, Syscall, ENOENT, ENOSPC};
 use solvent_core::sync::{Arsc, CHashMap};
 
@@ -13,7 +13,7 @@ use self::DispError::*;
 
 struct Task {
     pack: Box<dyn PackedSyscall>,
-    waker: AtomicWaker,
+    waker: Waker,
 }
 
 #[derive(Debug)]
@@ -27,8 +27,8 @@ pub enum DispError {
 }
 
 struct Dispatcher {
+    id: usize,
     inner: Inner,
-    stop: AtomicBool,
     num_recv: AtomicUsize,
     tasks: CHashMap<usize, Task>,
 }
@@ -41,9 +41,10 @@ unsafe impl Sync for Dispatcher {}
 impl Dispatcher {
     #[inline]
     fn new(capacity: usize) -> Self {
+        static ID: AtomicUsize = AtomicUsize::new(1);
         Dispatcher {
+            id: ID.fetch_add(1, SeqCst),
             inner: Inner::new(capacity),
-            stop: AtomicBool::new(false),
             num_recv: AtomicUsize::new(1),
             tasks: CHashMap::new(),
         }
@@ -51,13 +52,24 @@ impl Dispatcher {
 
     #[inline]
     fn disconnected(self: &Arsc<Self>) -> bool {
-        self.stop.load(Acquire) || Arsc::count(self) <= 1
+        self.num_recv.load(SeqCst) == 0 || Arsc::count(self) <= 1
     }
 
     fn poll_receive(self: &Arsc<Self>) -> Poll<Result<(), DispError>> {
         match self.inner.pop_raw() {
             Ok(res) => {
-                let Task { waker, mut pack } = self.tasks.remove(&res.key).ok_or(TimeOut)?;
+                let s = solvent::time::Instant::now();
+                let Task { waker, mut pack } = loop {
+                    match self.tasks.remove(&res.key) {
+                        Some(task) => break task,
+                        None => hint::spin_loop(),
+                    }
+                    assert!(
+                        s.elapsed() < core::time::Duration::from_secs(1),
+                        "The kernel object owns a key that the user space doesn't: {}",
+                        res.key
+                    );
+                };
                 // We need to inform the task where an internal error occurred.
                 let res = pack.unpack(res.result, NonZeroUsize::new(res.signal));
                 waker.wake();
@@ -102,10 +114,10 @@ impl Dispatcher {
         };
         let task = Task {
             pack: Box::new(pack),
-            waker: AtomicWaker::new(),
+            waker: waker.clone(),
         };
-        task.waker.register(waker);
-        self.tasks.insert(key, task);
+        let old = self.tasks.insert(key, task);
+        assert!(old.is_none());
         Ok(Ok(key))
     }
 
@@ -114,8 +126,8 @@ impl Dispatcher {
             return Err(Disconnected);
         }
 
-        if let Some(task) = self.tasks.get_mut(&key) {
-            task.waker.register(waker);
+        if let Some(mut task) = self.tasks.get_mut(&key) {
+            task.waker = waker.clone();
             Ok(())
         } else {
             Err(DidntWait)
@@ -176,6 +188,11 @@ impl DispReceiver {
     pub fn poll_receive(&self) -> Poll<Result<(), DispError>> {
         self.disp.poll_receive()
     }
+
+    #[inline]
+    pub fn id(&self) -> usize {
+        self.disp.id
+    }
 }
 
 impl Clone for DispReceiver {
@@ -188,8 +205,7 @@ impl Clone for DispReceiver {
 
 impl Drop for DispReceiver {
     fn drop(&mut self) {
-        self.disp.stop.store(true, SeqCst);
-        if self.disp.num_recv.fetch_sub(1, SeqCst) == 0 {
+        if self.disp.num_recv.fetch_sub(1, SeqCst) == 1 {
             let tasks = self.disp.tasks.take();
             for (_, task) in tasks {
                 let Task { mut pack, waker } = task;

+ 272 - 189
src/lib/h2o_async/src/exe.rs

@@ -1,251 +1,334 @@
+#![allow(clippy::duplicate_mod)]
+
+#[cfg(feature = "runtime")]
 mod enter;
+#[cfg(feature = "runtime")]
 mod park;
 
-use alloc::vec::Vec;
+use alloc::collections::BTreeMap;
 use core::{
     iter,
-    pin::Pin,
+    marker::PhantomData,
     sync::atomic::{AtomicUsize, Ordering::*},
-    task::{Context, Poll},
+    task::Poll,
 };
 
 use async_task::{Runnable, Task};
-use futures::{
-    task::{FutureObj, Spawn, SpawnError},
-    Future,
-};
-#[cfg(feature = "runtime")]
-use solvent_core::{sync::Lazy, thread::available_parallelism, thread_local};
-use solvent_core::{
-    sync::{Arsc, Injector, Stealer, Worker},
-    thread::{self, Backoff},
-};
-
-use crate::disp::{DispError, DispReceiver, DispSender};
-
-struct Blocking<G>(Option<G>);
+use futures_lite::{future::yield_now, pin, stream, Future, FutureExt, StreamExt};
+use solvent_core::sync::{Arsc, Injector, Lazy, Steal, Stealer, Worker};
 
-impl<G> Unpin for Blocking<G> {}
+use crate::{disp::DispReceiver, sync::RwLock};
 
-impl<G, U> Future for Blocking<G>
-where
-    G: FnOnce() -> U + Send + 'static,
-{
-    type Output = U;
-
-    #[inline]
-    fn poll(mut self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Self::Output> {
-        let func = self.0.take().expect("Cannot run a task twice");
-        Poll::Ready(func())
-    }
+struct Inner {
+    global: Injector<Runnable>,
+    stealers: RwLock<BTreeMap<usize, Stealer<Runnable>>>,
 }
 
-#[derive(Debug)]
-pub struct ThreadPool {
-    inner: Arsc<Inner>,
+#[repr(transparent)]
+pub struct Executor {
+    inner: Lazy<Arsc<Inner>>,
 }
 
-#[derive(Debug)]
-struct Inner {
-    global: Injector<Runnable>,
-    stealers: Vec<Stealer<Runnable>>,
-    count: AtomicUsize,
-}
+impl Executor {
+    pub const fn new() -> Self {
+        #[inline(never)]
+        fn lazy_new() -> Arsc<Inner> {
+            Arsc::new(Inner {
+                global: Injector::new(),
+                stealers: RwLock::new(BTreeMap::new()),
+            })
+        }
+        Executor {
+            inner: Lazy::new(lazy_new),
+        }
+    }
 
-impl ThreadPool {
-    pub fn new(num: usize) -> Self {
-        log::trace!("solvent-async::exe: Create thread pool");
-        let injector = Injector::new();
-        let (workers, stealers) = (0..num).fold(
-            (Vec::with_capacity(num), Vec::with_capacity(num)),
-            |(mut workers, mut stealers), _| {
-                let worker = Worker::new_fifo();
-                let stealer = worker.stealer();
-                workers.push(worker);
-                stealers.push(stealer);
-                (workers, stealers)
-            },
-        );
-        let inner = Arsc::new(Inner {
-            global: injector,
-            stealers,
-            count: AtomicUsize::new(1),
-        });
+    pub async fn run<T>(&self, fut: impl Future<Output = T> + 'static) -> T {
+        fut.or(poller(self.inner.clone())).await
+    }
 
-        workers.into_iter().for_each(|worker| {
-            let inner = inner.clone();
-            thread::spawn(move || worker_thread(worker, inner));
-        });
-        ThreadPool { inner }
+    pub async fn clear(&self) {
+        poller_cleared(self.inner.clone()).await
     }
 
-    pub fn spawn<F, T>(&self, fut: F) -> Task<T>
+    pub fn spawn<T>(&self, fut: impl Future<Output = T> + Send + 'static) -> Task<T>
     where
-        F: Future<Output = T> + Send + 'static,
         T: Send + 'static,
     {
         let inner = self.inner.clone();
-        let (runnable, task) = async_task::spawn(fut, move |t| inner.global.push(t));
+        let (runnable, task) = async_task::spawn(fut, move |task| inner.global.push(task));
         runnable.schedule();
         task
     }
+}
 
-    pub fn spawn_blocking<F, T>(&self, func: F) -> Task<T>
-    where
-        F: FnOnce() -> T + Send + 'static,
-        T: Send + 'static,
-    {
-        self.spawn(Blocking(Some(func)))
+impl Default for Executor {
+    fn default() -> Self {
+        Self::new()
     }
+}
 
-    pub fn dispatch(&self, capacity: usize) -> DispSender {
-        let (tx, rx) = crate::disp::dispatch(capacity);
-        let inner = self.inner.clone();
-        log::trace!("solvent-async::exe: Dispatch I/O operations");
-        thread::spawn(move || io_thread(rx, inner));
-        tx
+impl Drop for Executor {
+    fn drop(&mut self) {
+        // log::debug!("Drop on EXE {:p}", self);
+        if Lazy::is_initialized(&self.inner) {
+            loop {
+                match self.inner.global.steal() {
+                    Steal::Empty => break,
+                    Steal::Success(task) => task.waker().wake(),
+                    Steal::Retry => {}
+                }
+            }
+        }
     }
+}
 
-    #[inline]
-    pub fn block_on<F, G, T>(&self, gen: G) -> T
-    where
-        F: Future<Output = T> + Send + 'static,
-        G: FnOnce(ThreadPool) -> F,
-    {
-        let fut = gen(self.clone());
-        enter::enter().block_on(fut)
-    }
+#[repr(transparent)]
+pub struct LocalExecutor {
+    exe: Executor,
+    _marker: PhantomData<*mut ()>,
 }
 
-impl Spawn for ThreadPool {
-    #[inline]
-    fn spawn_obj(&self, future: FutureObj<'static, ()>) -> Result<(), SpawnError> {
-        self.spawn(future).detach();
-        Ok(())
+impl LocalExecutor {
+    pub const fn new() -> Self {
+        LocalExecutor {
+            exe: Executor::new(),
+            _marker: PhantomData,
+        }
+    }
+
+    pub async fn run<T>(&self, fut: impl Future<Output = T> + 'static) -> T {
+        self.exe.run(fut).await
+    }
+
+    pub async fn clear(&self) {
+        self.exe.clear().await
+    }
+
+    pub fn spawn<T: 'static>(&self, fut: impl Future<Output = T> + 'static) -> Task<T> {
+        let inner = self.exe.inner.clone();
+        // SAFETY: The executor is not `Send`, so the future doesn't need to be `Send`.
+        let (runnable, task) =
+            unsafe { async_task::spawn_unchecked(fut, move |task| inner.global.push(task)) };
+        runnable.schedule();
+        task
     }
 }
 
-impl Clone for ThreadPool {
-    fn clone(&self) -> Self {
-        let inner = self.inner.clone();
-        inner.count.fetch_add(1, Release);
-        ThreadPool { inner }
+impl Default for LocalExecutor {
+    fn default() -> Self {
+        Self::new()
     }
 }
 
-impl Drop for ThreadPool {
-    fn drop(&mut self) {
-        self.inner.count.fetch_sub(1, Release);
+async fn tick(inner: &Inner, local: &Worker<Runnable>) -> bool {
+    let stream = stream::iter(iter::repeat_with(|| {
+        inner.global.steal_batch_and_pop(local)
+    }))
+    .then(|steal| async {
+        let steal_from_others = async {
+            let stealers = inner.stealers.read().await;
+            stealers.values().map(Stealer::steal).collect()
+        };
+        match steal {
+            Steal::Empty => steal_from_others.await,
+            Steal::Success(_) => steal,
+            Steal::Retry => match steal_from_others.await {
+                Steal::Success(res) => Steal::Success(res),
+                _ => Steal::Retry,
+            },
+        }
+    });
+    pin!(stream);
+
+    let task = match local.pop() {
+        Some(task) => Some(task),
+        None => stream
+            .find(|steal| !steal.is_retry())
+            .await
+            .and_then(|steal| steal.success()),
+    };
+
+    match task {
+        Some(task) => {
+            task.run();
+            true
+        }
+        None => false,
     }
 }
 
-fn worker_thread(local: Worker<Runnable>, pool: Arsc<Inner>) {
-    log::trace!(
-        "solvent-async::exe: worker thread #{}",
-        thread::current().id()
-    );
-    #[inline]
-    fn next_task<T>(local: &Worker<T>, global: &Injector<T>, stealers: &[Stealer<T>]) -> Option<T> {
-        local.pop().or_else(|| {
-            iter::repeat_with(|| {
-                global
-                    .steal_batch_and_pop(local)
-                    .or_else(|| stealers.iter().map(|s| s.steal()).collect())
-            })
-            .find(|s| !s.is_retry())
-            .and_then(|s| s.success())
-        })
+static ID: AtomicUsize = AtomicUsize::new(1);
+
+async fn poller<T>(inner: Arsc<Inner>) -> T {
+    let local = Worker::new_fifo();
+
+    let mut stealers = inner.stealers.write().await;
+    let id = ID.fetch_add(1, SeqCst);
+    assert!(id != 0);
+    stealers.insert(id, local.stealer());
+    drop(stealers);
+
+    let mut num = 0;
+    loop {
+        let add = tick(&inner, &local).await;
+        if !add {
+            num = 0;
+            yield_now().await;
+        }
+        num += 1;
+        if num > u8::MAX as u32 {
+            num = 0;
+            yield_now().await
+        }
     }
+}
+
+async fn poller_cleared(inner: Arsc<Inner>) {
+    let local = Worker::new_fifo();
+
+    let mut stealers = inner.stealers.write().await;
+    let id = ID.fetch_add(1, SeqCst);
+    assert!(id != 0);
+    stealers.insert(id, local.stealer());
+    drop(stealers);
 
-    let backoff = Backoff::new();
     loop {
-        match next_task(&local, &pool.global, &pool.stealers) {
-            Some(runnable) => {
-                runnable.run();
-                backoff.reset();
-            }
-            None => {
-                if pool.count.load(Acquire) == 0 {
-                    break;
-                }
-                log::trace!("W#{}: Waiting for next task...", thread::current().id());
-                backoff.snooze()
-            }
+        if !tick(&inner, &local).await {
+            break;
         }
     }
+
+    let mut stealers = inner.stealers.write().await;
+    stealers.remove(&id);
 }
 
-fn io_thread(rx: DispReceiver, pool: Arsc<Inner>) {
-    log::trace!("solvent-async::exe: io thread #{}", thread::current().id());
-    let backoff = Backoff::new();
+pub async fn io_task(rx: DispReceiver) {
     loop {
-        match rx.poll_receive() {
-            Poll::Ready(res) => match res {
-                Ok(()) => backoff.reset(),
-                Err(DispError::Disconnected) => break,
-                Err(err) => log::warn!("Error while polling for dispatcher: {:?}", err),
-            },
-            Poll::Pending => {
-                if pool.count.load(Acquire) == 0 {
-                    break;
-                }
-                log::trace!("IO#{}: Waiting for next task...", thread::current().id());
-                backoff.snooze()
-            }
+        if let Poll::Ready(Err(e)) = rx.poll_receive() {
+            log::trace!("IO task polled error: {e:?}");
         }
+        yield_now().await
     }
 }
 
-cfg_if::cfg_if! { if #[cfg(feature = "runtime")] {
+#[cfg(feature = "runtime")]
+pub(crate) mod runtime {
+    use alloc::vec::Vec;
 
-static POOL: Lazy<ThreadPool> = Lazy::new(|| ThreadPool::new(available_parallelism().into()));
-thread_local! {
-    static DISP: DispSender = POOL.dispatch(4096);
-}
+    use futures_lite::future::pending;
+    use solvent_core::{
+        thread::{self, available_parallelism},
+        thread_local,
+    };
 
-#[inline]
-pub fn spawn<F, T>(fut: F) -> Task<T>
-where
-    F: Future<Output = T> + Send + 'static,
-    T: Send + 'static,
-{
-    POOL.spawn(fut)
-}
+    use crate::{disp::DispSender, exe::*, sync::channel};
 
-#[inline]
-pub fn spawn_blocking<F, T>(func: F) -> Task<T>
-where
-    F: FnOnce() -> T + Send + 'static,
-    T: Send + 'static,
-{
-    POOL.spawn_blocking(func)
-}
+    static GLOBAL: Executor = Executor::new();
 
-#[inline]
-pub fn dispatch() -> DispSender {
-    DISP.with(|tx| tx.clone())
-}
+    thread_local! {
+        static LOCAL: LocalExecutor = LocalExecutor::new();
+    }
 
-#[inline]
-pub fn block_on<F, T>(fut: F) -> T
-where
-    F: Future<Output = T> + Send + 'static,
-{
-    POOL.block_on(|_| fut)
-}
+    #[inline]
+    pub fn spawn_local<T: 'static>(fut: impl Future<Output = T> + 'static) -> Task<T> {
+        LOCAL.with(|local| local.spawn(fut))
+    }
 
-#[macro_export]
-macro_rules! entry {
-    ($func:ident, $std:path) => {
-        mod __h2o_async_inner {
-            fn main() {
-                $crate::block_on(async { (super::$func)().await })
-            }
+    #[inline]
+    pub fn spawn<T: Send + 'static>(fut: impl Future<Output = T> + Send + 'static) -> Task<T> {
+        GLOBAL.spawn(fut)
+    }
 
-            use $std as std;
-            std::entry!(main);
-        }
-    };
-}
+    #[inline]
+    pub fn global_executor() -> &'static Executor {
+        &GLOBAL
+    }
+
+    #[inline]
+    pub fn local_executor<T, F: FnOnce(&LocalExecutor) -> T>(f: F) -> T {
+        LOCAL.with(f)
+    }
 
-} }
+    /// Start a bunch of executors to run async tasks, with the main task `fut`
+    /// running on the local executor of the current thread.
+    ///
+    /// When the function exits, all the previous spawned worker threads are
+    /// joined except for the current thread, if there exist. Thus, the caller
+    /// can recycle any resources used by the async tasks.
+    ///
+    /// # Arguments
+    ///
+    /// - `num` - The expected worker threads number. Defaults to
+    ///   `std::thread::available_parallelism` if `None`.
+    /// - `fut` - The main async task to be run on the local executor of the
+    ///   current thread.
+    pub fn block_on<T: 'static>(num: Option<usize>, fut: impl Future<Output = T> + 'static) -> T {
+        let num = num
+            .unwrap_or_else(|| available_parallelism().get())
+            .saturating_sub(1);
+
+        let data = (num > 0).then(|| {
+            let (tx, rx) = channel::bounded(num);
+
+            let threads = (0..num).map(|_| {
+                let rx = rx.clone();
+                thread::spawn(move || {
+                    let stop = async move {
+                        let _ = rx.recv().await;
+                    };
+                    LOCAL.with(|local| {
+                        let local = local.run(stop);
+                        let global = GLOBAL.run(pending());
+                        let fut = local.or(global);
+
+                        enter::enter().block_on(fut);
+                    });
+                })
+            });
+            (tx, threads.collect::<Vec<_>>())
+        });
+
+        LOCAL.with(|local| {
+            let local = local.run(fut);
+            let global = GLOBAL.run(pending());
+            let fut = local.or(global);
+
+            enter::enter().block_on(async {
+                let ret = fut.await;
+                if let Some((tx, threads)) = data {
+                    for _ in 0..num {
+                        let _ = tx.send(()).await;
+                    }
+                    threads.into_iter().for_each(|t| t.join())
+                }
+                ret
+            })
+        })
+    }
+
+    static DISP: Lazy<DispSender> = Lazy::new(|| {
+        let (tx, rx) = crate::disp::dispatch(4096);
+        spawn(io_task(rx)).detach();
+        tx
+    });
+
+    #[inline]
+    pub fn dispatch() -> DispSender {
+        DISP.clone()
+    }
+
+    #[macro_export]
+    macro_rules! entry {
+        ($func:ident, $std:path, $num:expr) => {
+            mod __h2o_async_inner {
+                fn main() {
+                    $crate::block_on($num, async { (super::$func)().await })
+                }
+
+                use $std as std;
+                std::entry!(main);
+            }
+        };
+    }
+}

+ 1 - 2
src/lib/h2o_async/src/exe/enter.rs

@@ -1,6 +1,5 @@
-use core::{cell::Cell, marker::PhantomData};
+use core::{cell::Cell, future::Future, marker::PhantomData};
 
-use futures::Future;
 use solvent_core::thread_local;
 
 thread_local! {

+ 15 - 15
src/lib/h2o_async/src/exe/park.rs

@@ -1,29 +1,29 @@
-use core::task::{Context, Poll, Waker};
+use core::{
+    future::Future,
+    task::{Context, Poll, Waker},
+};
 
-use futures::{pin_mut, Future};
-use solvent_core::{sync::Parker, thread_local};
+use futures_lite::pin;
+use solvent_core::{thread, thread_local};
 use waker_fn::waker_fn;
 
 thread_local! {
-    static CURRENT: (Parker, Waker) = {
-        let parker = Parker::new();
-        let unparker = parker.unparker().clone();
-        let waker = waker_fn(move || unparker.unpark());
-        (parker, waker)
+    static CURRENT: Waker = {
+        let thread = thread::current();
+        waker_fn(move || thread.unpark())
     }
 }
 
-pub(crate) fn block_on<F: Future>(fut: F) -> F::Output {
-    pin_mut!(fut);
+pub(super) fn block_on<F: Future>(fut: F) -> F::Output {
+    pin!(fut);
 
-    CURRENT.with(|(parker, waker)| {
+    CURRENT.with(|waker| {
         let mut cx = Context::from_waker(waker);
         loop {
-            if let Poll::Ready(v) = fut.as_mut().poll(&mut cx) {
-                break v;
+            match fut.as_mut().poll(&mut cx) {
+                Poll::Ready(v) => break v,
+                Poll::Pending => thread::park(),
             }
-
-            parker.park();
         }
     })
 }

+ 17 - 8
src/lib/h2o_async/src/ipc.rs

@@ -19,6 +19,12 @@ use solvent_core::{
 pub use self::channel::*;
 use crate::disp::{DispSender, PackedSyscall};
 
+#[cfg(feature = "runtime")]
+pub fn channel() -> (Channel, Channel) {
+    let (a, b) = solvent::ipc::Channel::new();
+    (Channel::new(a), Channel::new(b))
+}
+
 pub trait AsyncObject: Object {
     type TryWait<'a>: Future<Output = Result<usize>> + 'a
     where
@@ -92,18 +98,21 @@ impl<'a, T: Object> Future for TryWait<'a, T> {
     type Output = Result<usize>;
 
     fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-        if let Some(rx) = self.result.take() {
+        if let Some(ref rx) = self.result {
             match rx.try_recv() {
                 Ok(result) => return Poll::Ready(result),
                 Err(TryRecvError::Empty) => {
-                    self.result = Some(rx);
-                    if let Err(err) = self
-                        .key
-                        .ok_or(ENOENT)
-                        .map(|key| self.disp.update(key, cx.waker()).expect("update"))
-                    {
-                        return Poll::Ready(Err(err));
+                    let Some(key) = self.key else {
+                        return Poll::Ready(Err(ENOENT))
+                    };
+                    if let Err(err) = self.disp.update(key, cx.waker()) {
+                        if let Ok(res) = rx.recv() {
+                            return Poll::Ready(res);
+                        }
+                        panic!("Update future error with key {key}: {err:?}");
                     }
+
+                    return Poll::Pending;
                 }
                 Err(TryRecvError::Disconnected) => {}
             }

+ 23 - 16
src/lib/h2o_async/src/ipc/channel.rs

@@ -134,38 +134,45 @@ pub struct Receive<'a> {
 
 impl<'a> Receive<'a> {
     fn result_recv(&mut self, cx: &mut Context<'_>) -> ControlFlow<Poll<Result<Packet>>, Packet> {
-        let packet = match self.result.take() {
-            Some(rx) => match rx.try_recv() {
-                // Has a result
-                Ok(send_data) => match send_data.id {
+        macro_rules! has_a_result {
+            ($send_data:ident) => {
+                match $send_data.id {
                     // Packet transferring successful, return it
                     Ok(id) => {
-                        let mut packet = send_data.packet;
+                        let mut packet = $send_data.packet;
                         packet.id = NonZeroUsize::new(id);
                         return ControlFlow::Break(Poll::Ready(Ok(packet)));
                     }
 
                     // Packet buffer too small, reserve enough memory and restart polling
                     Err(EBUFFER) => {
-                        let mut packet = send_data.packet;
-                        packet.buffer.reserve(send_data.buffer_size);
-                        packet.handles.reserve(send_data.handle_count);
+                        let mut packet = $send_data.packet;
+                        packet.buffer.reserve($send_data.buffer_size);
+                        packet.handles.reserve($send_data.handle_count);
                         Some(packet)
                     }
 
                     // Actual error occurred, return it
                     Err(err) => return ControlFlow::Break(Poll::Ready(Err(err))),
-                },
+                }
+            };
+        }
+
+        let packet = match self.result {
+            Some(ref rx) => match rx.try_recv() {
+                // Has a result
+                Ok(send_data) => has_a_result!(send_data),
 
                 // Not yet, continue waiting
                 Err(TryRecvError::Empty) => {
-                    self.result = Some(rx);
-                    if let Err(err) = self
-                        .key
-                        .ok_or(ENOENT)
-                        .map(|key| self.channel.disp.update(key, cx.waker()).expect("update"))
-                    {
-                        return ControlFlow::Break(Poll::Ready(Err(err)));
+                    let Some(key) = self.key else {
+                        return ControlFlow::Break(Poll::Ready(Err(ENOENT)))
+                    };
+                    if let Err(err) = self.channel.disp.update(key, cx.waker()) {
+                        if let Ok(send_data) = rx.recv() {
+                            has_a_result!(send_data);
+                        }
+                        panic!("Update future error: {err:?}");
                     }
 
                     return ControlFlow::Break(Poll::Pending);

+ 16 - 14
src/lib/h2o_async/src/lib.rs

@@ -1,5 +1,6 @@
 #![no_std]
 #![feature(control_flow_enum)]
+#![feature(error_in_core)]
 
 pub mod dev;
 pub mod disp;
@@ -11,17 +12,16 @@ pub mod sync;
 pub mod time;
 mod utils;
 
-pub use solvent_core as reexport_std;
-
 extern crate alloc;
 
 #[cfg(feature = "runtime")]
-pub use self::exe::{block_on, dispatch, spawn, spawn_blocking};
+pub use self::exe::runtime::*;
 
 #[cfg(feature = "runtime")]
 pub mod test {
     use core::future::Future;
 
+    use futures_lite::future::{yield_now, zip};
     use solvent::{
         ipc::Packet,
         prelude::{Handle, PhysOptions},
@@ -46,7 +46,6 @@ pub mod test {
                 i2.receive(&mut packet)
                     .await
                     .expect("Failed to receive packet");
-                // log::debug!("\t\t\tGot #{index}");
                 assert_eq!(packet.buffer[0], index as u8);
             }
             log::debug!("\t\t\tReceive finished");
@@ -63,11 +62,11 @@ pub mod test {
                 packet
                     .buffer
                     .extend(core::iter::repeat_with(|| random() as u8).take(199));
-                async {
-                    // log::debug!("Send #{index}");
-                    i1.send(&mut packet).expect("Failed to send packet")
+                // log::debug!("Send #{index}");
+                i1.send(&mut packet).expect("Failed to send packet");
+                if index % 10 == 5 {
+                    yield_now().await
                 }
-                .await;
             }
             log::debug!("Send finished");
         };
@@ -75,9 +74,7 @@ pub mod test {
         (send, recv)
     }
 
-    pub async fn test_disp() {
-        log::debug!("Has {} cpus available", solvent::task::cpu_num());
-
+    async fn test_stream() {
         let phys = solvent::mem::Phys::allocate(5, PhysOptions::ZEROED | PhysOptions::RESIZABLE)
             .expect("Failed to allocate memory");
         let stream =
@@ -90,11 +87,16 @@ pub mod test {
         let mut buf = [0; 10];
         let len = stream.read(&mut buf).await.unwrap();
         assert_eq!(&buf[..len], [4, 5, 6, 7]);
+    }
+
+    pub async fn test_disp() {
+        log::debug!("Has {} cpus available", solvent::task::cpu_num());
+
+        test_stream().await;
 
         let (send, recv) = test_tx();
         let recv = crate::spawn(recv);
-        let send = crate::spawn(send);
-        recv.await;
-        send.await;
+        let send = crate::spawn_local(send);
+        zip(send, recv).await;
     }
 }

+ 22 - 25
src/lib/h2o_async/src/mem.rs

@@ -1,5 +1,6 @@
 use alloc::vec::Vec;
 use core::{
+    future::Future,
     mem,
     num::NonZeroUsize,
     ops::ControlFlow,
@@ -7,7 +8,6 @@ use core::{
     task::{Context, Poll},
 };
 
-use futures::Future;
 use solvent::prelude::{
     IoSlice, IoSliceMut, PackRead, PackResize, PackWrite, Result, SerdeReg, Syscall, EAGAIN,
     ENOENT, EPIPE, SIG_READ, SIG_WRITE,
@@ -163,39 +163,39 @@ struct FutInner<'a> {
 
 impl<'a> FutInner<'a> {
     fn result_recv<T>(
-        &mut self,
-        result: &mut Option<oneshot::Receiver<Result<T>>>,
+        &self,
+        result: &Option<oneshot::Receiver<Result<T>>>,
         cx: &mut Context,
     ) -> ControlFlow<Poll<Result<T>>> {
-        match result.take() {
+        match result {
             Some(rx) => match rx.try_recv() {
                 // Has a result
                 Ok(res) => match res {
                     // The lock is already taken, restart
-                    Err(EAGAIN) => ControlFlow::CONTINUE,
+                    Err(EAGAIN) => ControlFlow::Continue(()),
                     res => ControlFlow::Break(Poll::Ready(res)),
                 },
 
                 // Not yet, continue waiting
                 Err(TryRecvError::Empty) => {
-                    *result = Some(rx);
-                    ControlFlow::Break(
-                        if let Err(err) = self
-                            .key
-                            .ok_or(ENOENT)
-                            .map(|key| self.phys.disp.update(key, cx.waker()).expect("update"))
-                        {
-                            Poll::Ready(Err(err))
-                        } else {
-                            Poll::Pending
-                        },
-                    )
+                    let Some(key) = self.key else {
+                        return ControlFlow::Break(Poll::Ready(Err(ENOENT)))
+                    };
+
+                    if let Err(err) = self.phys.disp.update(key, cx.waker()) {
+                        if let Ok(res) = rx.recv() {
+                            return ControlFlow::Break(Poll::Ready(res));
+                        }
+                        panic!("Update future error with key {key}: {err:?}");
+                    }
+
+                    ControlFlow::Break(Poll::Pending)
                 }
 
                 // Channel early disconnected, restart the default process
-                Err(TryRecvError::Disconnected) => ControlFlow::CONTINUE,
+                Err(TryRecvError::Disconnected) => ControlFlow::Continue(()),
             },
-            None => ControlFlow::CONTINUE,
+            None => ControlFlow::Continue(()),
         }
     }
 }
@@ -212,8 +212,7 @@ impl Future for Read<'_> {
     type Output = Result<Vec<u8>>;
 
     fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-        let mut result = mem::take(&mut self.result);
-        let mut buf = match self.f.result_recv(&mut result, cx) {
+        let mut buf = match self.f.result_recv(&self.result, cx) {
             ControlFlow::Continue(()) => mem::take(&mut self.buf),
             ControlFlow::Break(value) => return value,
         };
@@ -279,8 +278,7 @@ impl Future for Write<'_> {
     type Output = Result<(Vec<u8>, usize)>;
 
     fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-        let mut result = mem::take(&mut self.result);
-        let mut buf = match self.f.result_recv(&mut result, cx) {
+        let mut buf = match self.f.result_recv(&self.result, cx) {
             ControlFlow::Continue(()) => mem::take(&mut self.buf),
             ControlFlow::Break(value) => return value,
         };
@@ -336,8 +334,7 @@ impl Future for Resize<'_> {
     type Output = Result;
 
     fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-        let mut result = mem::take(&mut self.result);
-        if let ControlFlow::Break(res) = self.f.result_recv(&mut result, cx) {
+        if let ControlFlow::Break(res) = self.f.result_recv(&self.result, cx) {
             return res;
         }
 

+ 1 - 0
src/lib/h2o_async/src/sync.rs

@@ -1,3 +1,4 @@
+pub mod channel;
 mod event;
 mod mutex;
 mod rw_lock;

+ 1272 - 0
src/lib/h2o_async/src/sync/channel.rs

@@ -0,0 +1,1272 @@
+//! Imported from crate `async-channel`.
+//!
+//! An async multi-producer multi-consumer channel, where each message can be
+//! received by only one of all existing consumers.
+//!
+//! There are two kinds of channels:
+//!
+//! 1. [Bounded][`bounded()`] channel with limited capacity.
+//! 2. [Unbounded][`unbounded()`] channel with unlimited capacity.
+//!
+//! A channel has the [`Sender`] and [`Receiver`] side. Both sides are cloneable
+//! and can be shared among multiple threads.
+//!
+//! When all [`Sender`]s or all [`Receiver`]s are dropped, the channel becomes
+//! closed. When a channel is closed, no more messages can be sent, but
+//! remaining messages can still be received.
+//!
+//! The channel can also be closed manually by calling [`Sender::close()`] or
+//! [`Receiver::close()`].
+//!
+//! # Examples
+//!
+//! ```
+//! # futures_lite::future::block_on(async {
+//! let (s, r) = async_channel::unbounded();
+//!
+//! assert_eq!(s.send("Hello").await, Ok(()));
+//! assert_eq!(r.recv().await, Ok("Hello"));
+//! # });
+//! ```
+
+#![forbid(unsafe_code)]
+#![warn(missing_docs, missing_debug_implementations)]
+
+use alloc::boxed::Box;
+use core::{
+    error, fmt,
+    pin::Pin,
+    sync::atomic::{AtomicBool, AtomicUsize, Ordering},
+    task::{Context, Poll},
+};
+
+use crossbeam_queue::{ArrayQueue, SegQueue};
+use futures_lite::{ready, stream::Stream, Future};
+use solvent_core::sync::Arsc;
+
+use super::{Event, EventListener};
+
+enum Flavor<T> {
+    Bounded(Box<ArrayQueue<T>>),
+    Unbounded(SegQueue<T>),
+}
+
+impl<T> Flavor<T> {
+    fn push(&self, value: T) -> Option<T> {
+        match self {
+            Flavor::Bounded(q) => q.push(value).err(),
+            Flavor::Unbounded(q) => {
+                q.push(value);
+                None
+            }
+        }
+    }
+
+    fn pop(&self) -> Option<T> {
+        match self {
+            Flavor::Bounded(q) => q.pop(),
+            Flavor::Unbounded(q) => q.pop(),
+        }
+    }
+
+    fn is_empty(&self) -> bool {
+        match self {
+            Flavor::Bounded(q) => q.is_empty(),
+            Flavor::Unbounded(q) => q.is_empty(),
+        }
+    }
+
+    fn len(&self) -> usize {
+        match self {
+            Flavor::Bounded(q) => q.len(),
+            Flavor::Unbounded(q) => q.len(),
+        }
+    }
+
+    fn is_full(&self) -> bool {
+        match self {
+            Flavor::Bounded(q) => q.is_full(),
+            Flavor::Unbounded(_) => false,
+        }
+    }
+
+    fn capacity(&self) -> Option<usize> {
+        match self {
+            Flavor::Bounded(q) => Some(q.capacity()),
+            Flavor::Unbounded(_) => None,
+        }
+    }
+}
+
+struct Channel<T> {
+    /// Inner message queue.
+    queue: Flavor<T>,
+
+    /// Indicates whether the channel is closed.
+    closed: AtomicBool,
+
+    /// Send operations waiting while the channel is full.
+    send_ops: Event,
+
+    /// Receive operations waiting while the channel is empty and not closed.
+    recv_ops: Event,
+
+    /// Stream operations while the channel is empty and not closed.
+    stream_ops: Event,
+
+    /// The number of currently active `Sender`s.
+    sender_count: AtomicUsize,
+
+    /// The number of currently active `Receivers`s.
+    receiver_count: AtomicUsize,
+}
+
+impl<T> Channel<T> {
+    /// Closes the channel and notifies all blocked operations.
+    ///
+    /// Returns `true` if this call has closed the channel and it was not closed
+    /// already.
+    fn close(&self) -> bool {
+        if !self.closed.swap(true, Ordering::SeqCst) {
+            // Notify all send operations.
+            self.send_ops.notify(usize::MAX);
+
+            // Notify all receive and stream operations.
+            self.recv_ops.notify(usize::MAX);
+            self.stream_ops.notify(usize::MAX);
+
+            true
+        } else {
+            false
+        }
+    }
+}
+
+/// Creates a bounded channel.
+///
+/// The created channel has space to hold at most `cap` messages at a time.
+///
+/// # Panics
+///
+/// Capacity must be a positive number. If `cap` is zero, this function will
+/// panic.
+///
+/// # Examples
+///
+/// ```
+/// # futures_lite::future::block_on(async {
+/// use async_channel::{bounded, TryRecvError, TrySendError};
+///
+/// let (s, r) = bounded(1);
+///
+/// assert_eq!(s.send(10).await, Ok(()));
+/// assert_eq!(s.try_send(20), Err(TrySendError::Full(20)));
+///
+/// assert_eq!(r.recv().await, Ok(10));
+/// assert_eq!(r.try_recv(), Err(TryRecvError::Empty));
+/// # });
+/// ```
+pub fn bounded<T>(cap: usize) -> (Sender<T>, Receiver<T>) {
+    assert!(cap > 0, "capacity cannot be zero");
+
+    let channel = Arsc::new(Channel {
+        queue: Flavor::Bounded(Box::new(ArrayQueue::new(cap))),
+        closed: AtomicBool::new(false),
+        send_ops: Event::new(),
+        recv_ops: Event::new(),
+        stream_ops: Event::new(),
+        sender_count: AtomicUsize::new(1),
+        receiver_count: AtomicUsize::new(1),
+    });
+
+    let s = Sender {
+        channel: channel.clone(),
+    };
+    let r = Receiver {
+        channel,
+        listener: None,
+    };
+    (s, r)
+}
+
+/// Creates an unbounded channel.
+///
+/// The created channel can hold an unlimited number of messages.
+///
+/// # Examples
+///
+/// ```
+/// # futures_lite::future::block_on(async {
+/// use async_channel::{unbounded, TryRecvError};
+///
+/// let (s, r) = unbounded();
+///
+/// assert_eq!(s.send(10).await, Ok(()));
+/// assert_eq!(s.send(20).await, Ok(()));
+///
+/// assert_eq!(r.recv().await, Ok(10));
+/// assert_eq!(r.recv().await, Ok(20));
+/// assert_eq!(r.try_recv(), Err(TryRecvError::Empty));
+/// # });
+/// ```
+pub fn unbounded<T>() -> (Sender<T>, Receiver<T>) {
+    let channel = Arsc::new(Channel {
+        queue: Flavor::Unbounded(SegQueue::new()),
+        closed: AtomicBool::new(false),
+        send_ops: Event::new(),
+        recv_ops: Event::new(),
+        stream_ops: Event::new(),
+        sender_count: AtomicUsize::new(1),
+        receiver_count: AtomicUsize::new(1),
+    });
+
+    let s = Sender {
+        channel: channel.clone(),
+    };
+    let r = Receiver {
+        channel,
+        listener: None,
+    };
+    (s, r)
+}
+
+/// The sending side of a channel.
+///
+/// Senders can be cloned and shared among threads. When all senders associated
+/// with a channel are dropped, the channel becomes closed.
+///
+/// The channel can also be closed manually by calling [`Sender::close()`].
+pub struct Sender<T> {
+    /// Inner channel state.
+    channel: Arsc<Channel<T>>,
+}
+
+impl<T> Sender<T> {
+    /// Attempts to send a message into the channel.
+    ///
+    /// If the channel is full or closed, this method returns an error.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use async_channel::{bounded, TrySendError};
+    ///
+    /// let (s, r) = bounded(1);
+    ///
+    /// assert_eq!(s.try_send(1), Ok(()));
+    /// assert_eq!(s.try_send(2), Err(TrySendError::Full(2)));
+    ///
+    /// drop(r);
+    /// assert_eq!(s.try_send(3), Err(TrySendError::Closed(3)));
+    /// ```
+    pub fn try_send(&self, msg: T) -> Result<(), TrySendError<T>> {
+        if self.channel.closed.load(Ordering::SeqCst) {
+            return Err(TrySendError::Closed(msg));
+        }
+        match self.channel.queue.push(msg) {
+            None => {
+                // Notify a blocked receive operation. If the notified operation gets canceled,
+                // it will notify another blocked receive operation.
+                self.channel.recv_ops.notify_additional(1);
+
+                // Notify all blocked streams.
+                self.channel.stream_ops.notify(usize::MAX);
+
+                Ok(())
+            }
+            Some(msg) => Err(TrySendError::Full(msg)),
+        }
+    }
+
+    /// Sends a message into the channel.
+    ///
+    /// If the channel is full, this method waits until there is space for a
+    /// message.
+    ///
+    /// If the channel is closed, this method returns an error.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::{unbounded, SendError};
+    ///
+    /// let (s, r) = unbounded();
+    ///
+    /// assert_eq!(s.send(1).await, Ok(()));
+    /// drop(r);
+    /// assert_eq!(s.send(2).await, Err(SendError(2)));
+    /// # });
+    /// ```
+    pub fn send(&self, msg: T) -> Send<'_, T> {
+        Send {
+            sender: self,
+            listener: None,
+            msg: Some(msg),
+        }
+    }
+
+    /// Sends a message into this channel using the blocking strategy.
+    ///
+    /// If the channel is full, this method will block until there is room.
+    /// If the channel is closed, this method returns an error.
+    ///
+    /// # Blocking
+    ///
+    /// Rather than using asynchronous waiting, like the [`send`](Self::send)
+    /// method, this method will block the current thread until the message
+    /// is sent.
+    ///
+    /// This method should not be used in an asynchronous context. It is
+    /// intended to be used such that a channel can be used in both
+    /// asynchronous and synchronous contexts. Calling this method in an
+    /// asynchronous context may result in deadlocks.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use async_channel::{unbounded, SendError};
+    ///
+    /// let (s, r) = unbounded();
+    ///
+    /// assert_eq!(s.send_blocking(1), Ok(()));
+    /// drop(r);
+    /// assert_eq!(s.send_blocking(2), Err(SendError(2)));
+    /// ```
+    pub fn send_blocking(&self, msg: T) -> Result<(), SendError<T>> {
+        self.send(msg).wait()
+    }
+
+    /// Closes the channel.
+    ///
+    /// Returns `true` if this call has closed the channel and it was not closed
+    /// already.
+    ///
+    /// The remaining messages can still be received.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::{unbounded, RecvError};
+    ///
+    /// let (s, r) = unbounded();
+    /// assert_eq!(s.send(1).await, Ok(()));
+    /// assert!(s.close());
+    ///
+    /// assert_eq!(r.recv().await, Ok(1));
+    /// assert_eq!(r.recv().await, Err(RecvError));
+    /// # });
+    /// ```
+    pub fn close(&self) -> bool {
+        self.channel.close()
+    }
+
+    /// Returns `true` if the channel is closed.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::{unbounded, RecvError};
+    ///
+    /// let (s, r) = unbounded::<()>();
+    /// assert!(!s.is_closed());
+    ///
+    /// drop(r);
+    /// assert!(s.is_closed());
+    /// # });
+    /// ```
+    pub fn is_closed(&self) -> bool {
+        self.channel.closed.load(Ordering::SeqCst)
+    }
+
+    /// Returns `true` if the channel is empty.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::unbounded;
+    ///
+    /// let (s, r) = unbounded();
+    ///
+    /// assert!(s.is_empty());
+    /// s.send(1).await;
+    /// assert!(!s.is_empty());
+    /// # });
+    /// ```
+    pub fn is_empty(&self) -> bool {
+        self.channel.queue.is_empty()
+    }
+
+    /// Returns `true` if the channel is full.
+    ///
+    /// Unbounded channels are never full.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::bounded;
+    ///
+    /// let (s, r) = bounded(1);
+    ///
+    /// assert!(!s.is_full());
+    /// s.send(1).await;
+    /// assert!(s.is_full());
+    /// # });
+    /// ```
+    pub fn is_full(&self) -> bool {
+        self.channel.queue.is_full()
+    }
+
+    /// Returns the number of messages in the channel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::unbounded;
+    ///
+    /// let (s, r) = unbounded();
+    /// assert_eq!(s.len(), 0);
+    ///
+    /// s.send(1).await;
+    /// s.send(2).await;
+    /// assert_eq!(s.len(), 2);
+    /// # });
+    /// ```
+    pub fn len(&self) -> usize {
+        self.channel.queue.len()
+    }
+
+    /// Returns the channel capacity if it's bounded.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use async_channel::{bounded, unbounded};
+    ///
+    /// let (s, r) = bounded::<i32>(5);
+    /// assert_eq!(s.capacity(), Some(5));
+    ///
+    /// let (s, r) = unbounded::<i32>();
+    /// assert_eq!(s.capacity(), None);
+    /// ```
+    pub fn capacity(&self) -> Option<usize> {
+        self.channel.queue.capacity()
+    }
+
+    /// Returns the number of receivers for the channel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::unbounded;
+    ///
+    /// let (s, r) = unbounded::<()>();
+    /// assert_eq!(s.receiver_count(), 1);
+    ///
+    /// let r2 = r.clone();
+    /// assert_eq!(s.receiver_count(), 2);
+    /// # });
+    /// ```
+    pub fn receiver_count(&self) -> usize {
+        self.channel.receiver_count.load(Ordering::SeqCst)
+    }
+
+    /// Returns the number of senders for the channel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::unbounded;
+    ///
+    /// let (s, r) = unbounded::<()>();
+    /// assert_eq!(s.sender_count(), 1);
+    ///
+    /// let s2 = s.clone();
+    /// assert_eq!(s.sender_count(), 2);
+    /// # });
+    /// ```
+    pub fn sender_count(&self) -> usize {
+        self.channel.sender_count.load(Ordering::SeqCst)
+    }
+
+    /// Downgrade the sender to a weak reference.
+    pub fn downgrade(&self) -> WeakSender<T> {
+        WeakSender {
+            channel: self.channel.clone(),
+        }
+    }
+}
+
+impl<T> Drop for Sender<T> {
+    fn drop(&mut self) {
+        // Decrement the sender count and close the channel if it drops down to zero.
+        if self.channel.sender_count.fetch_sub(1, Ordering::AcqRel) == 1 {
+            self.channel.close();
+        }
+    }
+}
+
+impl<T> fmt::Debug for Sender<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Sender {{ .. }}")
+    }
+}
+
+impl<T> Clone for Sender<T> {
+    fn clone(&self) -> Sender<T> {
+        let count = self.channel.sender_count.fetch_add(1, Ordering::Relaxed);
+
+        // Make sure the count never overflows, even if lots of sender clones are
+        // leaked.
+        assert!(count <= usize::MAX / 2);
+
+        Sender {
+            channel: self.channel.clone(),
+        }
+    }
+}
+
+/// The receiving side of a channel.
+///
+/// Receivers can be cloned and shared among threads. When all receivers
+/// associated with a channel are dropped, the channel becomes closed.
+///
+/// The channel can also be closed manually by calling [`Receiver::close()`].
+///
+/// Receivers implement the [`Stream`] trait.
+pub struct Receiver<T> {
+    /// Inner channel state.
+    channel: Arsc<Channel<T>>,
+
+    /// Listens for a send or close event to unblock this stream.
+    listener: Option<EventListener>,
+}
+
+impl<T> Receiver<T> {
+    /// Attempts to receive a message from the channel.
+    ///
+    /// If the channel is empty, or empty and closed, this method returns an
+    /// error.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::{unbounded, TryRecvError};
+    ///
+    /// let (s, r) = unbounded();
+    /// assert_eq!(s.send(1).await, Ok(()));
+    ///
+    /// assert_eq!(r.try_recv(), Ok(1));
+    /// assert_eq!(r.try_recv(), Err(TryRecvError::Empty));
+    ///
+    /// drop(s);
+    /// assert_eq!(r.try_recv(), Err(TryRecvError::Closed));
+    /// # });
+    /// ```
+    pub fn try_recv(&self) -> Result<T, TryRecvError> {
+        match self.channel.queue.pop() {
+            Some(msg) => {
+                // Notify a blocked send operation. If the notified operation gets canceled, it
+                // will notify another blocked send operation.
+                self.channel.send_ops.notify_additional(1);
+
+                Ok(msg)
+            }
+            None => Err(if self.channel.closed.load(Ordering::SeqCst) {
+                TryRecvError::Closed
+            } else {
+                TryRecvError::Empty
+            }),
+        }
+    }
+
+    /// Receives a message from the channel.
+    ///
+    /// If the channel is empty, this method waits until there is a message.
+    ///
+    /// If the channel is closed, this method receives a message or returns an
+    /// error if there are no more messages.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::{unbounded, RecvError};
+    ///
+    /// let (s, r) = unbounded();
+    ///
+    /// assert_eq!(s.send(1).await, Ok(()));
+    /// drop(s);
+    ///
+    /// assert_eq!(r.recv().await, Ok(1));
+    /// assert_eq!(r.recv().await, Err(RecvError));
+    /// # });
+    /// ```
+    pub fn recv(&self) -> Recv<'_, T> {
+        Recv {
+            receiver: self,
+            listener: None,
+        }
+    }
+
+    /// Receives a message from the channel using the blocking strategy.
+    ///
+    /// If the channel is empty, this method waits until there is a message.
+    /// If the channel is closed, this method receives a message or returns an
+    /// error if there are no more messages.
+    ///
+    /// # Blocking
+    ///
+    /// Rather than using asynchronous waiting, like the [`recv`](Self::recv)
+    /// method, this method will block the current thread until the message
+    /// is sent.
+    ///
+    /// This method should not be used in an asynchronous context. It is
+    /// intended to be used such that a channel can be used in both
+    /// asynchronous and synchronous contexts. Calling this method in an
+    /// asynchronous context may result in deadlocks.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use async_channel::{unbounded, RecvError};
+    ///
+    /// let (s, r) = unbounded();
+    ///
+    /// assert_eq!(s.send_blocking(1), Ok(()));
+    /// drop(s);
+    ///
+    /// assert_eq!(r.recv_blocking(), Ok(1));
+    /// assert_eq!(r.recv_blocking(), Err(RecvError));
+    /// ```
+    pub fn recv_blocking(&self) -> Result<T, RecvError> {
+        self.recv().wait()
+    }
+
+    /// Closes the channel.
+    ///
+    /// Returns `true` if this call has closed the channel and it was not closed
+    /// already.
+    ///
+    /// The remaining messages can still be received.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::{unbounded, RecvError};
+    ///
+    /// let (s, r) = unbounded();
+    /// assert_eq!(s.send(1).await, Ok(()));
+    ///
+    /// assert!(r.close());
+    /// assert_eq!(r.recv().await, Ok(1));
+    /// assert_eq!(r.recv().await, Err(RecvError));
+    /// # });
+    /// ```
+    pub fn close(&self) -> bool {
+        self.channel.close()
+    }
+
+    /// Returns `true` if the channel is closed.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::{unbounded, RecvError};
+    ///
+    /// let (s, r) = unbounded::<()>();
+    /// assert!(!r.is_closed());
+    ///
+    /// drop(s);
+    /// assert!(r.is_closed());
+    /// # });
+    /// ```
+    pub fn is_closed(&self) -> bool {
+        self.channel.closed.load(Ordering::SeqCst)
+    }
+
+    /// Returns `true` if the channel is empty.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::unbounded;
+    ///
+    /// let (s, r) = unbounded();
+    ///
+    /// assert!(s.is_empty());
+    /// s.send(1).await;
+    /// assert!(!s.is_empty());
+    /// # });
+    /// ```
+    pub fn is_empty(&self) -> bool {
+        self.channel.queue.is_empty()
+    }
+
+    /// Returns `true` if the channel is full.
+    ///
+    /// Unbounded channels are never full.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::bounded;
+    ///
+    /// let (s, r) = bounded(1);
+    ///
+    /// assert!(!r.is_full());
+    /// s.send(1).await;
+    /// assert!(r.is_full());
+    /// # });
+    /// ```
+    pub fn is_full(&self) -> bool {
+        self.channel.queue.is_full()
+    }
+
+    /// Returns the number of messages in the channel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::unbounded;
+    ///
+    /// let (s, r) = unbounded();
+    /// assert_eq!(r.len(), 0);
+    ///
+    /// s.send(1).await;
+    /// s.send(2).await;
+    /// assert_eq!(r.len(), 2);
+    /// # });
+    /// ```
+    pub fn len(&self) -> usize {
+        self.channel.queue.len()
+    }
+
+    /// Returns the channel capacity if it's bounded.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use async_channel::{bounded, unbounded};
+    ///
+    /// let (s, r) = bounded::<i32>(5);
+    /// assert_eq!(r.capacity(), Some(5));
+    ///
+    /// let (s, r) = unbounded::<i32>();
+    /// assert_eq!(r.capacity(), None);
+    /// ```
+    pub fn capacity(&self) -> Option<usize> {
+        self.channel.queue.capacity()
+    }
+
+    /// Returns the number of receivers for the channel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::unbounded;
+    ///
+    /// let (s, r) = unbounded::<()>();
+    /// assert_eq!(r.receiver_count(), 1);
+    ///
+    /// let r2 = r.clone();
+    /// assert_eq!(r.receiver_count(), 2);
+    /// # });
+    /// ```
+    pub fn receiver_count(&self) -> usize {
+        self.channel.receiver_count.load(Ordering::SeqCst)
+    }
+
+    /// Returns the number of senders for the channel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # futures_lite::future::block_on(async {
+    /// use async_channel::unbounded;
+    ///
+    /// let (s, r) = unbounded::<()>();
+    /// assert_eq!(r.sender_count(), 1);
+    ///
+    /// let s2 = s.clone();
+    /// assert_eq!(r.sender_count(), 2);
+    /// # });
+    /// ```
+    pub fn sender_count(&self) -> usize {
+        self.channel.sender_count.load(Ordering::SeqCst)
+    }
+
+    /// Downgrade the receiver to a weak reference.
+    pub fn downgrade(&self) -> WeakReceiver<T> {
+        WeakReceiver {
+            channel: self.channel.clone(),
+        }
+    }
+}
+
+impl<T> Unpin for Receiver<T> {}
+
+impl<T> Drop for Receiver<T> {
+    fn drop(&mut self) {
+        // Decrement the receiver count and close the channel if it drops down to zero.
+        if self.channel.receiver_count.fetch_sub(1, Ordering::AcqRel) == 1 {
+            self.channel.close();
+        }
+    }
+}
+
+impl<T> fmt::Debug for Receiver<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Receiver {{ .. }}")
+    }
+}
+
+impl<T> Clone for Receiver<T> {
+    fn clone(&self) -> Receiver<T> {
+        let count = self.channel.receiver_count.fetch_add(1, Ordering::Relaxed);
+
+        // Make sure the count never overflows, even if lots of receiver clones are
+        // leaked.
+        assert!(count <= usize::MAX / 2);
+
+        Receiver {
+            channel: self.channel.clone(),
+            listener: None,
+        }
+    }
+}
+
+impl<T> Stream for Receiver<T> {
+    type Item = T;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        loop {
+            // If this stream is listening for events, first wait for a notification.
+            if let Some(listener) = self.listener.as_mut() {
+                ready!(Pin::new(listener).poll(cx));
+                self.listener = None;
+            }
+
+            loop {
+                // Attempt to receive a message.
+                match self.try_recv() {
+                    Ok(msg) => {
+                        // The stream is not blocked on an event - drop the listener.
+                        self.listener = None;
+                        return Poll::Ready(Some(msg));
+                    }
+                    Err(TryRecvError::Closed) => {
+                        // The stream is not blocked on an event - drop the listener.
+                        self.listener = None;
+                        return Poll::Ready(None);
+                    }
+                    Err(TryRecvError::Empty) => {}
+                }
+
+                // Receiving failed - now start listening for notifications or wait for one.
+                match self.listener.as_mut() {
+                    None => {
+                        // Create a listener and try sending the message again.
+                        self.listener = Some(self.channel.stream_ops.listen());
+                    }
+                    Some(_) => {
+                        // Go back to the outer loop to poll the listener.
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
+
+// impl<T> FusedStream for Receiver<T> {
+//     fn is_terminated(&self) -> bool {
+//         self.channel.closed.load(Ordering::SeqCst) &&
+// self.channel.queue.is_empty()     }
+// }
+
+/// A [`Sender`] that prevents the channel from not being closed.
+///
+/// This is created through the [`Sender::downgrade`] method. In order to use
+/// it, it needs to be upgraded into a [`Sender`] through the `upgrade` method.
+#[derive(Clone)]
+pub struct WeakSender<T> {
+    channel: Arsc<Channel<T>>,
+}
+
+impl<T> WeakSender<T> {
+    /// Upgrade the [`WeakSender`] into a [`Sender`].
+    pub fn upgrade(&self) -> Option<Sender<T>> {
+        if self.channel.closed.load(Ordering::SeqCst) {
+            None
+        } else {
+            let old_count = self.channel.sender_count.fetch_add(1, Ordering::Relaxed);
+            if old_count == 0 {
+                // Channel was closed while we were incrementing the count.
+                self.channel.sender_count.store(0, Ordering::Release);
+                None
+            } else {
+                // Make sure the count never overflows, even if lots of sender clones are
+                // leaked.
+                assert!(old_count <= usize::MAX / 2);
+
+                Some(Sender {
+                    channel: self.channel.clone(),
+                })
+            }
+        }
+    }
+}
+
+impl<T> fmt::Debug for WeakSender<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "WeakSender {{ .. }}")
+    }
+}
+
+/// A [`Receiver`] that prevents the channel from not being closed.
+///
+/// This is created through the [`Receiver::downgrade`] method. In order to use
+/// it, it needs to be upgraded into a [`Receiver`] through the `upgrade`
+/// method.
+#[derive(Clone)]
+pub struct WeakReceiver<T> {
+    channel: Arsc<Channel<T>>,
+}
+
+impl<T> WeakReceiver<T> {
+    /// Upgrade the [`WeakReceiver`] into a [`Receiver`].
+    pub fn upgrade(&self) -> Option<Receiver<T>> {
+        if self.channel.closed.load(Ordering::SeqCst) {
+            None
+        } else {
+            let old_count = self.channel.receiver_count.fetch_add(1, Ordering::Relaxed);
+            if old_count == 0 {
+                // Channel was closed while we were incrementing the count.
+                self.channel.receiver_count.store(0, Ordering::Release);
+                None
+            } else {
+                // Make sure the count never overflows, even if lots of receiver clones are
+                // leaked.
+                assert!(old_count <= usize::MAX / 2);
+
+                Some(Receiver {
+                    channel: self.channel.clone(),
+                    listener: None,
+                })
+            }
+        }
+    }
+}
+
+impl<T> fmt::Debug for WeakReceiver<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "WeakReceiver {{ .. }}")
+    }
+}
+
+/// An error returned from [`Sender::send()`].
+///
+/// Received because the channel is closed.
+#[derive(PartialEq, Eq, Clone, Copy)]
+pub struct SendError<T>(pub T);
+
+impl<T> SendError<T> {
+    /// Unwraps the message that couldn't be sent.
+    pub fn into_inner(self) -> T {
+        self.0
+    }
+}
+
+impl<T> error::Error for SendError<T> {}
+
+impl<T> fmt::Debug for SendError<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "SendError(..)")
+    }
+}
+
+impl<T> fmt::Display for SendError<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "sending into a closed channel")
+    }
+}
+
+/// An error returned from [`Sender::try_send()`].
+#[derive(PartialEq, Eq, Clone, Copy)]
+pub enum TrySendError<T> {
+    /// The channel is full but not closed.
+    Full(T),
+
+    /// The channel is closed.
+    Closed(T),
+}
+
+impl<T> TrySendError<T> {
+    /// Unwraps the message that couldn't be sent.
+    pub fn into_inner(self) -> T {
+        match self {
+            TrySendError::Full(t) => t,
+            TrySendError::Closed(t) => t,
+        }
+    }
+
+    /// Returns `true` if the channel is full but not closed.
+    pub fn is_full(&self) -> bool {
+        match self {
+            TrySendError::Full(_) => true,
+            TrySendError::Closed(_) => false,
+        }
+    }
+
+    /// Returns `true` if the channel is closed.
+    pub fn is_closed(&self) -> bool {
+        match self {
+            TrySendError::Full(_) => false,
+            TrySendError::Closed(_) => true,
+        }
+    }
+}
+
+impl<T> error::Error for TrySendError<T> {}
+
+impl<T> fmt::Debug for TrySendError<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match *self {
+            TrySendError::Full(..) => write!(f, "Full(..)"),
+            TrySendError::Closed(..) => write!(f, "Closed(..)"),
+        }
+    }
+}
+
+impl<T> fmt::Display for TrySendError<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match *self {
+            TrySendError::Full(..) => write!(f, "sending into a full channel"),
+            TrySendError::Closed(..) => write!(f, "sending into a closed channel"),
+        }
+    }
+}
+
+/// An error returned from [`Receiver::recv()`].
+///
+/// Received because the channel is empty and closed.
+#[derive(PartialEq, Eq, Clone, Copy, Debug)]
+pub struct RecvError;
+
+impl error::Error for RecvError {}
+
+impl fmt::Display for RecvError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "receiving from an empty and closed channel")
+    }
+}
+
+/// An error returned from [`Receiver::try_recv()`].
+#[derive(PartialEq, Eq, Clone, Copy, Debug)]
+pub enum TryRecvError {
+    /// The channel is empty but not closed.
+    Empty,
+
+    /// The channel is empty and closed.
+    Closed,
+}
+
+impl TryRecvError {
+    /// Returns `true` if the channel is empty but not closed.
+    pub fn is_empty(&self) -> bool {
+        match self {
+            TryRecvError::Empty => true,
+            TryRecvError::Closed => false,
+        }
+    }
+
+    /// Returns `true` if the channel is empty and closed.
+    pub fn is_closed(&self) -> bool {
+        match self {
+            TryRecvError::Empty => false,
+            TryRecvError::Closed => true,
+        }
+    }
+}
+
+impl error::Error for TryRecvError {}
+
+impl fmt::Display for TryRecvError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match *self {
+            TryRecvError::Empty => write!(f, "receiving from an empty channel"),
+            TryRecvError::Closed => write!(f, "receiving from an empty and closed channel"),
+        }
+    }
+}
+
+/// A future returned by [`Sender::send()`].
+#[derive(Debug)]
+#[must_use = "futures do nothing unless you `.await` or poll them"]
+pub struct Send<'a, T> {
+    sender: &'a Sender<T>,
+    listener: Option<EventListener>,
+    msg: Option<T>,
+}
+
+impl<'a, T> Send<'a, T> {
+    /// Run this future with the given `Strategy`.
+    fn run_with_strategy<S: Strategy>(
+        &mut self,
+        cx: &mut S::Context,
+    ) -> Poll<Result<(), SendError<T>>> {
+        loop {
+            let msg = self.msg.take().unwrap();
+            // Attempt to send a message.
+            match self.sender.try_send(msg) {
+                Ok(()) => return Poll::Ready(Ok(())),
+                Err(TrySendError::Closed(msg)) => return Poll::Ready(Err(SendError(msg))),
+                Err(TrySendError::Full(m)) => self.msg = Some(m),
+            }
+
+            // Sending failed - now start listening for notifications or wait for one.
+            match self.listener.take() {
+                None => {
+                    // Start listening and then try sending again.
+                    self.listener = Some(self.sender.channel.send_ops.listen());
+                }
+                Some(l) => {
+                    // Poll using the given strategy
+                    if let Err(l) = S::poll(l, cx) {
+                        self.listener = Some(l);
+                        return Poll::Pending;
+                    }
+                }
+            }
+        }
+    }
+
+    /// Run using the blocking strategy.
+    fn wait(mut self) -> Result<(), SendError<T>> {
+        match self.run_with_strategy::<Blocking>(&mut ()) {
+            Poll::Ready(res) => res,
+            Poll::Pending => unreachable!(),
+        }
+    }
+}
+
+impl<'a, T> Unpin for Send<'a, T> {}
+
+impl<'a, T> Future for Send<'a, T> {
+    type Output = Result<(), SendError<T>>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        self.run_with_strategy::<NonBlocking<'_>>(cx)
+    }
+}
+
+/// A future returned by [`Receiver::recv()`].
+#[derive(Debug)]
+#[must_use = "futures do nothing unless you `.await` or poll them"]
+pub struct Recv<'a, T> {
+    receiver: &'a Receiver<T>,
+    listener: Option<EventListener>,
+}
+
+impl<'a, T> Unpin for Recv<'a, T> {}
+
+impl<'a, T> Recv<'a, T> {
+    /// Run this future with the given `Strategy`.
+    fn run_with_strategy<S: Strategy>(
+        &mut self,
+        cx: &mut S::Context,
+    ) -> Poll<Result<T, RecvError>> {
+        loop {
+            // Attempt to receive a message.
+            match self.receiver.try_recv() {
+                Ok(msg) => return Poll::Ready(Ok(msg)),
+                Err(TryRecvError::Closed) => return Poll::Ready(Err(RecvError)),
+                Err(TryRecvError::Empty) => {}
+            }
+
+            // Receiving failed - now start listening for notifications or wait for one.
+            match self.listener.take() {
+                None => {
+                    // Start listening and then try receiving again.
+                    self.listener = Some(self.receiver.channel.recv_ops.listen());
+                }
+                Some(l) => {
+                    // Poll using the given strategy.
+                    if let Err(l) = S::poll(l, cx) {
+                        self.listener = Some(l);
+                        return Poll::Pending;
+                    }
+                }
+            }
+        }
+    }
+
+    /// Run with the blocking strategy.
+    fn wait(mut self) -> Result<T, RecvError> {
+        match self.run_with_strategy::<Blocking>(&mut ()) {
+            Poll::Ready(res) => res,
+            Poll::Pending => unreachable!(),
+        }
+    }
+}
+
+impl<'a, T> Future for Recv<'a, T> {
+    type Output = Result<T, RecvError>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        self.run_with_strategy::<NonBlocking<'_>>(cx)
+    }
+}
+
+/// A strategy used to poll an `EventListener`.
+trait Strategy {
+    /// Context needed to be provided to the `poll` method.
+    type Context;
+
+    /// Polls the given `EventListener`.
+    ///
+    /// Returns the `EventListener` back if it was not completed; otherwise,
+    /// returns `Ok(())`.
+    fn poll(evl: EventListener, cx: &mut Self::Context) -> Result<(), EventListener>;
+}
+
+/// Non-blocking strategy for use in asynchronous code.
+struct NonBlocking<'a>(&'a mut ());
+
+impl<'a> Strategy for NonBlocking<'a> {
+    type Context = Context<'a>;
+
+    fn poll(mut evl: EventListener, cx: &mut Context<'a>) -> Result<(), EventListener> {
+        match Pin::new(&mut evl).poll(cx) {
+            Poll::Ready(()) => Ok(()),
+            Poll::Pending => Err(evl),
+        }
+    }
+}
+
+/// Blocking strategy for use in synchronous code.
+struct Blocking;
+
+impl Strategy for Blocking {
+    type Context = ();
+
+    fn poll(evl: EventListener, _cx: &mut ()) -> Result<(), EventListener> {
+        evl.wait();
+        Ok(())
+    }
+}

+ 6 - 26
src/lib/h2o_async/src/time.rs

@@ -1,10 +1,6 @@
-use core::{
-    pin::Pin,
-    task::{Context, Poll},
-    time::Duration,
-};
+use core::time::Duration;
 
-use futures::{pin_mut, Future, Stream};
+use futures_lite::{stream, Stream};
 use solvent::{
     error::Result,
     prelude::SIG_TIMER,
@@ -80,25 +76,9 @@ impl Timer {
     }
 
     #[inline]
-    pub fn interval(&self, period: Duration) -> Intervals {
-        Intervals {
-            timer: self,
-            period,
-        }
-    }
-}
-
-pub struct Intervals<'a> {
-    timer: &'a Timer,
-    period: Duration,
-}
-
-impl Stream for Intervals<'_> {
-    type Item = Result;
-
-    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        let fut = self.timer.wait_after(self.period);
-        pin_mut!(fut);
-        fut.poll(cx).map(Some)
+    pub fn interval(&self, period: Duration) -> impl Stream<Item = Result> + '_ {
+        stream::unfold((), move |_| async move {
+            Some((self.wait_after(period).await, ()))
+        })
     }
 }

+ 19 - 0
src/lib/h2o_ddk/Cargo.toml

@@ -0,0 +1,19 @@
+[package]
+edition = "2021"
+name = "solvent-ddk"
+version = "0.1.0"
+
+[features]
+ddk = []
+default = ["ddk"]
+
+[dependencies]
+# Local crates
+dbglog = {path = "../dbglog"}
+solvent = {path = "../h2o_rs", default-features = false}
+solvent-async = {path = "../h2o_async", default-features = false}
+solvent-core = {path = "../h2o_std/core"}
+solvent-fs = {path = "../h2o_fs", default-features = false}
+solvent-rpc = {path = "../h2o_rpc", default-features = false}
+# External crates
+log = "0.4"

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است