ostd/mm/kspace/mod.rs
1// SPDX-License-Identifier: MPL-2.0
2//! Kernel memory space management.
3//!
4//! The kernel memory space is currently managed as follows, if the
5//! address width is 48 bits (with 47 bits kernel space).
6//!
7//! TODO: the cap of linear mapping (the start of vm alloc) are raised
8//! to workaround for high IO in TDX. We need actual vm alloc API to have
9//! a proper fix.
10//!
11//! ```text
12//! +-+ <- the highest used address (0xffff_ffff_ffff_0000)
13//! | | For the kernel code, 1 GiB.
14//! +-+ <- 0xffff_ffff_8000_0000
15//! | |
16//! | | Unused hole.
17//! +-+ <- 0xffff_e100_0000_0000
18//! | | For frame metadata, 1 TiB.
19//! +-+ <- 0xffff_e000_0000_0000
20//! | | For [`KVirtArea`], 32 TiB.
21//! +-+ <- the middle of the higher half (0xffff_c000_0000_0000)
22//! | |
23//! | |
24//! | |
25//! | | For linear mappings, 64 TiB.
26//! | | Mapped physical addresses are untracked.
27//! | |
28//! | |
29//! | |
30//! +-+ <- the base of high canonical address (0xffff_8000_0000_0000)
31//! ```
32//!
33//! If the address width is (according to [`crate::arch::mm::PagingConsts`])
34//! 39 bits or 57 bits, the memory space just adjust proportionally.
35use vstd::prelude::*;
36use vstd::atomic::PermissionU64;
37use vstd::simple_pptr::PointsTo;
38use core::ops::Range;
39
40//use log::info;
41use crate::sync::{OnceImpl, TrivialPred};
42#[cfg(ktest)]
43mod test;
44pub mod kvirt_area;
45
46use super::{
47 frame::{
48 meta::{mapping, AnyFrameMeta, MetaPageMeta, MetaSlot},
49 Frame, Segment,
50 },
51 page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
52 page_table::{PageTable, PageTableConfig},
53 Paddr, PagingConstsTrait, Vaddr,
54};
55use crate::specs::mm::frame::meta_owners::MetaSlotStorage;
56use crate::specs::arch::mm::NR_LEVELS;
57use crate::mm::frame::DynFrame;
58use crate::{
59 boot::memory_region::MemoryRegionType,
60 mm::{largest_pages, PagingLevel},
61 specs::arch::{PageTableEntry, PagingConsts},
62 //task::disable_preempt,
63};
64use crate::mm::page_table::RCClone;
65use crate::specs::mm::frame::meta_owners::MetaPerm;
66
67verus! {
68
69/// The shortest supported address width is 39 bits. And the literal
70/// values are written for 48 bits address width. Adjust the values
71/// by arithmetic left shift.
72pub const ADDR_WIDTH_SHIFT: isize = 48 - 48;
73
74/// Start of the kernel address space.
75/// This is the _lowest_ address of the x86-64's _high_ canonical addresses.
76#[cfg(not(target_arch = "loongarch64"))]
77pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT;
78
79#[cfg(target_arch = "loongarch64")]
80pub const KERNEL_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
81
82/// End of the kernel address space (non inclusive).
83pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000 << ADDR_WIDTH_SHIFT;
84
85/*
86/// The kernel code is linear mapped to this address.
87///
88/// FIXME: This offset should be randomly chosen by the loader or the
89/// boot compatibility layer. But we disabled it because OSTD
90/// doesn't support relocatable kernel yet.
91pub fn kernel_loaded_offset() -> usize {
92 KERNEL_CODE_BASE_VADDR
93}*/
94
95#[cfg(target_arch = "x86_64")]
96pub const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000 << ADDR_WIDTH_SHIFT;
97
98#[cfg(target_arch = "riscv64")]
99pub const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_0000_0000 << ADDR_WIDTH_SHIFT;
100
101#[cfg(target_arch = "loongarch64")]
102pub const KERNEL_CODE_BASE_VADDR: usize = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
103
104pub const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_fff0_8000_0000 << ADDR_WIDTH_SHIFT;
105
106pub const FRAME_METADATA_BASE_VADDR: Vaddr = 0xffff_fff0_0000_0000 << ADDR_WIDTH_SHIFT;
107
108pub const VMALLOC_BASE_VADDR: Vaddr = 0xffff_c000_0000_0000 << ADDR_WIDTH_SHIFT;
109
110pub const VMALLOC_VADDR_RANGE: Range<Vaddr> = VMALLOC_BASE_VADDR..FRAME_METADATA_BASE_VADDR;
111
112/// The base address of the linear mapping of all physical
113/// memory in the kernel address space.
114pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT;
115
116pub const LINEAR_MAPPING_VADDR_RANGE: Range<Vaddr> = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR;
117
118/*
119#[cfg(not(target_arch = "loongarch64"))]
120pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT;
121#[cfg(target_arch = "loongarch64")]
122pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
123pub const LINEAR_MAPPING_VADDR_RANGE: Range<Vaddr> = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR;
124*/
125
126/// Convert physical address to virtual address using offset, only available inside `ostd`
127pub open spec fn paddr_to_vaddr_spec(pa: Paddr) -> usize {
128 (pa + LINEAR_MAPPING_BASE_VADDR) as usize
129}
130
131#[verifier::when_used_as_spec(paddr_to_vaddr_spec)]
132pub fn paddr_to_vaddr(pa: Paddr) -> usize
133 requires
134 pa + LINEAR_MAPPING_BASE_VADDR < usize::MAX,
135 returns
136 paddr_to_vaddr_spec(pa),
137{
138 //debug_assert!(pa < VMALLOC_BASE_VADDR - LINEAR_MAPPING_BASE_VADDR);
139 pa + LINEAR_MAPPING_BASE_VADDR
140}
141
142
143/// The kernel page table instance.
144///
145/// It manages the kernel mapping of all address spaces by sharing the kernel part. And it
146/// is unlikely to be activated.
147pub exec static KERNEL_PAGE_TABLE: OnceImpl<PageTable<KernelPtConfig>, TrivialPred> =
148 OnceImpl::new(Ghost(TrivialPred));
149
150
151#[derive(Clone, Debug)]
152pub(crate) struct KernelPtConfig {}
153
154// We use the first available PTE bit to mark the frame as tracked.
155// SAFETY: `item_into_raw` and `item_from_raw` are implemented correctly,
156unsafe impl PageTableConfig for KernelPtConfig {
157 open spec fn TOP_LEVEL_INDEX_RANGE_spec() -> Range<usize> {
158 256..512
159 }
160
161 fn TOP_LEVEL_INDEX_RANGE() -> (r: Range<usize>)
162 ensures
163 r == Self::TOP_LEVEL_INDEX_RANGE_spec(),
164 {
165 256..512
166 }
167
168 open spec fn TOP_LEVEL_CAN_UNMAP_spec() -> bool {
169 false
170 }
171
172 fn TOP_LEVEL_CAN_UNMAP() -> (b: bool)
173 ensures
174 b == Self::TOP_LEVEL_CAN_UNMAP_spec(),
175 {
176 false
177 }
178
179 type E = PageTableEntry;
180 type C = PagingConsts;
181
182 type Item = MappedItem;
183
184 uninterp spec fn item_into_raw_spec(item: Self::Item) -> (Paddr, PagingLevel, PageProperty);
185
186// #[verifier::when_used_as_spec(item_into_raw_spec)]
187 #[verifier::external_body]
188 fn item_into_raw(item: Self::Item) -> (res: (Paddr, PagingLevel, PageProperty))
189 ensures
190 1 <= res.1 <= crate::specs::arch::mm::NR_LEVELS,
191 res == Self::item_into_raw_spec(item),
192 {
193 match item {
194 MappedItem::Tracked(frame, mut prop) => {
195 debug_assert!(!prop.flags.contains(PageFlags::AVAIL1()));
196 prop.flags = prop.flags | PageFlags::AVAIL1();
197 let level = frame.map_level();
198 let paddr = frame.into_raw();
199 (paddr, level, prop)
200 }
201 MappedItem::Untracked(pa, level, mut prop) => {
202 debug_assert!(!prop.flags.contains(PageFlags::AVAIL1()));
203 prop.flags = prop.flags - PageFlags::AVAIL1();
204 (pa, level, prop)
205 }
206 }
207 }
208
209 uninterp spec fn item_from_raw_spec(
210 paddr: Paddr,
211 level: PagingLevel,
212 prop: PageProperty,
213 ) -> Self::Item;
214
215 //#[verifier::when_used_as_spec(item_from_raw_spec)]
216 #[verifier::external_body]
217 fn item_from_raw(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> (res: Self::Item)
218 ensures
219 res == Self::item_from_raw_spec(paddr, level, prop),
220 {
221 if prop.flags.contains(PageFlags::AVAIL1()) {
222 debug_assert_eq!(level, 1);
223 // SAFETY: The caller ensures safety.
224 let frame = unsafe { Frame::<MetaSlotStorage>::from_raw(paddr) };
225 MappedItem::Tracked(frame, prop)
226 } else {
227 MappedItem::Untracked(paddr, level, prop)
228 }
229 }
230
231 axiom fn axiom_nr_subpage_per_huge_eq_nr_entries();
232
233 axiom fn item_roundtrip(item: Self::Item, paddr: Paddr, level: PagingLevel, prop: PageProperty);
234}
235
236impl KernelPtConfig {
237 /// The spec agrees with the exec, which ensures 1 <= level <= NR_LEVELS.
238 pub axiom fn item_into_raw_spec_level_bounds(item: MappedItem)
239 ensures
240 1 <= KernelPtConfig::item_into_raw_spec(item).1 <= crate::specs::arch::mm::NR_LEVELS;
241
242 /// Tracked frames use 4K pages (level 1). Used to prove alignment in map_frames.
243 pub axiom fn item_into_raw_spec_tracked_level(item: MappedItem)
244 requires
245 matches!(item, MappedItem::Tracked(_, _)),
246 ensures
247 KernelPtConfig::item_into_raw_spec(item).1 == 1;
248
249 /// For untracked items, `item_into_raw_spec` preserves PA, level, and prop.
250 /// This is correct when the AVAIL1 bit is not set in `prop`, which is assumed
251 /// for untracked MMIO frames (enforced by the debug_assert in the exec).
252 pub axiom fn item_into_raw_spec_untracked(pa: Paddr, level: PagingLevel, prop: PageProperty)
253 ensures
254 KernelPtConfig::item_into_raw_spec(MappedItem::Untracked(pa, level, prop)).0 == pa,
255 KernelPtConfig::item_into_raw_spec(MappedItem::Untracked(pa, level, prop)).1 == level,
256 KernelPtConfig::item_into_raw_spec(MappedItem::Untracked(pa, level, prop)).2 == prop;
257
258 /// For KernelPtConfig (x86_64): HIGHEST_TRANSLATION_LEVEL = 2 < NR_LEVELS = 4.
259 pub axiom fn axiom_kernel_htl_lt_nr_levels()
260 ensures
261 (KernelPtConfig::HIGHEST_TRANSLATION_LEVEL() as int) < NR_LEVELS as int;
262}
263
264/*
265#[derive(Clone, Debug, PartialEq, Eq)]
266pub(crate) enum MappedItem {
267 Tracked(Frame<dyn AnyFrameMeta>, PageProperty),
268 Untracked(Paddr, PagingLevel, PageProperty),
269}
270*/
271
272pub enum MappedItem {
273 Tracked(DynFrame, PageProperty),
274 Untracked(Paddr, PagingLevel, PageProperty),
275}
276
277impl RCClone for MappedItem {
278 open spec fn clone_requires(self, slot_perm: PointsTo<MetaSlot>, rc_perm: PermissionU64) -> bool {
279 match self {
280 MappedItem::Tracked(frame, _) => frame.clone_requires(slot_perm, rc_perm),
281 MappedItem::Untracked(_, _, _) => true,
282 }
283 }
284
285 #[verifier::external_body]
286 fn clone(&self, Tracked(slot_perm): Tracked<&PointsTo<MetaSlot>>, Tracked(rc_perm): Tracked<&mut PermissionU64>) -> (res: Self)
287 {
288 unimplemented!();
289 }
290}
291
292} // verus!
293// /// Initializes the kernel page table.
294// ///
295// /// This function should be called after:
296// /// - the page allocator and the heap allocator are initialized;
297// /// - the memory regions are initialized.
298// ///
299// /// This function should be called before:
300// /// - any initializer that modifies the kernel page table.
301// pub fn init_kernel_page_table(meta_pages: Segment<MetaPageMeta>) {
302// info!("Initializing the kernel page table");
303// // Start to initialize the kernel page table.
304// let kpt = PageTable::<KernelPtConfig>::new_kernel_page_table();
305// let preempt_guard = disable_preempt();
306// // In LoongArch64, we don't need to do linear mappings for the kernel because of DMW0.
307// #[cfg(not(target_arch = "loongarch64"))]
308// // Do linear mappings for the kernel.
309// {
310// let max_paddr = crate::mm::frame::max_paddr();
311// let from = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + max_paddr;
312// let prop = PageProperty {
313// flags: PageFlags::RW,
314// cache: CachePolicy::Writeback,
315// priv_flags: PrivilegedPageFlags::GLOBAL,
316// };
317// let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
318// for (pa, level) in largest_pages::<KernelPtConfig>(from.start, 0, max_paddr) {
319// // SAFETY: we are doing the linear mapping for the kernel.
320// unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
321// .expect("Kernel linear address space is mapped twice");
322// }
323// }
324// // Map the metadata pages.
325// {
326// let start_va = mapping::frame_to_meta::<PagingConsts>(0);
327// let from = start_va..start_va + meta_pages.size();
328// let prop = PageProperty {
329// flags: PageFlags::RW,
330// cache: CachePolicy::Writeback,
331// priv_flags: PrivilegedPageFlags::GLOBAL,
332// };
333// let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
334// // We use untracked mapping so that we can benefit from huge pages.
335// // We won't unmap them anyway, so there's no leaking problem yet.
336// // TODO: support tracked huge page mapping.
337// let pa_range = meta_pages.into_raw();
338// for (pa, level) in
339// largest_pages::<KernelPtConfig>(from.start, pa_range.start, pa_range.len())
340// {
341// // SAFETY: We are doing the metadata mappings for the kernel.
342// unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
343// .expect("Frame metadata address space is mapped twice");
344// }
345// }
346// // In LoongArch64, we don't need to do linear mappings for the kernel code because of DMW0.
347// #[cfg(not(target_arch = "loongarch64"))]
348// // Map for the kernel code itself.
349// // TODO: set separated permissions for each segments in the kernel.
350// {
351// let regions = &crate::boot::EARLY_INFO.get().unwrap().memory_regions;
352// let region = regions
353// .iter()
354// .find(|r| r.typ() == MemoryRegionType::Kernel)
355// .unwrap();
356// let offset = kernel_loaded_offset();
357// let from = region.base() + offset..region.end() + offset;
358// let prop = PageProperty {
359// flags: PageFlags::RWX,
360// cache: CachePolicy::Writeback,
361// priv_flags: PrivilegedPageFlags::GLOBAL,
362// };
363// let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
364// for (pa, level) in largest_pages::<KernelPtConfig>(from.start, region.base(), from.len()) {
365// // SAFETY: we are doing the kernel code mapping.
366// unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
367// .expect("Kernel code mapped twice");
368// }
369// }
370// KERNEL_PAGE_TABLE.call_once(|| kpt);
371// }
372// /// Activates the kernel page table.
373// ///
374// /// # Safety
375// ///
376// /// This function should only be called once per CPU.
377// pub unsafe fn activate_kernel_page_table() {
378// let kpt = KERNEL_PAGE_TABLE
379// .get()
380// .expect("The kernel page table is not initialized yet");
381// // SAFETY: the kernel page table is initialized properly.
382// unsafe {
383// kpt.first_activate_unchecked();
384// crate::arch::mm::tlb_flush_all_including_global();
385// }
386// // SAFETY: the boot page table is OK to be dismissed now since
387// // the kernel page table is activated just now.
388// unsafe {
389// crate::mm::page_table::boot_pt::dismiss();
390// }
391// }