ostd/mm/kspace/mod.rs
1// SPDX-License-Identifier: MPL-2.0
2//! Kernel memory space management.
3//!
4//! The kernel memory space is currently managed as follows, if the
5//! address width is 48 bits (with 47 bits kernel space).
6//!
7//! TODO: the cap of linear mapping (the start of vm alloc) are raised
8//! to workaround for high IO in TDX. We need actual vm alloc API to have
9//! a proper fix.
10//!
11//! ```text
12//! +-+ <- the highest used address (0xffff_ffff_ffff_0000)
13//! | | For the kernel code, 1 GiB.
14//! +-+ <- 0xffff_ffff_8000_0000
15//! | |
16//! | | Unused hole.
17//! +-+ <- 0xffff_e100_0000_0000
18//! | | For frame metadata, 1 TiB.
19//! +-+ <- 0xffff_e000_0000_0000
20//! | | For [`KVirtArea`], 32 TiB.
21//! +-+ <- the middle of the higher half (0xffff_c000_0000_0000)
22//! | |
23//! | |
24//! | |
25//! | | For linear mappings, 64 TiB.
26//! | | Mapped physical addresses are untracked.
27//! | |
28//! | |
29//! | |
30//! +-+ <- the base of high canonical address (0xffff_8000_0000_0000)
31//! ```
32//!
33//! If the address width is (according to [`crate::arch::mm::PagingConsts`])
34//! 39 bits or 57 bits, the memory space just adjust proportionally.
35use vstd::prelude::*;
36use vstd::atomic::PermissionU64;
37use vstd::simple_pptr::PointsTo;
38use core::ops::Range;
39
40//use log::info;
41use crate::sync::{OnceImpl, TrivialPred};
42#[cfg(ktest)]
43mod test;
44pub mod kvirt_area;
45
46use super::{
47 frame::{
48 meta::{mapping, AnyFrameMeta, MetaPageMeta, MetaSlot},
49 Frame, Segment,
50 },
51 page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
52 page_table::{PageTable, PageTableConfig},
53 Paddr, PagingConstsTrait, Vaddr,
54};
55use crate::specs::mm::frame::meta_owners::MetaSlotStorage;
56use crate::specs::arch::mm::NR_LEVELS;
57use crate::mm::frame::DynFrame;
58use crate::{
59 boot::memory_region::MemoryRegionType,
60 mm::{largest_pages, PagingLevel},
61 specs::arch::{PageTableEntry, PagingConsts},
62 //task::disable_preempt,
63};
64use crate::mm::page_table::RCClone;
65use crate::specs::mm::frame::meta_owners::MetaPerm;
66use crate::specs::mm::frame::meta_region_owners::MetaRegionOwners;
67
68use vstd_extra::ownership::*;
69
70verus! {
71
72/// The shortest supported address width is 39 bits. And the literal
73/// values are written for 48 bits address width. Adjust the values
74/// by arithmetic left shift.
75pub const ADDR_WIDTH_SHIFT: isize = 48 - 48;
76
77/// Start of the kernel address space.
78/// This is the _lowest_ address of the x86-64's _high_ canonical addresses.
79#[cfg(not(target_arch = "loongarch64"))]
80pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT;
81
82#[cfg(target_arch = "loongarch64")]
83pub const KERNEL_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
84
85/// End of the kernel address space (non inclusive).
86pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000 << ADDR_WIDTH_SHIFT;
87
88/*
89/// The kernel code is linear mapped to this address.
90///
91/// FIXME: This offset should be randomly chosen by the loader or the
92/// boot compatibility layer. But we disabled it because OSTD
93/// doesn't support relocatable kernel yet.
94pub fn kernel_loaded_offset() -> usize {
95 KERNEL_CODE_BASE_VADDR
96}*/
97
98#[cfg(target_arch = "x86_64")]
99pub const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000 << ADDR_WIDTH_SHIFT;
100
101#[cfg(target_arch = "riscv64")]
102pub const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_0000_0000 << ADDR_WIDTH_SHIFT;
103
104#[cfg(target_arch = "loongarch64")]
105pub const KERNEL_CODE_BASE_VADDR: usize = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
106
107pub const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_fff0_8000_0000 << ADDR_WIDTH_SHIFT;
108
109pub const FRAME_METADATA_BASE_VADDR: Vaddr = 0xffff_fff0_0000_0000 << ADDR_WIDTH_SHIFT;
110
111pub const VMALLOC_BASE_VADDR: Vaddr = 0xffff_c000_0000_0000 << ADDR_WIDTH_SHIFT;
112
113pub const VMALLOC_VADDR_RANGE: Range<Vaddr> = VMALLOC_BASE_VADDR..FRAME_METADATA_BASE_VADDR;
114
115/// The base address of the linear mapping of all physical
116/// memory in the kernel address space.
117pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT;
118
119pub const LINEAR_MAPPING_VADDR_RANGE: Range<Vaddr> = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR;
120
121/*
122#[cfg(not(target_arch = "loongarch64"))]
123pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_8000_0000_0000 << ADDR_WIDTH_SHIFT;
124#[cfg(target_arch = "loongarch64")]
125pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000 << ADDR_WIDTH_SHIFT;
126pub const LINEAR_MAPPING_VADDR_RANGE: Range<Vaddr> = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR;
127*/
128
129/// Convert physical address to virtual address using offset, only available inside `ostd`
130pub open spec fn paddr_to_vaddr_spec(pa: Paddr) -> usize {
131 (pa + LINEAR_MAPPING_BASE_VADDR) as usize
132}
133
134#[verifier::when_used_as_spec(paddr_to_vaddr_spec)]
135pub fn paddr_to_vaddr(pa: Paddr) -> usize
136 requires
137 pa + LINEAR_MAPPING_BASE_VADDR < usize::MAX,
138 returns
139 paddr_to_vaddr_spec(pa),
140{
141 //debug_assert!(pa < VMALLOC_BASE_VADDR - LINEAR_MAPPING_BASE_VADDR);
142 pa + LINEAR_MAPPING_BASE_VADDR
143}
144
145
146/// The kernel page table instance.
147///
148/// It manages the kernel mapping of all address spaces by sharing the kernel part. And it
149/// is unlikely to be activated.
150pub exec static KERNEL_PAGE_TABLE: OnceImpl<PageTable<KernelPtConfig>, TrivialPred> =
151 OnceImpl::new(Ghost(TrivialPred));
152
153
154#[derive(Clone, Debug)]
155pub(crate) struct KernelPtConfig {}
156
157// We use the first available PTE bit to mark the frame as tracked.
158// SAFETY: `item_into_raw` and `item_from_raw` are implemented correctly,
159unsafe impl PageTableConfig for KernelPtConfig {
160 open spec fn TOP_LEVEL_INDEX_RANGE_spec() -> Range<usize> {
161 256..512
162 }
163
164 open spec fn LEADING_BITS_spec() -> usize {
165 0xffff
166 }
167
168 fn TOP_LEVEL_INDEX_RANGE() -> (r: Range<usize>)
169 ensures
170 r == Self::TOP_LEVEL_INDEX_RANGE_spec(),
171 {
172 256..512
173 }
174
175 open spec fn TOP_LEVEL_CAN_UNMAP_spec() -> bool {
176 false
177 }
178
179 fn TOP_LEVEL_CAN_UNMAP() -> (b: bool)
180 ensures
181 b == Self::TOP_LEVEL_CAN_UNMAP_spec(),
182 {
183 false
184 }
185
186 type E = PageTableEntry;
187 type C = PagingConsts;
188
189 type Item = MappedItem;
190
191 uninterp spec fn item_into_raw_spec(item: Self::Item) -> (Paddr, PagingLevel, PageProperty);
192
193// #[verifier::when_used_as_spec(item_into_raw_spec)]
194 #[verifier::external_body]
195 fn item_into_raw(item: Self::Item) -> (res: (Paddr, PagingLevel, PageProperty))
196 ensures
197 1 <= res.1 <= crate::specs::arch::mm::NR_LEVELS,
198 res == Self::item_into_raw_spec(item),
199 {
200 match item {
201 MappedItem::Tracked(frame, mut prop) => {
202 debug_assert!(!prop.flags.contains(PageFlags::AVAIL1()));
203 prop.flags = prop.flags | PageFlags::AVAIL1();
204 let level = frame.map_level();
205 let paddr = frame.into_raw();
206 (paddr, level, prop)
207 }
208 MappedItem::Untracked(pa, level, mut prop) => {
209 debug_assert!(!prop.flags.contains(PageFlags::AVAIL1()));
210 prop.flags = prop.flags - PageFlags::AVAIL1();
211 (pa, level, prop)
212 }
213 }
214 }
215
216 uninterp spec fn item_from_raw_spec(
217 paddr: Paddr,
218 level: PagingLevel,
219 prop: PageProperty,
220 ) -> Self::Item;
221
222 //#[verifier::when_used_as_spec(item_from_raw_spec)]
223 #[verifier::external_body]
224 fn item_from_raw(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> (res: Self::Item)
225 ensures
226 res == Self::item_from_raw_spec(paddr, level, prop),
227 {
228 if prop.flags.contains(PageFlags::AVAIL1()) {
229 debug_assert_eq!(level, 1);
230 // SAFETY: The caller ensures safety.
231 let frame = unsafe { Frame::<MetaSlotStorage>::from_raw(paddr) };
232 MappedItem::Tracked(frame, prop)
233 } else {
234 MappedItem::Untracked(paddr, level, prop)
235 }
236 }
237
238 axiom fn axiom_nr_subpage_per_huge_eq_nr_entries();
239
240 axiom fn item_roundtrip(item: Self::Item, paddr: Paddr, level: PagingLevel, prop: PageProperty);
241
242 proof fn clone_ensures_concrete(
243 item: Self::Item,
244 pa: Paddr,
245 old_regions: MetaRegionOwners,
246 new_regions: MetaRegionOwners,
247 res: Self::Item,
248 ) {
249 admit();
250 }
251
252 proof fn clone_requires_concrete(
253 item: Self::Item,
254 pa: Paddr,
255 level: PagingLevel,
256 prop: PageProperty,
257 regions: MetaRegionOwners,
258 ) {
259 admit();
260 }
261}
262
263impl KernelPtConfig {
264 /// The spec agrees with the exec, which ensures 1 <= level <= NR_LEVELS.
265 pub axiom fn item_into_raw_spec_level_bounds(item: MappedItem)
266 ensures
267 1 <= KernelPtConfig::item_into_raw_spec(item).1 <= crate::specs::arch::mm::NR_LEVELS;
268
269 /// Tracked frames use 4K pages (level 1). Used to prove alignment in map_frames.
270 pub axiom fn item_into_raw_spec_tracked_level(item: MappedItem)
271 requires
272 matches!(item, MappedItem::Tracked(_, _)),
273 ensures
274 KernelPtConfig::item_into_raw_spec(item).1 == 1;
275
276 /// For untracked items, `item_into_raw_spec` preserves PA, level, and prop.
277 /// This is correct when the AVAIL1 bit is not set in `prop`, which is assumed
278 /// for untracked MMIO frames (enforced by the debug_assert in the exec).
279 pub axiom fn item_into_raw_spec_untracked(pa: Paddr, level: PagingLevel, prop: PageProperty)
280 ensures
281 KernelPtConfig::item_into_raw_spec(MappedItem::Untracked(pa, level, prop)).0 == pa,
282 KernelPtConfig::item_into_raw_spec(MappedItem::Untracked(pa, level, prop)).1 == level,
283 KernelPtConfig::item_into_raw_spec(MappedItem::Untracked(pa, level, prop)).2 == prop;
284
285 /// For tracked items, the physical address from item_into_raw_spec equals
286 /// the frame's metadata-derived physical address.
287 pub axiom fn item_into_raw_spec_tracked_pa(frame: DynFrame, prop: PageProperty)
288 ensures
289 KernelPtConfig::item_into_raw_spec(MappedItem::Tracked(frame, prop)).0
290 == crate::mm::frame::meta::mapping::meta_to_frame(frame.ptr.addr());
291
292 /// For KernelPtConfig (x86_64): HIGHEST_TRANSLATION_LEVEL = 2 < NR_LEVELS = 4.
293 pub axiom fn axiom_kernel_htl_lt_nr_levels()
294 ensures
295 (KernelPtConfig::HIGHEST_TRANSLATION_LEVEL() as int) < NR_LEVELS as int;
296}
297
298/*
299#[derive(Clone, Debug, PartialEq, Eq)]
300pub(crate) enum MappedItem {
301 Tracked(Frame<dyn AnyFrameMeta>, PageProperty),
302 Untracked(Paddr, PagingLevel, PageProperty),
303}
304*/
305
306pub enum MappedItem {
307 Tracked(DynFrame, PageProperty),
308 Untracked(Paddr, PagingLevel, PageProperty),
309}
310
311impl RCClone for MappedItem {
312 open spec fn clone_requires(self, perm: MetaRegionOwners) -> bool {
313 match self {
314 MappedItem::Tracked(frame, _) => frame.clone_requires(perm),
315 MappedItem::Untracked(_, _, _) => perm.inv(),
316 }
317 }
318
319 open spec fn clone_ensures(self, old_perm: MetaRegionOwners, new_perm: MetaRegionOwners, res: Self) -> bool {
320 match (self, res) {
321 (MappedItem::Tracked(frame, _), MappedItem::Tracked(res_frame, _)) =>
322 frame.clone_ensures(old_perm, new_perm, res_frame),
323 (MappedItem::Untracked(_, _, _), _) => old_perm == new_perm,
324 _ => true,
325 }
326 }
327
328 #[verifier::external_body]
329 fn clone(&self, Tracked(perm): Tracked<&mut MetaRegionOwners>) -> (res: Self)
330 {
331 unimplemented!();
332 }
333}
334
335} // verus!
336// /// Initializes the kernel page table.
337// ///
338// /// This function should be called after:
339// /// - the page allocator and the heap allocator are initialized;
340// /// - the memory regions are initialized.
341// ///
342// /// This function should be called before:
343// /// - any initializer that modifies the kernel page table.
344// pub fn init_kernel_page_table(meta_pages: Segment<MetaPageMeta>) {
345// info!("Initializing the kernel page table");
346// // Start to initialize the kernel page table.
347// let kpt = PageTable::<KernelPtConfig>::new_kernel_page_table();
348// let preempt_guard = disable_preempt();
349// // In LoongArch64, we don't need to do linear mappings for the kernel because of DMW0.
350// #[cfg(not(target_arch = "loongarch64"))]
351// // Do linear mappings for the kernel.
352// {
353// let max_paddr = crate::mm::frame::max_paddr();
354// let from = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + max_paddr;
355// let prop = PageProperty {
356// flags: PageFlags::RW,
357// cache: CachePolicy::Writeback,
358// priv_flags: PrivilegedPageFlags::GLOBAL,
359// };
360// let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
361// for (pa, level) in largest_pages::<KernelPtConfig>(from.start, 0, max_paddr) {
362// // SAFETY: we are doing the linear mapping for the kernel.
363// unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
364// .expect("Kernel linear address space is mapped twice");
365// }
366// }
367// // Map the metadata pages.
368// {
369// let start_va = mapping::frame_to_meta::<PagingConsts>(0);
370// let from = start_va..start_va + meta_pages.size();
371// let prop = PageProperty {
372// flags: PageFlags::RW,
373// cache: CachePolicy::Writeback,
374// priv_flags: PrivilegedPageFlags::GLOBAL,
375// };
376// let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
377// // We use untracked mapping so that we can benefit from huge pages.
378// // We won't unmap them anyway, so there's no leaking problem yet.
379// // TODO: support tracked huge page mapping.
380// let pa_range = meta_pages.into_raw();
381// for (pa, level) in
382// largest_pages::<KernelPtConfig>(from.start, pa_range.start, pa_range.len())
383// {
384// // SAFETY: We are doing the metadata mappings for the kernel.
385// unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
386// .expect("Frame metadata address space is mapped twice");
387// }
388// }
389// // In LoongArch64, we don't need to do linear mappings for the kernel code because of DMW0.
390// #[cfg(not(target_arch = "loongarch64"))]
391// // Map for the kernel code itself.
392// // TODO: set separated permissions for each segments in the kernel.
393// {
394// let regions = &crate::boot::EARLY_INFO.get().unwrap().memory_regions;
395// let region = regions
396// .iter()
397// .find(|r| r.typ() == MemoryRegionType::Kernel)
398// .unwrap();
399// let offset = kernel_loaded_offset();
400// let from = region.base() + offset..region.end() + offset;
401// let prop = PageProperty {
402// flags: PageFlags::RWX,
403// cache: CachePolicy::Writeback,
404// priv_flags: PrivilegedPageFlags::GLOBAL,
405// };
406// let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
407// for (pa, level) in largest_pages::<KernelPtConfig>(from.start, region.base(), from.len()) {
408// // SAFETY: we are doing the kernel code mapping.
409// unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
410// .expect("Kernel code mapped twice");
411// }
412// }
413// KERNEL_PAGE_TABLE.call_once(|| kpt);
414// }
415// /// Activates the kernel page table.
416// ///
417// /// # Safety
418// ///
419// /// This function should only be called once per CPU.
420// pub unsafe fn activate_kernel_page_table() {
421// let kpt = KERNEL_PAGE_TABLE
422// .get()
423// .expect("The kernel page table is not initialized yet");
424// // SAFETY: the kernel page table is initialized properly.
425// unsafe {
426// kpt.first_activate_unchecked();
427// crate::arch::mm::tlb_flush_all_including_global();
428// }
429// // SAFETY: the boot page table is OK to be dismissed now since
430// // the kernel page table is activated just now.
431// unsafe {
432// crate::mm::page_table::boot_pt::dismiss();
433// }
434// }