ostd/mm/kspace/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! Kernel memory space management.
4//!
5//! The kernel memory space is currently managed as follows, if the
6//! address width is 48 bits (with 47 bits kernel space).
7//!
8//! TODO: the cap of linear mapping (the start of vm alloc) are raised
9//! to workaround for high IO in TDX. We need actual vm alloc API to have
10//! a proper fix.
11//!
12//! ```text
13//! +-+ <- the highest used address (0xffff_ffff_ffff_0000)
14//! | |         For the kernel code, 1 GiB.
15//! +-+ <- 0xffff_ffff_8000_0000
16//! | |
17//! | |         Unused hole.
18//! +-+ <- 0xffff_e100_0000_0000
19//! | |         For frame metadata, 1 TiB.
20//! +-+ <- 0xffff_e000_0000_0000
21//! | |         For [`KVirtArea`], 32 TiB.
22//! +-+ <- the middle of the higher half (0xffff_c000_0000_0000)
23//! | |
24//! | |
25//! | |
26//! | |         For linear mappings, 64 TiB.
27//! | |         Mapped physical addresses are untracked.
28//! | |
29//! | |
30//! | |
31//! +-+ <- the base of high canonical address (0xffff_8000_0000_0000)
32//! ```
33//!
34//! If the address width is (according to [`crate::arch::mm::PagingConsts`])
35//! 39 bits or 57 bits, the memory space just adjust proportionally.
36
37#![cfg_attr(target_arch = "loongarch64", expect(unused_imports))]
38
39pub(crate) mod kvirt_area;
40
41use core::ops::Range;
42
43use log::info;
44use spin::Once;
45#[cfg(ktest)]
46mod test;
47
48use super::{
49    Frame, HasSize, Paddr, PagingConstsTrait, Vaddr,
50    frame::{
51        Segment,
52        meta::{AnyFrameMeta, MetaPageMeta, mapping},
53    },
54    page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
55    page_table::{PageTable, PageTableConfig},
56};
57use crate::{
58    arch::mm::{PageTableEntry, PagingConsts},
59    boot::memory_region::MemoryRegionType,
60    const_assert,
61    mm::{HasPaddr, PAGE_SIZE, PagingLevel, frame::FrameRef, page_table::largest_pages},
62    task::disable_preempt,
63};
64
65// The shortest supported address width is 39 bits. So the literal
66// values are written for 39 bits address width and we adjust the values
67// by arithmetic left shift.
68const_assert!(PagingConsts::ADDRESS_WIDTH >= 39);
69const ADDR_WIDTH_SHIFT: usize = PagingConsts::ADDRESS_WIDTH - 39;
70
71/// Start of the kernel address space.
72#[cfg(not(target_arch = "loongarch64"))]
73pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_ffc0_0000_0000 << ADDR_WIDTH_SHIFT;
74#[cfg(target_arch = "loongarch64")]
75pub const KERNEL_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000;
76/// End of the kernel address space (non inclusive).
77pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000;
78
79/// The maximum virtual address of user space (non inclusive).
80///
81/// A typical way to reserve half of the address space for the kernel is
82/// to use the highest `ADDRESS_WIDTH`-bit virtual address space.
83///
84/// Also, the top page is not regarded as usable since it's a workaround
85/// for some x86_64 CPUs' bugs. See
86/// <https://github.com/torvalds/linux/blob/480e035fc4c714fb5536e64ab9db04fedc89e910/arch/x86/include/asm/page_64.h#L68-L78>
87/// for the rationale.
88pub const MAX_USERSPACE_VADDR: Vaddr = (0x0000_0040_0000_0000 << ADDR_WIDTH_SHIFT) - PAGE_SIZE;
89
90/// The kernel address space.
91///
92/// They are the high canonical addresses (i.e., the negative part of the
93/// address space, with the most significant bits in the addresses set).
94pub const KERNEL_VADDR_RANGE: Range<Vaddr> = KERNEL_BASE_VADDR..KERNEL_END_VADDR;
95
96/// The kernel code is linear mapped to this address.
97///
98/// FIXME: This offset should be randomly chosen by the loader or the
99/// boot compatibility layer. But we disabled it because OSTD
100/// doesn't support relocatable kernel yet.
101pub fn kernel_loaded_offset() -> usize {
102    KERNEL_CODE_BASE_VADDR
103}
104
105#[cfg(target_arch = "x86_64")]
106const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000;
107#[cfg(target_arch = "riscv64")]
108const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_0000_0000;
109#[cfg(target_arch = "loongarch64")]
110const KERNEL_CODE_BASE_VADDR: usize = 0x9000_0000_0000_0000;
111
112const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_fff0_8000_0000 << ADDR_WIDTH_SHIFT;
113const FRAME_METADATA_BASE_VADDR: Vaddr = 0xffff_fff0_0000_0000 << ADDR_WIDTH_SHIFT;
114pub(in crate::mm) const FRAME_METADATA_RANGE: Range<Vaddr> =
115    FRAME_METADATA_BASE_VADDR..FRAME_METADATA_CAP_VADDR;
116
117const VMALLOC_BASE_VADDR: Vaddr = 0xffff_ffe0_0000_0000 << ADDR_WIDTH_SHIFT;
118pub const VMALLOC_VADDR_RANGE: Range<Vaddr> = VMALLOC_BASE_VADDR..FRAME_METADATA_BASE_VADDR;
119
120/// The base address of the linear mapping of all physical
121/// memory in the kernel address space.
122#[cfg(not(target_arch = "loongarch64"))]
123pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_ffc0_0000_0000 << ADDR_WIDTH_SHIFT;
124#[cfg(target_arch = "loongarch64")]
125pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000;
126pub const LINEAR_MAPPING_VADDR_RANGE: Range<Vaddr> = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR;
127
128/// Convert physical address to virtual address using offset, only available inside `ostd`
129pub fn paddr_to_vaddr(pa: Paddr) -> usize {
130    debug_assert!(pa < VMALLOC_BASE_VADDR - LINEAR_MAPPING_BASE_VADDR);
131    pa + LINEAR_MAPPING_BASE_VADDR
132}
133
134/// The kernel page table instance.
135///
136/// It manages the kernel mapping of all address spaces by sharing the kernel part. And it
137/// is unlikely to be activated.
138pub(super) static KERNEL_PAGE_TABLE: Once<PageTable<KernelPtConfig>> = Once::new();
139
140#[derive(Clone, Debug)]
141pub(super) struct KernelPtConfig {}
142
143// We use the first available PTE bit to mark the frame as tracked.
144// SAFETY: `item_raw_info`, `item_into_raw`, `item_from_raw`, and
145// `item_ref_from_raw` are correctly implemented with respect to the `Item` and
146// `ItemRef` types.
147unsafe impl PageTableConfig for KernelPtConfig {
148    const TOP_LEVEL_INDEX_RANGE: Range<usize> = 256..512;
149    const TOP_LEVEL_CAN_UNMAP: bool = false;
150
151    type E = PageTableEntry;
152    type C = PagingConsts;
153
154    type Item = MappedItem;
155    type ItemRef<'a> = MappedItemRef<'a>;
156
157    fn item_raw_info(item: &Self::Item) -> (Paddr, PagingLevel, PageProperty) {
158        match *item {
159            MappedItem::Tracked(ref frame, mut prop) => {
160                debug_assert!(!prop.priv_flags.contains(PrivilegedPageFlags::AVAIL1));
161                prop.priv_flags |= PrivilegedPageFlags::AVAIL1;
162                let level = frame.map_level();
163                let paddr = frame.paddr();
164                (paddr, level, prop)
165            }
166            MappedItem::Untracked(ref pa, ref level, mut prop) => {
167                debug_assert!(!prop.priv_flags.contains(PrivilegedPageFlags::AVAIL1));
168                prop.priv_flags -= PrivilegedPageFlags::AVAIL1;
169                (*pa, *level, prop)
170            }
171        }
172    }
173
174    unsafe fn item_from_raw(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self::Item {
175        if prop.priv_flags.contains(PrivilegedPageFlags::AVAIL1) {
176            debug_assert_eq!(level, 1);
177            // SAFETY: The caller ensures safety.
178            let frame = unsafe { Frame::<dyn AnyFrameMeta>::from_raw(paddr) };
179            MappedItem::Tracked(frame, prop)
180        } else {
181            MappedItem::Untracked(paddr, level, prop)
182        }
183    }
184
185    unsafe fn item_ref_from_raw<'a>(
186        paddr: Paddr,
187        level: PagingLevel,
188        prop: PageProperty,
189    ) -> Self::ItemRef<'a> {
190        if prop.priv_flags.contains(PrivilegedPageFlags::AVAIL1) {
191            debug_assert_eq!(level, 1);
192            // SAFETY: The caller ensures that the frame outlives `'a` and that
193            // the type matches the frame.
194            let frame = unsafe { FrameRef::<dyn AnyFrameMeta>::borrow_paddr(paddr) };
195            MappedItemRef::Tracked(frame, prop)
196        } else {
197            MappedItemRef::Untracked(paddr, level, prop)
198        }
199    }
200}
201
202#[derive(Clone, Debug, PartialEq, Eq)]
203pub(super) enum MappedItem {
204    Tracked(Frame<dyn AnyFrameMeta>, PageProperty),
205    Untracked(Paddr, PagingLevel, PageProperty),
206}
207
208#[derive(Debug)]
209pub(crate) enum MappedItemRef<'a> {
210    #[cfg_attr(not(ktest), expect(dead_code))]
211    Tracked(FrameRef<'a, dyn AnyFrameMeta>, PageProperty),
212    #[cfg_attr(not(ktest), expect(dead_code))]
213    Untracked(Paddr, PagingLevel, PageProperty),
214}
215
216/// Initializes the kernel page table.
217///
218/// This function should be called after:
219///  - the page allocator and the heap allocator are initialized;
220///  - the memory regions are initialized.
221///
222/// This function should be called before:
223///  - any initializer that modifies the kernel page table.
224pub fn init_kernel_page_table(meta_pages: Segment<MetaPageMeta>) {
225    info!("Initializing the kernel page table");
226
227    // Start to initialize the kernel page table.
228    let kpt = PageTable::<KernelPtConfig>::new_kernel_page_table();
229    let preempt_guard = disable_preempt();
230
231    // In LoongArch64, we don't need to do linear mappings for the kernel because of DMW0.
232    #[cfg(not(target_arch = "loongarch64"))]
233    // Do linear mappings for the kernel.
234    {
235        let max_paddr = crate::mm::frame::max_paddr();
236        let from = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + max_paddr;
237        let prop = PageProperty {
238            flags: PageFlags::RW,
239            cache: CachePolicy::Writeback,
240            priv_flags: PrivilegedPageFlags::GLOBAL,
241        };
242        let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
243        for (pa, level) in largest_pages::<KernelPtConfig>(from.start, 0, max_paddr) {
244            // SAFETY: we are doing the linear mapping for the kernel.
245            unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) };
246        }
247    }
248
249    // Map the metadata pages.
250    {
251        let start_va = mapping::frame_to_meta::<PagingConsts>(0);
252        let from = start_va..start_va + meta_pages.size();
253        let prop = PageProperty {
254            flags: PageFlags::RW,
255            cache: CachePolicy::Writeback,
256            priv_flags: PrivilegedPageFlags::GLOBAL,
257        };
258        let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
259        // We use untracked mapping so that we can benefit from huge pages.
260        // We won't unmap them anyway, so there's no leaking problem yet.
261        // TODO: support tracked huge page mapping.
262        let pa_range = meta_pages.into_raw();
263        for (pa, level) in
264            largest_pages::<KernelPtConfig>(from.start, pa_range.start, pa_range.len())
265        {
266            // SAFETY: We are doing the metadata mappings for the kernel.
267            unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) };
268        }
269    }
270
271    // In LoongArch64, we don't need to do linear mappings for the kernel code because of DMW0.
272    #[cfg(not(target_arch = "loongarch64"))]
273    // Map for the kernel code itself.
274    // TODO: set separated permissions for each segments in the kernel.
275    {
276        let regions = &crate::boot::EARLY_INFO.get().unwrap().memory_regions;
277        let region = regions
278            .iter()
279            .find(|r| r.typ() == MemoryRegionType::Kernel)
280            .unwrap();
281        let offset = kernel_loaded_offset();
282        let from = region.base() + offset..region.end() + offset;
283        let prop = PageProperty {
284            flags: PageFlags::RWX,
285            cache: CachePolicy::Writeback,
286            priv_flags: PrivilegedPageFlags::GLOBAL,
287        };
288        let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
289        for (pa, level) in largest_pages::<KernelPtConfig>(from.start, region.base(), from.len()) {
290            // SAFETY: we are doing the kernel code mapping.
291            unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) };
292        }
293    }
294
295    KERNEL_PAGE_TABLE.call_once(|| kpt);
296}
297
298/// Activates the kernel page table.
299///
300/// All address translation of symbols in the boot sections must be manually
301/// done from now on.
302///
303/// # Safety
304///
305/// This function must only be called once per CPU.
306pub unsafe fn activate_kernel_page_table() {
307    let kpt = KERNEL_PAGE_TABLE
308        .get()
309        .expect("The kernel page table is not initialized yet");
310    // SAFETY: the kernel page table is initialized properly.
311    unsafe {
312        kpt.first_activate_unchecked();
313        crate::arch::mm::tlb_flush_all_including_global();
314    }
315}