ostd/mm/kspace/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! Kernel memory space management.
4//!
5//! The kernel memory space is currently managed as follows, if the
6//! address width is 48 bits (with 47 bits kernel space).
7//!
8//! TODO: the cap of linear mapping (the start of vm alloc) are raised
9//! to workaround for high IO in TDX. We need actual vm alloc API to have
10//! a proper fix.
11//!
12//! ```text
13//! +-+ <- the highest used address (0xffff_ffff_ffff_0000)
14//! | |         For the kernel code, 1 GiB.
15//! +-+ <- 0xffff_ffff_8000_0000
16//! | |
17//! | |         Unused hole.
18//! +-+ <- 0xffff_e100_0000_0000
19//! | |         For frame metadata, 1 TiB.
20//! +-+ <- 0xffff_e000_0000_0000
21//! | |         For [`KVirtArea`], 32 TiB.
22//! +-+ <- the middle of the higher half (0xffff_c000_0000_0000)
23//! | |
24//! | |
25//! | |
26//! | |         For linear mappings, 64 TiB.
27//! | |         Mapped physical addresses are untracked.
28//! | |
29//! | |
30//! | |
31//! +-+ <- the base of high canonical address (0xffff_8000_0000_0000)
32//! ```
33//!
34//! If the address width is (according to [`crate::arch::mm::PagingConsts`])
35//! 39 bits or 57 bits, the memory space just adjust proportionally.
36
37#![cfg_attr(target_arch = "loongarch64", expect(unused_imports))]
38
39pub(crate) mod kvirt_area;
40
41use core::ops::Range;
42
43use log::info;
44use spin::Once;
45#[cfg(ktest)]
46mod test;
47
48use super::{
49    Frame, HasSize, Paddr, PagingConstsTrait, Vaddr,
50    frame::{
51        Segment,
52        meta::{AnyFrameMeta, MetaPageMeta, mapping},
53    },
54    page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
55    page_table::{PageTable, PageTableConfig},
56};
57use crate::{
58    arch::mm::{PageTableEntry, PagingConsts},
59    boot::memory_region::MemoryRegionType,
60    const_assert,
61    mm::{PAGE_SIZE, PagingLevel, page_table::largest_pages},
62    task::disable_preempt,
63};
64
65// The shortest supported address width is 39 bits. So the literal
66// values are written for 39 bits address width and we adjust the values
67// by arithmetic left shift.
68const_assert!(PagingConsts::ADDRESS_WIDTH >= 39);
69const ADDR_WIDTH_SHIFT: usize = PagingConsts::ADDRESS_WIDTH - 39;
70
71/// Start of the kernel address space.
72#[cfg(not(target_arch = "loongarch64"))]
73pub const KERNEL_BASE_VADDR: Vaddr = 0xffff_ffc0_0000_0000 << ADDR_WIDTH_SHIFT;
74#[cfg(target_arch = "loongarch64")]
75pub const KERNEL_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000;
76/// End of the kernel address space (non inclusive).
77pub const KERNEL_END_VADDR: Vaddr = 0xffff_ffff_ffff_0000;
78
79/// The maximum virtual address of user space (non inclusive).
80///
81/// A typical way to reserve half of the address space for the kernel is
82/// to use the highest `ADDRESS_WIDTH`-bit virtual address space.
83///
84/// Also, the top page is not regarded as usable since it's a workaround
85/// for some x86_64 CPUs' bugs. See
86/// <https://github.com/torvalds/linux/blob/480e035fc4c714fb5536e64ab9db04fedc89e910/arch/x86/include/asm/page_64.h#L68-L78>
87/// for the rationale.
88pub const MAX_USERSPACE_VADDR: Vaddr = (0x0000_0040_0000_0000 << ADDR_WIDTH_SHIFT) - PAGE_SIZE;
89
90/// The kernel address space.
91///
92/// They are the high canonical addresses (i.e., the negative part of the
93/// address space, with the most significant bits in the addresses set).
94pub const KERNEL_VADDR_RANGE: Range<Vaddr> = KERNEL_BASE_VADDR..KERNEL_END_VADDR;
95
96/// The kernel code is linear mapped to this address.
97///
98/// FIXME: This offset should be randomly chosen by the loader or the
99/// boot compatibility layer. But we disabled it because OSTD
100/// doesn't support relocatable kernel yet.
101pub fn kernel_loaded_offset() -> usize {
102    KERNEL_CODE_BASE_VADDR
103}
104
105#[cfg(target_arch = "x86_64")]
106const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_8000_0000;
107#[cfg(target_arch = "riscv64")]
108const KERNEL_CODE_BASE_VADDR: usize = 0xffff_ffff_0000_0000;
109#[cfg(target_arch = "loongarch64")]
110const KERNEL_CODE_BASE_VADDR: usize = 0x9000_0000_0000_0000;
111
112const FRAME_METADATA_CAP_VADDR: Vaddr = 0xffff_fff0_8000_0000 << ADDR_WIDTH_SHIFT;
113const FRAME_METADATA_BASE_VADDR: Vaddr = 0xffff_fff0_0000_0000 << ADDR_WIDTH_SHIFT;
114pub(in crate::mm) const FRAME_METADATA_RANGE: Range<Vaddr> =
115    FRAME_METADATA_BASE_VADDR..FRAME_METADATA_CAP_VADDR;
116
117const VMALLOC_BASE_VADDR: Vaddr = 0xffff_ffe0_0000_0000 << ADDR_WIDTH_SHIFT;
118pub const VMALLOC_VADDR_RANGE: Range<Vaddr> = VMALLOC_BASE_VADDR..FRAME_METADATA_BASE_VADDR;
119
120/// The base address of the linear mapping of all physical
121/// memory in the kernel address space.
122#[cfg(not(target_arch = "loongarch64"))]
123pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0xffff_ffc0_0000_0000 << ADDR_WIDTH_SHIFT;
124#[cfg(target_arch = "loongarch64")]
125pub const LINEAR_MAPPING_BASE_VADDR: Vaddr = 0x9000_0000_0000_0000;
126pub const LINEAR_MAPPING_VADDR_RANGE: Range<Vaddr> = LINEAR_MAPPING_BASE_VADDR..VMALLOC_BASE_VADDR;
127
128/// Convert physical address to virtual address using offset, only available inside `ostd`
129pub fn paddr_to_vaddr(pa: Paddr) -> usize {
130    debug_assert!(pa < VMALLOC_BASE_VADDR - LINEAR_MAPPING_BASE_VADDR);
131    pa + LINEAR_MAPPING_BASE_VADDR
132}
133
134/// The kernel page table instance.
135///
136/// It manages the kernel mapping of all address spaces by sharing the kernel part. And it
137/// is unlikely to be activated.
138pub static KERNEL_PAGE_TABLE: Once<PageTable<KernelPtConfig>> = Once::new();
139
140#[derive(Clone, Debug)]
141pub(crate) struct KernelPtConfig {}
142
143// We use the first available PTE bit to mark the frame as tracked.
144// SAFETY: `item_into_raw` and `item_from_raw` are implemented correctly,
145unsafe impl PageTableConfig for KernelPtConfig {
146    const TOP_LEVEL_INDEX_RANGE: Range<usize> = 256..512;
147    const TOP_LEVEL_CAN_UNMAP: bool = false;
148
149    type E = PageTableEntry;
150    type C = PagingConsts;
151
152    type Item = MappedItem;
153
154    fn item_into_raw(item: Self::Item) -> (Paddr, PagingLevel, PageProperty) {
155        match item {
156            MappedItem::Tracked(frame, mut prop) => {
157                debug_assert!(!prop.priv_flags.contains(PrivilegedPageFlags::AVAIL1));
158                prop.priv_flags |= PrivilegedPageFlags::AVAIL1;
159                let level = frame.map_level();
160                let paddr = frame.into_raw();
161                (paddr, level, prop)
162            }
163            MappedItem::Untracked(pa, level, mut prop) => {
164                debug_assert!(!prop.priv_flags.contains(PrivilegedPageFlags::AVAIL1));
165                prop.priv_flags -= PrivilegedPageFlags::AVAIL1;
166                (pa, level, prop)
167            }
168        }
169    }
170
171    unsafe fn item_from_raw(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self::Item {
172        if prop.priv_flags.contains(PrivilegedPageFlags::AVAIL1) {
173            debug_assert_eq!(level, 1);
174            // SAFETY: The caller ensures safety.
175            let frame = unsafe { Frame::<dyn AnyFrameMeta>::from_raw(paddr) };
176            MappedItem::Tracked(frame, prop)
177        } else {
178            MappedItem::Untracked(paddr, level, prop)
179        }
180    }
181}
182
183#[derive(Clone, Debug, PartialEq, Eq)]
184pub(crate) enum MappedItem {
185    Tracked(Frame<dyn AnyFrameMeta>, PageProperty),
186    Untracked(Paddr, PagingLevel, PageProperty),
187}
188
189/// Initializes the kernel page table.
190///
191/// This function should be called after:
192///  - the page allocator and the heap allocator are initialized;
193///  - the memory regions are initialized.
194///
195/// This function should be called before:
196///  - any initializer that modifies the kernel page table.
197pub fn init_kernel_page_table(meta_pages: Segment<MetaPageMeta>) {
198    info!("Initializing the kernel page table");
199
200    // Start to initialize the kernel page table.
201    let kpt = PageTable::<KernelPtConfig>::new_kernel_page_table();
202    let preempt_guard = disable_preempt();
203
204    // In LoongArch64, we don't need to do linear mappings for the kernel because of DMW0.
205    #[cfg(not(target_arch = "loongarch64"))]
206    // Do linear mappings for the kernel.
207    {
208        let max_paddr = crate::mm::frame::max_paddr();
209        let from = LINEAR_MAPPING_BASE_VADDR..LINEAR_MAPPING_BASE_VADDR + max_paddr;
210        let prop = PageProperty {
211            flags: PageFlags::RW,
212            cache: CachePolicy::Writeback,
213            priv_flags: PrivilegedPageFlags::GLOBAL,
214        };
215        let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
216        for (pa, level) in largest_pages::<KernelPtConfig>(from.start, 0, max_paddr) {
217            // SAFETY: we are doing the linear mapping for the kernel.
218            unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
219                .expect("Kernel linear address space is mapped twice");
220        }
221    }
222
223    // Map the metadata pages.
224    {
225        let start_va = mapping::frame_to_meta::<PagingConsts>(0);
226        let from = start_va..start_va + meta_pages.size();
227        let prop = PageProperty {
228            flags: PageFlags::RW,
229            cache: CachePolicy::Writeback,
230            priv_flags: PrivilegedPageFlags::GLOBAL,
231        };
232        let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
233        // We use untracked mapping so that we can benefit from huge pages.
234        // We won't unmap them anyway, so there's no leaking problem yet.
235        // TODO: support tracked huge page mapping.
236        let pa_range = meta_pages.into_raw();
237        for (pa, level) in
238            largest_pages::<KernelPtConfig>(from.start, pa_range.start, pa_range.len())
239        {
240            // SAFETY: We are doing the metadata mappings for the kernel.
241            unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
242                .expect("Frame metadata address space is mapped twice");
243        }
244    }
245
246    // In LoongArch64, we don't need to do linear mappings for the kernel code because of DMW0.
247    #[cfg(not(target_arch = "loongarch64"))]
248    // Map for the kernel code itself.
249    // TODO: set separated permissions for each segments in the kernel.
250    {
251        let regions = &crate::boot::EARLY_INFO.get().unwrap().memory_regions;
252        let region = regions
253            .iter()
254            .find(|r| r.typ() == MemoryRegionType::Kernel)
255            .unwrap();
256        let offset = kernel_loaded_offset();
257        let from = region.base() + offset..region.end() + offset;
258        let prop = PageProperty {
259            flags: PageFlags::RWX,
260            cache: CachePolicy::Writeback,
261            priv_flags: PrivilegedPageFlags::GLOBAL,
262        };
263        let mut cursor = kpt.cursor_mut(&preempt_guard, &from).unwrap();
264        for (pa, level) in largest_pages::<KernelPtConfig>(from.start, region.base(), from.len()) {
265            // SAFETY: we are doing the kernel code mapping.
266            unsafe { cursor.map(MappedItem::Untracked(pa, level, prop)) }
267                .expect("Kernel code mapped twice");
268        }
269    }
270
271    KERNEL_PAGE_TABLE.call_once(|| kpt);
272}
273
274/// Activates the kernel page table.
275///
276/// All address translation of symbols in the boot sections must be manually
277/// done from now on.
278///
279/// # Safety
280///
281/// This function must only be called once per CPU.
282pub unsafe fn activate_kernel_page_table() {
283    let kpt = KERNEL_PAGE_TABLE
284        .get()
285        .expect("The kernel page table is not initialized yet");
286    // SAFETY: the kernel page table is initialized properly.
287    unsafe {
288        kpt.first_activate_unchecked();
289        crate::arch::mm::tlb_flush_all_including_global();
290    }
291}