Skip to main content

ostd/mm/page_table/node/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2//! This module defines page table node abstractions and the handle.
3//!
4//! The page table node is also frequently referred to as a page table in many architectural
5//! documentations. It is essentially a page that contains page table entries (PTEs) that map
6//! to child page tables nodes or mapped pages.
7//!
8//! This module leverages the page metadata to manage the page table pages, which makes it
9//! easier to provide the following guarantees:
10//!
11//! The page table node is not freed when it is still in use by:
12//!    - a parent page table node,
13//!    - or a handle to a page table node,
14//!    - or a processor.
15//!
16//! This is implemented by using a reference counter in the page metadata. If the above
17//! conditions are not met, the page table node is ensured to be freed upon dropping the last
18//! reference.
19//!
20//! One can acquire exclusive access to a page table node using merely the physical address of
21//! the page table node. This is implemented by a lock in the page metadata. Here the
22//! exclusiveness is only ensured for kernel code, and the processor's MMU is able to access the
23//! page table node while a lock is held. So the modification to the PTEs should be done after
24//! the initialization of the entity that the PTE points to. This is taken care in this module.
25//!
26mod child;
27mod entry;
28
29#[path = "../../../../specs/mm/page_table/node/child.rs"]
30mod child_specs;
31#[path = "../../../../specs/mm/page_table/node/entry.rs"]
32mod entry_specs;
33
34pub use crate::specs::mm::page_table::node::{entry_owners::*, owners::*};
35pub use child::*;
36pub use entry::*;
37
38use vstd::cell::pcell_maybe_uninit;
39use vstd::prelude::*;
40
41use vstd::atomic::PAtomicU8;
42use vstd::simple_pptr::{self, PPtr};
43
44use vstd_extra::array_ptr;
45use vstd_extra::cast_ptr::*;
46use vstd_extra::ghost_tree::*;
47use vstd_extra::ownership::*;
48
49use crate::mm::frame::allocator::FrameAllocOptions;
50use crate::mm::frame::meta::MetaSlot;
51use crate::mm::frame::{frame_to_index, AnyFrameMeta, Frame};
52use crate::mm::kspace::VMALLOC_BASE_VADDR;
53use crate::mm::page_table::*;
54use crate::mm::{kspace::LINEAR_MAPPING_BASE_VADDR, paddr_to_vaddr, Paddr, Vaddr};
55use crate::specs::arch::kspace::FRAME_METADATA_RANGE;
56use crate::specs::mm::frame::mapping::{meta_to_frame, META_SLOT_SIZE};
57use crate::specs::mm::frame::meta_owners::{
58    MetaSlotOwner, MetaSlotStorage, Metadata, REF_COUNT_UNUSED,
59};
60use crate::specs::mm::frame::meta_region_owners::MetaRegionOwners;
61use crate::specs::mm::page_table::node::owners::*;
62
63use core::{marker::PhantomData, ops::Deref, sync::atomic::Ordering};
64
65use super::nr_subpage_per_huge;
66
67use crate::{
68    mm::{
69        page_table::{load_pte, store_pte},
70        //        FrameAllocOptions, Infallible,
71        //        VmReader,
72    },
73    specs::task::InAtomicMode,
74};
75
76verus! {
77
78/// The metadata of any kinds of page table pages.
79/// Make sure the the generic parameters don't effect the memory layout.
80pub struct PageTablePageMeta<C: PageTableConfig> {
81    /// The number of valid PTEs. It is mutable if the lock is held.
82    pub nr_children: pcell_maybe_uninit::PCell<u16>,
83    /// If the page table is detached from its parent.
84    ///
85    /// A page table can be detached from its parent while still being accessed,
86    /// since we use a RCU scheme to recycle page tables. If this flag is set,
87    /// it means that the parent is recycling the page table.
88    pub stray: pcell_maybe_uninit::PCell<bool>,
89    /// The level of the page table page. A page table page cannot be
90    /// referenced by page tables of different levels.
91    pub level: PagingLevel,
92    /// The lock for the page table page.
93    pub lock: PAtomicU8,
94    pub _phantom: core::marker::PhantomData<C>,
95}
96
97/// A smart pointer to a page table node.
98///
99/// This smart pointer is an owner of a page table node. Thus creating and
100/// dropping it will affect the reference count of the page table node. If
101/// dropped it as the last reference, the page table node and subsequent
102/// children will be freed.
103///
104/// [`PageTableNode`] is read-only. To modify the page table node, lock and use
105/// [`PageTableGuard`].
106pub type PageTableNode<C> = Frame<PageTablePageMeta<C>>;
107
108impl<C: PageTableConfig> AnyFrameMeta for PageTablePageMeta<C> {
109    fn on_drop(&mut self, _reader: &mut crate::mm::VmReader<'_, crate::mm::Infallible>) {
110    }
111
112    fn is_untyped(&self) -> bool {
113        false
114    }
115
116    uninterp spec fn vtable_ptr(&self) -> usize;
117}
118
119#[verus_verify]
120impl<C: PageTableConfig> PageTableNode<C> {
121    /// Gets the level of a page table node.
122    /// # Verified Properties
123    /// ## Preconditions
124    /// - The node must be well-formed, and the caller must provide a permission token for its metadata.
125    /// ## Postconditions
126    /// - Returns the level of the node.
127    /// ## Safety
128    /// - We require the caller to provide a permission token to ensure that this function is only called on a valid page table node.
129    #[verus_spec(
130        with Tracked(perm) : Tracked<&PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>
131    )]
132    pub(super) fn level(&self) -> PagingLevel
133        requires
134            self.ptr.addr() == perm.addr(),
135            self.ptr.addr() == perm.points_to.addr(),
136            perm.is_init(),
137            perm.wf(&perm.inner_perms),
138        returns
139            perm.value().metadata.level,
140    {
141        #[verus_spec(with Tracked(perm))]
142        let meta = self.meta();
143        meta.level
144    }
145
146    /// Allocates a new empty page table node.
147    #[verus_spec(res =>
148        with Tracked(parent_owner): Tracked<&mut NodeOwner<C>>,
149            Tracked(regions): Tracked<&mut MetaRegionOwners>,
150            Tracked(guards): Tracked<&Guards<'rcu, C>>,
151            Ghost(idx): Ghost<usize>,
152        -> owner: Tracked<OwnerSubtree<C>>
153        requires
154            1 <= level < NR_LEVELS,
155            idx < NR_ENTRIES,
156            old(regions).inv(),
157            old(parent_owner).inv(),
158        ensures
159            final(regions).inv(),
160            final(parent_owner).inv(),
161            allocated_empty_node_owner(owner@, level),
162            res.ptr.addr() == owner@.value.node.unwrap().meta_perm.addr(),
163            guards.unlocked(owner@.value.node.unwrap().meta_perm.addr()),
164            MetaSlot::get_from_unused_spec(meta_to_frame(owner@.value.node.unwrap().meta_perm.addr()), false, *old(regions), *final(regions)),
165            old(regions).slots.contains_key(frame_to_index(meta_to_frame(owner@.value.node.unwrap().meta_perm.addr()))),
166            owner@.value.metaregion_sound(*final(regions)),
167            owner@.value.in_scope,
168            owner@.value.match_pte(C::E::new_pt_spec(meta_to_frame(owner@.value.node.unwrap().meta_perm.addr())), level as PagingLevel),
169            *final(parent_owner) == old(parent_owner).set_children_perm(idx, C::E::new_pt_spec(meta_to_frame(owner@.value.node.unwrap().meta_perm.addr()))),
170    )]
171    #[verifier::external_body]
172    pub fn alloc<'rcu>(level: PagingLevel) -> Self {
173        let tracked entry_owner = EntryOwner::new_absent(TreePath::new(Seq::empty()), level);
174
175        let tracked mut owner = OwnerSubtree::<C>::new_val_tracked(entry_owner, level as nat);
176        let meta = PageTablePageMeta::new(level);
177        let mut frame = FrameAllocOptions::new();
178        frame.zeroed(true);
179        let allocated_frame = frame.alloc_frame_with(meta).expect(
180            "Failed to allocate a page table node",
181        );
182        // The allocated frame is zeroed. Make sure zero is absent PTE.
183        //debug_assert_eq!(C::E::new_absent().as_usize(), 0);
184
185        proof_with!(|= Tracked(owner));
186
187        allocated_frame
188    }/*
189    /// Activates the page table assuming it is a root page table.
190    ///
191    /// Here we ensure not dropping an active page table by making a
192    /// processor a page table owner. When activating a page table, the
193    /// reference count of the last activated page table is decremented.
194    /// And that of the current page table is incremented.
195    ///
196    /// # Safety
197    ///
198    /// The caller must ensure that the page table to be activated has
199    /// proper mappings for the kernel and has the correct const parameters
200    /// matching the current CPU.
201    ///
202    /// # Panics
203    ///
204    /// Only top-level page tables can be activated using this function.
205    pub(crate) unsafe fn activate(&self) {
206        use crate::{
207            arch::mm::{activate_page_table, current_page_table_paddr},
208            mm::page_prop::CachePolicy,
209        };
210
211        #[cfg(feature = "allow_panic")]
212        assert_eq!(self.level(), C::NR_LEVELS());
213
214        let last_activated_paddr = current_page_table_paddr();
215        if last_activated_paddr == self.start_paddr() {
216            return;
217        }
218
219        // SAFETY: The safety is upheld by the caller.
220        unsafe { activate_page_table(self.clone().into_raw(), CachePolicy::Writeback) };
221
222        // Restore and drop the last activated page table.
223        // SAFETY: The physical address is valid and points to a forgotten page table node.
224        drop(unsafe { Self::from_raw(last_activated_paddr) });
225    }
226
227    /// Activates the (root) page table assuming it is the first activation.
228    ///
229    /// It will not try dropping the last activate page table. It is the same
230    /// with [`Self::activate()`] in other senses.
231    pub(super) unsafe fn first_activate(&self) {
232        use crate::{arch::mm::activate_page_table, mm::page_prop::CachePolicy};
233
234        // SAFETY: The safety is upheld by the caller.
235        unsafe { activate_page_table(self.clone().into_raw(), CachePolicy::Writeback) };
236    }*/
237
238}
239
240#[verus_verify]
241impl<'a, C: PageTableConfig> PageTableNodeRef<'a, C> {
242    pub open spec fn locks_preserved_except<'rcu>(
243        addr: usize,
244        guards0: Guards<'rcu, C>,
245        guards1: Guards<'rcu, C>,
246    ) -> bool {
247        &&& OwnerSubtree::implies(
248            CursorOwner::node_unlocked(guards0),
249            CursorOwner::node_unlocked_except(guards1, addr),
250        )
251        &&& forall|i: usize| guards0.lock_held(i) ==> guards1.lock_held(i)
252        &&& forall|i: usize| guards0.unlocked(i) && i != addr ==> guards1.unlocked(i)
253    }
254
255    /// Locks the page table node.
256    ///
257    /// An atomic mode guard is required to
258    ///  1. prevent deadlocks;
259    ///  2. provide a lifetime (`'rcu`) that the nodes are guaranteed to outlive.
260    /// # Verification Design
261    /// As of when we verified this library, we didn't have a spin lock implementation, so we axiomatize
262    /// what happens when it's successful.
263    #[verifier::external_body]
264    #[verus_spec(res =>
265        with Tracked(owner): Tracked<&NodeOwner<C>>,
266            Tracked(guards): Tracked<&mut Guards<'rcu, C>>
267        -> guard_perm: Tracked<GuardPerm<'rcu, C>>
268        requires
269            self.inner@.invariants(*owner),
270            old(guards).unlocked(owner.meta_perm.addr()),
271        ensures
272            final(guards).lock_held(owner.meta_perm.addr()),
273            Self::locks_preserved_except(owner.meta_perm.addr(), *old(guards), *final(guards)),
274            res.addr() == guard_perm@.addr(),
275            owner.relate_guard_perm(guard_perm@),
276    )]
277    pub fn lock<'rcu, A: InAtomicMode>(self, _guard: &'rcu A) -> PPtr<
278        PageTableGuard<'rcu, C>,
279    > where 'a: 'rcu {
280        unimplemented!()
281    }
282
283    /// Creates a new [`PageTableGuard`] without checking if the page table lock is held.
284    ///
285    /// # Safety
286    ///
287    /// This function must be called if this task logically holds the lock.
288    ///
289    /// Calling this function when a guard is already created is undefined behavior
290    /// unless that guard was already forgotten.
291    #[verus_spec(res =>
292        with Tracked(owner): Tracked<&NodeOwner<C>>,
293            Tracked(guards): Tracked<&mut Guards<'rcu, C>>
294            -> guard_perm: Tracked<GuardPerm<'rcu, C>>
295        requires
296            self.inner@.invariants(*owner),
297            old(guards).unlocked(owner.meta_perm.addr()),
298        ensures
299            final(guards).lock_held(owner.meta_perm.addr()),
300            Self::locks_preserved_except(owner.meta_perm.addr(), *old(guards), *final(guards)),
301            res.addr() == guard_perm@.addr(),
302            owner.relate_guard_perm(guard_perm@),
303    )]
304    pub fn make_guard_unchecked<'rcu, A: InAtomicMode>(self, _guard: &'rcu A) -> PPtr<
305        PageTableGuard<'rcu, C>,
306    > where 'a: 'rcu {
307        let guard = PageTableGuard { inner: self };
308        let (ptr, guard_perm) = PPtr::<PageTableGuard<C>>::new(guard);
309
310        proof {
311            let ghost guards0 = *guards;
312            guards.guards.tracked_insert(owner.meta_perm.addr(), None);
313            assert(owner.relate_guard_perm(guard_perm@));
314
315            assert(forall|other: EntryOwner<C>, path: TreePath<NR_ENTRIES>|
316                owner.inv() && CursorOwner::node_unlocked(guards0)(other, path)
317                    ==> #[trigger] CursorOwner::node_unlocked_except(
318                    *guards,
319                    owner.meta_perm.addr(),
320                )(other, path));
321        }
322
323        proof_with!{|= guard_perm}
324        ptr
325    }
326}
327
328//}
329impl<'rcu, C: PageTableConfig> PageTableGuard<'rcu, C> {
330    /// Borrows an entry in the node at a given index.
331    ///
332    /// # Panics
333    ///
334    /// Panics if the index is not within the bound of
335    /// [`nr_subpage_per_huge<C>`].
336    #[verus_spec(res =>
337        with Tracked(owner): Tracked<&NodeOwner<C>>,
338            Tracked(child_owner): Tracked<&EntryOwner<C>>,
339            Tracked(guard_perm): Tracked<&GuardPerm<'rcu, C>>,
340    )]
341    pub fn entry(guard: PPtr<Self>, idx: usize) -> (res: Entry<'rcu, C>)
342        requires
343            owner.inv(),
344            !child_owner.in_scope,
345            child_owner.inv(),
346            owner.relate_guard_perm(*guard_perm),
347            guard_perm.addr() == guard.addr(),
348            child_owner.match_pte(
349                owner.children_perm.value()[idx as int],
350                child_owner.parent_level,
351            ),
352            // Panic condition
353            idx < NR_ENTRIES,
354        ensures
355            res.wf(*child_owner),
356            res.node.addr() == guard_perm.addr(),
357            res.idx == idx,
358    {
359        #[cfg(feature = "allow_panic")]
360        assert!(idx < nr_subpage_per_huge::<C>());
361        // SAFETY: The index is within the bound.
362        #[verus_spec(with Tracked(child_owner), Tracked(owner), Tracked(guard_perm))]
363        Entry::new_at(guard, idx);
364    }
365
366    /// Gets the number of valid PTEs in a page table node.
367    /// # Verified Properties
368    /// ## Preconditions
369    /// - The node must be well-formed.
370    /// ## Postconditions
371    /// - Returns the number of valid PTEs in the node.
372    /// ## Safety
373    /// - We require the caller to provide a permission token to ensure that this function is only called on a valid page table node.
374    #[verus_spec(
375        with Tracked(owner) : Tracked<&NodeOwner<C>>
376    )]
377    pub fn nr_children(&self) -> (nr: u16)
378        requires
379    // Node invariants: owner well-formedness and node-owner consistency
380
381            self.inner.inner@.invariants(*owner),
382        returns
383            owner.meta_own.nr_children.value(),
384    {
385        // SAFETY: The lock is held so we have an exclusive access.
386        #[verus_spec(with Tracked(&owner.meta_perm))]
387        let meta = self.meta();
388
389        *meta.nr_children.borrow(Tracked(&owner.meta_own.nr_children))
390    }
391
392    /// Returns if the page table node is detached from its parent.
393    #[verus_spec(
394        with Tracked(meta_perm): Tracked<&'a PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>
395    )]
396    pub(super) fn stray_mut<'a>(&'a mut self) -> (res: &'a pcell_maybe_uninit::PCell<bool>)
397        requires
398            old(self).inner.inner@.ptr.addr() == meta_perm.addr(),
399            old(self).inner.inner@.ptr.addr() == meta_perm.points_to.addr(),
400            meta_perm.is_init(),
401            meta_perm.wf(&meta_perm.inner_perms),
402        ensures
403            res.id() == meta_perm.value().metadata.stray.id(),
404            *final(self) == *old(self),
405    {
406        // SAFETY: The lock is held so we have an exclusive access.
407        #[verus_spec(with Tracked(meta_perm))]
408        let meta = self.meta();
409        &meta.stray
410    }
411
412    /// Reads a non-owning PTE at the given index.
413    ///
414    /// A non-owning PTE means that it does not account for a reference count
415    /// of the a page if the PTE points to a page. The original PTE still owns
416    /// the child page.
417    ///
418    /// # Safety
419    ///
420    /// The caller must ensure that the index is within the bound.
421    #[verus_spec(
422        with Tracked(owner): Tracked<&NodeOwner<C>>
423    )]
424    pub fn read_pte(&self, idx: usize) -> (pte: C::E)
425        requires
426            self.inner.inner@.invariants(*owner),
427            idx < NR_ENTRIES,
428        ensures
429            pte == owner.children_perm.value()[idx as int],
430    {
431        // debug_assert!(idx < nr_subpage_per_huge::<C>());
432        let ptr = vstd_extra::array_ptr::ArrayPtr::<C::E, NR_ENTRIES>::from_addr(
433            paddr_to_vaddr(
434                #[verus_spec(with Tracked(&owner.meta_perm.points_to))]
435                self.start_paddr(),
436            ),
437        );
438
439        // SAFETY:
440        // - The page table node is alive. The index is inside the bound, so the page table entry is valid.
441        // - All page table entries are aligned and accessed with atomic operations only.
442        #[verus_spec(with Tracked(&owner.children_perm))]
443        load_pte(ptr.add(idx), Ordering::Relaxed)
444    }
445
446    /// Writes a page table entry at a given index.
447    ///
448    /// This operation will leak the old child if the old PTE is present.
449    ///
450    /// # Safety
451    ///
452    /// The caller must ensure that:
453    ///  1. The index must be within the bound;
454    ///  2. The PTE must represent a valid [`Child`] whose level is compatible
455    ///     with the page table node.
456    ///  3. The page table node will have the ownership of the [`Child`]
457    ///     after this method.
458    #[verus_spec(
459        with Tracked(owner): Tracked<&mut NodeOwner<C>>
460    )]
461    pub fn write_pte(&mut self, idx: usize, pte: C::E)
462        requires
463            old(self).inner.inner@.invariants(*old(owner)),
464            idx < NR_ENTRIES,
465        ensures
466            final(owner).inv(),
467            final(owner).meta_perm.addr() == old(owner).meta_perm.addr(),
468            final(owner).level == old(owner).level,
469            final(owner).meta_own == old(owner).meta_own,
470            final(owner).meta_perm.points_to == old(owner).meta_perm.points_to,
471            final(owner).meta_perm.inner_perms == old(owner).meta_perm.inner_perms,
472            final(owner).children_perm.value() == old(owner).children_perm.value().update(idx as int, pte),
473            *final(self) == *old(self),
474    {
475        // debug_assert!(idx < nr_subpage_per_huge::<C>());
476        #[verusfmt::skip]
477        let ptr = vstd_extra::array_ptr::ArrayPtr::<C::E, NR_ENTRIES>::from_addr(
478            paddr_to_vaddr(
479                #[verus_spec(with Tracked(&owner.meta_perm.points_to))]
480                self.start_paddr()
481            ),
482        );
483
484        // SAFETY:
485        // - The page table node is alive. The index is inside the bound, so the page table entry is valid.
486        // - All page table entries are aligned and accessed with atomic operations only.
487        #[verus_spec(with Tracked(&mut owner.children_perm))]
488        store_pte(ptr.add(idx), pte, Ordering::Release)
489    }
490
491    /// Gets the mutable reference to the number of valid PTEs in the node.
492    #[verus_spec(
493        with Tracked(meta_perm): Tracked<&'a PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>
494    )]
495    fn nr_children_mut<'a>(&'a mut self) -> (res: &'a pcell_maybe_uninit::PCell<u16>)
496        requires
497            old(self).inner.inner@.ptr.addr() == meta_perm.addr(),
498            old(self).inner.inner@.ptr.addr() == meta_perm.points_to.addr(),
499            meta_perm.is_init(),
500            meta_perm.wf(&meta_perm.inner_perms),
501        ensures
502            res.id() == meta_perm.value().metadata.nr_children.id(),
503            *final(self) == *old(self),
504    {
505        // SAFETY: The lock is held so we have an exclusive access.
506        #[verus_spec(with Tracked(meta_perm))]
507        let meta = self.meta();
508        &meta.nr_children
509    }
510}
511
512/*impl<C: PageTableConfig> Drop for PageTableGuard<'_, C> {
513    fn drop(&mut self) {
514        self.inner.meta().lock.store(0, Ordering::Release);
515    }
516}*/
517
518impl<C: PageTableConfig> PageTablePageMeta<C> {
519    pub fn new(level: PagingLevel) -> Self {
520        Self {
521            nr_children: pcell_maybe_uninit::PCell::new(0).0,
522            stray: pcell_maybe_uninit::PCell::new(false).0,
523            level,
524            lock: PAtomicU8::new(0).0,
525            _phantom: PhantomData,
526        }
527    }
528}
529
530} // verus!
531/* TODO: Come back after VMReader
532// FIXME: The safe APIs in the `page_table/node` module allow `Child::Frame`s with
533// arbitrary addresses to be stored in the page table nodes. Therefore, they may not
534// be valid `C::Item`s. The soundness of the following `on_drop` implementation must
535// be reasoned in conjunction with the `page_table/cursor` implementation.
536unsafe impl<C: PageTableConfig> AnyFrameMeta for PageTablePageMeta<C> {
537    fn on_drop(&mut self, reader: &mut VmReader<Infallible>) {
538        let nr_children = self.nr_children.get_mut();
539        if *nr_children == 0 {
540            return;
541        }
542
543        let level = self.level;
544        let range = if level == C::NR_LEVELS() {
545            C::TOP_LEVEL_INDEX_RANGE.clone()
546        } else {
547            0..nr_subpage_per_huge::<C>()
548        };
549
550        // Drop the children.
551        reader.skip(range.start * size_of::<C::E>());
552        for _ in range {
553            // Non-atomic read is OK because we have mutable access.
554            let pte = reader.read_once::<C::E>().unwrap();
555            if pte.is_present() {
556                let paddr = pte.paddr();
557                // As a fast path, we can ensure that the type of the child frame
558                // is `Self` if the PTE points to a child page table. Then we don't
559                // need to check the vtable for the drop method.
560                if !pte.is_last(level) {
561                    // SAFETY: The PTE points to a page table node. The ownership
562                    // of the child is transferred to the child then dropped.
563                    drop(unsafe { Frame::<Self>::from_raw(paddr) });
564                } else {
565                    // SAFETY: The PTE points to a mapped item. The ownership
566                    // of the item is transferred here then dropped.
567                    drop(unsafe { C::item_from_raw(paddr, level, pte.prop()) });
568                }
569            }
570        }
571    }
572}*/