Skip to main content

ostd/mm/page_table/node/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2//! This module defines page table node abstractions and the handle.
3//!
4//! The page table node is also frequently referred to as a page table in many architectural
5//! documentations. It is essentially a page that contains page table entries (PTEs) that map
6//! to child page tables nodes or mapped pages.
7//!
8//! This module leverages the page metadata to manage the page table pages, which makes it
9//! easier to provide the following guarantees:
10//!
11//! The page table node is not freed when it is still in use by:
12//!    - a parent page table node,
13//!    - or a handle to a page table node,
14//!    - or a processor.
15//!
16//! This is implemented by using a reference counter in the page metadata. If the above
17//! conditions are not met, the page table node is ensured to be freed upon dropping the last
18//! reference.
19//!
20//! One can acquire exclusive access to a page table node using merely the physical address of
21//! the page table node. This is implemented by a lock in the page metadata. Here the
22//! exclusiveness is only ensured for kernel code, and the processor's MMU is able to access the
23//! page table node while a lock is held. So the modification to the PTEs should be done after
24//! the initialization of the entity that the PTE points to. This is taken care in this module.
25//!
26mod child;
27mod entry;
28
29#[path = "../../../../specs/mm/page_table/node/child.rs"]
30mod child_specs;
31#[path = "../../../../specs/mm/page_table/node/entry.rs"]
32mod entry_specs;
33
34pub use crate::specs::mm::page_table::node::{entry_owners::*, owners::*};
35pub use child::*;
36pub use entry::*;
37
38use vstd::cell::pcell_maybe_uninit;
39use vstd::prelude::*;
40
41use vstd::atomic::PAtomicU8;
42
43use vstd_extra::array_ptr;
44use vstd_extra::cast_ptr::*;
45use vstd_extra::drop_tracking::{Drop as VerifiedDrop, TrackDrop};
46use vstd_extra::ghost_tree::*;
47use vstd_extra::ownership::*;
48
49use crate::mm::frame::allocator::FrameAllocOptions;
50use crate::mm::frame::meta::{
51    META_SLOT_SIZE,
52    mapping::{frame_to_meta, meta_to_frame},
53};
54use crate::mm::frame::meta::{MetaSlot, REF_COUNT_MAX, REF_COUNT_UNUSED};
55use crate::mm::frame::{AnyFrameMeta, Frame};
56use crate::mm::kspace::VMALLOC_BASE_VADDR;
57use crate::mm::page_table::*;
58use crate::mm::{Paddr, Vaddr, kspace::LINEAR_MAPPING_BASE_VADDR, paddr_to_vaddr};
59use crate::specs::mm::frame::meta_owners::{MetaSlotOwner, MetaSlotStorage, Metadata};
60use crate::specs::mm::frame::{
61    mapping::{frame_to_index, lemma_frame_to_index_injective},
62    meta_region_owners::MetaRegionOwners,
63};
64use crate::specs::mm::page_table::node::owners::*;
65
66use core::{marker::PhantomData, ops::Deref, sync::atomic::Ordering};
67
68use super::nr_subpage_per_huge;
69
70use crate::{
71    mm::{
72        page_table::{load_pte, store_pte},
73        //        FrameAllocOptions, Infallible,
74        //        VmReader,
75    },
76    specs::task::InAtomicMode,
77};
78
79verus! {
80
81/// The metadata of any kinds of page table pages.
82/// Make sure the the generic parameters don't effect the memory layout.
83pub struct PageTablePageMeta<C: PageTableConfig> {
84    /// The number of valid PTEs. It is mutable if the lock is held.
85    pub nr_children: pcell_maybe_uninit::PCell<u16>,
86    /// If the page table is detached from its parent.
87    ///
88    /// A page table can be detached from its parent while still being accessed,
89    /// since we use a RCU scheme to recycle page tables. If this flag is set,
90    /// it means that the parent is recycling the page table.
91    pub stray: pcell_maybe_uninit::PCell<bool>,
92    /// The level of the page table page. A page table page cannot be
93    /// referenced by page tables of different levels.
94    pub level: PagingLevel,
95    /// The lock for the page table page.
96    pub lock: PAtomicU8,
97    pub _phantom: core::marker::PhantomData<C>,
98}
99
100/// A smart pointer to a page table node.
101///
102/// This smart pointer is an owner of a page table node. Thus creating and
103/// dropping it will affect the reference count of the page table node. If
104/// dropped it as the last reference, the page table node and subsequent
105/// children will be freed.
106///
107/// [`PageTableNode`] is read-only. To modify the page table node, lock and use
108/// [`PageTableGuard`].
109pub type PageTableNode<C> = Frame<PageTablePageMeta<C>>;
110
111unsafe impl<C: PageTableConfig> AnyFrameMeta for PageTablePageMeta<C> {
112    /// Caller invariants the PT-node `on_drop` body relies on:
113    /// - Reader well-formedness + `vm_io_owner` matching + read view
114    ///   initialized + at least `PAGE_SIZE` bytes remaining for the
115    ///   PT-node walk.
116    /// - Global region table invariant.
117    /// - Embedding ([`child_perms_embedding`]): for every paddr in
118    ///   `child_perms.dom()`, the slot and perm match `from_raw` /
119    ///   `VerifiedDrop::drop`'s expected shape.
120    /// - Walk coverage ([`walk_coverage_from_view`]): for every present
121    ///   non-last PTE in the page bytes, `frame_to_index(pte.paddr()) ∈
122    ///   child_perms.dom()`.
123    /// - Walk uniqueness ([`walk_uniqueness_from_view`]): distinct PTE
124    ///   positions with present non-last PTEs have distinct paddrs.
125    ///
126    /// The body now discharges the dom-membership obligation in full via
127    /// byte-level chaining (`decode_pod` + `read_once`'s strengthened
128    /// ensures + the byte-preservation loop invariant) plus the two
129    /// walk-* preconditions; see [`lemma_coverage_at`] and
130    /// [`lemma_uniqueness_at_pair`].
131    open spec fn on_drop_pre(
132        &self,
133        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
134        regions: crate::specs::mm::frame::meta_region_owners::MetaRegionOwners,
135        vm_io_owner: crate::specs::mm::io::VmIoOwner,
136    ) -> bool {
137        &&& reader.inv()
138        &&& reader.wf(vm_io_owner)
139        &&& reader.remain_spec() >= crate::specs::arch::PAGE_SIZE
140        &&& reader.cursor.vaddr % core::mem::align_of::<C::E>() == 0
141        &&& vm_io_owner.inv()
142        &&& vm_io_owner.read_view_initialized()
143        &&& regions.inv()
144        &&& Self::child_perms_embedding(regions, vstd::set::Set::empty())
145        &&& self.walk_coverage_from_view(reader, vm_io_owner.read_view_of(), regions.slots.dom())
146        &&& self.walk_uniqueness_from_view(reader, vm_io_owner.read_view_of())
147    }
148
149    /// Drops the children of a page-table node: walks each present PTE and
150    /// drops the referenced child page-table-node frame or mapped item.
151    #[verifier::spinoff_prover]
152    fn on_drop(
153        &mut self,
154        reader: &mut crate::mm::VmReader<'_, crate::mm::Infallible>,
155        Tracked(regions): Tracked<
156            &mut crate::specs::mm::frame::meta_region_owners::MetaRegionOwners,
157        >,
158        Tracked(vm_io_owner): Tracked<&mut crate::specs::mm::io::VmIoOwner>,
159    ) {
160        let level = self.level;
161        let range = if level == C::NR_LEVELS() {
162            C::TOP_LEVEL_INDEX_RANGE()
163        } else {
164            0..nr_subpage_per_huge::<C>()
165        };
166
167        proof {
168            C::lemma_pte_walk_fills_page();
169            C::lemma_page_table_config_constant_properties();
170            vstd::arithmetic::mul::lemma_mul_inequality(
171                range.start as int,
172                NR_ENTRIES as int,
173                core::mem::size_of::<C::E>() as int,
174            );
175        }
176
177        let ghost size_of_e: int = core::mem::size_of::<C::E>() as int;
178        let ghost align_of_e: int = core::mem::align_of::<C::E>() as int;
179        let ghost pre_skip_cursor: int = reader.cursor.vaddr as int;
180
181        let ghost initial_view: crate::specs::mm::virt_mem::MemView = vm_io_owner.read_view_of();
182        let ghost initial_dom: vstd::set::Set<usize> = regions.slots.dom();
183        let ghost initial_reader: crate::mm::VmReader<'_, crate::mm::Infallible> = *reader;
184
185        #[verus_spec(with Tracked(vm_io_owner))]
186        reader.skip_in_place(range.start * core::mem::size_of::<C::E>());
187
188        proof {
189            C::lemma_pte_align_divides_size();
190            let k = size_of_e / align_of_e;
191            vstd::arithmetic::div_mod::lemma_fundamental_div_mod(size_of_e, align_of_e);
192            vstd::arithmetic::mul::lemma_mul_is_commutative(align_of_e, k);
193            vstd::arithmetic::mul::lemma_mul_is_associative(range.start as int, k, align_of_e);
194            vstd::arithmetic::div_mod::lemma_mod_multiples_basic(range.start * k, align_of_e);
195            vstd::arithmetic::div_mod::lemma_mod_adds(
196                pre_skip_cursor,
197                range.start * size_of_e,
198                align_of_e,
199            );
200        }
201
202        let ghost post_skip_remain: int = reader.remain_spec() as int;
203        let ghost range_start: int = range.start as int;
204        let ghost range_end: int = range.end as int;
205        let n_iters: usize = range.end - range.start;
206        let mut iter_count: usize = 0;
207        let ghost mut removed_indices: vstd::set::Set<usize> = vstd::set::Set::empty();
208
209        proof {
210            C::lemma_pte_walk_fills_page();
211            C::lemma_page_table_config_constant_properties();
212            C::lemma_paging_consts_properties();
213            vstd::arithmetic::mul::lemma_mul_is_distributive_sub_other_way(
214                size_of_e,
215                NR_ENTRIES as int,
216                range_start,
217            );
218            vstd::arithmetic::mul::lemma_mul_inequality(
219                range_end - range_start,
220                NR_ENTRIES - range_start,
221                size_of_e,
222            );
223        }
224
225        while iter_count < n_iters
226            invariant
227                reader.inv(),
228                reader.wf(*vm_io_owner),
229                vm_io_owner.inv(),
230                vm_io_owner.read_view_initialized(),
231                regions.inv(),
232                reader.cursor.vaddr as int % align_of_e == 0,
233                size_of_e == core::mem::size_of::<C::E>(),
234                align_of_e == core::mem::align_of::<C::E>(),
235                size_of_e % align_of_e == 0,
236                align_of_e > 0,
237                size_of_e > 0,
238                iter_count <= n_iters,
239                n_iters == range_end - range_start,
240                // Verus loses non-negativity of `range_start` / `range_end`
241                // across the loop boundary; pin it via these invariants so
242                // `lemma_mul_nonnegative` preconditions discharge in the body.
243                0 <= range_start,
244                range_start <= range_end,
245                range_end <= NR_ENTRIES,
246                reader.remain_spec() == post_skip_remain - iter_count * size_of_e,
247                post_skip_remain >= (range_end - range_start) * size_of_e,
248                regions.slots.dom() == initial_dom,
249                Self::child_perms_embedding(*regions, removed_indices),
250                self.walk_coverage_from_view(initial_reader, initial_view, initial_dom),
251                self.walk_uniqueness_from_view(initial_reader, initial_view),
252                // Without this, Verus treats `self.level` as potentially
253                // mutated by `&mut self` and the level-comparison facts go
254                // missing inside walk_coverage / walk_uniqueness instances.
255                self.level == level,
256                reader.end == initial_reader.end,
257                reader.cursor.vaddr == initial_reader.cursor.vaddr + range_start * size_of_e
258                    + iter_count * size_of_e,
259                forall|i: usize|
260                    #![trigger initial_view.addr_transl(i)]
261                    initial_reader.cursor.vaddr <= i < initial_reader.end.vaddr ==> {
262                        &&& initial_view.addr_transl(i) is Some
263                        &&& initial_view.memory.contains_key(initial_view.addr_transl(i).unwrap().0)
264                    },
265                forall|va: usize|
266                    #![trigger vm_io_owner.read_view_of().read(va)]
267                    reader.cursor.vaddr <= va < initial_reader.end.vaddr ==> {
268                        &&& initial_view.addr_transl(va) == vm_io_owner.read_view_of().addr_transl(
269                            va,
270                        )
271                        &&& initial_view.read(va) == vm_io_owner.read_view_of().read(va)
272                    },
273                removed_indices.subset_of(initial_dom),
274                // Witness past iter for each removed idx — the discharge
275                // proof picks it up via `choose|j|` and invokes
276                // `walk_uniqueness` at (current_cursor, witness_cursor).
277                forall|idx: usize| #[trigger]
278                    removed_indices.contains(idx) ==> exists|j: int|
279                        #![trigger Self::walk_pte_at_view(
280                            initial_view,
281                            (initial_reader.cursor.vaddr
282                                + range_start * size_of_e
283                                + j * size_of_e) as usize,
284                        )]
285                        0 <= j < iter_count && {
286                            let cj = (initial_reader.cursor.vaddr + range_start * size_of_e + j
287                                * size_of_e) as usize;
288                            let pte_j = Self::walk_pte_at_view(initial_view, cj);
289                            &&& pte_j.is_present()
290                            &&& !pte_j.is_last(self.level)
291                            &&& idx == frame_to_index(pte_j.paddr())
292                        },
293            decreases n_iters - iter_count,
294        {
295            proof {
296                vstd::arithmetic::mul::lemma_mul_is_distributive_sub(
297                    size_of_e,
298                    range_end - range_start,
299                    iter_count as int,
300                );
301                vstd::arithmetic::mul::lemma_mul_inequality(
302                    1,
303                    range_end - range_start - iter_count,
304                    size_of_e,
305                );
306            }
307            let ghost cursor_pre_read: usize = reader.cursor.vaddr;
308            let ghost pre_view: crate::specs::mm::virt_mem::MemView = vm_io_owner.read_view_of();
309            proof {
310                crate::specs::mm::virt_mem::MemView::lemma_read_bytes_eq_pointwise(
311                    pre_view,
312                    initial_view,
313                    cursor_pre_read,
314                    core::mem::size_of::<C::E>(),
315                );
316            }
317            let pte = #[verus_spec(with Tracked(vm_io_owner))]
318            reader.read_once::<C::E>();
319            let pte = pte.unwrap();
320            proof {
321                ostd_pod::lemma_decode_pod_inverse::<C::E>(pte);
322                vstd::arithmetic::mul::lemma_mul_nonnegative(range_start, size_of_e);
323                vstd::arithmetic::mul::lemma_mul_nonnegative(iter_count as int, size_of_e);
324            }
325            if pte.is_present() {
326                let paddr = pte.paddr();
327                if !pte.is_last(level) {
328                    proof {
329                        vstd::arithmetic::mul::lemma_mul_is_distributive_add_other_way(
330                            size_of_e,
331                            range_start,
332                            iter_count as int,
333                        );
334                        vstd::arithmetic::div_mod::lemma_mod_multiples_basic(
335                            range_start + iter_count,
336                            size_of_e,
337                        );
338                        Self::lemma_coverage_at(
339                            *self,
340                            initial_reader,
341                            initial_view,
342                            initial_dom,
343                            cursor_pre_read,
344                        );
345                        broadcast use lemma_frame_to_index_injective;
346
347                        assert forall|idx: usize| #[trigger]
348                            removed_indices.contains(idx) implies idx != frame_to_index(
349                            pte.paddr(),
350                        ) by {
351                            let j = choose|j: int|
352                                #![trigger Self::walk_pte_at_view(
353                                    initial_view,
354                                    (initial_reader.cursor.vaddr
355                                        + range_start * size_of_e
356                                        + j * size_of_e) as usize,
357                                )]
358                                0 <= j < iter_count && {
359                                    let cj = (initial_reader.cursor.vaddr + range_start * size_of_e
360                                        + j * size_of_e) as usize;
361                                    let pte_j = Self::walk_pte_at_view(initial_view, cj);
362                                    &&& pte_j.is_present()
363                                    &&& !pte_j.is_last(self.level)
364                                    &&& idx == frame_to_index(pte_j.paddr())
365                                };
366                            let cj: usize = (initial_reader.cursor.vaddr + range_start * size_of_e
367                                + j * size_of_e) as usize;
368                            let pte_j = Self::walk_pte_at_view(initial_view, cj);
369                            vstd::arithmetic::mul::lemma_mul_nonnegative(range_start, size_of_e);
370                            vstd::arithmetic::mul::lemma_mul_nonnegative(j, size_of_e);
371                            vstd::arithmetic::mul::lemma_mul_strict_inequality(
372                                j,
373                                iter_count as int,
374                                size_of_e,
375                            );
376                            vstd::arithmetic::mul::lemma_mul_is_distributive_add_other_way(
377                                size_of_e,
378                                range_start,
379                                j,
380                            );
381                            vstd::arithmetic::mul::lemma_mul_inequality(
382                                range_start + j + 1,
383                                range_end,
384                                size_of_e,
385                            );
386                            vstd::arithmetic::mul::lemma_mul_is_distributive_sub_other_way(
387                                size_of_e,
388                                range_end,
389                                range_start,
390                            );
391                            vstd::arithmetic::div_mod::lemma_mod_multiples_basic(
392                                range_start + j,
393                                size_of_e,
394                            );
395                            Self::lemma_uniqueness_at_pair(
396                                *self,
397                                initial_reader,
398                                initial_view,
399                                cursor_pre_read,
400                                cj,
401                            );
402                            pte.lemma_paddr_is_page_aligned();
403                            pte_j.lemma_paddr_is_page_aligned();
404                        };
405                        // Pinning these in SMT context lets `tracked_remove`'s
406                        // dom-containment precondition and `from_raw`'s
407                        // `from_raw_requires_safety` (via embedding) discharge.
408                    }
409                    proof {
410                        removed_indices = removed_indices.insert(frame_to_index(paddr));
411                        assert({
412                            let cj = (initial_reader.cursor.vaddr + range_start * size_of_e
413                                + iter_count * size_of_e) as usize;
414                            let pte_j = Self::walk_pte_at_view(initial_view, cj);
415                            &&& cj == cursor_pre_read
416                            &&& pte_j == pte
417                            &&& pte_j.is_present()
418                            &&& !pte_j.is_last(self.level)
419                            &&& frame_to_index(paddr) == frame_to_index(pte_j.paddr())
420                        });
421                    }
422                    proof_decl! {
423                        let tracked from_raw_obl: vstd_extra::drop_tracking::DropObligation<usize>;
424                    }
425                    let frame = unsafe {
426                        #[verus_spec(with Tracked(regions) => Tracked(from_raw_obl))]
427                        Frame::<Self>::from_raw(paddr)
428                    };
429                    // `from_raw` minted the obligation; `frame.drop`
430                    // consumes it directly. No redeem dance needed.
431                    VerifiedDrop::drop(frame, Tracked(regions), Tracked(from_raw_obl));
432                } else {
433                    // SAFETY: The PTE points to a mapped item. The ownership
434                    // of the item is transferred here then dropped.
435                    let _item = unsafe { C::item_from_raw(paddr, level, pte.prop()) };
436                }
437            }
438            proof {
439                vstd::arithmetic::div_mod::lemma_mod_adds(
440                    reader.cursor.vaddr - size_of_e,
441                    size_of_e,
442                    align_of_e,
443                );
444            }
445            let ghost iter_count_old: int = iter_count as int;
446            iter_count = iter_count + 1;
447            proof {
448                vstd::arithmetic::mul::lemma_mul_is_distributive_add_other_way(
449                    size_of_e,
450                    iter_count_old,
451                    1,
452                );
453            }
454        }
455    }
456
457    fn is_untyped(&self) -> bool {
458        false
459    }
460
461    uninterp spec fn vtable_ptr(&self) -> usize;
462}
463
464#[verus_verify]
465impl<C: PageTableConfig> PageTableNode<C> {
466    /// Gets the level of a page table node.
467    /// # Verified Properties
468    /// ## Preconditions
469    /// - The node must be well-formed, and the caller must provide a permission token for its metadata.
470    /// ## Postconditions
471    /// - Returns the level of the node.
472    /// ## Safety
473    /// - We require the caller to provide a permission token to ensure that this function is only called on a valid page table node.
474    #[verus_spec(
475        with Tracked(perm) : Tracked<&PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>
476    )]
477    pub(super) fn level(&self) -> PagingLevel
478        requires
479            self.ptr.addr() == perm.addr(),
480            self.ptr.addr() == perm.points_to.addr(),
481            perm.is_init(),
482            perm.wf(&perm.inner_perms),
483        returns
484            perm.value().metadata.level,
485    {
486        #[verus_spec(with Tracked(perm))]
487        let meta = self.meta();
488        meta.level
489    }
490
491    /// Allocates a new empty page table node.
492    #[verus_spec(res =>
493        with Tracked(parent_owner): Tracked<&mut NodeOwner<C>>,
494             Tracked(regions): Tracked<&mut MetaRegionOwners>,
495             Tracked(guards): Tracked<&Guards<'rcu>>,
496             Ghost(idx): Ghost<usize>,
497                 -> owner: Tracked<OwnerSubtree<C>>,
498        requires
499            1 <= level < NR_LEVELS,
500            idx < NR_ENTRIES,
501            old(regions).inv(),
502            old(parent_owner).inv(),
503        ensures
504            final(regions).inv(),
505            final(parent_owner).inv(),
506            allocated_empty_node_owner(owner@, level),
507            allocated_empty_node_grandchildren_none(owner@),
508            res.ptr.addr() == owner@.value.node().meta_addr_self(),
509            guards.unlocked(owner@.value.node().meta_addr_self()),
510            MetaSlot::get_node_from_unused_spec(meta_to_frame(owner@.value.node().meta_addr_self()), *old(regions), *final(regions)),
511            MetaSlot::slot_perm_reparked_spec(meta_to_frame(owner@.value.node().meta_addr_self()), *old(regions), *final(regions)),
512
513            final(regions).frame_obligations == old(regions).frame_obligations.insert(
514                frame_to_index(meta_to_frame(owner@.value.node().meta_addr_self()))),
515            old(regions).slots.contains_key(frame_to_index(meta_to_frame(owner@.value.node().meta_addr_self()))),
516
517            !crate::specs::mm::frame::meta_owners::is_mmio_paddr(
518                meta_to_frame(owner@.value.node().meta_addr_self())),
519            owner@.value.metaregion_sound(*final(regions)),
520            forall|i: usize|
521                #[trigger] old(regions).slot_owners[i].inner_perms.ref_count.value() != REF_COUNT_UNUSED
522                ==> i != frame_to_index(meta_to_frame(owner@.value.node().meta_addr_self())),
523            owner@.value.match_pte(C::E::new_pt_spec(meta_to_frame(owner@.value.node().meta_addr_self())), level as PagingLevel),
524            *final(parent_owner) == old(parent_owner).set_children_perm(idx, C::E::new_pt_spec(meta_to_frame(owner@.value.node().meta_addr_self()))),
525            final(regions).slots.contains_key(owner@.value.node().slot_index),
526            owner@.value.node().metaregion_sound_node(*final(regions)),
527    )]
528    #[verifier::external_body]
529    pub fn alloc<'rcu>(level: PagingLevel) -> Self {
530        let tracked entry_owner = EntryOwner::tracked_new_absent(
531            TreePath::new(Seq::empty()),
532            level,
533        );
534
535        let tracked mut owner = OwnerSubtree::<C>::new_val_tracked(entry_owner, level as nat);
536        let meta = PageTablePageMeta::new(level);
537        let mut frame = FrameAllocOptions::new();
538        frame.zeroed(true);
539        let allocated_frame = frame.alloc_frame_with(meta).expect(
540            "Failed to allocate a page table node",
541        );
542        // The allocated frame is zeroed. Make sure zero is absent PTE.
543        //debug_assert_eq!(C::E::new_absent().as_usize(), 0);
544
545        proof_with!(|= Tracked(owner));
546
547        allocated_frame
548    }/*
549    /// Activates the page table assuming it is a root page table.
550    ///
551    /// Here we ensure not dropping an active page table by making a
552    /// processor a page table owner. When activating a page table, the
553    /// reference count of the last activated page table is decremented.
554    /// And that of the current page table is incremented.
555    ///
556    /// # Safety
557    ///
558    /// The caller must ensure that the page table to be activated has
559    /// proper mappings for the kernel and has the correct const parameters
560    /// matching the current CPU.
561    ///
562    /// # Panics
563    ///
564    /// Only top-level page tables can be activated using this function.
565    pub(crate) unsafe fn activate(&self) {
566        use crate::{
567            arch::mm::{activate_page_table, current_page_table_paddr},
568            mm::page_prop::CachePolicy,
569        };
570
571        #[cfg(feature = "allow_panic")]
572        assert_eq!(self.level(), C::NR_LEVELS());
573
574        let last_activated_paddr = current_page_table_paddr();
575        if last_activated_paddr == self.start_paddr() {
576            return;
577        }
578
579        // SAFETY: The safety is upheld by the caller.
580        unsafe { activate_page_table(self.clone().into_raw(), CachePolicy::Writeback) };
581
582        // Restore and drop the last activated page table.
583        // SAFETY: The physical address is valid and points to a forgotten page table node.
584        drop(unsafe { Self::from_raw(last_activated_paddr) });
585    }
586
587    /// Activates the (root) page table assuming it is the first activation.
588    ///
589    /// It will not try dropping the last activate page table. It is the same
590    /// with [`Self::activate()`] in other senses.
591    pub(super) unsafe fn first_activate(&self) {
592        use crate::{arch::mm::activate_page_table, mm::page_prop::CachePolicy};
593
594        // SAFETY: The safety is upheld by the caller.
595        unsafe { activate_page_table(self.clone().into_raw(), CachePolicy::Writeback) };
596    }*/
597
598}
599
600#[verus_verify]
601impl<'a, C: PageTableConfig> PageTableNodeRef<'a, C> {
602    pub open spec fn locks_preserved_except<'rcu>(
603        addr: usize,
604        guards0: Guards<'rcu>,
605        guards1: Guards<'rcu>,
606    ) -> bool {
607        &&& OwnerSubtree::implies(
608            CursorOwner::<'rcu, C>::node_unlocked(guards0),
609            CursorOwner::<'rcu, C>::node_unlocked_except(guards1, addr),
610        )
611        &&& forall|i: usize| guards0.lock_held(i) ==> guards1.lock_held(i)
612        &&& forall|i: usize| guards0.unlocked(i) && i != addr ==> guards1.unlocked(i)
613    }
614
615    /// Locks the page table node.
616    ///
617    /// An atomic mode guard is required to
618    ///  1. prevent deadlocks;
619    ///  2. provide a lifetime (`'rcu`) that the nodes are guaranteed to outlive.
620    /// # Verification Design
621    /// As of when we verified this library, we didn't have a spin lock implementation, so we axiomatize
622    /// what happens when it's successful.
623    #[verifier::external_body]
624    #[verus_spec(res =>
625        with Tracked(owner): Tracked<&NodeOwner<C>>,
626            Tracked(guards): Tracked<&mut Guards<'rcu>>
627        requires
628            self.inner@.invariants(*owner),
629            old(guards).unlocked(owner.meta_addr_self()),
630        ensures
631            final(guards).lock_held(owner.meta_addr_self()),
632            Self::locks_preserved_except(owner.meta_addr_self(), *old(guards), *final(guards)),
633            owner.relate_guard(res),
634    )]
635    pub fn lock<'rcu, A: InAtomicMode>(self, _guard: &'rcu A) -> PageTableGuard<'rcu, C> where
636        'a: 'rcu,
637     {
638        unimplemented!()
639    }
640
641    /// Creates a new [`PageTableGuard`] without checking if the page table lock is held.
642    ///
643    /// # Safety
644    ///
645    /// This function must be called if this task logically holds the lock.
646    ///
647    /// Calling this function when a guard is already created is undefined behavior
648    /// unless that guard was already forgotten.
649    #[verus_spec(res =>
650        with Tracked(owner): Tracked<&NodeOwner<C>>,
651             Tracked(guards): Tracked<&mut Guards<'rcu>>,
652        requires
653            self.inner@.invariants(*owner),
654            old(guards).unlocked(owner.meta_addr_self()),
655        ensures
656            final(guards).lock_held(owner.meta_addr_self()),
657            Self::locks_preserved_except(owner.meta_addr_self(), *old(guards), *final(guards)),
658            owner.relate_guard(res),
659    )]
660    pub unsafe fn make_guard_unchecked<'rcu, A: InAtomicMode>(
661        self,
662        _guard: &'rcu A,
663    ) -> PageTableGuard<'rcu, C> where 'a: 'rcu {
664        let guard = PageTableGuard { inner: self };
665
666        proof {
667            let ghost guards0 = *guards;
668            guards.guards = guards.guards.insert(owner.meta_addr_self());
669
670        }
671
672        guard
673    }
674}
675
676impl<'rcu, C: PageTableConfig> PageTableGuard<'rcu, C> {
677    /// Borrows an entry in the node at a given index.
678    ///
679    /// # Panics
680    ///
681    /// Panics if the index is not within the bound of
682    /// [`nr_subpage_per_huge<C>`].
683    #[verus_spec(res =>
684        with Tracked(owner): Tracked<&NodeOwner<C>>,
685             Tracked(child_owner): Tracked<&EntryOwner<C>>,
686             Tracked(regions): Tracked<&MetaRegionOwners>,
687        requires
688            owner.inv(),
689            child_owner.inv(),
690            owner.relate_guard(*old(self)),
691            child_owner.match_pte(
692                owner.children_perm.value()[idx as int],
693                child_owner.parent_level,
694            ),
695            regions.inv(),
696            regions.slots.contains_key(owner.slot_index),
697            // Panic condition
698            idx < NR_ENTRIES,
699        ensures
700            res.wf(*child_owner),
701            res.idx == idx,
702            *res.node == *old(self),
703            *final(self) == *final(res.node),
704            owner.relate_guard(*res.node),
705    )]
706    pub fn entry<'a>(&'a mut self, idx: usize) -> Entry<'a, 'rcu, C> {
707        #[cfg(feature = "allow_panic")]
708        assert!(idx < nr_subpage_per_huge::<C>());
709        // SAFETY: The index is within the bound. `Entry::new_at` returns an
710        // entry whose node is the guard value we were handed.
711        unsafe {
712            #[verus_spec(with Tracked(child_owner), Tracked(owner), Tracked(regions))]
713            Entry::new_at(self, idx)
714        }
715    }
716
717    /// Gets the number of valid PTEs in a page table node.
718    /// # Verified Properties
719    /// ## Preconditions
720    /// - The node must be well-formed.
721    /// ## Postconditions
722    /// - Returns the number of valid PTEs in the node.
723    /// ## Safety
724    /// - We require the caller to provide a permission token to ensure that this function is only called on a valid page table node.
725    #[verus_spec(nr =>
726        with Tracked(owner) : Tracked<&NodeOwner<C>>,
727             Tracked(regions): Tracked<&MetaRegionOwners>,
728        requires
729            self.inner.inner@.invariants(*owner),
730            regions.inv(),
731            owner.metaregion_sound_node(*regions),
732        returns
733            owner.meta_own.nr_children.value(),
734    )]
735    pub fn nr_children(&self) -> u16 {
736        let tracked owner_meta_perm = regions.borrow_typed_perm::<PageTablePageMeta<C>>(
737            owner.slot_index,
738        );
739        #[verus_spec(with Tracked(owner_meta_perm))]
740        let meta = self.meta();
741
742        *meta.nr_children.borrow(Tracked(&owner.meta_own.nr_children))
743    }
744
745    /// Returns if the page table node is detached from its parent.
746    #[verus_spec(res =>
747        with
748            Tracked(meta_perm): Tracked<&'a PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>,
749        requires
750            old(self).inner.inner@.ptr.addr() == meta_perm.addr(),
751            old(self).inner.inner@.ptr.addr() == meta_perm.points_to.addr(),
752            meta_perm.is_init(),
753            meta_perm.wf(&meta_perm.inner_perms),
754        ensures
755            res.id() == meta_perm.value().metadata.stray.id(),
756            *final(self) == *old(self),
757    )]
758    pub(super) fn stray_mut<'a>(&'a mut self) -> &'a pcell_maybe_uninit::PCell<bool> {
759        // SAFETY: The lock is held so we have an exclusive access.
760        #[verus_spec(with Tracked(meta_perm))]
761        let meta = self.meta();
762        &meta.stray
763    }
764
765    /// Reads a non-owning PTE at the given index.
766    ///
767    /// A non-owning PTE means that it does not account for a reference count
768    /// of the a page if the PTE points to a page. The original PTE still owns
769    /// the child page.
770    ///
771    /// # Safety
772    ///
773    /// The caller must ensure that the index is within the bound.
774    #[verus_spec(pte =>
775        with Tracked(owner): Tracked<&NodeOwner<C>>,
776             Tracked(regions): Tracked<&MetaRegionOwners>,
777        requires
778            self.inner.inner@.invariants(*owner),
779            regions.inv(),
780            regions.slots.contains_key(owner.slot_index),
781            idx < NR_ENTRIES,
782        ensures
783            pte == owner.children_perm.value()[idx as int],
784    )]
785    pub unsafe fn read_pte(&self, idx: usize) -> C::E {
786        // debug_assert!(idx < nr_subpage_per_huge::<C>());
787        let tracked owner_slot_perm = regions.slots.tracked_borrow(owner.slot_index);
788        let ptr = vstd_extra::array_ptr::ArrayPtr::<C::E, NR_ENTRIES>::from_addr(
789            paddr_to_vaddr(
790                #[verus_spec(with Tracked(owner_slot_perm))]
791                self.start_paddr(),
792            ),
793        );
794
795        // SAFETY:
796        // - The page table node is alive. The index is inside the bound, so the page table entry is valid.
797        // - All page table entries are aligned and accessed with atomic operations only.
798        unsafe {
799            #[verus_spec(with Tracked(&owner.children_perm))]
800            load_pte(ptr.add(idx), Ordering::Relaxed)
801        }
802    }
803
804    /// Writes a page table entry at a given index.
805    ///
806    /// This operation will leak the old child if the old PTE is present.
807    ///
808    /// # Safety
809    ///
810    /// The caller must ensure that:
811    ///  1. The index must be within the bound;
812    ///  2. The PTE must represent a valid [`Child`] whose level is compatible
813    ///     with the page table node.
814    ///  3. The page table node will have the ownership of the [`Child`]
815    ///     after this method.
816    #[verus_spec(
817        with Tracked(owner): Tracked<&mut NodeOwner<C>>,
818             Tracked(regions): Tracked<&MetaRegionOwners>,
819        requires
820            old(self).inner.inner@.invariants(*old(owner)),
821            regions.inv(),
822            regions.slots.contains_key(old(owner).slot_index),
823            idx < NR_ENTRIES,
824        ensures
825            final(owner).inv(),
826            final(owner).level == old(owner).level,
827            final(owner).meta_own == old(owner).meta_own,
828            final(owner).slot_index == old(owner).slot_index,
829            final(owner).children_perm.value() == old(owner).children_perm.value().update(
830                idx as int,
831                pte,
832            ),
833            *final(self) == *old(self),
834    )]
835    pub unsafe fn write_pte(&mut self, idx: usize, pte: C::E) {
836        // debug_assert!(idx < nr_subpage_per_huge::<C>());
837        let tracked owner_slot_perm = regions.slots.tracked_borrow(owner.slot_index);
838        #[verusfmt::skip]
839        let ptr = vstd_extra::array_ptr::ArrayPtr::<C::E, NR_ENTRIES>::from_addr(
840            paddr_to_vaddr(
841                #[verus_spec(with Tracked(owner_slot_perm))]
842                self.start_paddr()
843            ),
844        );
845
846        // SAFETY:
847        // - The page table node is alive. The index is inside the bound, so the page table entry is valid.
848        // - All page table entries are aligned and accessed with atomic operations only.
849        unsafe {
850            #[verus_spec(with Tracked(&mut owner.children_perm))]
851            store_pte(ptr.add(idx), pte, Ordering::Release)
852        }
853    }
854
855    /// Gets the mutable reference to the number of valid PTEs in the node.
856    #[verus_spec(res =>
857        with
858            Tracked(meta_perm): Tracked<&'a PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>,
859        requires
860            old(self).inner.inner@.ptr.addr() == meta_perm.addr(),
861            old(self).inner.inner@.ptr.addr() == meta_perm.points_to.addr(),
862            meta_perm.is_init(),
863            meta_perm.wf(&meta_perm.inner_perms),
864        ensures
865            res.id() == meta_perm.value().metadata.nr_children.id(),
866            *final(self) == *old(self),
867    )]
868    fn nr_children_mut<'a>(&'a mut self) -> &'a pcell_maybe_uninit::PCell<u16> {
869        // SAFETY: The lock is held so we have an exclusive access.
870        #[verus_spec(with Tracked(meta_perm))]
871        let meta = self.meta();
872        &meta.nr_children
873    }
874}
875
876/*impl<C: PageTableConfig> Drop for PageTableGuard<'_, C> {
877    fn drop(&mut self) {
878        self.inner.meta().lock.store(0, Ordering::Release);
879    }
880}*/
881
882impl<C: PageTableConfig> PageTablePageMeta<C> {
883    pub fn new(level: PagingLevel) -> Self {
884        Self {
885            nr_children: pcell_maybe_uninit::PCell::new(0).0,
886            stray: pcell_maybe_uninit::PCell::new(false).0,
887            level,
888            lock: PAtomicU8::new(0).0,
889            _phantom: PhantomData,
890        }
891    }
892
893    /// The PTE value that `read_once::<C::E>` would produce at cursor `c`
894    /// against the given memory view. Linked to `read_once` via
895    /// `pod_bytes(v) == read_view.read_bytes(...)` (strengthened ensures)
896    /// + [`lemma_decode_pod_inverse`].
897    pub open spec fn walk_pte_at_view(view: crate::specs::mm::virt_mem::MemView, c: usize) -> C::E {
898        ostd_pod::decode_pod::<C::E>(view.read_bytes(c, core::mem::size_of::<C::E>()))
899    }
900
901    /// Single-cursor projection of [`walk_coverage_from_view`]. Extracting
902    /// the forall body to a named predicate lets the body invoke
903    /// [`lemma_coverage_at`] for one specific `c` instead of relying on
904    /// auto-trigger matching across the loop invariant's `forall|c|`.
905    pub open spec fn walk_coverage_at(
906        self,
907        view: crate::specs::mm::virt_mem::MemView,
908        dom: vstd::set::Set<usize>,
909        c: usize,
910    ) -> bool {
911        let pte = Self::walk_pte_at_view(view, c);
912        pte.is_present() && !pte.is_last(self.level) ==> dom.contains(frame_to_index(pte.paddr()))
913    }
914
915    /// Instantiate [`walk_coverage_from_view`]'s forall at one cursor.
916    pub proof fn lemma_coverage_at(
917        self,
918        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
919        view: crate::specs::mm::virt_mem::MemView,
920        dom: vstd::set::Set<usize>,
921        c: usize,
922    )
923        requires
924            self.walk_coverage_from_view(reader, view, dom),
925            reader.cursor.vaddr <= c,
926            c + core::mem::size_of::<C::E>() <= reader.cursor.vaddr + reader.remain_spec(),
927            (c - reader.cursor.vaddr) % core::mem::size_of::<C::E>() as int == 0,
928        ensures
929            self.walk_coverage_at(view, dom, c),
930    {
931    }
932
933    /// Instantiate [`walk_uniqueness_from_view`]'s forall at one cursor pair.
934    pub proof fn lemma_uniqueness_at_pair(
935        self,
936        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
937        view: crate::specs::mm::virt_mem::MemView,
938        c1: usize,
939        c2: usize,
940    )
941        requires
942            self.walk_uniqueness_from_view(reader, view),
943            reader.cursor.vaddr <= c1,
944            c1 + core::mem::size_of::<C::E>() <= reader.cursor.vaddr + reader.remain_spec(),
945            (c1 - reader.cursor.vaddr) % core::mem::size_of::<C::E>() as int == 0,
946            reader.cursor.vaddr <= c2,
947            c2 + core::mem::size_of::<C::E>() <= reader.cursor.vaddr + reader.remain_spec(),
948            (c2 - reader.cursor.vaddr) % core::mem::size_of::<C::E>() as int == 0,
949            c1 != c2,
950            Self::walk_pte_at_view(view, c1).is_present(),
951            !Self::walk_pte_at_view(view, c1).is_last(self.level),
952            Self::walk_pte_at_view(view, c2).is_present(),
953            !Self::walk_pte_at_view(view, c2).is_last(self.level),
954        ensures
955            Self::walk_pte_at_view(view, c1).paddr() != Self::walk_pte_at_view(view, c2).paddr(),
956    {
957    }
958
959    /// Caller-side dom-membership obligation: every present non-last PTE
960    /// position in the walk (over `view`) has its child-frame index in
961    /// `dom`. Phrased over a frozen `(view, dom)` pair so the body can
962    /// carry it as a loop invariant against an entry-state snapshot
963    /// while `vm_io_owner` advances per iteration.
964    pub open spec fn walk_coverage_from_view(
965        self,
966        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
967        view: crate::specs::mm::virt_mem::MemView,
968        dom: vstd::set::Set<usize>,
969    ) -> bool {
970        forall|c: usize|
971            #![trigger Self::walk_pte_at_view(view, c)]
972            reader.cursor.vaddr <= c && c + core::mem::size_of::<C::E>() <= reader.cursor.vaddr
973                + reader.remain_spec() && (c - reader.cursor.vaddr) % core::mem::size_of::<
974                C::E,
975            >() as int == 0 ==> {
976                let pte = Self::walk_pte_at_view(view, c);
977                pte.is_present() && !pte.is_last(self.level) ==> dom.contains(
978                    frame_to_index(pte.paddr()),
979                )
980            }
981    }
982
983    /// Caller-side uniqueness obligation: distinct cursor positions with
984    /// present non-last PTEs (in `view`) map to distinct paddrs.
985    pub open spec fn walk_uniqueness_from_view(
986        self,
987        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
988        view: crate::specs::mm::virt_mem::MemView,
989    ) -> bool {
990        forall|c1: usize, c2: usize|
991            #![trigger Self::walk_pte_at_view(view, c1), Self::walk_pte_at_view(view, c2)]
992            reader.cursor.vaddr <= c1 && c1 + core::mem::size_of::<C::E>() <= reader.cursor.vaddr
993                + reader.remain_spec() && (c1 - reader.cursor.vaddr) % core::mem::size_of::<
994                C::E,
995            >() as int == 0 && reader.cursor.vaddr <= c2 && c2 + core::mem::size_of::<C::E>()
996                <= reader.cursor.vaddr + reader.remain_spec() && (c2 - reader.cursor.vaddr)
997                % core::mem::size_of::<C::E>() as int == 0 && c1 != c2 ==> {
998                let pte1 = Self::walk_pte_at_view(view, c1);
999                let pte2 = Self::walk_pte_at_view(view, c2);
1000                pte1.is_present() && !pte1.is_last(self.level) && pte2.is_present()
1001                    && !pte2.is_last(self.level) ==> pte1.paddr() != pte2.paddr()
1002            }
1003    }
1004
1005    /// Caller-side shape obligation: every paddr in `child_perms.dom()`
1006    /// has a slot perm matching the shape `from_raw` + `VerifiedDrop::drop`
1007    /// expect (init, alignment, refcount within bounds, last-reference
1008    /// shape when refcount == 1).
1009    pub open spec fn child_perms_embedding(
1010        regions: crate::specs::mm::frame::meta_region_owners::MetaRegionOwners,
1011        excluded: vstd::set::Set<usize>,
1012    ) -> bool {
1013        forall|paddr: crate::mm::Paddr|
1014            #![trigger regions.slot_owners[frame_to_index(paddr)]]
1015            regions.slots.dom().contains(frame_to_index(paddr)) && !excluded.contains(
1016                frame_to_index(paddr),
1017            ) ==> {
1018                let idx = frame_to_index(paddr);
1019                let so = regions.slot_owners[idx];
1020                &&& <Frame<Self>>::from_raw_requires_safety(
1021                    regions,
1022                    paddr,
1023                )
1024                // Borrow-protocol transition: `raw_count` is dormant.
1025                &&& so.inner_perms.ref_count.value() > 0
1026                &&& so.inner_perms.ref_count.value() != REF_COUNT_UNUSED
1027                &&& so.inner_perms.ref_count.value() <= REF_COUNT_MAX
1028                &&& so.inner_perms.ref_count.value() == 1 ==> {
1029                    &&& so.inner_perms.storage.is_init()
1030                    &&& so.inner_perms.in_list.value() == 0
1031                    &&& so.paths_in_pt.is_empty()
1032                }
1033                // Borrow-protocol redesign: in steady state between
1034                // `into_pte`'s consume and `on_drop`'s `from_raw`-mint,
1035                // the per-child `frame_obligations` count is 0.
1036
1037            }
1038    }
1039}
1040
1041} // verus!