Skip to main content

ostd/mm/page_table/node/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2//! This module defines page table node abstractions and the handle.
3//!
4//! The page table node is also frequently referred to as a page table in many architectural
5//! documentations. It is essentially a page that contains page table entries (PTEs) that map
6//! to child page tables nodes or mapped pages.
7//!
8//! This module leverages the page metadata to manage the page table pages, which makes it
9//! easier to provide the following guarantees:
10//!
11//! The page table node is not freed when it is still in use by:
12//!    - a parent page table node,
13//!    - or a handle to a page table node,
14//!    - or a processor.
15//!
16//! This is implemented by using a reference counter in the page metadata. If the above
17//! conditions are not met, the page table node is ensured to be freed upon dropping the last
18//! reference.
19//!
20//! One can acquire exclusive access to a page table node using merely the physical address of
21//! the page table node. This is implemented by a lock in the page metadata. Here the
22//! exclusiveness is only ensured for kernel code, and the processor's MMU is able to access the
23//! page table node while a lock is held. So the modification to the PTEs should be done after
24//! the initialization of the entity that the PTE points to. This is taken care in this module.
25//!
26mod child;
27mod entry;
28
29#[path = "../../../../specs/mm/page_table/node/child.rs"]
30mod child_specs;
31#[path = "../../../../specs/mm/page_table/node/entry.rs"]
32mod entry_specs;
33
34pub use crate::specs::mm::page_table::node::{entry_owners::*, owners::*};
35pub use child::*;
36pub use entry::*;
37
38use vstd::cell::pcell_maybe_uninit;
39use vstd::prelude::*;
40
41use vstd::atomic::PAtomicU8;
42
43use vstd_extra::array_ptr;
44use vstd_extra::cast_ptr::*;
45use vstd_extra::drop_tracking::{Drop as VerifiedDrop, TrackDrop};
46use vstd_extra::ghost_tree::*;
47use vstd_extra::ownership::*;
48
49use crate::mm::frame::allocator::FrameAllocOptions;
50use crate::mm::frame::meta::MetaSlot;
51use crate::mm::frame::meta::mapping::{META_SLOT_SIZE, frame_to_meta, meta_to_frame};
52use crate::mm::frame::{AnyFrameMeta, Frame, frame_to_index};
53use crate::mm::kspace::VMALLOC_BASE_VADDR;
54use crate::mm::page_table::*;
55use crate::mm::{Paddr, Vaddr, kspace::LINEAR_MAPPING_BASE_VADDR, paddr_to_vaddr};
56use crate::specs::arch::kspace::FRAME_METADATA_RANGE;
57use crate::specs::mm::frame::meta_owners::{
58    MetaSlotOwner, MetaSlotStorage, Metadata, REF_COUNT_UNUSED,
59};
60use crate::specs::mm::frame::meta_region_owners::MetaRegionOwners;
61use crate::specs::mm::page_table::node::owners::*;
62
63use core::{marker::PhantomData, ops::Deref, sync::atomic::Ordering};
64
65use super::nr_subpage_per_huge;
66
67use crate::{
68    mm::{
69        page_table::{load_pte, store_pte},
70        //        FrameAllocOptions, Infallible,
71        //        VmReader,
72    },
73    specs::task::InAtomicMode,
74};
75
76verus! {
77
78/// The metadata of any kinds of page table pages.
79/// Make sure the the generic parameters don't effect the memory layout.
80pub struct PageTablePageMeta<C: PageTableConfig> {
81    /// The number of valid PTEs. It is mutable if the lock is held.
82    pub nr_children: pcell_maybe_uninit::PCell<u16>,
83    /// If the page table is detached from its parent.
84    ///
85    /// A page table can be detached from its parent while still being accessed,
86    /// since we use a RCU scheme to recycle page tables. If this flag is set,
87    /// it means that the parent is recycling the page table.
88    pub stray: pcell_maybe_uninit::PCell<bool>,
89    /// The level of the page table page. A page table page cannot be
90    /// referenced by page tables of different levels.
91    pub level: PagingLevel,
92    /// The lock for the page table page.
93    pub lock: PAtomicU8,
94    pub _phantom: core::marker::PhantomData<C>,
95}
96
97/// A smart pointer to a page table node.
98///
99/// This smart pointer is an owner of a page table node. Thus creating and
100/// dropping it will affect the reference count of the page table node. If
101/// dropped it as the last reference, the page table node and subsequent
102/// children will be freed.
103///
104/// [`PageTableNode`] is read-only. To modify the page table node, lock and use
105/// [`PageTableGuard`].
106pub type PageTableNode<C> = Frame<PageTablePageMeta<C>>;
107
108unsafe impl<C: PageTableConfig> AnyFrameMeta for PageTablePageMeta<C> {
109    /// Caller invariants the PT-node `on_drop` body relies on:
110    /// - Reader well-formedness + `vm_io_owner` matching + read view
111    ///   initialized + at least `PAGE_SIZE` bytes remaining for the
112    ///   PT-node walk.
113    /// - Global region table invariant.
114    /// - Embedding ([`child_perms_embedding`]): for every paddr in
115    ///   `child_perms.dom()`, the slot and perm match `from_raw` /
116    ///   `VerifiedDrop::drop`'s expected shape.
117    /// - Walk coverage ([`walk_coverage_from_view`]): for every present
118    ///   non-last PTE in the page bytes, `frame_to_index(pte.paddr()) ∈
119    ///   child_perms.dom()`.
120    /// - Walk uniqueness ([`walk_uniqueness_from_view`]): distinct PTE
121    ///   positions with present non-last PTEs have distinct paddrs.
122    ///
123    /// The body now discharges the dom-membership obligation in full via
124    /// byte-level chaining (`decode_pod` + `read_once`'s strengthened
125    /// ensures + the byte-preservation loop invariant) plus the two
126    /// walk-* preconditions; see [`lemma_coverage_at`] and
127    /// [`lemma_uniqueness_at_pair`].
128    open spec fn on_drop_pre(
129        &self,
130        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
131        regions: crate::specs::mm::frame::meta_region_owners::MetaRegionOwners,
132        vm_io_owner: crate::specs::mm::io::VmIoOwner,
133    ) -> bool {
134        &&& reader.inv()
135        &&& reader.wf(vm_io_owner)
136        &&& reader.remain_spec() >= crate::specs::arch::mm::PAGE_SIZE
137        &&& reader.cursor.vaddr % core::mem::align_of::<C::E>() == 0
138        &&& vm_io_owner.inv()
139        &&& vm_io_owner.read_view_initialized()
140        &&& regions.inv()
141        &&& Self::child_perms_embedding(regions, vstd::set::Set::empty())
142        &&& self.walk_coverage_from_view(reader, vm_io_owner.read_view_of(), regions.slots.dom())
143        &&& self.walk_uniqueness_from_view(reader, vm_io_owner.read_view_of())
144    }
145
146    /// Drops the children of a page-table node: walks each present PTE and
147    /// drops the referenced child page-table-node frame or mapped item.
148    #[verifier::rlimit(400)]
149    #[verifier::spinoff_prover]
150    fn on_drop(
151        &mut self,
152        reader: &mut crate::mm::VmReader<'_, crate::mm::Infallible>,
153        Tracked(regions): Tracked<
154            &mut crate::specs::mm::frame::meta_region_owners::MetaRegionOwners,
155        >,
156        Tracked(vm_io_owner): Tracked<&mut crate::specs::mm::io::VmIoOwner>,
157    ) {
158        let level = self.level;
159        let range = if level == C::NR_LEVELS() {
160            C::TOP_LEVEL_INDEX_RANGE()
161        } else {
162            0..nr_subpage_per_huge::<C>()
163        };
164
165        proof {
166            C::axiom_pte_walk_fills_page();
167            C::axiom_top_level_index_range_within_nr_entries();
168            C::axiom_nr_subpage_per_huge_eq_nr_entries();
169            crate::mm::page_table::axiom_top_level_index_range_bounds::<C>();
170            vstd::arithmetic::mul::lemma_mul_inequality(
171                range.start as int,
172                NR_ENTRIES as int,
173                core::mem::size_of::<C::E>() as int,
174            );
175        }
176
177        let ghost size_of_e: int = core::mem::size_of::<C::E>() as int;
178        let ghost align_of_e: int = core::mem::align_of::<C::E>() as int;
179        let ghost pre_skip_cursor: int = reader.cursor.vaddr as int;
180
181        let ghost initial_view: crate::specs::mm::virt_mem::MemView = vm_io_owner.read_view_of();
182        let ghost initial_dom: vstd::set::Set<usize> = regions.slots.dom();
183        let ghost initial_reader: crate::mm::VmReader<'_, crate::mm::Infallible> = *reader;
184
185        #[verus_spec(with Tracked(vm_io_owner))]
186        reader.skip_in_place(range.start * core::mem::size_of::<C::E>());
187
188        proof {
189            C::axiom_pte_align_divides_size();
190            let k = size_of_e / align_of_e;
191            vstd::arithmetic::div_mod::lemma_fundamental_div_mod(size_of_e, align_of_e);
192            assert(size_of_e == align_of_e * k);
193            vstd::arithmetic::mul::lemma_mul_is_commutative(align_of_e, k);
194            vstd::arithmetic::mul::lemma_mul_is_associative(range.start as int, k, align_of_e);
195            vstd::arithmetic::div_mod::lemma_mod_multiples_basic(
196                range.start as int * k,
197                align_of_e,
198            );
199            assert((range.start as int * size_of_e) % align_of_e == 0);
200            vstd::arithmetic::div_mod::lemma_mod_adds(
201                pre_skip_cursor,
202                range.start as int * size_of_e,
203                align_of_e,
204            );
205        }
206
207        let ghost post_skip_remain: int = reader.remain_spec() as int;
208        let ghost range_start: int = range.start as int;
209        let ghost range_end: int = range.end as int;
210        let n_iters: usize = range.end - range.start;
211        let mut iter_count: usize = 0;
212        let ghost mut removed_indices: vstd::set::Set<usize> = vstd::set::Set::empty();
213
214        proof {
215            C::axiom_pte_walk_fills_page();
216            C::axiom_top_level_index_range_within_nr_entries();
217            C::axiom_nr_subpage_per_huge_eq_nr_entries();
218            crate::mm::page_table::axiom_top_level_index_range_bounds::<C>();
219            vstd::arithmetic::mul::lemma_mul_is_distributive_sub_other_way(
220                size_of_e,
221                NR_ENTRIES as int,
222                range_start,
223            );
224            vstd::arithmetic::mul::lemma_mul_inequality(
225                range_end - range_start,
226                NR_ENTRIES as int - range_start,
227                size_of_e,
228            );
229            assert(post_skip_remain >= (range_end - range_start) * size_of_e);
230        }
231
232        while iter_count < n_iters
233            invariant
234                reader.inv(),
235                reader.wf(*vm_io_owner),
236                vm_io_owner.inv(),
237                vm_io_owner.read_view_initialized(),
238                regions.inv(),
239                reader.cursor.vaddr as int % align_of_e == 0,
240                size_of_e == core::mem::size_of::<C::E>() as int,
241                align_of_e == core::mem::align_of::<C::E>() as int,
242                size_of_e % align_of_e == 0,
243                align_of_e > 0,
244                size_of_e > 0,
245                iter_count <= n_iters,
246                n_iters as int == range_end - range_start,
247                // Verus loses non-negativity of `range_start` / `range_end`
248                // across the loop boundary; pin it via these invariants so
249                // `lemma_mul_nonnegative` preconditions discharge in the body.
250                0 <= range_start,
251                range_start <= range_end,
252                range_end <= NR_ENTRIES as int,
253                reader.remain_spec() as int == post_skip_remain - iter_count as int * size_of_e,
254                post_skip_remain >= (range_end - range_start) * size_of_e,
255                regions.slots.dom() == initial_dom,
256                Self::child_perms_embedding(*regions, removed_indices),
257                self.walk_coverage_from_view(initial_reader, initial_view, initial_dom),
258                self.walk_uniqueness_from_view(initial_reader, initial_view),
259                // Without this, Verus treats `self.level` as potentially
260                // mutated by `&mut self` and the level-comparison facts go
261                // missing inside walk_coverage / walk_uniqueness instances.
262                self.level == level,
263                reader.end == initial_reader.end,
264                reader.cursor.vaddr == initial_reader.cursor.vaddr + range_start * size_of_e
265                    + iter_count as int * size_of_e,
266                forall|i: usize|
267                    #![trigger initial_view.addr_transl(i)]
268                    initial_reader.cursor.vaddr <= i < initial_reader.end.vaddr ==> {
269                        &&& initial_view.addr_transl(i) is Some
270                        &&& initial_view.memory.contains_key(initial_view.addr_transl(i).unwrap().0)
271                    },
272                forall|va: usize|
273                    #![trigger vm_io_owner.read_view_of().read(va)]
274                    reader.cursor.vaddr <= va < initial_reader.end.vaddr ==> {
275                        &&& initial_view.addr_transl(va) == vm_io_owner.read_view_of().addr_transl(
276                            va,
277                        )
278                        &&& initial_view.read(va) == vm_io_owner.read_view_of().read(va)
279                    },
280                removed_indices.subset_of(initial_dom),
281                // Witness past iter for each removed idx — the discharge
282                // proof picks it up via `choose|j|` and invokes
283                // `walk_uniqueness` at (current_cursor, witness_cursor).
284                forall|idx: usize| #[trigger]
285                    removed_indices.contains(idx) ==> exists|j: int|
286                        #![trigger Self::walk_pte_at_view(
287                            initial_view,
288                            (initial_reader.cursor.vaddr
289                                + range_start * size_of_e
290                                + j * size_of_e) as usize,
291                        )]
292                        0 <= j < iter_count as int && {
293                            let cj = (initial_reader.cursor.vaddr + range_start * size_of_e + j
294                                * size_of_e) as usize;
295                            let pte_j = Self::walk_pte_at_view(initial_view, cj);
296                            &&& pte_j.is_present()
297                            &&& !pte_j.is_last(self.level)
298                            &&& idx == frame_to_index(pte_j.paddr())
299                        },
300            decreases n_iters - iter_count,
301        {
302            proof {
303                vstd::arithmetic::mul::lemma_mul_is_distributive_sub(
304                    size_of_e,
305                    range_end - range_start,
306                    iter_count as int,
307                );
308                vstd::arithmetic::mul::lemma_mul_inequality(
309                    1,
310                    range_end - range_start - iter_count as int,
311                    size_of_e,
312                );
313            }
314            let ghost cursor_pre_read: usize = reader.cursor.vaddr;
315            let ghost pre_view: crate::specs::mm::virt_mem::MemView = vm_io_owner.read_view_of();
316            proof {
317                crate::specs::mm::virt_mem::MemView::lemma_read_bytes_eq_pointwise(
318                    pre_view,
319                    initial_view,
320                    cursor_pre_read,
321                    core::mem::size_of::<C::E>(),
322                );
323            }
324            let pte = #[verus_spec(with Tracked(vm_io_owner))]
325            reader.read_once::<C::E>();
326            let pte = pte.unwrap();
327            proof {
328                ostd_pod::lemma_decode_pod_inverse::<C::E>(pte);
329                assert(pte == Self::walk_pte_at_view(initial_view, cursor_pre_read));
330                vstd::arithmetic::mul::lemma_mul_nonnegative(range_start, size_of_e);
331                vstd::arithmetic::mul::lemma_mul_nonnegative(iter_count as int, size_of_e);
332                assert(initial_reader.cursor.vaddr <= cursor_pre_read);
333            }
334            if pte.is_present() {
335                let paddr = pte.paddr();
336                if !pte.is_last(level) {
337                    proof {
338                        vstd::arithmetic::mul::lemma_mul_is_distributive_add_other_way(
339                            size_of_e,
340                            range_start,
341                            iter_count as int,
342                        );
343                        vstd::arithmetic::div_mod::lemma_mod_multiples_basic(
344                            range_start + iter_count as int,
345                            size_of_e,
346                        );
347                        Self::lemma_coverage_at(
348                            *self,
349                            initial_reader,
350                            initial_view,
351                            initial_dom,
352                            cursor_pre_read,
353                        );
354                        broadcast use crate::mm::frame::meta::mapping::lemma_frame_to_index_injective;
355
356                        assert forall|idx: usize| #[trigger]
357                            removed_indices.contains(idx) implies idx != frame_to_index(
358                            pte.paddr(),
359                        ) by {
360                            let j = choose|j: int|
361                                #![trigger Self::walk_pte_at_view(
362                                    initial_view,
363                                    (initial_reader.cursor.vaddr
364                                        + range_start * size_of_e
365                                        + j * size_of_e) as usize,
366                                )]
367                                0 <= j < iter_count as int && {
368                                    let cj = (initial_reader.cursor.vaddr + range_start * size_of_e
369                                        + j * size_of_e) as usize;
370                                    let pte_j = Self::walk_pte_at_view(initial_view, cj);
371                                    &&& pte_j.is_present()
372                                    &&& !pte_j.is_last(self.level)
373                                    &&& idx == frame_to_index(pte_j.paddr())
374                                };
375                            let cj: usize = (initial_reader.cursor.vaddr + range_start * size_of_e
376                                + j * size_of_e) as usize;
377                            let pte_j = Self::walk_pte_at_view(initial_view, cj);
378                            vstd::arithmetic::mul::lemma_mul_nonnegative(range_start, size_of_e);
379                            vstd::arithmetic::mul::lemma_mul_nonnegative(j, size_of_e);
380                            vstd::arithmetic::mul::lemma_mul_strict_inequality(
381                                j,
382                                iter_count as int,
383                                size_of_e,
384                            );
385                            vstd::arithmetic::mul::lemma_mul_is_distributive_add_other_way(
386                                size_of_e,
387                                range_start,
388                                j,
389                            );
390                            vstd::arithmetic::mul::lemma_mul_inequality(
391                                range_start + j + 1,
392                                range_end,
393                                size_of_e,
394                            );
395                            vstd::arithmetic::mul::lemma_mul_is_distributive_sub_other_way(
396                                size_of_e,
397                                range_end,
398                                range_start,
399                            );
400                            vstd::arithmetic::div_mod::lemma_mod_multiples_basic(
401                                range_start + j,
402                                size_of_e,
403                            );
404                            Self::lemma_uniqueness_at_pair(
405                                *self,
406                                initial_reader,
407                                initial_view,
408                                cursor_pre_read,
409                                cj,
410                            );
411                            pte.axiom_present_paddr_aligned();
412                            pte_j.axiom_present_paddr_aligned();
413                        };
414                        // Pinning these in SMT context lets `tracked_remove`'s
415                        // dom-containment precondition and `from_raw`'s
416                        // `from_raw_requires_safety` (via embedding) discharge.
417                        assert(regions.slots.dom().contains(frame_to_index(paddr)));
418                    }
419                    proof {
420                        removed_indices = removed_indices.insert(frame_to_index(paddr));
421                        assert({
422                            let cj = (initial_reader.cursor.vaddr + range_start * size_of_e
423                                + iter_count as int * size_of_e) as usize;
424                            let pte_j = Self::walk_pte_at_view(initial_view, cj);
425                            &&& cj == cursor_pre_read
426                            &&& pte_j == pte
427                            &&& pte_j.is_present()
428                            &&& !pte_j.is_last(self.level)
429                            &&& frame_to_index(paddr) == frame_to_index(pte_j.paddr())
430                        });
431                    }
432                    proof_decl! {
433                        let tracked from_raw_obl: vstd_extra::drop_tracking::DropObligation<usize>;
434                    }
435                    let frame = unsafe {
436                        #[verus_spec(with Tracked(regions) => Tracked(from_raw_obl))]
437                        Frame::<Self>::from_raw(paddr)
438                    };
439                    // `from_raw` minted the obligation; `frame.drop`
440                    // consumes it directly. No redeem dance needed.
441                    VerifiedDrop::drop(frame, Tracked(regions), Tracked(from_raw_obl));
442                } else {
443                    // SAFETY: The PTE points to a mapped item. The ownership
444                    // of the item is transferred here then dropped.
445                    let _item = unsafe { C::item_from_raw(paddr, level, pte.prop()) };
446                }
447            }
448            proof {
449                vstd::arithmetic::div_mod::lemma_mod_adds(
450                    reader.cursor.vaddr as int - size_of_e,
451                    size_of_e,
452                    align_of_e,
453                );
454            }
455            let ghost iter_count_old: int = iter_count as int;
456            iter_count = iter_count + 1;
457            proof {
458                vstd::arithmetic::mul::lemma_mul_is_distributive_add_other_way(
459                    size_of_e,
460                    iter_count_old,
461                    1,
462                );
463            }
464        }
465    }
466
467    fn is_untyped(&self) -> bool {
468        false
469    }
470
471    uninterp spec fn vtable_ptr(&self) -> usize;
472}
473
474#[verus_verify]
475impl<C: PageTableConfig> PageTableNode<C> {
476    /// Gets the level of a page table node.
477    /// # Verified Properties
478    /// ## Preconditions
479    /// - The node must be well-formed, and the caller must provide a permission token for its metadata.
480    /// ## Postconditions
481    /// - Returns the level of the node.
482    /// ## Safety
483    /// - We require the caller to provide a permission token to ensure that this function is only called on a valid page table node.
484    #[verus_spec(
485        with Tracked(perm) : Tracked<&PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>
486    )]
487    pub(super) fn level(&self) -> PagingLevel
488        requires
489            self.ptr.addr() == perm.addr(),
490            self.ptr.addr() == perm.points_to.addr(),
491            perm.is_init(),
492            perm.wf(&perm.inner_perms),
493        returns
494            perm.value().metadata.level,
495    {
496        #[verus_spec(with Tracked(perm))]
497        let meta = self.meta();
498        meta.level
499    }
500
501    /// Allocates a new empty page table node.
502    #[verus_spec(res =>
503        with Tracked(parent_owner): Tracked<&mut NodeOwner<C>>,
504            Tracked(regions): Tracked<&mut MetaRegionOwners>,
505            Tracked(guards): Tracked<&Guards<'rcu>>,
506            Ghost(idx): Ghost<usize>,
507        -> owner: Tracked<OwnerSubtree<C>>
508        requires
509            1 <= level < NR_LEVELS,
510            idx < NR_ENTRIES,
511            old(regions).inv(),
512            old(parent_owner).inv(),
513        ensures
514            final(regions).inv(),
515            final(parent_owner).inv(),
516            allocated_empty_node_owner(owner@, level),
517            allocated_empty_node_grandchildren_none(owner@),
518            res.ptr.addr() == owner@.value.node.unwrap().meta_addr_self(),
519            guards.unlocked(owner@.value.node.unwrap().meta_addr_self()),
520            MetaSlot::get_from_unused_spec(meta_to_frame(owner@.value.node.unwrap().meta_addr_self()), false, *old(regions), *final(regions)),
521            MetaSlot::slot_perm_reparked_spec(meta_to_frame(owner@.value.node.unwrap().meta_addr_self()), *old(regions), *final(regions)),
522
523            final(regions).frame_obligations =~= old(regions).frame_obligations.insert(
524                frame_to_index(meta_to_frame(owner@.value.node.unwrap().meta_addr_self()))),
525            old(regions).slots.contains_key(frame_to_index(meta_to_frame(owner@.value.node.unwrap().meta_addr_self()))),
526
527            !crate::specs::mm::frame::meta_owners::is_mmio_paddr(
528                meta_to_frame(owner@.value.node.unwrap().meta_addr_self())),
529            owner@.value.metaregion_sound(*final(regions)),
530            forall|i: usize|
531                #[trigger] old(regions).slot_owners[i].inner_perms.ref_count.value() != REF_COUNT_UNUSED
532                ==> i != frame_to_index(meta_to_frame(owner@.value.node.unwrap().meta_addr_self())),
533            owner@.value.match_pte(C::E::new_pt_spec(meta_to_frame(owner@.value.node.unwrap().meta_addr_self())), level as PagingLevel),
534            *final(parent_owner) == old(parent_owner).set_children_perm(idx, C::E::new_pt_spec(meta_to_frame(owner@.value.node.unwrap().meta_addr_self()))),
535            final(regions).slots.contains_key(owner@.value.node.unwrap().slot_index),
536            owner@.value.node.unwrap().metaregion_sound_node(*final(regions)),
537    )]
538    #[verifier::external_body]
539    pub fn alloc<'rcu>(level: PagingLevel) -> Self {
540        let tracked entry_owner = EntryOwner::tracked_new_absent(
541            TreePath::new(Seq::empty()),
542            level,
543        );
544
545        let tracked mut owner = OwnerSubtree::<C>::new_val_tracked(entry_owner, level as nat);
546        let meta = PageTablePageMeta::new(level);
547        let mut frame = FrameAllocOptions::new();
548        frame.zeroed(true);
549        let allocated_frame = frame.alloc_frame_with(meta).expect(
550            "Failed to allocate a page table node",
551        );
552        // The allocated frame is zeroed. Make sure zero is absent PTE.
553        //debug_assert_eq!(C::E::new_absent().as_usize(), 0);
554
555        proof_with!(|= Tracked(owner));
556
557        allocated_frame
558    }/*
559    /// Activates the page table assuming it is a root page table.
560    ///
561    /// Here we ensure not dropping an active page table by making a
562    /// processor a page table owner. When activating a page table, the
563    /// reference count of the last activated page table is decremented.
564    /// And that of the current page table is incremented.
565    ///
566    /// # Safety
567    ///
568    /// The caller must ensure that the page table to be activated has
569    /// proper mappings for the kernel and has the correct const parameters
570    /// matching the current CPU.
571    ///
572    /// # Panics
573    ///
574    /// Only top-level page tables can be activated using this function.
575    pub(crate) unsafe fn activate(&self) {
576        use crate::{
577            arch::mm::{activate_page_table, current_page_table_paddr},
578            mm::page_prop::CachePolicy,
579        };
580
581        #[cfg(feature = "allow_panic")]
582        assert_eq!(self.level(), C::NR_LEVELS());
583
584        let last_activated_paddr = current_page_table_paddr();
585        if last_activated_paddr == self.start_paddr() {
586            return;
587        }
588
589        // SAFETY: The safety is upheld by the caller.
590        unsafe { activate_page_table(self.clone().into_raw(), CachePolicy::Writeback) };
591
592        // Restore and drop the last activated page table.
593        // SAFETY: The physical address is valid and points to a forgotten page table node.
594        drop(unsafe { Self::from_raw(last_activated_paddr) });
595    }
596
597    /// Activates the (root) page table assuming it is the first activation.
598    ///
599    /// It will not try dropping the last activate page table. It is the same
600    /// with [`Self::activate()`] in other senses.
601    pub(super) unsafe fn first_activate(&self) {
602        use crate::{arch::mm::activate_page_table, mm::page_prop::CachePolicy};
603
604        // SAFETY: The safety is upheld by the caller.
605        unsafe { activate_page_table(self.clone().into_raw(), CachePolicy::Writeback) };
606    }*/
607
608}
609
610#[verus_verify]
611impl<'a, C: PageTableConfig> PageTableNodeRef<'a, C> {
612    pub open spec fn locks_preserved_except<'rcu>(
613        addr: usize,
614        guards0: Guards<'rcu>,
615        guards1: Guards<'rcu>,
616    ) -> bool {
617        &&& OwnerSubtree::implies(
618            CursorOwner::<'rcu, C>::node_unlocked(guards0),
619            CursorOwner::<'rcu, C>::node_unlocked_except(guards1, addr),
620        )
621        &&& forall|i: usize| guards0.lock_held(i) ==> guards1.lock_held(i)
622        &&& forall|i: usize| guards0.unlocked(i) && i != addr ==> guards1.unlocked(i)
623    }
624
625    /// Locks the page table node.
626    ///
627    /// An atomic mode guard is required to
628    ///  1. prevent deadlocks;
629    ///  2. provide a lifetime (`'rcu`) that the nodes are guaranteed to outlive.
630    /// # Verification Design
631    /// As of when we verified this library, we didn't have a spin lock implementation, so we axiomatize
632    /// what happens when it's successful.
633    #[verifier::external_body]
634    #[verus_spec(res =>
635        with Tracked(owner): Tracked<&NodeOwner<C>>,
636            Tracked(guards): Tracked<&mut Guards<'rcu>>
637        requires
638            self.inner@.invariants(*owner),
639            old(guards).unlocked(owner.meta_addr_self()),
640        ensures
641            final(guards).lock_held(owner.meta_addr_self()),
642            Self::locks_preserved_except(owner.meta_addr_self(), *old(guards), *final(guards)),
643            owner.relate_guard(res),
644    )]
645    pub fn lock<'rcu, A: InAtomicMode>(self, _guard: &'rcu A) -> PageTableGuard<'rcu, C> where
646        'a: 'rcu,
647     {
648        unimplemented!()
649    }
650
651    /// Creates a new [`PageTableGuard`] without checking if the page table lock is held.
652    ///
653    /// # Safety
654    ///
655    /// This function must be called if this task logically holds the lock.
656    ///
657    /// Calling this function when a guard is already created is undefined behavior
658    /// unless that guard was already forgotten.
659    #[verus_spec(res =>
660        with Tracked(owner): Tracked<&NodeOwner<C>>,
661            Tracked(guards): Tracked<&mut Guards<'rcu>>
662        requires
663            self.inner@.invariants(*owner),
664            old(guards).unlocked(owner.meta_addr_self()),
665        ensures
666            final(guards).lock_held(owner.meta_addr_self()),
667            Self::locks_preserved_except(owner.meta_addr_self(), *old(guards), *final(guards)),
668            owner.relate_guard(res),
669    )]
670    pub unsafe fn make_guard_unchecked<'rcu, A: InAtomicMode>(
671        self,
672        _guard: &'rcu A,
673    ) -> PageTableGuard<'rcu, C> where 'a: 'rcu {
674        let guard = PageTableGuard { inner: self };
675
676        proof {
677            let ghost guards0 = *guards;
678            guards.guards = guards.guards.insert(owner.meta_addr_self());
679            assert(owner.relate_guard(guard));
680
681            assert(forall|other: EntryOwner<C>, path: TreePath<NR_ENTRIES>|
682                owner.inv() && CursorOwner::node_unlocked(guards0)(other, path)
683                    ==> #[trigger] CursorOwner::node_unlocked_except(
684                    *guards,
685                    owner.meta_addr_self(),
686                )(other, path));
687        }
688
689        guard
690    }
691}
692
693impl<'rcu, C: PageTableConfig> PageTableGuard<'rcu, C> {
694    /// Borrows an entry in the node at a given index.
695    ///
696    /// # Panics
697    ///
698    /// Panics if the index is not within the bound of
699    /// [`nr_subpage_per_huge<C>`].
700    #[verus_spec(res =>
701        with Tracked(owner): Tracked<&NodeOwner<C>>,
702            Tracked(child_owner): Tracked<&EntryOwner<C>>,
703            Tracked(regions): Tracked<&MetaRegionOwners>,
704    )]
705    pub fn entry<'a>(&'a mut self, idx: usize) -> (res: Entry<'a, 'rcu, C>)
706        requires
707            owner.inv(),
708            !child_owner.in_scope,
709            child_owner.inv(),
710            owner.relate_guard(*old(self)),
711            child_owner.match_pte(
712                owner.children_perm.value()[idx as int],
713                child_owner.parent_level,
714            ),
715            regions.inv(),
716            regions.slots.contains_key(owner.slot_index),
717            // Panic condition
718            idx < NR_ENTRIES,
719        ensures
720            res.wf(*child_owner),
721            res.idx == idx,
722            owner.relate_guard(*res.node),
723            *final(self) == *old(self),
724    {
725        #[cfg(feature = "allow_panic")]
726        assert!(idx < nr_subpage_per_huge::<C>());
727        // SAFETY: The index is within the bound. `*self` is unchanged because
728        // Entry::new_at's `*res.node == *old(guard)` ensures says the wrapped
729        // node equals the input guard's value, and the reborrow makes
730        // `*final(self) == *res.node`.
731        unsafe {
732            #[verus_spec(with Tracked(child_owner), Tracked(owner), Tracked(regions))]
733            Entry::new_at(self, idx)
734        }
735    }
736
737    /// Gets the number of valid PTEs in a page table node.
738    /// # Verified Properties
739    /// ## Preconditions
740    /// - The node must be well-formed.
741    /// ## Postconditions
742    /// - Returns the number of valid PTEs in the node.
743    /// ## Safety
744    /// - We require the caller to provide a permission token to ensure that this function is only called on a valid page table node.
745    #[verus_spec(
746        with Tracked(owner) : Tracked<&NodeOwner<C>>,
747            Tracked(regions): Tracked<&MetaRegionOwners>,
748    )]
749    pub fn nr_children(&self) -> (nr: u16)
750        requires
751            self.inner.inner@.invariants(*owner),
752            regions.inv(),
753            owner.metaregion_sound_node(*regions),
754        returns
755            owner.meta_own.nr_children.value(),
756    {
757        let tracked owner_meta_perm = regions.borrow_typed_perm::<PageTablePageMeta<C>>(
758            owner.slot_index,
759        );
760        #[verus_spec(with Tracked(owner_meta_perm))]
761        let meta = self.meta();
762
763        *meta.nr_children.borrow(Tracked(&owner.meta_own.nr_children))
764    }
765
766    /// Returns if the page table node is detached from its parent.
767    #[verus_spec(
768        with Tracked(meta_perm): Tracked<&'a PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>
769    )]
770    pub(super) fn stray_mut<'a>(&'a mut self) -> (res: &'a pcell_maybe_uninit::PCell<bool>)
771        requires
772            old(self).inner.inner@.ptr.addr() == meta_perm.addr(),
773            old(self).inner.inner@.ptr.addr() == meta_perm.points_to.addr(),
774            meta_perm.is_init(),
775            meta_perm.wf(&meta_perm.inner_perms),
776        ensures
777            res.id() == meta_perm.value().metadata.stray.id(),
778            *final(self) == *old(self),
779    {
780        // SAFETY: The lock is held so we have an exclusive access.
781        #[verus_spec(with Tracked(meta_perm))]
782        let meta = self.meta();
783        &meta.stray
784    }
785
786    /// Reads a non-owning PTE at the given index.
787    ///
788    /// A non-owning PTE means that it does not account for a reference count
789    /// of the a page if the PTE points to a page. The original PTE still owns
790    /// the child page.
791    ///
792    /// # Safety
793    ///
794    /// The caller must ensure that the index is within the bound.
795    #[verus_spec(
796        with Tracked(owner): Tracked<&NodeOwner<C>>,
797            Tracked(regions): Tracked<&MetaRegionOwners>,
798    )]
799    pub unsafe fn read_pte(&self, idx: usize) -> (pte: C::E)
800        requires
801            self.inner.inner@.invariants(*owner),
802            regions.inv(),
803            regions.slots.contains_key(owner.slot_index),
804            idx < NR_ENTRIES,
805        ensures
806            pte == owner.children_perm.value()[idx as int],
807    {
808        // debug_assert!(idx < nr_subpage_per_huge::<C>());
809        let tracked owner_slot_perm = regions.slots.tracked_borrow(owner.slot_index);
810        let ptr = vstd_extra::array_ptr::ArrayPtr::<C::E, NR_ENTRIES>::from_addr(
811            paddr_to_vaddr(
812                #[verus_spec(with Tracked(owner_slot_perm))]
813                self.start_paddr(),
814            ),
815        );
816
817        // SAFETY:
818        // - The page table node is alive. The index is inside the bound, so the page table entry is valid.
819        // - All page table entries are aligned and accessed with atomic operations only.
820        unsafe {
821            #[verus_spec(with Tracked(&owner.children_perm))]
822            load_pte(ptr.add(idx), Ordering::Relaxed)
823        }
824    }
825
826    /// Writes a page table entry at a given index.
827    ///
828    /// This operation will leak the old child if the old PTE is present.
829    ///
830    /// # Safety
831    ///
832    /// The caller must ensure that:
833    ///  1. The index must be within the bound;
834    ///  2. The PTE must represent a valid [`Child`] whose level is compatible
835    ///     with the page table node.
836    ///  3. The page table node will have the ownership of the [`Child`]
837    ///     after this method.
838    #[verus_spec(
839        with Tracked(owner): Tracked<&mut NodeOwner<C>>,
840            Tracked(regions): Tracked<&MetaRegionOwners>,
841    )]
842    pub unsafe fn write_pte(&mut self, idx: usize, pte: C::E)
843        requires
844            old(self).inner.inner@.invariants(*old(owner)),
845            regions.inv(),
846            regions.slots.contains_key(old(owner).slot_index),
847            idx < NR_ENTRIES,
848        ensures
849            final(owner).inv(),
850            final(owner).level == old(owner).level,
851            final(owner).meta_own == old(owner).meta_own,
852            final(owner).slot_index == old(owner).slot_index,
853            final(owner).children_perm.value() == old(owner).children_perm.value().update(
854                idx as int,
855                pte,
856            ),
857            *final(self) == *old(self),
858    {
859        // debug_assert!(idx < nr_subpage_per_huge::<C>());
860        let tracked owner_slot_perm = regions.slots.tracked_borrow(owner.slot_index);
861        #[verusfmt::skip]
862        let ptr = vstd_extra::array_ptr::ArrayPtr::<C::E, NR_ENTRIES>::from_addr(
863            paddr_to_vaddr(
864                #[verus_spec(with Tracked(owner_slot_perm))]
865                self.start_paddr()
866            ),
867        );
868
869        // SAFETY:
870        // - The page table node is alive. The index is inside the bound, so the page table entry is valid.
871        // - All page table entries are aligned and accessed with atomic operations only.
872        unsafe {
873            #[verus_spec(with Tracked(&mut owner.children_perm))]
874            store_pte(ptr.add(idx), pte, Ordering::Release)
875        }
876    }
877
878    /// Gets the mutable reference to the number of valid PTEs in the node.
879    #[verus_spec(
880        with Tracked(meta_perm): Tracked<&'a PointsTo<MetaSlot, Metadata<PageTablePageMeta<C>>>>
881    )]
882    fn nr_children_mut<'a>(&'a mut self) -> (res: &'a pcell_maybe_uninit::PCell<u16>)
883        requires
884            old(self).inner.inner@.ptr.addr() == meta_perm.addr(),
885            old(self).inner.inner@.ptr.addr() == meta_perm.points_to.addr(),
886            meta_perm.is_init(),
887            meta_perm.wf(&meta_perm.inner_perms),
888        ensures
889            res.id() == meta_perm.value().metadata.nr_children.id(),
890            *final(self) == *old(self),
891    {
892        // SAFETY: The lock is held so we have an exclusive access.
893        #[verus_spec(with Tracked(meta_perm))]
894        let meta = self.meta();
895        &meta.nr_children
896    }
897}
898
899/*impl<C: PageTableConfig> Drop for PageTableGuard<'_, C> {
900    fn drop(&mut self) {
901        self.inner.meta().lock.store(0, Ordering::Release);
902    }
903}*/
904
905impl<C: PageTableConfig> PageTablePageMeta<C> {
906    pub fn new(level: PagingLevel) -> Self {
907        Self {
908            nr_children: pcell_maybe_uninit::PCell::new(0).0,
909            stray: pcell_maybe_uninit::PCell::new(false).0,
910            level,
911            lock: PAtomicU8::new(0).0,
912            _phantom: PhantomData,
913        }
914    }
915
916    /// The PTE value that `read_once::<C::E>` would produce at cursor `c`
917    /// against the given memory view. Linked to `read_once` via
918    /// `pod_bytes(v) == read_view.read_bytes(...)` (strengthened ensures)
919    /// + [`lemma_decode_pod_inverse`].
920    pub open spec fn walk_pte_at_view(view: crate::specs::mm::virt_mem::MemView, c: usize) -> C::E {
921        ostd_pod::decode_pod::<C::E>(view.read_bytes(c, core::mem::size_of::<C::E>()))
922    }
923
924    /// Single-cursor projection of [`walk_coverage_from_view`]. Extracting
925    /// the forall body to a named predicate lets the body invoke
926    /// [`lemma_coverage_at`] for one specific `c` instead of relying on
927    /// auto-trigger matching across the loop invariant's `forall|c|`.
928    pub open spec fn walk_coverage_at(
929        self,
930        view: crate::specs::mm::virt_mem::MemView,
931        dom: vstd::set::Set<usize>,
932        c: usize,
933    ) -> bool {
934        let pte = Self::walk_pte_at_view(view, c);
935        pte.is_present() && !pte.is_last(self.level) ==> dom.contains(frame_to_index(pte.paddr()))
936    }
937
938    /// Instantiate [`walk_coverage_from_view`]'s forall at one cursor.
939    pub proof fn lemma_coverage_at(
940        self,
941        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
942        view: crate::specs::mm::virt_mem::MemView,
943        dom: vstd::set::Set<usize>,
944        c: usize,
945    )
946        requires
947            self.walk_coverage_from_view(reader, view, dom),
948            reader.cursor.vaddr <= c,
949            c + core::mem::size_of::<C::E>() <= reader.cursor.vaddr + reader.remain_spec(),
950            (c - reader.cursor.vaddr) % core::mem::size_of::<C::E>() as int == 0,
951        ensures
952            self.walk_coverage_at(view, dom, c),
953    {
954        assert(Self::walk_pte_at_view(view, c) == Self::walk_pte_at_view(view, c));
955    }
956
957    /// Instantiate [`walk_uniqueness_from_view`]'s forall at one cursor pair.
958    pub proof fn lemma_uniqueness_at_pair(
959        self,
960        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
961        view: crate::specs::mm::virt_mem::MemView,
962        c1: usize,
963        c2: usize,
964    )
965        requires
966            self.walk_uniqueness_from_view(reader, view),
967            reader.cursor.vaddr <= c1,
968            c1 + core::mem::size_of::<C::E>() <= reader.cursor.vaddr + reader.remain_spec(),
969            (c1 - reader.cursor.vaddr) % core::mem::size_of::<C::E>() as int == 0,
970            reader.cursor.vaddr <= c2,
971            c2 + core::mem::size_of::<C::E>() <= reader.cursor.vaddr + reader.remain_spec(),
972            (c2 - reader.cursor.vaddr) % core::mem::size_of::<C::E>() as int == 0,
973            c1 != c2,
974            Self::walk_pte_at_view(view, c1).is_present(),
975            !Self::walk_pte_at_view(view, c1).is_last(self.level),
976            Self::walk_pte_at_view(view, c2).is_present(),
977            !Self::walk_pte_at_view(view, c2).is_last(self.level),
978        ensures
979            Self::walk_pte_at_view(view, c1).paddr() != Self::walk_pte_at_view(view, c2).paddr(),
980    {
981    }
982
983    /// Caller-side dom-membership obligation: every present non-last PTE
984    /// position in the walk (over `view`) has its child-frame index in
985    /// `dom`. Phrased over a frozen `(view, dom)` pair so the body can
986    /// carry it as a loop invariant against an entry-state snapshot
987    /// while `vm_io_owner` advances per iteration.
988    pub open spec fn walk_coverage_from_view(
989        self,
990        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
991        view: crate::specs::mm::virt_mem::MemView,
992        dom: vstd::set::Set<usize>,
993    ) -> bool {
994        forall|c: usize|
995            #![trigger Self::walk_pte_at_view(view, c)]
996            reader.cursor.vaddr <= c && c + core::mem::size_of::<C::E>() <= reader.cursor.vaddr
997                + reader.remain_spec() && (c - reader.cursor.vaddr) % core::mem::size_of::<
998                C::E,
999            >() as int == 0 ==> {
1000                let pte = Self::walk_pte_at_view(view, c);
1001                pte.is_present() && !pte.is_last(self.level) ==> dom.contains(
1002                    frame_to_index(pte.paddr()),
1003                )
1004            }
1005    }
1006
1007    /// Caller-side uniqueness obligation: distinct cursor positions with
1008    /// present non-last PTEs (in `view`) map to distinct paddrs.
1009    pub open spec fn walk_uniqueness_from_view(
1010        self,
1011        reader: crate::mm::VmReader<'_, crate::mm::Infallible>,
1012        view: crate::specs::mm::virt_mem::MemView,
1013    ) -> bool {
1014        forall|c1: usize, c2: usize|
1015            #![trigger Self::walk_pte_at_view(view, c1), Self::walk_pte_at_view(view, c2)]
1016            reader.cursor.vaddr <= c1 && c1 + core::mem::size_of::<C::E>() <= reader.cursor.vaddr
1017                + reader.remain_spec() && (c1 - reader.cursor.vaddr) % core::mem::size_of::<
1018                C::E,
1019            >() as int == 0 && reader.cursor.vaddr <= c2 && c2 + core::mem::size_of::<C::E>()
1020                <= reader.cursor.vaddr + reader.remain_spec() && (c2 - reader.cursor.vaddr)
1021                % core::mem::size_of::<C::E>() as int == 0 && c1 != c2 ==> {
1022                let pte1 = Self::walk_pte_at_view(view, c1);
1023                let pte2 = Self::walk_pte_at_view(view, c2);
1024                pte1.is_present() && !pte1.is_last(self.level) && pte2.is_present()
1025                    && !pte2.is_last(self.level) ==> pte1.paddr() != pte2.paddr()
1026            }
1027    }
1028
1029    /// Caller-side shape obligation: every paddr in `child_perms.dom()`
1030    /// has a slot perm matching the shape `from_raw` + `VerifiedDrop::drop`
1031    /// expect (init, alignment, refcount within bounds, last-reference
1032    /// shape when refcount == 1).
1033    pub open spec fn child_perms_embedding(
1034        regions: crate::specs::mm::frame::meta_region_owners::MetaRegionOwners,
1035        excluded: vstd::set::Set<usize>,
1036    ) -> bool {
1037        forall|paddr: crate::mm::Paddr|
1038            #![trigger regions.slot_owners[frame_to_index(paddr)]]
1039            regions.slots.dom().contains(frame_to_index(paddr)) && !excluded.contains(
1040                frame_to_index(paddr),
1041            ) ==> {
1042                let idx = frame_to_index(paddr);
1043                let so = regions.slot_owners[idx];
1044                &&& <Frame<Self>>::from_raw_requires_safety(
1045                    regions,
1046                    paddr,
1047                )
1048                // Borrow-protocol transition: `raw_count` is dormant.
1049                &&& so.inner_perms.ref_count.value() > 0
1050                &&& so.inner_perms.ref_count.value()
1051                    != crate::specs::mm::frame::meta_owners::REF_COUNT_UNUSED
1052                &&& so.inner_perms.ref_count.value()
1053                    <= crate::specs::mm::frame::meta_owners::REF_COUNT_MAX
1054                &&& so.inner_perms.ref_count.value() == 1 ==> {
1055                    &&& so.inner_perms.storage.is_init()
1056                    &&& so.inner_perms.in_list.value() == 0
1057                    &&& so.paths_in_pt.is_empty()
1058                }
1059                // Borrow-protocol redesign: in steady state between
1060                // `into_pte`'s consume and `on_drop`'s `from_raw`-mint,
1061                // the per-child `frame_obligations` count is 0.
1062
1063            }
1064    }
1065}
1066
1067} // verus!