Skip to main content

ostd/mm/page_table/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2use vstd::arithmetic::power2::*;
3use vstd::atomic::PermissionU64;
4use vstd::prelude::*;
5use vstd::simple_pptr;
6use vstd::std_specs::clone::*;
7
8use vstd_extra::prelude::{lemma_usize_ilog2_ordered, lemma_usize_pow2_ilog2};
9
10use core::{
11    fmt::Debug,
12    intrinsics::transmute_unchecked,
13    ops::{Range, RangeInclusive},
14    sync::atomic::{AtomicUsize, Ordering},
15};
16
17use crate::mm::frame::meta::MetaSlot;
18
19use super::{
20    Paddr, PagingConstsTrait, PagingLevel, Vaddr, io::PodOnce, kspace::KernelPtConfig,
21    lemma_nr_subpage_per_huge_bounded, nr_subpage_per_huge, page_prop::PageProperty,
22    vm_space::UserPtConfig,
23};
24
25use crate::Pod;
26use crate::specs::mm::page_table::*;
27
28use crate::specs::arch::mm::*;
29use crate::specs::arch::paging_consts::PagingConsts;
30use crate::specs::mm::page_table::cursor::*;
31use crate::specs::task::InAtomicMode;
32
33use crate::mm::frame::meta::mapping::frame_to_index;
34use crate::mm::kspace::kvirt_area::disable_preempt;
35use crate::specs::arch::PageTableEntry;
36use crate::specs::mm::frame::meta_owners::MetaPerm;
37use crate::specs::mm::frame::meta_region_owners::MetaRegionOwners;
38use vstd_extra::ownership::Inv;
39use vstd_extra::panic::may_panic;
40
41mod node;
42pub use node::*;
43mod cursor;
44
45pub(crate) use cursor::*;
46
47#[cfg(ktest)]
48mod test;
49
50//pub(crate) mod boot_pt;
51
52verus! {
53
54#[derive(Clone, Copy, PartialEq, Eq, Debug)]
55pub enum PageTableError {
56    /// The provided virtual address range is invalid.
57    InvalidVaddrRange(Vaddr, Vaddr),
58    /// The provided virtual address is invalid.
59    InvalidVaddr(Vaddr),
60    /// Using virtual address not aligned.
61    UnalignedVaddr,
62}
63
64pub trait RCClone: Sized {
65    spec fn clone_requires(self, perm: MetaRegionOwners) -> bool;
66
67    spec fn clone_ensures(
68        self,
69        old_perm: MetaRegionOwners,
70        new_perm: MetaRegionOwners,
71        res: Self,
72    ) -> bool;
73
74    fn clone(&self, Tracked(perm): Tracked<&mut MetaRegionOwners>) -> (res: Self)
75        requires
76            self.clone_requires(*old(perm)),
77        ensures
78            res == *self,
79            self.clone_ensures(*old(perm), *final(perm), res),
80            final(perm).inv(),
81            final(perm).slots =~= old(perm).slots,
82            final(perm).slot_owners.dom() =~= old(
83                perm,
84            ).slot_owners.dom(),
85    // Linear-drop pilot: `RCClone::clone` doesn't mint/redeem
86    // segment obligations. The per-frame `frame_obligations` effect
87    // is left to each impl's `clone_ensures` — canonically a clone
88    // creates a fresh live value, so `Frame::clone` MINTS one entry
89    // (`.insert(idx)`); ref-count-only clones (`Segment`) stay
90    // net-zero. Hardcoding `=~= old` here would forbid the mint.
91
92    ;
93}
94
95/// The configurations of a page table.
96///
97/// It abstracts away both the usage and the architecture specifics from the
98/// general page table implementation. For examples:
99///  - the managed virtual address range;
100///  - the trackedness of physical mappings;
101///  - the PTE layout;
102///  - the number of page table levels, etc.
103///
104/// # Safety
105///
106/// The implementor must ensure that the `item_into_raw` and `item_from_raw`
107/// are implemented correctly so that:
108///  - `item_into_raw` consumes the ownership of the item;
109///  - if the provided raw form matches the item that was consumed by
110///    `item_into_raw`, `item_from_raw` restores the exact item that was
111///    consumed by `item_into_raw`.
112pub unsafe trait PageTableConfig: Clone + Debug + Send + Sync + 'static {
113    /// The index range at the top level (`C::NR_LEVELS()`) page table.
114    ///
115    /// When configured with this value, the [`PageTable`] instance will only
116    /// be allowed to manage the virtual address range that is covered by
117    /// this range. The range can be smaller than the actual allowed range
118    /// specified by the hardware MMU (limited by `C::ADDRESS_WIDTH`).
119    spec fn TOP_LEVEL_INDEX_RANGE_spec() -> Range<usize>;
120
121    /// The leading bits `[48, 64)` of every virtual address managed by this
122    /// config.
123    ///
124    /// Concretely, a mapping `m` in this page table has
125    /// `m.va_range.start / 2^48 == LEADING_BITS_spec()`. For non-sign-extended
126    /// configurations (e.g. `UserPtConfig`) this is `0`. For x86-64 kernel
127    /// PT it is `0xffff` (sign-extended high half). The type is wide enough
128    /// to carry arbitrary bit patterns, so the model can accommodate future
129    /// configurations that place their managed range at a non-canonical
130    /// fixed offset.
131    ///
132    /// Combined with `TOP_LEVEL_INDEX_RANGE_spec`, this fully determines
133    /// the managed VA range, computed as
134    /// [`vaddr_range_bounds_spec::<Self>`]. Callers that previously used
135    /// `VADDR_RANGE_spec()` should use `vaddr_range_bounds_spec::<C>()`
136    /// directly — the inclusive `(start, end_inclusive)` form avoids the
137    /// `end == usize::MAX + 1` overflow that plagues `Range<Vaddr>` for
138    /// sign-extended kernel configurations.
139    open spec fn LEADING_BITS_spec() -> usize {
140        0
141    }
142
143    fn TOP_LEVEL_INDEX_RANGE() -> (r: Range<usize>)
144        ensures
145            r == Self::TOP_LEVEL_INDEX_RANGE_spec(),
146    ;
147
148    /// If we can remove the top-level page table entries.
149    ///
150    /// This is for the kernel page table, whose second-top-level page
151    /// tables need `'static` lifetime to be shared with user page tables.
152    /// Other page tables do not need to set this to `false`.
153    open spec fn TOP_LEVEL_CAN_UNMAP_spec() -> bool {
154        true
155    }
156
157    fn TOP_LEVEL_CAN_UNMAP() -> (b: bool)
158        ensures
159            b == Self::TOP_LEVEL_CAN_UNMAP_spec(),
160    ;
161
162    /// Upper bound on `locked_range().end as int` for cursors of this config.
163    ///
164    /// May be tighter than the structural `vaddr_range_bounds_spec().1 + 1`
165    /// when the actual sources of cursor ranges (e.g. the kvirt allocator
166    /// for `KernelPtConfig`) draw from a sub-window of the configured VA
167    /// range. `KernelPtConfig` overrides this to `FRAME_METADATA_BASE_VADDR`,
168    /// which the `kvirt_alloc_range_bounds` axiom enforces. This bound is
169    /// what allows the cursor's `move_forward` proof to discharge
170    /// `prefix.idx[NR_LEVELS - 1] + 1 < NR_ENTRIES` at the top-level
171    /// boundary — the structural bound only gives `<= NR_ENTRIES` for
172    /// configurations whose `TOP_LEVEL_INDEX_RANGE.end == NR_ENTRIES`.
173    ///
174    /// Default: `usize::MAX + 1` (no tightening over the structural bound).
175    open spec fn LOCKED_END_BOUND_spec() -> int {
176        0x1_0000_0000_0000_0000int
177    }
178
179    /// The type of the page table entry.
180    type E: PageTableEntryTrait;
181
182    /// The paging constants.
183    type C: PagingConstsTrait;
184
185    proof fn axiom_nr_subpage_per_huge_eq_nr_entries()
186        ensures
187            Self::C::BASE_PAGE_SIZE() / Self::C::PTE_SIZE() == NR_ENTRIES,
188    ;
189
190    /// Layout identity: the PTE type's Rust `size_of` matches the config's
191    /// `PTE_SIZE_spec`. Concrete impls satisfy this via their `global
192    /// layout` declaration. Exposed for generic code that calls
193    /// `core::mem::size_of::<Self::E>()`.
194    proof fn axiom_pte_size_eq_size_of()
195        ensures
196            core::mem::size_of::<Self::E>() == Self::C::PTE_SIZE_spec(),
197    ;
198
199    /// A full PT-node's worth of PTEs fills exactly one base page.
200    /// `NR_ENTRIES * size_of::<E>() == PAGE_SIZE`. Bundles the
201    /// `pow2-divides-pow2 ⇒ mul-equals-div` arithmetic Verus doesn't
202    /// auto-derive from `axiom_nr_subpage_per_huge` + `axiom_pte_size`.
203    proof fn axiom_pte_walk_fills_page()
204        ensures
205            NR_ENTRIES * core::mem::size_of::<Self::E>() == crate::specs::arch::mm::PAGE_SIZE,
206    ;
207
208    /// The top-level index range fits within a single PT-node. Concretely
209    /// `0..256` (UserPtConfig) or `256..512` (KernelPtConfig); both have
210    /// `end <= NR_ENTRIES`. Used by PT-node `on_drop` to bound
211    /// `range.start * size_of::<C::E>() <= PAGE_SIZE`.
212    proof fn axiom_top_level_index_range_within_nr_entries()
213        ensures
214            Self::TOP_LEVEL_INDEX_RANGE_spec().end <= NR_ENTRIES,
215    ;
216
217    /// `align_of::<E>()` divides `size_of::<E>()`. True for any sized Rust
218    /// type (the alignment divides the size by the layout rules), but
219    /// Verus's `size_of`/`align_of` are uninterpreted so we expose it as
220    /// an axiom. Used by PT-node `on_drop` to prove cursor alignment is
221    /// preserved across `read_once` iterations.
222    proof fn axiom_pte_align_divides_size()
223        ensures
224            core::mem::size_of::<Self::E>() % core::mem::align_of::<Self::E>() == 0,
225            core::mem::align_of::<Self::E>() > 0,
226    ;
227
228    /// The item that can be mapped into the virtual memory space using the
229    /// page table.
230    ///
231    /// Usually, this item is a [`crate::mm::Frame`], which we call a "tracked"
232    /// frame. The page table can also do "untracked" mappings that only maps
233    /// to certain physical addresses without tracking the ownership of the
234    /// mapped physical frame. The user of the page table APIs can choose by
235    /// defining this type and the corresponding methods [`item_into_raw`] and
236    /// [`item_from_raw`].
237    ///
238    /// [`item_from_raw`]: PageTableConfig::item_from_raw
239    /// [`item_into_raw`]: PageTableConfig::item_into_raw
240    type Item: RCClone;
241
242    /// Consumes the item and returns the physical address, the paging level,
243    /// and the page property.
244    ///
245    /// The ownership of the item will be consumed, i.e., the item will be
246    /// forgotten after this function is called.
247    spec fn item_into_raw_spec(item: Self::Item) -> (Paddr, PagingLevel, PageProperty);
248
249    #[verifier::when_used_as_spec(item_into_raw_spec)]
250    fn item_into_raw(item: Self::Item) -> (res: (Paddr, PagingLevel, PageProperty))
251        ensures
252            1 <= res.1 <= NR_LEVELS,
253            res == Self::item_into_raw_spec(item),
254            res.0 % crate::specs::arch::mm::PAGE_SIZE == 0,
255            res.0 < crate::specs::arch::mm::MAX_PADDR,
256            res.0 % crate::mm::page_table::cursor::page_size(res.1) == 0,
257            res.0 + crate::mm::page_table::cursor::page_size(res.1)
258                <= crate::specs::arch::mm::MAX_PADDR,
259    ;
260
261    /// Restores the item from the physical address and the paging level.
262    ///
263    /// There could be transformations after [`PageTableConfig::item_into_raw`]
264    /// and before [`PageTableConfig::item_from_raw`], which include:
265    ///  - splitting and coalescing the items, for example, splitting one item
266    ///    into 512 `level - 1` items with and contiguous physical addresses;
267    ///  - protecting the items, for example, changing the page property.
268    ///
269    /// Splitting and coalescing maintains ownership rules, i.e., if one
270    /// physical address is within the range of one item, after splitting/
271    /// coalescing, there should be exactly one item that contains the address.
272    ///
273    /// # Safety
274    ///
275    /// The caller must ensure that:
276    ///  - the physical address and the paging level represent a page table
277    ///    item or part of it (as described above);
278    ///  - either the ownership of the item is properly transferred to the
279    ///    return value, or the return value is wrapped in a
280    ///    [`core::mem::ManuallyDrop`] that won't outlive the original item.
281    ///
282    /// A concrete trait implementation may require the caller to ensure that
283    ///  - the [`super::PageFlags::AVAIL1`] flag is the same as that returned
284    ///    from [`PageTableConfig::item_into_raw`].
285    spec fn item_from_raw_spec(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self::Item;
286
287    #[verifier::when_used_as_spec(item_from_raw_spec)]
288    unsafe fn item_from_raw(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self::Item
289        returns
290            Self::item_from_raw_spec(paddr, level, prop),
291    ;
292
293    /// Whether cloning this item bumps a slot's refcount. For ref-counted items
294    /// (e.g. `MappedItem::Tracked`), `true`; for items where clone is a no-op
295    /// (e.g. `MappedItem::Untracked` for kernel MMIO frames), `false`.
296    spec fn tracked(item: Self::Item) -> bool;
297
298    /// Per-config predicate that captures the structural well-formedness an item
299    /// reconstructed via `item_from_raw_spec` must satisfy. Typically: the
300    /// `Frame::inv()` of the tracked-frame component (if any).
301    ///
302    /// `KernelPtConfig` defines this as `match item { Tracked(f, _) => f.inv(),
303    /// Untracked => true }`. `UserPtConfig` defines it as `item.frame.inv()`.
304    spec fn item_well_formed(item: Self::Item) -> bool;
305
306    /// The item produced by `item_from_raw_spec` is structurally
307    /// well-formed (see `item_well_formed`).
308    proof fn item_from_raw_well_formed(pa: Paddr, level: PagingLevel, prop: PageProperty)
309        requires
310            crate::mm::frame::meta::has_safe_slot(pa),
311        ensures
312            Self::item_well_formed(Self::item_from_raw_spec(pa, level, prop)),
313    ;
314
315    /// Proves that `clone_ensures` for `Self::Item` implies concrete per-field
316    /// properties on `MetaRegionOwners`. Each `PageTableConfig` implementor proves
317    /// this by unfolding its `MappedItem::clone_ensures` → `Frame::clone_ensures`.
318    /// Proves that after `clone`, the slot at `frame_to_index(pa)` has the expected
319    /// per-field properties. Implementors unfold their `MappedItem::clone_ensures` to
320    /// `Frame::clone_ensures` and connect `pa` to the frame's internal pointer address.
321    proof fn clone_ensures_concrete(
322        item: Self::Item,
323        pa: Paddr,
324        old_regions: MetaRegionOwners,
325        new_regions: MetaRegionOwners,
326        res: Self::Item,
327    )
328        requires
329            item.clone_ensures(old_regions, new_regions, res),
330            Self::item_into_raw_spec(item).0 == pa,
331            res == item,
332            new_regions.inv(),
333            new_regions.slots =~= old_regions.slots,
334            new_regions.slot_owners.dom() =~= old_regions.slot_owners.dom(),
335        ensures
336    // Other slots always unchanged.
337
338            forall|i: usize|
339                i != frame_to_index(pa) ==> (#[trigger] new_regions.slot_owners[i]
340                    == old_regions.slot_owners[i]),
341            // The frame's slot: bumped if the item is ref-counted, otherwise unchanged.
342            Self::tracked(item) ==> {
343                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.ref_count.value()
344                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.ref_count.value() + 1
345                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.ref_count.id()
346                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.ref_count.id()
347                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.storage
348                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.storage
349                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.vtable_ptr
350                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.vtable_ptr
351                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.in_list
352                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.in_list
353                &&& new_regions.slot_owners[frame_to_index(pa)].paths_in_pt
354                    == old_regions.slot_owners[frame_to_index(pa)].paths_in_pt
355                &&& new_regions.slot_owners[frame_to_index(pa)].self_addr
356                    == old_regions.slot_owners[frame_to_index(pa)].self_addr
357                &&& new_regions.slot_owners[frame_to_index(pa)].usage
358                    == old_regions.slot_owners[frame_to_index(pa)].usage
359            },
360            !Self::tracked(item) ==> new_regions.slot_owners[frame_to_index(pa)]
361                == old_regions.slot_owners[frame_to_index(pa)],
362            // Canonical model: a tracked clone MINTS one per-frame obligation
363            // at the slot (`Frame::clone`); an untracked clone is net-zero.
364            Self::tracked(item) ==> new_regions.frame_obligations
365                =~= old_regions.frame_obligations.insert(frame_to_index(pa)),
366            !Self::tracked(item) ==> new_regions.frame_obligations
367                =~= old_regions.frame_obligations,
368    ;
369
370    proof fn item_roundtrip(item: Self::Item, paddr: Paddr, level: PagingLevel, prop: PageProperty)
371        ensures
372            Self::item_into_raw_spec(item) == (paddr, level, prop) <==> Self::item_from_raw_spec(
373                paddr,
374                level,
375                prop,
376            ) == item,
377    ;
378
379    /// Proves `item.clone_requires(regions)` from the concrete frame-slot facts
380    /// delivered by `metaregion_sound` plus the non-saturation bound propagated
381    /// from `Cursor::query`. Implementors unfold their `MappedItem::clone_requires`
382    /// to `Frame::clone_requires` and connect `pa` to the frame's internal pointer
383    /// address.
384    proof fn clone_requires_concrete(
385        item: Self::Item,
386        pa: Paddr,
387        level: PagingLevel,
388        prop: PageProperty,
389        regions: MetaRegionOwners,
390    )
391        requires
392            regions.inv(),
393            Self::item_from_raw_spec(pa, level, prop) == item,
394            crate::mm::frame::meta::has_safe_slot(pa),
395            regions.slots.contains_key(frame_to_index(pa)),
396            regions.slot_owners.contains_key(frame_to_index(pa)),
397            Self::tracked(item) ==> regions.slot_owners[frame_to_index(
398                pa,
399            )].inner_perms.ref_count.value() > 0,
400            // `rc != UNUSED` is needed only for tracked frames (untracked clone is a no-op).
401            Self::tracked(item) ==> regions.slot_owners[frame_to_index(
402                pa,
403            )].inner_perms.ref_count.value()
404                != crate::specs::mm::frame::meta_owners::REF_COUNT_UNUSED,
405            // Saturation aborts (Arc-style) via `inc_ref_count`'s diverging panic.
406            Self::tracked(item) ==> (regions.slot_owners[frame_to_index(
407                pa,
408            )].inner_perms.ref_count.value() < crate::specs::mm::frame::meta_owners::REF_COUNT_MAX
409                || may_panic()),
410        ensures
411            item.clone_requires(regions),
412    ;
413}
414
415// Implement it so that we can comfortably use low level functions
416// like `page_size::<C>` without typing `C::C` everywhere.
417impl<C: PageTableConfig> PagingConstsTrait for C {
418    open spec fn BASE_PAGE_SIZE_spec() -> usize {
419        C::C::BASE_PAGE_SIZE_spec()
420    }
421
422    fn BASE_PAGE_SIZE() -> usize {
423        C::C::BASE_PAGE_SIZE()
424    }
425
426    open spec fn NR_LEVELS_spec() -> PagingLevel {
427        C::C::NR_LEVELS_spec()
428    }
429
430    fn NR_LEVELS() -> PagingLevel {
431        C::C::NR_LEVELS()
432    }
433
434    open spec fn HIGHEST_TRANSLATION_LEVEL_spec() -> PagingLevel {
435        C::C::HIGHEST_TRANSLATION_LEVEL_spec()
436    }
437
438    fn HIGHEST_TRANSLATION_LEVEL() -> PagingLevel {
439        C::C::HIGHEST_TRANSLATION_LEVEL()
440    }
441
442    open spec fn PTE_SIZE_spec() -> usize {
443        C::C::PTE_SIZE_spec()
444    }
445
446    fn PTE_SIZE() -> usize {
447        C::C::PTE_SIZE()
448    }
449
450    open spec fn ADDRESS_WIDTH_spec() -> usize {
451        C::C::ADDRESS_WIDTH_spec()
452    }
453
454    fn ADDRESS_WIDTH() -> usize {
455        C::C::ADDRESS_WIDTH()
456    }
457
458    open spec fn VA_SIGN_EXT_spec() -> bool {
459        C::C::VA_SIGN_EXT_spec()
460    }
461
462    fn VA_SIGN_EXT() -> bool {
463        C::C::VA_SIGN_EXT()
464    }
465
466    proof fn lemma_BASE_PAGE_SIZE_properties()
467        ensures
468            0 < Self::BASE_PAGE_SIZE_spec(),
469            is_pow2(Self::BASE_PAGE_SIZE_spec() as int),
470    {
471        C::C::lemma_BASE_PAGE_SIZE_properties();
472    }
473
474    proof fn lemma_NR_LEVELS_eq()
475        ensures
476            Self::NR_LEVELS_spec() as int == NR_LEVELS as int,
477    {
478        C::C::lemma_NR_LEVELS_eq();
479    }
480
481    proof fn lemma_PTE_SIZE_properties()
482        ensures
483            0 < Self::PTE_SIZE_spec() <= Self::BASE_PAGE_SIZE(),
484            is_pow2(Self::PTE_SIZE_spec() as int),
485    {
486        C::C::lemma_PTE_SIZE_properties();
487    }
488}
489
490/// The interface for defining architecture-specific page table entries.
491///
492/// Note that a default PTE should be a PTE that points to nothing.
493pub trait PageTableEntryTrait:
494    Clone + Copy + Debug + Sized + Send + Sync + Pod + PodOnce + 'static {
495    spec fn default_spec() -> Self;
496
497    /// For implement `Default` trait.
498    #[verifier::when_used_as_spec(default_spec)]
499    fn default() -> (res: Self)
500        ensures
501            res == Self::default_spec(),
502    ;
503
504    /// Create a set of new invalid page table flags that indicates an absent page.
505    ///
506    /// Note that currently the implementation requires an all zero PTE to be an absent PTE.
507    spec fn new_absent_spec() -> Self;
508
509    #[verifier(when_used_as_spec(new_absent_spec))]
510    fn new_absent() -> (res: Self)
511        ensures
512            res == Self::new_absent_spec(),
513            res.paddr() % PAGE_SIZE == 0,
514            res.paddr() < MAX_PADDR,
515    ;
516
517    /// If the flags are present with valid mappings.
518    spec fn is_present_spec(&self) -> bool;
519
520    #[verifier::when_used_as_spec(is_present_spec)]
521    fn is_present(&self) -> (res: bool)
522        ensures
523            res == self.is_present_spec(),
524    ;
525
526    /// Create a new PTE with the given physical address and flags that map to a page.
527    spec fn new_page_spec(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self;
528
529    #[verifier::when_used_as_spec(new_page_spec)]
530    fn new_page(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> (res: Self)
531        requires
532            paddr % PAGE_SIZE == 0,
533            paddr < MAX_PADDR,
534        ensures
535            res == Self::new_page_spec(paddr, level, prop),
536            res.paddr() == paddr,
537            res.paddr() % PAGE_SIZE == 0,
538            res.paddr() < MAX_PADDR,
539    ;
540
541    /// Create a new PTE that map to a child page table.
542    spec fn new_pt_spec(paddr: Paddr) -> Self;
543
544    #[verifier::when_used_as_spec(new_pt_spec)]
545    fn new_pt(paddr: Paddr) -> (res: Self)
546        requires
547            paddr % PAGE_SIZE == 0,
548            paddr < MAX_PADDR,
549        ensures
550            res == Self::new_pt_spec(paddr),
551            res.paddr() == paddr,
552            res.paddr() % PAGE_SIZE == 0,
553            res.paddr() < MAX_PADDR,
554    ;
555
556    proof fn new_properties()
557        ensures
558            !Self::new_absent_spec().is_present(),
559            forall|level: PagingLevel|
560                #![trigger Self::new_absent_spec().is_last(level)]
561                1 < level ==> !Self::new_absent_spec().is_last(level),
562            forall|paddr: Paddr, level: PagingLevel, prop: PageProperty|
563                #![trigger Self::new_page_spec(paddr, level, prop)]
564                {
565                    &&& Self::new_page_spec(paddr, level, prop).is_present()
566                    &&& Self::new_page_spec(paddr, level, prop).paddr() == paddr
567                    &&& Self::new_page_spec(paddr, level, prop).prop() == prop
568                    &&& Self::new_page_spec(paddr, level, prop).is_last(level)
569                },
570            forall|paddr: Paddr|
571                #![trigger Self::new_pt_spec(paddr)]
572                {
573                    &&& Self::new_pt_spec(paddr).is_present()
574                    &&& Self::new_pt_spec(paddr).paddr() == paddr
575                    &&& forall|level: PagingLevel| !Self::new_pt_spec(paddr).is_last(level)
576                },
577    ;
578
579    /// Get the physical address from the PTE.
580    /// The physical address recorded in the PTE is either:
581    /// - the physical address of the next level page table;
582    /// - or the physical address of the page it maps to.
583    spec fn paddr_spec(&self) -> Paddr;
584
585    #[verifier::when_used_as_spec(paddr_spec)]
586    fn paddr(&self) -> (res: Paddr)
587        ensures
588            res == self.paddr_spec(),
589    ;
590
591    /// AXIOM: a present PTE's `paddr()` is page-aligned. PT entries record
592    /// the base address of either a child PT page or a mapped frame; both
593    /// are page-aligned by construction. Used by `PageTablePageMeta::on_drop`
594    /// to combine `lemma_frame_to_index_injective` with walk uniqueness.
595    proof fn axiom_present_paddr_aligned(&self)
596        requires
597            self.is_present(),
598        ensures
599            self.paddr() % crate::specs::arch::mm::PAGE_SIZE == 0,
600    ;
601
602    spec fn prop_spec(&self) -> PageProperty;
603
604    #[verifier::when_used_as_spec(prop_spec)]
605    fn prop(&self) -> (res: PageProperty)
606        ensures
607            res == self.prop_spec(),
608    ;
609
610    /// Set the page property of the PTE.
611    ///
612    /// This will be only done if the PTE is present. If not, this method will
613    /// do nothing.
614    spec fn set_prop_spec(&self, prop: PageProperty) -> Self;
615
616    fn set_prop(&mut self, prop: PageProperty)
617        ensures
618            old(self).set_prop_spec(prop) == *final(self),
619    ;
620
621    proof fn set_prop_properties(self, prop: PageProperty)
622        ensures
623            self.set_prop_spec(prop).prop() == prop,
624            self.set_prop_spec(prop).paddr() == self.paddr(),
625            self.is_present() ==> self.set_prop_spec(prop).is_present(),
626            forall|level: PagingLevel|
627                #![trigger self.is_last(level)]
628                self.is_last(level) ==> self.set_prop_spec(prop).is_last(level),
629    ;
630
631    /// If the PTE maps a page rather than a child page table.
632    ///
633    /// The level of the page table the entry resides is given since architectures
634    /// like amd64 only uses a huge bit in intermediate levels.
635    spec fn is_last_spec(&self, level: PagingLevel) -> bool;
636
637    #[verifier::when_used_as_spec(is_last_spec)]
638    fn is_last(&self, level: PagingLevel) -> (b: bool)
639        ensures
640            b == self.is_last_spec(level),
641    ;
642
643    /// Converts the PTE into its corresponding `usize` value.
644    spec fn as_usize_spec(self) -> usize;
645
646    #[verifier::external_body]
647    #[verifier::when_used_as_spec(as_usize_spec)]
648    fn as_usize(self) -> (res: usize)
649        ensures
650            res == self.as_usize_spec(),
651    {
652        unimplemented!()
653        // SAFETY: `Self` is `Pod` and has the same memory representation as `usize`.
654        //        unsafe { transmute_unchecked(self) }
655
656    }
657
658    /// Converts a usize `pte_raw` into a PTE.
659    #[verifier::external_body]
660    fn from_usize(pte_raw: usize) -> Self {
661        unimplemented!()
662        // SAFETY: `Self` is `Pod` and has the same memory representation as `usize`.
663        //        unsafe { transmute_unchecked(pte_raw) }
664
665    }
666}
667
668/// A handle to a page table.
669/// A page table can track the lifetime of the mapped physical pages.
670pub struct PageTable<C: PageTableConfig> {
671    pub root: PageTableNode<C>,
672}
673
674#[verifier::inline]
675pub open spec fn nr_pte_index_bits_spec<C: PagingConstsTrait>() -> usize {
676    nr_subpage_per_huge::<C>().ilog2() as usize
677}
678
679/// The number of virtual address bits used to index a PTE in a page.
680#[inline(always)]
681#[verifier::when_used_as_spec(nr_pte_index_bits_spec)]
682pub fn nr_pte_index_bits<C: PagingConstsTrait>() -> (res: usize)
683    ensures
684        res == nr_pte_index_bits_spec::<C>(),
685{
686    proof {
687        lemma_nr_subpage_per_huge_bounded::<C>();
688    }
689    nr_subpage_per_huge::<C>().ilog2() as usize
690}
691
692pub proof fn lemma_nr_pte_index_bits_bounded<C: PagingConstsTrait>()
693    ensures
694        0 <= nr_pte_index_bits::<C>() <= C::BASE_PAGE_SIZE().ilog2(),
695{
696    lemma_nr_subpage_per_huge_bounded::<C>();
697    let nr = nr_subpage_per_huge::<C>();
698    assert(1 <= nr <= C::BASE_PAGE_SIZE());
699    let bits = nr.ilog2();
700    assert(0 <= bits) by {
701        lemma_usize_ilog2_ordered(1, nr);
702    }
703    assert(bits <= C::BASE_PAGE_SIZE().ilog2()) by {
704        lemma_usize_ilog2_ordered(nr, C::BASE_PAGE_SIZE());
705    }
706}
707
708/// Splits the address range into largest page table items.
709///
710/// Each of the returned items is a tuple of the physical address and the
711/// paging level. It is helpful when you want to map a physical address range
712/// into the provided virtual address.
713///
714/// For example, on x86-64, `C: PageTableConfig` may specify level 1 page as
715/// 4KiB, level 2 page as 2MiB, and level 3 page as 1GiB. Suppose that the
716/// supplied physical address range is from `0x3fdff000` to `0x80002000`,
717/// and the virtual address is also `0x3fdff000`, the following 5 items will
718/// be returned:
719///
720/// ```text
721/// 0x3fdff000                                                 0x80002000
722/// start                                                             end
723///   |----|----------------|--------------------------------|----|----|
724///    4KiB      2MiB                       1GiB              4KiB 4KiB
725/// ```
726///
727/// # Panics
728///
729/// Panics if:
730///  - any of `va`, `pa`, or `len` is not aligned to the base page size;
731///  - the range `va..(va + len)` is not valid for the page table.
732#[verifier::external_body]
733pub fn largest_pages<C: PageTableConfig>(
734    mut va: Vaddr,
735    mut pa: Paddr,
736    mut len: usize,
737) -> impl Iterator<Item = (Paddr, PagingLevel)> {
738    assert_eq!(va % C::BASE_PAGE_SIZE(), 0);
739    assert_eq!(pa % C::BASE_PAGE_SIZE(), 0);
740    assert_eq!(len % C::BASE_PAGE_SIZE(), 0);
741    assert!(is_valid_range::<C>(&(va..(va + len))));
742
743    core::iter::from_fn(
744        move ||
745            {
746                if len == 0 {
747                    return None;
748                }
749                let mut level = C::HIGHEST_TRANSLATION_LEVEL();
750                while page_size(level) > len || va % page_size(level) != 0 || pa % page_size(level)
751                    != 0 {
752                    level -= 1;
753                }
754
755                let item_start = pa;
756                va += page_size(level);
757                pa += page_size(level);
758                len -= page_size(level);
759
760                Some((item_start, level))
761            },
762    )
763}
764
765/// Gets the managed virtual addresses range for the page table.
766///
767/// It returns a [`RangeInclusive`] because the end address, if being
768/// [`Vaddr::MAX`], overflows [`Range<Vaddr>`].
769#[verifier::external_body]
770fn top_level_index_width<C: PageTableConfig>() -> usize {
771    C::ADDRESS_WIDTH() - pte_index_bit_offset::<C>(C::NR_LEVELS())
772}
773
774/// Concrete positional start of the VA range: `idx_range.start * 2^offset`.
775#[verifier::external_body]
776fn pt_va_range_start<C: PageTableConfig>() -> (ret: Vaddr)
777    ensures
778        ret as int == C::TOP_LEVEL_INDEX_RANGE_spec().start as int * pow2(
779            pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat,
780        ) as int,
781{
782    C::TOP_LEVEL_INDEX_RANGE().start << pte_index_bit_offset::<C>(C::NR_LEVELS())
783}
784
785/// Concrete positional end of the VA range (inclusive):
786/// `(idx_range.end * 2^offset) - 1`, with wrap semantics when
787/// `idx_range.end * 2^offset == 2^64` (e.g. kernel top-level with all
788/// entries mapped — `2^64 - 1 = usize::MAX`).
789#[verifier::external_body]
790fn pt_va_range_end<C: PageTableConfig>() -> (ret: Vaddr)
791    ensures
792        ret as int == (C::TOP_LEVEL_INDEX_RANGE_spec().end as int * pow2(
793            pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat,
794        ) as int - 1) % 0x1_0000_0000_0000_0000int,
795{
796    C::TOP_LEVEL_INDEX_RANGE().end.unbounded_shl(
797        pte_index_bit_offset::<C>(C::NR_LEVELS()) as u32,
798    ).wrapping_sub(1)  // Inclusive end.
799
800}
801
802/// Test whether bit `ADDRESS_WIDTH - 1` of `va` is set.
803#[verifier::external_body]
804fn sign_bit_of_va<C: PageTableConfig>(va: Vaddr) -> (ret: bool)
805    ensures
806        ret == ((va as int / pow2((C::ADDRESS_WIDTH() - 1) as nat) as int) % 2 == 1),
807{
808    (va >> (C::ADDRESS_WIDTH() - 1)) & 1 != 0
809}
810
811/// Trait-level invariant the upstream enforces via `const` assertions in
812/// `vaddr_range`'s prologue. Captured here as an axiom so we can use it in
813/// proofs (Verus has no `const { ... }` form for trait constants).
814///
815/// `idx.start < 2^(ADDRESS_WIDTH - offset)` and `idx.end <= 2^(...)`
816/// together bound the positional VA at `pt_va_range_start` /
817/// `pt_va_range_end` by `2^ADDRESS_WIDTH`.
818pub axiom fn axiom_top_level_index_range_bounds<C: PageTableConfig>()
819    ensures
820        (C::TOP_LEVEL_INDEX_RANGE_spec().start as int) < (pow2(
821            (C::ADDRESS_WIDTH() as int - pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS())) as nat,
822        ) as int),
823        C::TOP_LEVEL_INDEX_RANGE_spec().end as int <= pow2(
824            (C::ADDRESS_WIDTH() as int - pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS())) as nat,
825        ) as int,
826        pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) <= C::ADDRESS_WIDTH() as int,
827        pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) >= 0,
828        // Non-empty index range: idx.start < idx.end (matches the upstream
829        // `const` assertion).
830        (C::TOP_LEVEL_INDEX_RANGE_spec().start as int) < (
831        C::TOP_LEVEL_INDEX_RANGE_spec().end as int),
832        // 64-bit hardware bound (Rust `usize` is 64 bits on supported targets).
833        C::ADDRESS_WIDTH() as int <= 64,
834;
835
836/// Per-config relationship between `LEADING_BITS_spec` and the sign-ext
837/// branch of `vaddr_range`: a non-zero `LEADING_BITS` requires both
838/// `VA_SIGN_EXT` and that bit `ADDRESS_WIDTH - 1` of `pt_va_range_start`
839/// is set. Equivalently (contrapositive), if either of those fails, then
840/// `LEADING_BITS == 0`.
841///
842/// For `UserPtConfig` (`LB=0`) the conclusion is vacuous; for
843/// `KernelPtConfig` (`LB=0xffff`, `idx.start=256`, `off=39`,
844/// `ADDRESS_WIDTH=48`), `2^47 / 2^47 % 2 == 1` and `VA_SIGN_EXT == true`
845/// — so the implication holds.
846pub axiom fn axiom_leading_bits_only_when_high_half<C: PageTableConfig>()
847    ensures
848        C::LEADING_BITS_spec() != 0usize ==> (C::VA_SIGN_EXT() && (((
849        C::TOP_LEVEL_INDEX_RANGE_spec().start as int) * (pow2(
850            pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat,
851        ) as int)) / (pow2((C::ADDRESS_WIDTH() - 1) as nat) as int)) % 2 == 1),
852;
853
854#[verifier::inline]
855pub open spec fn pte_index_bit_offset_spec<C: PagingConstsTrait>(level: PagingLevel) -> int {
856    (C::BASE_PAGE_SIZE().ilog2() as int) + (nr_pte_index_bits::<C>() as int) * (level as int - 1)
857}
858
859/// Spec for the managed virtual address range (exclusive end).
860/// For configs without VA_SIGN_EXT (e.g. UserPtConfig) or when the base range has sign bit 0.
861/// Configs with sign extension (e.g. KernelPtConfig) use a wrapped range in exec;
862/// we use an axiom to connect that case.
863#[verifier::inline]
864pub open spec fn vaddr_range_spec<C: PageTableConfig>() -> Range<Vaddr> {
865    let idx_range = C::TOP_LEVEL_INDEX_RANGE_spec();
866    let offset = pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat;
867    let start = (idx_range.start as int) * pow2(offset);
868    let end_inclusive = (idx_range.end as int) * pow2(offset) - 1;
869    (start as Vaddr)..((end_inclusive + 1) as Vaddr)
870}
871
872/// Spec for whether a range is within the page table's managed address space.
873#[verifier::inline]
874pub open spec fn is_valid_range_spec<C: PageTableConfig>(r: &Range<Vaddr>) -> bool {
875    let va_range = vaddr_range_spec::<C>();
876    (r.start == 0 && r.end == 0) || (va_range.start <= r.start && r.end > 0 && r.end - 1
877        <= va_range.end - 1)
878}
879
880/// Gets the managed virtual addresses range for the page table.
881///
882/// Returns a [`RangeInclusive`] because the end address, when the range
883/// reaches the top of the 64-bit address space (e.g. the canonical
884/// high-half kernel range ending at `usize::MAX`), would overflow the
885/// exclusive end of a [`Range<Vaddr>`].
886fn vaddr_range<C: PageTableConfig>() -> (ret: RangeInclusive<Vaddr>)
887    ensures
888        ret@.start == vaddr_range_bounds_spec::<C>().0,
889        ret@.end == vaddr_range_bounds_spec::<C>().1,
890        ret@.exhausted == false,
891{
892    let mut start = pt_va_range_start::<C>();
893    let mut end = pt_va_range_end::<C>();
894
895    proof {
896        lemma_vaddr_range_bounds_spec_unfold::<C>();
897        axiom_top_level_index_range_bounds::<C>();
898        crate::specs::mm::page_table::vaddr_range_proofs::lemma_idx_times_pow2_bound::<C>(
899            start,
900            end,
901        );
902    }
903    let pt_start = pt_va_range_start::<C>();
904    let va_sign_ext = C::VA_SIGN_EXT();
905    let sign_bit_set = sign_bit_of_va::<C>(pt_start);
906    if va_sign_ext && sign_bit_set {
907        start = apply_sign_ext::<C>(start);
908        end = apply_sign_ext::<C>(end);
909    } else {
910        proof {
911            // The if-condition was false, so either va_sign_ext is false
912            // or sign_bit_set is false. The contrapositive of
913            // `axiom_leading_bits_only_when_high_half` gives LEADING_BITS == 0.
914            axiom_leading_bits_only_when_high_half::<C>();
915            assert(!va_sign_ext || !sign_bit_set);
916            // Bridge exec bool to spec form. `va_sign_ext == C::VA_SIGN_EXT()`
917            // by `when_used_as_spec`; `sign_bit_set == ((pt_start as int /
918            // 2^(aw-1)) % 2 == 1)` by `sign_bit_of_va`'s ensures.
919            assert(va_sign_ext == C::VA_SIGN_EXT());
920            let off = pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat;
921            let aw_m1 = (C::ADDRESS_WIDTH() - 1) as nat;
922            let i_start = C::TOP_LEVEL_INDEX_RANGE_spec().start as int;
923            let p_off = pow2(off) as int;
924            let p_aw_m1 = pow2(aw_m1) as int;
925            assert(pt_start as int == i_start * p_off);
926            assert(sign_bit_set == ((pt_start as int / p_aw_m1) % 2 == 1));
927            assert(sign_bit_set == ((i_start * p_off / p_aw_m1) % 2 == 1));
928            // Now invoke the axiom's contrapositive form explicitly.
929            if C::LEADING_BITS_spec() != 0usize {
930                assert(C::VA_SIGN_EXT() && ((i_start * p_off / p_aw_m1) % 2 == 1));
931                assert(va_sign_ext);
932                assert(sign_bit_set);
933                assert(false);
934            }
935            assert(C::LEADING_BITS_spec() == 0usize);
936        }
937    }
938    proof {
939        // Both branches now establish the equation
940        //   start as int == lb * 2^48 + idx.start * 2^off
941        //   end as int   == lb * 2^48 + idx.end * 2^off - 1
942        // matching the unfolded `vaddr_range_bounds_spec`.
943        assert(start as int == (C::LEADING_BITS_spec() as int) * 0x1_0000_0000_0000int + (
944        C::TOP_LEVEL_INDEX_RANGE_spec().start as int) * (pow2(
945            pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat,
946        ) as int));
947        assert(end as int == (C::LEADING_BITS_spec() as int) * 0x1_0000_0000_0000int + (
948        C::TOP_LEVEL_INDEX_RANGE_spec().end as int) * (pow2(
949            pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat,
950        ) as int) - 1);
951    }
952    RangeInclusive::new(start, end)
953}
954
955/// Apply the sign-extension OR to a positional value.
956///
957/// For any value `va` in `[0, 2^ADDRESS_WIDTH)` with bit `ADDRESS_WIDTH - 1`
958/// set, the OR with `!0 ^ ((1 << ADDRESS_WIDTH) - 1)` is equivalent to
959/// adding `LEADING_BITS_spec() * 2^48`, because the mask's bits and `va`'s
960/// bits are disjoint.
961///
962/// The helper is `external_body` only so Verus doesn't need to verify the
963/// bit-level OR; the ensures states the arithmetic equivalent.
964#[verifier::external_body]
965fn apply_sign_ext<C: PageTableConfig>(va: Vaddr) -> (ret: Vaddr)
966    requires
967        (va as int) < pow2(C::ADDRESS_WIDTH() as nat) as int,
968    ensures
969        ret as int == va as int + C::LEADING_BITS_spec() as int * 0x1_0000_0000_0000int,
970{
971    let sign_ext_mask = !0 ^ ((1usize << C::ADDRESS_WIDTH()) - 1);
972    va | sign_ext_mask
973}
974
975/// Checks if the given range is covered by the valid range of the page table.
976#[verifier::external_body]
977fn is_valid_range<C: PageTableConfig>(r: &Range<Vaddr>) -> (ret: bool)
978    ensures
979        ret == is_valid_range_spec::<C>(r),
980{
981    let va_range = vaddr_range::<C>();
982    (r.start == 0 && r.end == 0) || (*va_range.start() <= r.start && r.end - 1 <= *va_range.end())
983}
984
985/// Sanity-check: for x86_64 kernel PT, `vaddr_range_spec` evaluates to the
986/// low-half `[2^47, 2^48)` window. The exec path (`vaddr_range`) applies
987/// sign extension on top of this and wraps at `usize::MAX + 1`, which is
988/// the "KNOWN BUG" referenced in `vm_space::unmap`. See
989/// `vaddr_range_bounds_spec` below for the overflow-free formulation.
990pub(crate) proof fn lemma_vaddr_range_spec_kernel()
991    ensures
992        vaddr_range_spec::<KernelPtConfig>() == (
993        0x0000_8000_0000_0000_usize..0x0001_0000_0000_0000_usize),
994{
995    use vstd::arithmetic::power2::{lemma2_to64, lemma2_to64_rest, lemma_pow2_adds};
996    lemma2_to64();
997    lemma2_to64_rest();
998    lemma_usize_pow2_ilog2(12);
999    assert(PagingConsts::BASE_PAGE_SIZE().ilog2() == 12u32);
1000    assert(nr_subpage_per_huge::<PagingConsts>() == 512_usize);
1001    lemma_usize_pow2_ilog2(9);
1002    assert(nr_pte_index_bits::<PagingConsts>() == 9_usize);
1003    assert(pte_index_bit_offset_spec::<PagingConsts>(4) == 39);
1004    lemma_pow2_adds(8, 39);
1005    lemma_pow2_adds(9, 39);
1006    assert((256 as int) * pow2(39) == pow2(47));
1007    assert((512 as int) * pow2(39) == pow2(48));
1008}
1009
1010/// Canonical bounds of the VA range managed by a page-table config,
1011/// returned as inclusive `(start, end_inclusive)`. `end_inclusive` may
1012/// equal `usize::MAX` for sign-extended kernel configs, which is why the
1013/// inclusive form is used — `Range<Vaddr>` cannot represent that.
1014///
1015/// Derived from `LEADING_BITS_spec` and `TOP_LEVEL_INDEX_RANGE_spec`. For
1016/// `UserPtConfig` `(LEADING_BITS=0, idx=0..256)` this is `(0, 2^47 - 1)`;
1017/// for `KernelPtConfig` `(LEADING_BITS=0xffff, idx=256..512)` this is
1018/// `(0xffff_8000_0000_0000, 0xffff_ffff_ffff_ffff)`.
1019pub closed spec fn vaddr_range_bounds_spec<C: PageTableConfig>() -> (Vaddr, Vaddr) {
1020    let idx = C::TOP_LEVEL_INDEX_RANGE_spec();
1021    let off = pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat;
1022    let lb = C::LEADING_BITS_spec() as int;
1023    let base = lb * 0x1_0000_0000_0000int;
1024    let start = (base + (idx.start as int) * pow2(off)) as usize;
1025    let end_inclusive = (base + (idx.end as int) * pow2(off) - 1) as usize;
1026    (start, end_inclusive)
1027}
1028
1029/// Reveal the body of `vaddr_range_bounds_spec` at a call site without
1030/// making the function itself open (which causes z3-context pollution in
1031/// cursor invariants).
1032pub proof fn lemma_vaddr_range_bounds_spec_unfold<C: PageTableConfig>()
1033    ensures
1034        vaddr_range_bounds_spec::<C>() == {
1035            let idx = C::TOP_LEVEL_INDEX_RANGE_spec();
1036            let off = pte_index_bit_offset_spec::<C::C>(C::NR_LEVELS()) as nat;
1037            let lb = C::LEADING_BITS_spec() as int;
1038            let base = lb * 0x1_0000_0000_0000int;
1039            let start = (base + (idx.start as int) * pow2(off)) as usize;
1040            let end_inclusive = (base + (idx.end as int) * pow2(off) - 1) as usize;
1041            (start, end_inclusive)
1042        },
1043{
1044}
1045
1046/// Sanity-check: for x86_64 user PT, the bounds are
1047/// `(0, 0x0000_7FFF_FFFF_FFFF)`, i.e. the low-half 47-bit user VA space.
1048pub(crate) proof fn lemma_vaddr_range_bounds_spec_user()
1049    ensures
1050        vaddr_range_bounds_spec::<crate::mm::vm_space::UserPtConfig>() == (
1051            0_usize,
1052            0x0000_7FFF_FFFF_FFFF_usize,
1053        ),
1054{
1055    use vstd::arithmetic::power2::{lemma2_to64, lemma2_to64_rest, lemma_pow2_adds};
1056    lemma2_to64();
1057    lemma2_to64_rest();
1058    lemma_usize_pow2_ilog2(12);
1059    lemma_usize_pow2_ilog2(9);
1060    lemma_pow2_adds(8, 39);
1061    assert(nr_subpage_per_huge::<PagingConsts>() == 512_usize);
1062    assert(nr_pte_index_bits::<PagingConsts>() == 9_usize);
1063    assert(pte_index_bit_offset_spec::<PagingConsts>(4) == 39);
1064    assert((0 as int) * pow2(39) == 0);
1065    assert((256 as int) * pow2(39) == pow2(47));
1066    assert(pow2(47) as int - 1 == 0x0000_7FFF_FFFF_FFFF_int);
1067    // UserPtConfig: LEADING_BITS = 0 via trait default.
1068    assert(<crate::mm::vm_space::UserPtConfig as PageTableConfig>::LEADING_BITS_spec() == 0_usize);
1069}
1070
1071/// Sanity-check: for x86_64 kernel PT, the bounds are the canonical
1072/// upper half `(0xFFFF_8000_0000_0000, 0xFFFF_FFFF_FFFF_FFFF)`.
1073pub(crate) proof fn lemma_vaddr_range_bounds_spec_kernel()
1074    ensures
1075        vaddr_range_bounds_spec::<KernelPtConfig>() == (
1076            0xFFFF_8000_0000_0000_usize,
1077            0xFFFF_FFFF_FFFF_FFFF_usize,
1078        ),
1079{
1080    use vstd::arithmetic::power2::{lemma2_to64, lemma2_to64_rest, lemma_pow2_adds};
1081    lemma2_to64();
1082    lemma2_to64_rest();
1083    lemma_usize_pow2_ilog2(12);
1084    lemma_usize_pow2_ilog2(9);
1085    lemma_pow2_adds(8, 39);
1086    lemma_pow2_adds(9, 39);
1087    assert(nr_subpage_per_huge::<PagingConsts>() == 512_usize);
1088    assert(nr_pte_index_bits::<PagingConsts>() == 9_usize);
1089    assert(PagingConsts::BASE_PAGE_SIZE().ilog2() == 12u32);
1090    assert(pte_index_bit_offset_spec::<PagingConsts>(4) == 39);
1091    assert((256 as int) * pow2(39) == pow2(47));
1092    assert((512 as int) * pow2(39) == pow2(48));
1093    assert(0xffff_int * 0x1_0000_0000_0000int + pow2(47) as int == 0xffff_8000_0000_0000int);
1094    assert(0xffff_int * 0x1_0000_0000_0000int + pow2(48) as int - 1 == 0xffff_ffff_ffff_ffffint);
1095}
1096
1097// Here are some const values that are determined by the paging constants.
1098/// The index of a VA's PTE in a page table node at the given level.
1099#[verifier::external_body]
1100fn pte_index<C: PagingConstsTrait>(va: Vaddr, level: PagingLevel) -> (res: usize)
1101    ensures
1102        res == AbstractVaddr::from_vaddr(va).index[level - 1],
1103{
1104    (va >> pte_index_bit_offset::<C>(level)) & (nr_subpage_per_huge::<C>() - 1)
1105}
1106
1107/// The bit offset of the entry offset part in a virtual address.
1108///
1109/// This function returns the bit offset of the least significant bit. Take
1110/// x86-64 as an example, the `pte_index_bit_offset(2)` should return 21, which
1111/// is 12 (the 4KiB in-page offset) plus 9 (index width in the level-1 table).
1112#[verifier::external_body]
1113fn pte_index_bit_offset<C: PagingConstsTrait>(level: PagingLevel) -> usize {
1114    C::BASE_PAGE_SIZE().ilog2() as usize + nr_pte_index_bits::<C>() * (level as usize - 1)
1115}
1116
1117/*
1118impl PageTable<UserPtConfig> {
1119    pub fn activate(&self) {
1120        // SAFETY: The user mode page table is safe to activate since the kernel
1121        // mappings are shared.
1122        unsafe {
1123            self.root.activate();
1124        }
1125    }
1126}*/
1127
1128impl PageTable<KernelPtConfig> {
1129    /// Panic condition for [`Self::create_user_page_table`]:
1130    /// Some kernel root entry at index `i` in `TOP_LEVEL_INDEX_RANGE` is
1131    /// not a page table node (i.e., is absent or maps a huge frame).
1132    pub open spec fn create_user_pt_panic_condition(root_owner: NodeOwner<KernelPtConfig>) -> bool {
1133        exists|i: usize|
1134            #![trigger root_owner.children_perm.value()[i as int]]
1135            KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= i
1136                < KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end && {
1137                let pte = root_owner.children_perm.value()[i as int];
1138                ||| !pte.is_present()
1139                ||| pte.is_last(root_owner.level)
1140            }
1141    }
1142
1143    /// Create a new kernel page table.
1144    #[verifier::external_body]
1145    pub(crate) fn new_kernel_page_table() -> Self {
1146        unimplemented!()/*        let kpt = Self::empty();
1147
1148        // Make shared the page tables mapped by the root table in the kernel space.
1149        {
1150            let preempt_guard = disable_preempt();
1151            let mut root_node = kpt.root.borrow().lock(&preempt_guard);
1152
1153            for i in KernelPtConfig::TOP_LEVEL_INDEX_RANGE {
1154                let mut root_entry = root_node.entry(i);
1155                let _ = root_entry.alloc_if_none(&preempt_guard).unwrap();
1156            }
1157        }
1158
1159        kpt*/
1160
1161    }
1162
1163    /// Create a new user page table.
1164    ///
1165    /// This should be the only way to create the user page table, that is to
1166    /// duplicate the kernel page table with all the kernel mappings shared.
1167    #[verus_spec(r =>
1168        with Tracked(kernel_owner): Tracked<&PageTableOwner<KernelPtConfig>>,
1169            Tracked(regions): Tracked<&mut MetaRegionOwners>,
1170            Tracked(guards): Tracked<&mut Guards<'rcu>>,
1171        requires
1172            kernel_owner.inv(),
1173            old(regions).inv(),
1174            kernel_owner.0.value.node is Some,
1175            !Self::create_user_pt_panic_condition(kernel_owner.0.value.node.unwrap()),
1176            // The kernel page table's root frame matches the tracked owner.
1177            self.root.ptr.addr() == kernel_owner.0.value.node.unwrap().meta_addr_self(),
1178            // The kernel root entry is sound with respect to the meta regions.
1179            kernel_owner.0.value.metaregion_sound(*old(regions)),
1180            // The whole kernel page-table tree is sound: every entry's metaregion
1181            // bookkeeping matches `old(regions)`. Needed to derive each child's
1182            // soundness inside the loop body.
1183            kernel_owner.metaregion_sound(*old(regions)),
1184            // The kernel root is not currently locked.
1185            old(guards).unlocked(kernel_owner.0.value.node.unwrap().meta_addr_self()),
1186        ensures
1187            final(regions).inv(),
1188    )]
1189    pub(in crate::mm) fn create_user_page_table<'rcu, G: InAtomicMode + 'static>(
1190        &'static self,
1191    ) -> PageTable<UserPtConfig> {
1192        let preempt_guard: &'rcu G = disable_preempt::<G>();
1193
1194        proof_decl! {
1195            let tracked mut new_pt_owner: Option<PageTableOwner<UserPtConfig>> = None;
1196        }
1197        let ghost regions_before_alloc = *regions;
1198        let new_pt: PageTable<UserPtConfig> = (
1199        #[verus_spec(with Tracked(&mut new_pt_owner), Tracked(regions), Tracked(guards))]
1200        PageTable::empty_with_owner());
1201        let new_root = new_pt.root;
1202        // Capture new_idx as a ghost BEFORE the tracked_take below empties new_pt_owner.
1203        let ghost new_idx_g: usize = crate::specs::mm::frame::mapping::frame_to_index(
1204            new_pt_owner@.unwrap().0.value.meta_slot_paddr().unwrap(),
1205        );
1206        let ghost new_pt_owner_snap = new_pt_owner@.unwrap();
1207        proof {
1208            let kern_idx = crate::specs::mm::frame::mapping::frame_to_index(
1209                kernel_owner.0.value.meta_slot_paddr().unwrap(),
1210            );
1211            let new_idx = new_idx_g;
1212            crate::specs::mm::page_table::node::entry_owners::EntryOwner::<
1213                KernelPtConfig,
1214            >::active_entry_not_in_free_pool(kernel_owner.0.value, regions_before_alloc, new_idx);
1215            assert(kern_idx != new_idx);
1216            assert(regions.slot_owners[kern_idx] == regions_before_alloc.slot_owners[kern_idx]);
1217            assert(kernel_owner.metaregion_sound(*regions));
1218            assert(!regions.slots.contains_key(new_idx));
1219        }
1220
1221        proof_decl! {
1222            let tracked root_owner: &NodeOwner<KernelPtConfig>
1223                = kernel_owner.0.borrow_value().node.tracked_borrow();
1224            let tracked mut new_pt_owner_val: PageTableOwner<UserPtConfig>
1225                = new_pt_owner.tracked_take();
1226            let tracked mut new_node_owner: NodeOwner<UserPtConfig>
1227                = new_pt_owner_val.0.value.node.tracked_take();
1228            let tracked mut entry_owner: &EntryOwner<KernelPtConfig>;
1229        }
1230
1231        // Discharge borrow/lock preconditions for the kernel root from
1232        // kernel_owner.inv() + metaregion_sound + guards unlocked.
1233        proof {
1234            assert(kernel_owner.0.value.is_node());
1235            assert(kernel_owner.0.value.metaregion_sound(*regions));
1236        }
1237        let ghost regions_before_self_borrow: MetaRegionOwners = *regions;
1238        let mut root_node = {
1239            #[verus_spec(with Tracked(regions))]
1240            let root_ref = self.root.borrow();
1241            #[verus_spec(with Tracked(root_owner), Tracked(guards))]
1242            root_ref.lock(preempt_guard)
1243        };
1244        let ghost regions_after_kroot_borrow: MetaRegionOwners = *regions;
1245        let mut new_node: PageTableGuard<'rcu, UserPtConfig> = {
1246            #[verus_spec(with Tracked(regions))]
1247            let new_ref = new_root.borrow();
1248            #[verus_spec(with Tracked(&new_node_owner), Tracked(guards))]
1249            new_ref.lock(preempt_guard)
1250        };
1251        proof {
1252            let kern_idx = crate::specs::mm::frame::mapping::frame_to_index(
1253                kernel_owner.0.value.meta_slot_paddr().unwrap(),
1254            );
1255            assert(regions_before_self_borrow.slot_owners
1256                =~= regions_after_kroot_borrow.slot_owners);
1257            assert forall|k: usize|
1258                regions_before_self_borrow.slots.contains_key(
1259                    k,
1260                ) implies regions_before_self_borrow.slots[k]
1261                == #[trigger] regions_after_kroot_borrow.slots[k] by {
1262                if k == kern_idx {
1263                    crate::specs::mm::page_table::node::entry_owners::EntryOwner::<
1264                        KernelPtConfig,
1265                    >::active_entry_not_in_free_pool(
1266                        kernel_owner.0.value,
1267                        regions_before_self_borrow,
1268                        k,
1269                    );
1270                }
1271            };
1272            kernel_owner.metaregion_sound_preserved_slot_owners_eq(
1273                regions_before_self_borrow,
1274                regions_after_kroot_borrow,
1275            );
1276
1277            let new_idx = new_idx_g;
1278            assert(regions_before_alloc.slots.contains_key(new_idx));
1279            assert(kern_idx != new_idx) by {
1280                crate::specs::mm::page_table::node::entry_owners::EntryOwner::<
1281                    KernelPtConfig,
1282                >::active_entry_not_in_free_pool(
1283                    kernel_owner.0.value,
1284                    regions_before_alloc,
1285                    new_idx,
1286                );
1287            };
1288
1289            assert(!regions_before_self_borrow.slots.contains_key(new_idx));
1290            assert(!regions_after_kroot_borrow.slots.contains_key(new_idx));
1291            assert forall|k: usize|
1292                regions_after_kroot_borrow.slots.contains_key(
1293                    k,
1294                ) implies regions_after_kroot_borrow.slots[k] == #[trigger] regions.slots[k] by {
1295                if k != new_idx {
1296                    // borrow preserves slots[k] at k != self.index() == new_idx
1297                }
1298            };
1299            assert(kernel_owner.metaregion_sound(regions_before_alloc));
1300
1301            kernel_owner.0.map_implies(
1302                kernel_owner.0.value.path,
1303                |
1304                    e: crate::specs::mm::page_table::node::entry_owners::EntryOwner<KernelPtConfig>,
1305                    p: vstd_extra::ghost_tree::TreePath<NR_ENTRIES>,
1306                |
1307                    e.is_frame() && e.parent_level > 1 ==> {
1308                        let pa = e.frame.unwrap().mapped_pa;
1309                        let nr_pages = crate::mm::page_table::cursor::page_size_spec(e.parent_level)
1310                            / crate::specs::arch::mm::PAGE_SIZE;
1311                        forall|j: usize|
1312                            0 < j < nr_pages ==> {
1313                                let sub_idx =
1314                                    #[trigger] crate::specs::mm::frame::mapping::frame_to_index(
1315                                    (pa + j * crate::specs::arch::mm::PAGE_SIZE) as usize,
1316                                );
1317                                sub_idx != new_idx
1318                            }
1319                    },
1320                |
1321                    e: crate::specs::mm::page_table::node::entry_owners::EntryOwner<KernelPtConfig>,
1322                    p: vstd_extra::ghost_tree::TreePath<NR_ENTRIES>,
1323                |
1324                    e.is_frame() && e.parent_level > 1 ==> {
1325                        let pa = e.frame.unwrap().mapped_pa;
1326                        let nr_pages = crate::mm::page_table::cursor::page_size_spec(e.parent_level)
1327                            / crate::specs::arch::mm::PAGE_SIZE;
1328                        forall|j: usize|
1329                            0 < j < nr_pages ==> {
1330                                let sub_idx =
1331                                    #[trigger] crate::specs::mm::frame::mapping::frame_to_index(
1332                                    (pa + j * crate::specs::arch::mm::PAGE_SIZE) as usize,
1333                                );
1334                                sub_idx != new_idx || (regions.slots.contains_key(sub_idx)
1335                                    && regions.slot_owners[sub_idx].inner_perms.ref_count.value()
1336                                    != crate::specs::mm::frame::meta_owners::REF_COUNT_UNUSED
1337                                    && regions.slot_owners[sub_idx].inner_perms.ref_count.value()
1338                                    > 0)
1339                            }
1340                    },
1341            );
1342            kernel_owner.metaregion_sound_preserved_one_slot_changed(
1343                regions_after_kroot_borrow,
1344                *regions,
1345                new_idx,
1346            );
1347        }
1348        let mut i: usize = KernelPtConfig::TOP_LEVEL_INDEX_RANGE().start;
1349        while i < KernelPtConfig::TOP_LEVEL_INDEX_RANGE().end
1350            invariant
1351                kernel_owner.inv(),
1352                kernel_owner.0.value.node is Some,
1353                regions.inv(),
1354                !Self::create_user_pt_panic_condition(kernel_owner.0.value.node.unwrap()),
1355                i <= KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end,
1356                KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= i,
1357                // Lock postcondition for the kernel root.
1358                *root_owner == kernel_owner.0.value.node.unwrap(),
1359                root_owner.relate_guard(root_node),
1360                // Tree-wide soundness of the kernel page table.
1361                kernel_owner.metaregion_sound(*regions),
1362                // The new node owner's invariants and guard relation.
1363                new_node_owner.inv(),
1364                new_node_owner.relate_guard(new_node),
1365                regions.slots.contains_key(new_node_owner.slot_index),
1366            decreases KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end - i,
1367        {
1368            proof {
1369                let kern_node = kernel_owner.0.value.node.unwrap();
1370                assert forall|j: usize|
1371                    #![trigger kern_node.children_perm.value()[j as int]]
1372                    KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= j
1373                        < KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end implies {
1374                    let pte = kern_node.children_perm.value()[j as int];
1375                    pte.is_present() && !pte.is_last(kern_node.level)
1376                } by {
1377                    let pte = kern_node.children_perm.value()[j as int];
1378                    if !pte.is_present() || pte.is_last(kern_node.level) {
1379                        assert(Self::create_user_pt_panic_condition(kern_node));
1380                    }
1381                }
1382
1383                kernel_owner.pt_inv_unroll(i as int);
1384                let tracked child_opt: &Option<OwnerSubtree<KernelPtConfig>> =
1385                    kernel_owner.0.children.tracked_borrow(i as int);
1386                let tracked child_subtree: &OwnerSubtree<KernelPtConfig> =
1387                    child_opt.tracked_borrow();
1388                entry_owner = child_subtree.borrow_value();
1389                let kern_node = kernel_owner.0.value.node.unwrap();
1390                assert(entry_owner.match_pte(
1391                    kern_node.children_perm.value()[i as int],
1392                    entry_owner.parent_level,
1393                ));
1394                assert(entry_owner.parent_level == kern_node.level);
1395                assert(child_subtree.inv());
1396                assert(entry_owner.inv());
1397                assert(!entry_owner.in_scope);
1398                assert(root_owner.relate_guard(root_node));
1399
1400                kernel_owner.0.map_unroll_once(
1401                    kernel_owner.0.value.path,
1402                    PageTableOwner::<KernelPtConfig>::metaregion_sound_pred(*regions),
1403                    i as int,
1404                );
1405                assert(child_subtree.tree_predicate_map(
1406                    kernel_owner.0.value.path.push_tail(i as usize),
1407                    PageTableOwner::<KernelPtConfig>::metaregion_sound_pred(*regions),
1408                ));
1409                assert(entry_owner.metaregion_sound(*regions));
1410            }
1411
1412            #[verus_spec(with Tracked(root_owner), Tracked(entry_owner), Tracked(&*regions))]
1413            let root_entry = root_node.entry(i);
1414            let ghost pre_to_ref_regions: MetaRegionOwners = *regions;
1415            #[verus_spec(with Tracked(entry_owner), Tracked(root_owner), Tracked(regions))]
1416            let child = root_entry.to_ref();
1417
1418            proof {
1419                let kern_node = kernel_owner.0.value.node.unwrap();
1420                let pte = kern_node.children_perm.value()[i as int];
1421
1422                assert(pte.is_present() && !pte.is_last(kern_node.level)) by {
1423                    if !pte.is_present() || pte.is_last(kern_node.level) {
1424                        assert(KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= i
1425                            < KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end);
1426                        assert(exists|j: usize|
1427                            KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= j
1428                                < KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end && {
1429                                let p = #[trigger] kern_node.children_perm.value()[j as int];
1430                                ||| !p.is_present()
1431                                ||| p.is_last(kern_node.level)
1432                            });
1433                        assert(Self::create_user_pt_panic_condition(kern_node));
1434                    }
1435                }
1436                // entry_owner.match_pte(pte, parent_level) + (present && !is_last)
1437                // ⟹ entry_owner.is_node().
1438                assert(entry_owner.is_node());
1439                // ChildRef::invariants(entry_owner, regions) gives child.wf(entry_owner).
1440                // For the Frame and None variants, wf requires is_frame() or is_absent(),
1441                // contradicting is_node(). Hence child must be PageTable.
1442                assert(child is PageTable);
1443                // to_ref's borrow_paddr preserves slot_owners exactly and only
1444                // grows `slots` (existing keys preserved). Use the tree-wide
1445                // preservation lemma.
1446                kernel_owner.metaregion_sound_preserved_slot_owners_eq(
1447                    pre_to_ref_regions,
1448                    *regions,
1449                );
1450            }
1451            let pt = match child {
1452                ChildRef::PageTable(pt) => pt,
1453                _ => vstd::pervasive::unreached(),
1454            };
1455
1456            let ghost entry_node_slot_idx = entry_owner.node.tracked_borrow().slot_index;
1457            let tracked entry_node_slot_perm = regions.slots.tracked_borrow(entry_node_slot_idx);
1458            #[verus_spec(with Tracked(entry_node_slot_perm))]
1459            let pt_addr = pt.start_paddr();
1460            let pte = PageTableEntry::new_pt(pt_addr);
1461
1462            proof {
1463                assert(regions.slots.contains_key(new_node_owner.slot_index));
1464            }
1465            unsafe {
1466                #[verus_spec(with Tracked(&mut new_node_owner), Tracked(&*regions))]
1467                new_node.write_pte(i, pte)
1468            };
1469
1470            i = i + 1;
1471        }
1472
1473        PageTable::<UserPtConfig> { root: new_root }
1474    }/*
1475    /// Protect the given virtual address range in the kernel page table.
1476    ///
1477    /// This method flushes the TLB entries when doing protection.
1478    ///
1479    /// # Safety
1480    ///
1481    /// The caller must ensure that the protection operation does not affect
1482    /// the memory safety of the kernel.
1483    pub unsafe fn protect_flush_tlb(
1484        &self,
1485        vaddr: &Range<Vaddr>,
1486        mut op: impl FnMut(&mut PageProperty),
1487    ) -> Result<(), PageTableError> {
1488        let preempt_guard = disable_preempt();
1489        let mut cursor = CursorMut::new(self, &preempt_guard, vaddr)?;
1490        // SAFETY: The safety is upheld by the caller.
1491        while let Some(range) =
1492            unsafe { cursor.protect_next(vaddr.end - cursor.virt_addr(), &mut op) }
1493        {
1494            crate::arch::mm::tlb_flush_addr(range.start);
1495        }
1496        Ok(())
1497    }*/
1498
1499}
1500
1501#[verus_verify]
1502impl<C: PageTableConfig> PageTable<C> {
1503    pub uninterp spec fn root_paddr_spec(&self) -> Paddr;
1504
1505    /// Create a new empty page table.
1506    ///
1507    /// Useful for the IOMMU page tables only.
1508    #[verifier::external_body]
1509    pub fn empty() -> Self {
1510        unimplemented!()
1511    }
1512
1513    /// Create a new empty page table together with its tracked ownership.
1514    #[verifier::external_body]
1515    #[verus_spec(r =>
1516        with Tracked(owner): Tracked<&mut Option<PageTableOwner<C>>>,
1517            Tracked(regions): Tracked<&mut MetaRegionOwners>,
1518            Tracked(guards): Tracked<&mut Guards<'rcu>>,
1519        requires
1520            old(regions).inv(),
1521        ensures
1522            final(owner)@ is Some,
1523            final(owner)@.unwrap().inv(),
1524            final(owner)@.unwrap().0.value.is_node(),
1525            final(owner)@.unwrap().0.value.node is Some,
1526            r.root.ptr.addr() == final(owner)@.unwrap().0.value.node.unwrap().meta_addr_self(),
1527            final(owner)@.unwrap().0.value.metaregion_sound(*final(regions)),
1528            final(regions).inv(),
1529            final(guards).unlocked(final(owner)@.unwrap().0.value.node.unwrap().meta_addr_self()),
1530            // Allocating a fresh node does not change the lock set, so any node
1531            // that was (un)locked before remains so.
1532            final(guards).guards == old(guards).guards,
1533            // The newly allocated slot was in the free pool before the call.
1534            old(regions).slots.contains_key(
1535                crate::specs::mm::frame::mapping::frame_to_index(
1536                    final(owner)@.unwrap().0.value.meta_slot_paddr().unwrap())),
1537            // After the alloc, the slot is removed from the free pool (now owned
1538            // by the new pt's NodeOwner).
1539            !final(regions).slots.contains_key(
1540                crate::specs::mm::frame::mapping::frame_to_index(
1541                    final(owner)@.unwrap().0.value.meta_slot_paddr().unwrap())),
1542            // Other slots and lock state are preserved.
1543            forall |i: usize| #![trigger final(regions).slot_owners[i]]
1544                i != crate::specs::mm::frame::mapping::frame_to_index(
1545                    final(owner)@.unwrap().0.value.meta_slot_paddr().unwrap())
1546                ==> final(regions).slot_owners[i] == old(regions).slot_owners[i],
1547            forall |a: usize| old(guards).lock_held(a) ==> final(guards).lock_held(a),
1548            forall |idx: usize| #![trigger final(regions).slot_owners[idx].paths_in_pt]
1549                final(regions).slot_owners[idx].paths_in_pt
1550                    == old(regions).slot_owners[idx].paths_in_pt,
1551            // Allocation preserves the soundness of the kernel page-table tree:
1552            // a fresh allocation cannot collide with any active node or frame entry
1553            // (the allocator returns a slot that wasn't in use). Stated as a
1554            // postcondition because deriving it requires a freshness axiom on the
1555            // underlying frame allocator.
1556            forall |kt: PageTableOwner<KernelPtConfig>|
1557                #![trigger kt.metaregion_sound(*final(regions))]
1558                kt.inv() && kt.metaregion_sound(*old(regions))
1559                ==> kt.metaregion_sound(*final(regions)),
1560            // Freshness: the new PT's slot index is not used (as a primary slot
1561            // or huge-frame sub-page slot) by any entry in any KernelPtConfig PT
1562            // tree that was sound before the alloc. Used to discharge the borrow
1563            // step that mutates `slot_owners[new_idx]`.
1564            forall |kt: PageTableOwner<KernelPtConfig>|
1565                #![trigger kt.metaregion_sound(*old(regions))]
1566                kt.inv() && kt.metaregion_sound(*old(regions)) ==>
1567                kt.0.tree_predicate_map(
1568                    kt.0.value.path,
1569                    |e: crate::specs::mm::page_table::node::entry_owners::EntryOwner<KernelPtConfig>,
1570                     p: vstd_extra::ghost_tree::TreePath<NR_ENTRIES>|
1571                        e.meta_slot_paddr() is Some
1572                            ==> crate::specs::mm::frame::mapping::frame_to_index(
1573                                e.meta_slot_paddr().unwrap()) !=
1574                                crate::specs::mm::frame::mapping::frame_to_index(
1575                                    final(owner)@.unwrap().0.value.meta_slot_paddr().unwrap()),
1576                ),
1577            // Sub-page freshness: for any huge frame entry in any pre-existing
1578            // sound KernelPtConfig tree, the new PT's slot index isn't a sub-page
1579            // slot of the huge frame either. Same allocator-freshness rationale.
1580            forall |kt: PageTableOwner<KernelPtConfig>|
1581                #![trigger kt.metaregion_sound(*old(regions))]
1582                kt.inv() && kt.metaregion_sound(*old(regions)) ==>
1583                kt.0.tree_predicate_map(
1584                    kt.0.value.path,
1585                    |e: crate::specs::mm::page_table::node::entry_owners::EntryOwner<KernelPtConfig>,
1586                     p: vstd_extra::ghost_tree::TreePath<NR_ENTRIES>|
1587                        e.is_frame() && e.parent_level > 1 ==> {
1588                            let pa = e.frame.unwrap().mapped_pa;
1589                            let nr_pages = crate::mm::page_table::cursor::page_size_spec(
1590                                e.parent_level) / crate::specs::arch::mm::PAGE_SIZE;
1591                            forall |j: usize| 0 < j < nr_pages ==> {
1592                                let sub_idx =
1593                                    #[trigger] crate::specs::mm::frame::mapping::frame_to_index(
1594                                        (pa + j * crate::specs::arch::mm::PAGE_SIZE) as usize);
1595                                sub_idx != crate::specs::mm::frame::mapping::frame_to_index(
1596                                    final(owner)@.unwrap().0.value.meta_slot_paddr().unwrap())
1597                            }
1598                        },
1599                ),
1600    )]
1601    pub fn empty_with_owner<'rcu>() -> Self {
1602        unimplemented!()
1603    }
1604
1605    #[verifier::external_body]
1606    pub(in crate::mm) unsafe fn first_activate_unchecked(&self) {
1607        unimplemented!()
1608        // SAFETY: The safety is upheld by the caller.
1609        //        unsafe { self.root.first_activate() };
1610
1611    }
1612
1613    /// The physical address of the root page table.
1614    ///
1615    /// Obtaining the physical address of the root page table is safe, however, using it or
1616    /// providing it to the hardware will be unsafe since the page table node may be dropped,
1617    /// resulting in UAF.
1618    #[verifier::external_body]
1619    #[verifier::when_used_as_spec(root_paddr_spec)]
1620    pub fn root_paddr(&self) -> (r: Paddr)
1621        returns
1622            self.root_paddr_spec(),
1623    {
1624        unimplemented!()
1625        //        self.root.start_paddr()
1626
1627    }
1628
1629    /// Query about the mapping of a single byte at the given virtual address.
1630    ///
1631    /// Note that this function may fail reflect an accurate result if there are
1632    /// cursors concurrently accessing the same virtual address range, just like what
1633    /// happens for the hardware MMU walk.
1634    #[cfg(ktest)]
1635    pub fn page_walk(&self, vaddr: Vaddr) -> Option<(Paddr, PageProperty)> {
1636        // SAFETY: The root node is a valid page table node so the address is valid.
1637        unsafe { page_walk::<C>(self.root_paddr(), vaddr) }
1638    }
1639
1640    /// Create a new cursor exclusively accessing the virtual address range for mapping.
1641    ///
1642    /// If another cursor is already accessing the range, the new cursor may wait until the
1643    /// previous cursor is dropped.
1644    #[verus_spec(r =>
1645        with Tracked(owner): Tracked<PageTableOwner<C>>,
1646            Ghost(root_guard): Ghost<PageTableGuard<'rcu, C>>,
1647            Tracked(regions): Tracked<&mut MetaRegionOwners>,
1648            Tracked(guards): Tracked<&mut Guards<'rcu>>
1649        requires
1650            // Per-config tightening; see `Cursor::new`.
1651            va.end as int <= C::LOCKED_END_BOUND_spec(),
1652        ensures
1653            Cursor::<C, G>::cursor_new_success_conditions(va) ==> {
1654                &&& r is Ok
1655                &&& r.unwrap().0.0.invariants(*r.unwrap().1, *final(regions), *final(guards))
1656                &&& r.unwrap().1.in_locked_range()
1657                &&& r.unwrap().0.0.level < r.unwrap().0.0.guard_level
1658                &&& r.unwrap().0.0.guard_level == NR_LEVELS as PagingLevel
1659                &&& r.unwrap().0.0.va < r.unwrap().0.0.barrier_va.end
1660                &&& r.unwrap().0.0.va == va.start
1661                &&& r.unwrap().0.0.barrier_va == *va
1662            },
1663            !Cursor::<C, G>::cursor_new_success_conditions(va) ==> r is Err,
1664            forall |item: C::Item| #![trigger CursorMut::<'rcu, C, G>::item_not_mapped(item, *old(regions))]
1665                CursorMut::<'rcu, C, G>::item_not_mapped(item, *old(regions)) ==>
1666                CursorMut::<'rcu, C, G>::item_not_mapped(item, *final(regions)),
1667            // cursor_mut only locks page-table node slots; paths_in_pt is unchanged for all slots.
1668            forall |idx: usize| #![auto]
1669                (*final(regions)).slot_owners[idx].paths_in_pt == (*old(regions)).slot_owners[idx].paths_in_pt,
1670    )]
1671    #[verifier::external_body]
1672    pub fn cursor_mut<'rcu, G: InAtomicMode>(
1673        &'rcu self,
1674        guard: &'rcu G,
1675        va: &Range<Vaddr>,
1676    ) -> Result<(CursorMut<'rcu, C, G>, Tracked<CursorOwner<'rcu, C>>), PageTableError> {
1677        #[verus_spec(with Tracked(owner), Ghost(root_guard), Tracked(regions), Tracked(guards))]
1678        CursorMut::new(self, guard, va)
1679    }
1680
1681    /// Create a new cursor exclusively accessing the virtual address range for querying.
1682    ///
1683    /// If another cursor is already accessing the range, the new cursor may wait until the
1684    /// previous cursor is dropped. The modification to the mapping by the cursor may also
1685    /// block or be overridden by the mapping of another cursor.
1686    #[verus_spec(r =>
1687        with Tracked(owner): Tracked<PageTableOwner<C>>,
1688            Ghost(root_guard): Ghost<PageTableGuard<'rcu, C>>,
1689            Tracked(regions): Tracked<&mut MetaRegionOwners>,
1690            Tracked(guards): Tracked<&mut Guards<'rcu>>
1691        requires
1692            owner.inv(),
1693            // Per-config tightening; see `Cursor::new`.
1694            va.end as int <= C::LOCKED_END_BOUND_spec(),
1695        ensures
1696            Cursor::<C, G>::cursor_new_success_conditions(va) ==> {
1697                &&& r is Ok
1698                &&& r.unwrap().0.invariants(*r.unwrap().1, *final(regions), *final(guards))
1699                &&& r.unwrap().1.in_locked_range()
1700                &&& r.unwrap().0.level == r.unwrap().0.guard_level
1701                &&& r.unwrap().0.va < r.unwrap().0.barrier_va.end
1702                &&& r.unwrap().0.va == va.start
1703                &&& r.unwrap().0.barrier_va == *va
1704                &&& r.unwrap().1@.as_page_table_owner() == owner
1705                &&& r.unwrap().1@.continuations[3].path() == owner.0.value.path
1706            },
1707            !Cursor::<C, G>::cursor_new_success_conditions(va) ==> r is Err,
1708            forall|idx: usize| #![trigger final(regions).slot_owners[idx].paths_in_pt]
1709                old(regions).slot_owners[idx].inner_perms.ref_count.value()
1710                    != crate::specs::mm::frame::meta_owners::REF_COUNT_UNUSED
1711                ==> final(regions).slot_owners[idx].paths_in_pt
1712                        == old(regions).slot_owners[idx].paths_in_pt,
1713            // Non-saturation preservation.
1714            (forall |i: usize| #![trigger old(regions).slot_owners[i]]
1715                old(regions).slot_owners.contains_key(i)
1716                && old(regions).slot_owners[i].inner_perms.ref_count.value()
1717                    != crate::specs::mm::frame::meta_owners::REF_COUNT_UNUSED
1718                ==> old(regions).slot_owners[i].inner_perms.ref_count.value() + 1
1719                    < crate::specs::mm::frame::meta_owners::REF_COUNT_MAX)
1720            ==>
1721            (forall |i: usize| #![trigger final(regions).slot_owners[i]]
1722                final(regions).slot_owners.contains_key(i)
1723                && final(regions).slot_owners[i].inner_perms.ref_count.value()
1724                    != crate::specs::mm::frame::meta_owners::REF_COUNT_UNUSED
1725                ==> final(regions).slot_owners[i].inner_perms.ref_count.value() + 1
1726                    < crate::specs::mm::frame::meta_owners::REF_COUNT_MAX),
1727            // Saturated-slot bridge (relayed from `Cursor::new`):
1728            // a slot at `>= REF_COUNT_MAX` before iff after, with the same
1729            // value. Used by `KVirtArea::query` to bridge inner-cursor
1730            // saturation back to the caller's snapshot.
1731            forall|idx: usize| #![trigger final(regions).slot_owners[idx].inner_perms.ref_count.value()]
1732                final(regions).slot_owners[idx].inner_perms.ref_count.value()
1733                    >= crate::specs::mm::frame::meta_owners::REF_COUNT_MAX
1734                ==> old(regions).slot_owners[idx].inner_perms.ref_count.value()
1735                        == final(regions).slot_owners[idx].inner_perms.ref_count.value(),
1736            forall|idx: usize| #![trigger old(regions).slot_owners[idx].inner_perms.ref_count.value()]
1737                old(regions).slot_owners[idx].inner_perms.ref_count.value()
1738                    >= crate::specs::mm::frame::meta_owners::REF_COUNT_MAX
1739                ==> final(regions).slot_owners[idx].inner_perms.ref_count.value()
1740                        == old(regions).slot_owners[idx].inner_perms.ref_count.value(),
1741    )]
1742    pub fn cursor<'rcu, G: InAtomicMode>(&'rcu self, guard: &'rcu G, va: &Range<Vaddr>) -> Result<
1743        (Cursor<'rcu, C, G>, Tracked<CursorOwner<'rcu, C>>),
1744        PageTableError,
1745    > {
1746        #[verus_spec(with Tracked(owner), Ghost(root_guard), Tracked(regions), Tracked(guards))]
1747        Cursor::new(self, guard, va)
1748    }/*
1749    /// Create a new reference to the same page table.
1750    /// The caller must ensure that the kernel page table is not copied.
1751    /// This is only useful for IOMMU page tables. Think twice before using it in other cases.
1752    pub unsafe fn shallow_copy(&self) -> Self {
1753        PageTable {
1754            root: self.root.clone(),
1755        }
1756    }
1757    */
1758
1759}
1760
1761/// A software emulation of the MMU address translation process.
1762///
1763/// This method returns the physical address of the given virtual address and
1764/// the page property if a valid mapping exists for the given virtual address.
1765///
1766/// # Safety
1767///
1768/// The caller must ensure that the `root_paddr` is a pointer to a valid root
1769/// page table node.
1770///
1771/// # Notes on the page table use-after-free problem
1772///
1773/// Neither the hardware MMU nor the software page walk method acquires the page
1774/// table locks while reading. They can enter a to-be-recycled page table node
1775/// and read the page table entries after the node is recycled and reused.
1776///
1777/// For the hardware MMU page walk, we mitigate this problem by dropping the page
1778/// table nodes only after the TLBs have been flushed on all the CPUs that
1779/// activate the page table.
1780///
1781/// For the software page walk, we only need to disable preemption at the beginning
1782/// since the page table nodes won't be recycled in the RCU critical section.
1783#[cfg(ktest)]
1784pub(super) unsafe fn page_walk<C: PageTableConfig>(root_paddr: Paddr, vaddr: Vaddr) -> Option<
1785    (Paddr, PageProperty),
1786> {
1787    use super::paddr_to_vaddr;
1788
1789    let _rcu_guard = disable_preempt();
1790
1791    let mut pt_addr = paddr_to_vaddr(root_paddr);
1792    #[verusfmt::skip]
1793    for cur_level in (1..= C::NR_LEVELS()).rev() {
1794        let offset = pte_index::<C>(vaddr, cur_level);
1795        // SAFETY:
1796        //  - The page table node is alive because (1) the root node is alive and
1797        //    (2) all child nodes cannot be recycled because we're in the RCU critical section.
1798        //  - The index is inside the bound, so the page table entry is valid.
1799        //  - All page table entries are aligned and accessed with atomic operations only.
1800        let cur_pte = unsafe { load_pte((pt_addr as *mut C::E).add(offset), Ordering::Acquire) };
1801
1802        if !cur_pte.is_present() {
1803            return None;
1804        }
1805        if cur_pte.is_last(cur_level) {
1806            debug_assert!(cur_level <= C::HIGHEST_TRANSLATION_LEVEL);
1807            return Some(
1808                (cur_pte.paddr() + (vaddr & (page_size::<C>(cur_level) - 1)), cur_pte.prop()),
1809            );
1810        }
1811        pt_addr = paddr_to_vaddr(cur_pte.paddr());
1812    }
1813
1814    unreachable!("All present PTEs at the level 1 must be last-level PTEs");
1815}
1816
1817/// Loads a page table entry with an atomic instruction.
1818///
1819/// # Verification Design
1820/// ## Preconditions
1821/// - The pointer must be a valid pointer to the array that represents the page table node.
1822/// - The array must be initialized at the target index.
1823/// ## Postconditions
1824/// - The value is loaded from the array at the given index.
1825/// ## Safety
1826/// - We require the caller to provide a permission token to ensure that this function is only called on a valid array
1827/// and the pointer is in bounds.
1828/// - Like an `AtomicUsize::load` in normal Rust, this function assumes that the value being loaded is an integer
1829/// (and therefore can be safely cloned). We model the PTE as an abstract type, but in all actual implementations it is an
1830/// integer. Importantly, it does not include any data that is unsafe to duplicate.
1831#[verifier::external_body]
1832#[verus_spec(
1833    with Tracked(perm): Tracked<&vstd_extra::array_ptr::PointsTo<E, NR_ENTRIES>>
1834    requires
1835        perm.is_init(ptr.index as int),
1836        perm.addr() == ptr.addr(),
1837        0 <= ptr.index < NR_ENTRIES,
1838    returns
1839        perm.value()[ptr.index as int],
1840)]
1841pub unsafe fn load_pte<E: PageTableEntryTrait>(
1842    ptr: vstd_extra::array_ptr::ArrayPtr<E, NR_ENTRIES>,
1843    ordering: Ordering,
1844) -> (pte: E) {
1845    unimplemented!()
1846}
1847
1848/// Stores a page table entry with an atomic instruction.
1849///
1850/// # Verification Design
1851/// We axiomatize this function as a store operation in the array that represents the page table node.
1852/// ## Preconditions
1853/// - The pointer must be a valid pointer to the array that represents the page table node.
1854/// - The array must be initialized so that the verifier knows that it remains initialized after the store.
1855/// ## Postconditions
1856/// - The new value is stored in the array at the given index.
1857/// ## Safety
1858/// - We require the caller to provide a permission token to ensure that this function is only called on a valid array
1859/// and the pointer is in bounds.
1860#[verifier::external_body]
1861#[verus_spec(
1862    with Tracked(perm): Tracked<&mut vstd_extra::array_ptr::PointsTo<E, NR_ENTRIES>>
1863    requires
1864        old(perm).addr() == ptr.addr(),
1865        0 <= ptr.index < NR_ENTRIES,
1866        old(perm).is_init_all(),
1867    ensures
1868        final(perm).value()[ptr.index as int] == new_val,
1869        final(perm).value() == old(perm).value().update(ptr.index as int, new_val),
1870        final(perm).addr() == old(perm).addr(),
1871        final(perm).is_init_all(),
1872)]
1873pub unsafe fn store_pte<E: PageTableEntryTrait>(
1874    ptr: vstd_extra::array_ptr::ArrayPtr<E, NR_ENTRIES>,
1875    new_val: E,
1876    ordering: Ordering,
1877);
1878
1879} // verus!