Skip to main content

ostd/mm/page_table/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2use vstd::arithmetic::power2::*;
3use vstd::prelude::*;
4use vstd::simple_pptr;
5use vstd::std_specs::clone::*;
6use vstd_extra::assert;
7use vstd_extra::panic::may_panic;
8use vstd_extra::prelude::*;
9
10use crate::specs::arch::*;
11use crate::specs::mm::page_table::{cursor::*, *};
12use crate::specs::task::InAtomicMode;
13
14use crate::mm::frame::meta::{REF_COUNT_MAX, REF_COUNT_UNIQUE, REF_COUNT_UNUSED};
15use crate::mm::kspace::kvirt_area::disable_preempt;
16use crate::specs::mm::frame::{
17    mapping::frame_to_index, meta_owners::MetaPerm, meta_region_owners::MetaRegionOwners,
18};
19
20use core::{
21    fmt::Debug,
22    intrinsics::transmute_unchecked,
23    ops::{Range, RangeInclusive},
24    sync::atomic::{AtomicUsize, Ordering},
25};
26
27use super::{
28    Paddr, PagingConstsTrait, PagingLevel, PodOnce, Vaddr,
29    kspace::KernelPtConfig,
30    nr_subpage_per_huge,
31    page_prop::{CachePolicy, PageProperty},
32    page_size,
33    vm_space::UserPtConfig,
34};
35
36use crate::{
37    //task::{atomic_mode::AsAtomicModeGuard, disable_preempt},
38    Pod,
39    arch::mm::{PageTableEntry, PagingConsts},
40};
41
42mod node;
43pub use node::*;
44mod cursor;
45
46pub(crate) use cursor::*;
47
48#[cfg(ktest)]
49mod test;
50
51//pub(crate) mod boot_pt;
52
53verus! {
54
55#[derive(Clone, Copy, PartialEq, Eq, Debug)]
56pub enum PageTableError {
57    /// The provided virtual address range is invalid.
58    InvalidVaddrRange(Vaddr, Vaddr),
59    /// The provided virtual address is invalid.
60    InvalidVaddr(Vaddr),
61    /// Using virtual address not aligned.
62    UnalignedVaddr,
63}
64
65pub trait RCClone: Sized {
66    spec fn clone_requires(self, perm: MetaRegionOwners) -> bool;
67
68    spec fn clone_ensures(
69        self,
70        old_perm: MetaRegionOwners,
71        new_perm: MetaRegionOwners,
72        res: Self,
73    ) -> bool;
74
75    fn clone(&self, Tracked(perm): Tracked<&mut MetaRegionOwners>) -> (res: Self)
76        requires
77            self.clone_requires(*old(perm)),
78        ensures
79    // RCClone::clone` doesn't mint/redeem segment obligations.
80    // The per-frame `frame_obligations` effect is left to each impl's `clone_ensures`
81
82            res == *self,
83            self.clone_ensures(*old(perm), *final(perm), res),
84            final(perm).inv(),
85            final(perm).slots == old(perm).slots,
86            final(perm).slot_owners.dom() == old(perm).slot_owners.dom(),
87    ;
88}
89
90/// The configurations of a page table.
91///
92/// It abstracts away both the usage and the architecture specifics from the
93/// general page table implementation. For examples:
94///  - the managed virtual address range;
95///  - the trackedness of physical mappings;
96///  - the PTE layout;
97///  - the number of page table levels, etc.
98///
99/// # Safety
100///
101/// The implementor must ensure that the `item_into_raw` and `item_from_raw`
102/// are implemented correctly so that:
103///  - `item_into_raw` consumes the ownership of the item;
104///  - if the provided raw form matches the item that was consumed by
105///    `item_into_raw`, `item_from_raw` restores the exact item that was
106///    consumed by `item_into_raw`.
107pub unsafe trait PageTableConfig: Clone + Debug + Send + Sync + 'static {
108    spec fn TOP_LEVEL_INDEX_RANGE_spec() -> Range<usize>;
109
110    /// The index range at the top level (`C::NR_LEVELS()`) page table.
111    ///
112    /// When configured with this value, the [`PageTable`] instance will only
113    /// be allowed to manage the virtual address range that is covered by
114    /// this range. The range can be smaller than the actual allowed range
115    /// specified by the hardware MMU (limited by `C::ADDRESS_WIDTH`).
116    #[verifier::when_used_as_spec(TOP_LEVEL_INDEX_RANGE_spec)]
117    fn TOP_LEVEL_INDEX_RANGE() -> Range<usize>
118        returns
119            Self::TOP_LEVEL_INDEX_RANGE(),
120    ;
121
122    /// VERIFICATION only: The leading bits `[48, 64)` of every virtual address managed by this
123    /// config.
124    ///
125    /// Concretely, a mapping `m` in this page table has
126    /// `m.va_range.start / 2^48 == LEADING_BITS_spec()`. For non-sign-extended
127    /// configurations (e.g. `UserPtConfig`) this is `0`. For x86-64 kernel
128    /// PT it is `0xffff` (sign-extended high half). The type is wide enough
129    /// to carry arbitrary bit patterns, so the model can accommodate future
130    /// configurations that place their managed range at a non-canonical
131    /// fixed offset.
132    ///
133    /// Combined with `TOP_LEVEL_INDEX_RANGE_spec`, this fully determines
134    /// the managed VA range, computed as
135    /// [`vaddr_range_bounds_spec::<Self>`]. Callers that previously used
136    /// `VADDR_RANGE_spec()` should use `vaddr_range_bounds_spec::<C>()`
137    /// directly — the inclusive `(start, end_inclusive)` form avoids the
138    /// `end == usize::MAX + 1` overflow that plagues `Range<Vaddr>` for
139    /// sign-extended kernel configurations.
140    open spec fn LEADING_BITS_spec() -> usize {
141        0
142    }
143
144    open spec fn TOP_LEVEL_CAN_UNMAP_spec() -> bool {
145        true
146    }
147
148    /// If we can remove the top-level page table entries.
149    ///
150    /// This is for the kernel page table, whose second-top-level page
151    /// tables need `'static` lifetime to be shared with user page tables.
152    /// Other page tables do not need to set this to `false`.
153    #[verifier::when_used_as_spec(TOP_LEVEL_CAN_UNMAP_spec)]
154    fn TOP_LEVEL_CAN_UNMAP() -> bool
155        returns
156            Self::TOP_LEVEL_CAN_UNMAP(),
157    ;
158
159    /// VERIFICATION only: Upper bound on `locked_range().end` for cursors of this config.
160    ///
161    /// May be tighter than the structural `vaddr_range_bounds_spec().1 + 1`
162    /// when the actual sources of cursor ranges (e.g. the kvirt allocator
163    /// for `KernelPtConfig`) draw from a sub-window of the configured VA
164    /// range. `KernelPtConfig` overrides this to `FRAME_METADATA_BASE_VADDR`,
165    /// which the `kvirt_alloc_range_bounds` axiom enforces. This bound is
166    /// what allows the cursor's `move_forward` proof to discharge
167    /// `prefix.idx[NR_LEVELS - 1] + 1 < NR_ENTRIES` at the top-level
168    /// boundary — the structural bound only gives `<= NR_ENTRIES` for
169    /// configurations whose `TOP_LEVEL_INDEX_RANGE.end == NR_ENTRIES`.
170    ///
171    /// Default: `usize::MAX + 1` (no tightening over the structural bound).
172    open spec fn LOCKED_END_BOUND_spec() -> int {
173        0x1_0000_0000_0000_0000int
174    }
175
176    /// The type of the page table entry.
177    type E: PageTableEntryTrait;
178
179    /// The paging constants.
180    type C: PagingConstsTrait;
181
182    /// The item that can be mapped into the virtual memory space using the
183    /// page table.
184    ///
185    /// Usually, this item is a [`crate::mm::Frame`], which we call a "tracked"
186    /// frame. The page table can also do "untracked" mappings that only maps
187    /// to certain physical addresses without tracking the ownership of the
188    /// mapped physical frame. The user of the page table APIs can choose by
189    /// defining this type and the corresponding methods [`item_into_raw`] and
190    /// [`item_from_raw`].
191    ///
192    /// [`item_from_raw`]: PageTableConfig::item_from_raw
193    /// [`item_into_raw`]: PageTableConfig::item_into_raw
194    type Item: RCClone;
195
196    /// Consumes the item and returns the physical address, the paging level,
197    /// and the page property.
198    ///
199    /// The ownership of the item will be consumed, i.e., the item will be
200    /// forgotten after this function is called.
201    spec fn item_into_raw_spec(item: Self::Item) -> (Paddr, PagingLevel, PageProperty);
202
203    #[verifier::when_used_as_spec(item_into_raw_spec)]
204    fn item_into_raw(item: Self::Item) -> (res: (Paddr, PagingLevel, PageProperty))
205        ensures
206            1 <= res.1 <= NR_LEVELS,
207            res == Self::item_into_raw_spec(item),
208            res.0 % PAGE_SIZE == 0,
209            res.0 < MAX_PADDR,
210            res.0 % page_size(res.1) == 0,
211            res.0 + page_size(res.1) <= MAX_PADDR,
212    ;
213
214    /// Restores the item from the physical address and the paging level.
215    ///
216    /// There could be transformations after [`PageTableConfig::item_into_raw`]
217    /// and before [`PageTableConfig::item_from_raw`], which include:
218    ///  - splitting and coalescing the items, for example, splitting one item
219    ///    into 512 `level - 1` items with and contiguous physical addresses;
220    ///  - protecting the items, for example, changing the page property.
221    ///
222    /// Splitting and coalescing maintains ownership rules, i.e., if one
223    /// physical address is within the range of one item, after splitting/
224    /// coalescing, there should be exactly one item that contains the address.
225    ///
226    /// # Safety
227    ///
228    /// The caller must ensure that:
229    ///  - the physical address and the paging level represent a page table
230    ///    item or part of it (as described above);
231    ///  - either the ownership of the item is properly transferred to the
232    ///    return value, or the return value is wrapped in a
233    ///    [`core::mem::ManuallyDrop`] that won't outlive the original item.
234    ///
235    /// A concrete trait implementation may require the caller to ensure that
236    ///  - the [`super::PageFlags::AVAIL1`] flag is the same as that returned
237    ///    from [`PageTableConfig::item_into_raw`].
238    spec fn item_from_raw_spec(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self::Item;
239
240    #[verifier::when_used_as_spec(item_from_raw_spec)]
241    unsafe fn item_from_raw(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self::Item
242        returns
243            Self::item_from_raw_spec(paddr, level, prop),
244    ;
245
246    /// Whether cloning this item bumps a slot's refcount. For ref-counted items
247    /// (e.g. `MappedItem::Tracked`), `true`; for items where clone is a no-op
248    /// (e.g. `MappedItem::Untracked` for kernel MMIO frames), `false`.
249    spec fn tracked(item: Self::Item) -> bool;
250
251    /// Per-config predicate that captures the structural well-formedness an item
252    /// reconstructed via `item_from_raw_spec` must satisfy. Typically: the
253    /// `Frame::inv()` of the tracked-frame component (if any).
254    ///
255    /// `KernelPtConfig` defines this as `match item { Tracked(f, _) => f.inv(),
256    /// Untracked => true }`. `UserPtConfig` defines it as `item.frame.inv()`.
257    spec fn item_well_formed(item: Self::Item) -> bool;
258
259    /// The item produced by `item_from_raw_spec` is structurally
260    /// well-formed (see `item_well_formed`).
261    proof fn item_from_raw_well_formed(pa: Paddr, level: PagingLevel, prop: PageProperty)
262        requires
263            has_safe_slot(pa),
264        ensures
265            Self::item_well_formed(Self::item_from_raw_spec(pa, level, prop)),
266    ;
267
268    /// Proves that `clone_ensures` for `Self::Item` implies concrete per-field
269    /// properties on `MetaRegionOwners`. Each `PageTableConfig` implementor proves
270    /// this by unfolding its `MappedItem::clone_ensures` → `Frame::clone_ensures`.
271    /// Proves that after `clone`, the slot at `frame_to_index(pa)` has the expected
272    /// per-field properties. Implementors unfold their `MappedItem::clone_ensures` to
273    /// `Frame::clone_ensures` and connect `pa` to the frame's internal pointer address.
274    proof fn clone_ensures_concrete(
275        item: Self::Item,
276        pa: Paddr,
277        old_regions: MetaRegionOwners,
278        new_regions: MetaRegionOwners,
279        res: Self::Item,
280    )
281        requires
282            item.clone_ensures(old_regions, new_regions, res),
283            Self::item_into_raw_spec(item).0 == pa,
284            res == item,
285            new_regions.inv(),
286            new_regions.slots =~= old_regions.slots,
287            new_regions.slot_owners.dom() =~= old_regions.slot_owners.dom(),
288        ensures
289    // Other slots always unchanged.
290
291            forall|i: usize|
292                i != frame_to_index(pa) ==> (#[trigger] new_regions.slot_owners[i]
293                    == old_regions.slot_owners[i]),
294            // The frame's slot: bumped if the item is ref-counted, otherwise unchanged.
295            Self::tracked(item) ==> {
296                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.ref_count.value()
297                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.ref_count.value() + 1
298                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.ref_count.id()
299                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.ref_count.id()
300                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.storage
301                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.storage
302                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.vtable_ptr
303                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.vtable_ptr
304                &&& new_regions.slot_owners[frame_to_index(pa)].inner_perms.in_list
305                    == old_regions.slot_owners[frame_to_index(pa)].inner_perms.in_list
306                &&& new_regions.slot_owners[frame_to_index(pa)].paths_in_pt
307                    == old_regions.slot_owners[frame_to_index(pa)].paths_in_pt
308                &&& new_regions.slot_owners[frame_to_index(pa)].slot_vaddr
309                    == old_regions.slot_owners[frame_to_index(pa)].slot_vaddr
310                &&& new_regions.slot_owners[frame_to_index(pa)].usage
311                    == old_regions.slot_owners[frame_to_index(pa)].usage
312            },
313            !Self::tracked(item) ==> new_regions.slot_owners[frame_to_index(pa)]
314                == old_regions.slot_owners[frame_to_index(pa)],
315            // Canonical model: a tracked clone MINTS one per-frame obligation
316            // at the slot (`Frame::clone`); an untracked clone is net-zero.
317            Self::tracked(item) ==> new_regions.frame_obligations
318                == old_regions.frame_obligations.insert(frame_to_index(pa)),
319            !Self::tracked(item) ==> new_regions.frame_obligations == old_regions.frame_obligations,
320    ;
321
322    proof fn item_roundtrip(item: Self::Item, paddr: Paddr, level: PagingLevel, prop: PageProperty)
323        ensures
324            Self::item_into_raw_spec(item) == (paddr, level, prop) <==> Self::item_from_raw_spec(
325                paddr,
326                level,
327                prop,
328            ) == item,
329    ;
330
331    /// Proves `item.clone_requires(regions)` from the concrete frame-slot facts
332    /// delivered by `metaregion_sound` plus the non-saturation bound propagated
333    /// from `Cursor::query`. Implementors unfold their `MappedItem::clone_requires`
334    /// to `Frame::clone_requires` and connect `pa` to the frame's internal pointer
335    /// address.
336    proof fn clone_requires_concrete(
337        item: Self::Item,
338        pa: Paddr,
339        level: PagingLevel,
340        prop: PageProperty,
341        regions: MetaRegionOwners,
342    )
343        requires
344            regions.inv(),
345            Self::item_from_raw_spec(pa, level, prop) == item,
346            has_safe_slot(pa),
347            regions.slots.contains_key(frame_to_index(pa)),
348            regions.slot_owners.contains_key(frame_to_index(pa)),
349            Self::tracked(item) ==> regions.slot_owners[frame_to_index(
350                pa,
351            )].inner_perms.ref_count.value() > 0,
352            // `rc != UNUSED` is needed only for tracked frames (untracked clone is a no-op).
353            Self::tracked(item) ==> regions.slot_owners[frame_to_index(
354                pa,
355            )].inner_perms.ref_count.value() != REF_COUNT_UNUSED,
356            // Saturation aborts (Arc-style) via `inc_ref_count`'s diverging panic.
357            Self::tracked(item) ==> (regions.slot_owners[frame_to_index(
358                pa,
359            )].inner_perms.ref_count.value() < REF_COUNT_MAX || may_panic()),
360        ensures
361            item.clone_requires(regions),
362    ;
363
364    /// The requirements of the page table configuration constants so that the memory management system can work correctly.
365    ///
366    /// NOTE: The postcondition is designed to be minimal, to actually be used in proofs, call `lemma_page_table_config_constant_properties`
367    /// instead to get all the properties that are derived from the requirements.
368    ///
369    /// FIXME: General architecture support. Move properties only relevant to paging constants to `PagingConstsTrait`.
370    proof fn lemma_page_table_config_constant_requirements()
371        ensures
372            Self::TOP_LEVEL_INDEX_RANGE().start < Self::TOP_LEVEL_INDEX_RANGE().end,
373            Self::TOP_LEVEL_INDEX_RANGE().end <= pow2(
374                (Self::C::ADDRESS_WIDTH() - pte_index_bit_offset::<Self::C>(
375                    Self::C::NR_LEVELS(),
376                )) as nat,
377            ),
378            Self::TOP_LEVEL_INDEX_RANGE().end * pow2(
379                pte_index_bit_offset::<Self::C>(Self::C::NR_LEVELS()) as nat,
380            ) <= usize::MAX,
381            Self::LEADING_BITS_spec() != 0usize ==> (Self::C::VA_SIGN_EXT() && ((
382            Self::TOP_LEVEL_INDEX_RANGE().start * pow2(
383                pte_index_bit_offset::<Self::C>(Self::C::NR_LEVELS()) as nat,
384            )) / (pow2((Self::C::ADDRESS_WIDTH() - 1) as nat) as int)) % 2 == 1),
385            (Self::C::VA_SIGN_EXT() && (((Self::TOP_LEVEL_INDEX_RANGE().start * pow2(
386                pte_index_bit_offset::<Self::C>(Self::C::NR_LEVELS()) as nat,
387            )) / (pow2((Self::C::ADDRESS_WIDTH() - 1) as nat) as int)) % 2 == 1)) ==> {
388                &&& 48 <= Self::C::ADDRESS_WIDTH()
389                &&& Self::LEADING_BITS_spec() * 0x1_0000_0000_0000int == 0x1_0000_0000_0000_0000int
390                    - pow2(Self::C::ADDRESS_WIDTH() as nat)
391            },
392            Self::LEADING_BITS_spec() < 0x1_0000_usize,
393            pow2(
394                (Self::C::ADDRESS_WIDTH() - pte_index_bit_offset::<Self::C>(
395                    Self::C::NR_LEVELS(),
396                )) as nat,
397            ) == NR_ENTRIES,
398    ;
399
400    // The derived properties of the page table config constants.
401    ///
402    /// NOTE: Implementations of `PageTableConfig` do not need to implement this lemma, the proof is automatically inherited from the default implementation.
403    proof fn lemma_page_table_config_constant_properties()
404        ensures
405    // Derived properties.
406
407            Self::TOP_LEVEL_INDEX_RANGE().end <= NR_ENTRIES,
408            // Copied from the postcondition of `lemma_page_table_config_constant_requirements`
409            // so that we only need to call this lemma in proofs.
410            Self::TOP_LEVEL_INDEX_RANGE().start < Self::TOP_LEVEL_INDEX_RANGE().end,
411            Self::TOP_LEVEL_INDEX_RANGE().end <= pow2(
412                (Self::C::ADDRESS_WIDTH() - pte_index_bit_offset::<Self::C>(
413                    Self::C::NR_LEVELS(),
414                )) as nat,
415            ),
416            Self::TOP_LEVEL_INDEX_RANGE().end * pow2(
417                pte_index_bit_offset::<Self::C>(Self::C::NR_LEVELS()) as nat,
418            ) <= usize::MAX,
419            Self::LEADING_BITS_spec() != 0usize ==> (Self::C::VA_SIGN_EXT() && ((
420            Self::TOP_LEVEL_INDEX_RANGE().start * pow2(
421                pte_index_bit_offset::<Self::C>(Self::C::NR_LEVELS()) as nat,
422            )) / (pow2((Self::C::ADDRESS_WIDTH() - 1) as nat) as int)) % 2 == 1),
423            (Self::C::VA_SIGN_EXT() && (((Self::TOP_LEVEL_INDEX_RANGE().start * pow2(
424                pte_index_bit_offset::<Self::C>(Self::C::NR_LEVELS()) as nat,
425            )) / (pow2((Self::C::ADDRESS_WIDTH() - 1) as nat) as int)) % 2 == 1)) ==> {
426                &&& 48 <= Self::C::ADDRESS_WIDTH()
427                &&& Self::LEADING_BITS_spec() * 0x1_0000_0000_0000int == 0x1_0000_0000_0000_0000int
428                    - pow2(Self::C::ADDRESS_WIDTH() as nat)
429            },
430            Self::LEADING_BITS_spec() < 0x1_0000_usize,
431            pow2(
432                (Self::C::ADDRESS_WIDTH() - pte_index_bit_offset::<Self::C>(
433                    Self::C::NR_LEVELS(),
434                )) as nat,
435            ) == NR_ENTRIES,
436    {
437        Self::C::lemma_paging_consts_properties();
438        Self::lemma_page_table_config_constant_requirements();
439    }
440
441    /// Layout identity: the PTE type's Rust `size_of` matches the config's
442    /// `PTE_SIZE_spec`. Concrete impls satisfy this via their `global
443    /// layout` declaration. Exposed for generic code that calls
444    /// `core::mem::size_of::<Self::E>()`.
445    proof fn lemma_pte_size_eq_size_of()
446        ensures
447            core::mem::size_of::<Self::E>() == Self::C::PTE_SIZE_spec(),
448    ;
449
450    /// A full PT-node's worth of PTEs fills exactly one base page.
451    proof fn lemma_pte_walk_fills_page()
452        ensures
453            NR_ENTRIES * core::mem::size_of::<Self::E>() == PAGE_SIZE,
454    ;
455
456    /// `align_of::<E>()` divides `size_of::<E>()`. True for any sized Rust
457    /// type (the alignment divides the size by the layout rules), but
458    /// Verus's `size_of`/`align_of` are uninterpreted so we expose it as
459    /// a proof obligation. Used by PT-node `on_drop` to prove cursor alignment is
460    /// preserved across `read_once` iterations.
461    proof fn lemma_pte_align_divides_size()
462        ensures
463            core::mem::size_of::<Self::E>() % core::mem::align_of::<Self::E>() == 0,
464            core::mem::align_of::<Self::E>() > 0,
465    ;
466}
467
468// Implement it so that we can comfortably use low level functions
469// like `page_size::<C>` without typing `C::C` everywhere.
470impl<C: PageTableConfig> PagingConstsTrait for C {
471    open spec fn BASE_PAGE_SIZE_spec() -> usize {
472        C::C::BASE_PAGE_SIZE_spec()
473    }
474
475    fn BASE_PAGE_SIZE() -> usize {
476        C::C::BASE_PAGE_SIZE()
477    }
478
479    open spec fn NR_LEVELS_spec() -> PagingLevel {
480        C::C::NR_LEVELS_spec()
481    }
482
483    fn NR_LEVELS() -> PagingLevel {
484        C::C::NR_LEVELS()
485    }
486
487    open spec fn HIGHEST_TRANSLATION_LEVEL_spec() -> PagingLevel {
488        C::C::HIGHEST_TRANSLATION_LEVEL_spec()
489    }
490
491    fn HIGHEST_TRANSLATION_LEVEL() -> PagingLevel {
492        C::C::HIGHEST_TRANSLATION_LEVEL()
493    }
494
495    open spec fn PTE_SIZE_spec() -> usize {
496        C::C::PTE_SIZE_spec()
497    }
498
499    fn PTE_SIZE() -> usize {
500        C::C::PTE_SIZE()
501    }
502
503    open spec fn ADDRESS_WIDTH_spec() -> usize {
504        C::C::ADDRESS_WIDTH_spec()
505    }
506
507    fn ADDRESS_WIDTH() -> usize {
508        C::C::ADDRESS_WIDTH()
509    }
510
511    open spec fn VA_SIGN_EXT_spec() -> bool {
512        C::C::VA_SIGN_EXT_spec()
513    }
514
515    fn VA_SIGN_EXT() -> bool {
516        C::C::VA_SIGN_EXT()
517    }
518
519    proof fn lemma_paging_consts_requirements() {
520        C::C::lemma_paging_consts_requirements();
521    }
522}
523
524/// A handle to a page table.
525/// A page table can track the lifetime of the mapped physical pages.
526pub struct PageTable<C: PageTableConfig> {
527    pub root: PageTableNode<C>,
528}
529
530#[verifier::inline]
531pub open spec fn nr_pte_index_bits_spec<C: PagingConstsTrait>() -> usize {
532    nr_subpage_per_huge::<C>().ilog2() as usize
533}
534
535/// The number of virtual address bits used to index a PTE in a page.
536#[inline(always)]
537#[verifier::when_used_as_spec(nr_pte_index_bits_spec)]
538pub fn nr_pte_index_bits<C: PagingConstsTrait>() -> usize
539    returns
540        nr_pte_index_bits_spec::<C>(),
541{
542    proof {
543        C::lemma_paging_consts_properties();
544    }
545    nr_subpage_per_huge::<C>().ilog2() as usize
546}
547
548pub proof fn lemma_nr_pte_index_bits_bounded<C: PagingConstsTrait>()
549    ensures
550        0 <= nr_pte_index_bits::<C>() <= C::BASE_PAGE_SIZE().ilog2(),
551{
552    C::lemma_paging_consts_properties();
553    let nr = nr_subpage_per_huge::<C>();
554    assert(1 <= nr <= C::BASE_PAGE_SIZE());
555    let bits = nr.ilog2();
556    assert(0 <= bits) by {
557        lemma_usize_ilog2_ordered(1, nr);
558    }
559    assert(bits <= C::BASE_PAGE_SIZE().ilog2()) by {
560        lemma_usize_ilog2_ordered(nr, C::BASE_PAGE_SIZE());
561    }
562}
563
564/// Splits the address range into largest page table items.
565///
566/// Each of the returned items is a tuple of the physical address and the
567/// paging level. It is helpful when you want to map a physical address range
568/// into the provided virtual address.
569///
570/// For example, on x86-64, `C: PageTableConfig` may specify level 1 page as
571/// 4KiB, level 2 page as 2MiB, and level 3 page as 1GiB. Suppose that the
572/// supplied physical address range is from `0x3fdff000` to `0x80002000`,
573/// and the virtual address is also `0x3fdff000`, the following 5 items will
574/// be returned:
575///
576/// ```text
577/// 0x3fdff000                                                 0x80002000
578/// start                                                             end
579///   |----|----------------|--------------------------------|----|----|
580///    4KiB      2MiB                       1GiB              4KiB 4KiB
581/// ```
582///
583/// # Panics
584///
585/// Panics if:
586///  - any of `va`, `pa`, or `len` is not aligned to the base page size;
587///  - the range `va..(va + len)` is not valid for the page table.
588#[verifier::external_body]
589pub fn largest_pages<C: PageTableConfig>(
590    mut va: Vaddr,
591    mut pa: Paddr,
592    mut len: usize,
593) -> impl Iterator<Item = (Paddr, PagingLevel)> {
594    assert_eq!(va % C::BASE_PAGE_SIZE(), 0);
595    assert_eq!(pa % C::BASE_PAGE_SIZE(), 0);
596    assert_eq!(len % C::BASE_PAGE_SIZE(), 0);
597    assert!(is_valid_range::<C>(&(va..(va + len))));
598
599    core::iter::from_fn(
600        move ||
601            {
602                if len == 0 {
603                    return None;
604                }
605                let mut level = C::HIGHEST_TRANSLATION_LEVEL();
606                while page_size(level) > len || va % page_size(level) != 0 || pa % page_size(level)
607                    != 0 {
608                    level -= 1;
609                }
610
611                let item_start = pa;
612                va += page_size(level);
613                pa += page_size(level);
614                len -= page_size(level);
615
616                Some((item_start, level))
617            },
618    )
619}
620
621/// Gets the top-level index width, in bits, for the page table.
622fn top_level_index_width<C: PageTableConfig>() -> (ret: usize)
623    ensures
624        ret == C::ADDRESS_WIDTH() - pte_index_bit_offset::<C>(C::NR_LEVELS()),
625{
626    proof {
627        C::lemma_paging_consts_properties();
628        C::lemma_page_table_config_constant_properties();
629    }
630
631    C::ADDRESS_WIDTH() - pte_index_bit_offset::<C>(C::NR_LEVELS())
632}
633
634/// Concrete positional start of the VA range: `idx_range.start * 2^offset`.
635fn pt_va_range_start<C: PageTableConfig>() -> (ret: Vaddr)
636    ensures
637        ret == C::TOP_LEVEL_INDEX_RANGE_spec().start * pow2(
638            pte_index_bit_offset::<C>(C::NR_LEVELS()) as nat,
639        ),
640{
641    let idx_start = C::TOP_LEVEL_INDEX_RANGE().start;
642    proof {
643        C::lemma_paging_consts_properties();
644    }
645    let offset = pte_index_bit_offset::<C>(C::NR_LEVELS());
646
647    proof {
648        crate::specs::mm::page_table::vaddr_range_proofs::lemma_pt_va_range_start_shift_facts::<C>(
649            idx_start,
650            offset,
651        );
652        vstd::bits::lemma_usize_shl_is_mul(idx_start, offset);
653    }
654
655    let ret = idx_start << offset;
656
657    ret
658}
659
660/// Concrete positional end of the VA range (inclusive):
661/// `(idx_range.end * 2^offset) - 1`, stated modulo `2^64` to match
662/// the inclusive-end spec. The verified configs prove the pre-subtraction
663/// product fits in `usize`, so the executable path can use an ordinary
664/// left shift followed by `wrapping_sub(1)`.
665fn pt_va_range_end<C: PageTableConfig>() -> (ret: Vaddr)
666    ensures
667        ret == (C::TOP_LEVEL_INDEX_RANGE_spec().end * pow2(
668            pte_index_bit_offset::<C>(C::NR_LEVELS()) as nat,
669        ) - 1) % 0x1_0000_0000_0000_0000int,
670{
671    let idx_end = C::TOP_LEVEL_INDEX_RANGE().end;
672    proof {
673        C::lemma_paging_consts_properties();
674    }
675    let offset = pte_index_bit_offset::<C>(C::NR_LEVELS());
676
677    proof {
678        crate::specs::mm::page_table::vaddr_range_proofs::lemma_pt_va_range_end_shift_facts::<C>(
679            idx_end,
680            offset,
681        );
682        vstd::bits::lemma_usize_shl_is_mul(idx_end, offset);
683    }
684
685    let shifted = idx_end << offset;
686    let ret = shifted.wrapping_sub(1);
687
688    proof {
689        assert(shifted == idx_end * pow2(offset as nat));
690        crate::specs::mm::page_table::vaddr_range_proofs::lemma_pt_va_range_end_wrapping_sub::<C>(
691            idx_end,
692            offset,
693            shifted,
694            ret,
695        );
696    }
697    ret
698}
699
700/// Test whether bit `ADDRESS_WIDTH - 1` of `va` is set.
701fn sign_bit_of_va<C: PageTableConfig>(va: Vaddr) -> (ret: bool)
702    ensures
703        ret == ((va as int / pow2((C::ADDRESS_WIDTH() - 1) as nat) as int) % 2 == 1),
704{
705    let address_width = C::ADDRESS_WIDTH();
706    proof {
707        C::lemma_paging_consts_properties();
708        C::lemma_page_table_config_constant_properties();
709    }
710
711    let shift = address_width - 1;
712    let shifted = va >> shift;
713    let bit = shifted & 1;
714
715    proof {
716        crate::specs::mm::page_table::vaddr_range_proofs::lemma_sign_bit_facts::<C>(
717            va,
718            address_width,
719            shift,
720            shifted,
721            bit,
722        );
723    }
724    bit != 0
725}
726
727/// Spec for the managed virtual address range (exclusive end).
728/// For configs without VA_SIGN_EXT (e.g. UserPtConfig) or when the base range has sign bit 0.
729/// Configs with sign extension (e.g. KernelPtConfig) use
730/// `vaddr_range_bounds_spec` for their canonical high-half bounds.
731#[verifier::inline]
732pub open spec fn vaddr_range_spec<C: PageTableConfig>() -> Range<Vaddr> {
733    let idx_range = C::TOP_LEVEL_INDEX_RANGE_spec();
734    let offset = pte_index_bit_offset::<C::C>(C::NR_LEVELS()) as nat;
735    let start = idx_range.start * pow2(offset);
736    let end_inclusive = idx_range.end * pow2(offset) - 1;
737    (start as Vaddr)..((end_inclusive + 1) as Vaddr)
738}
739
740/// Spec for whether a range is within the page table's managed address space.
741#[verifier::inline]
742pub open spec fn is_valid_range_spec<C: PageTableConfig>(r: &Range<Vaddr>) -> bool {
743    let va_range = vaddr_range_bounds_spec::<C>();
744    (r.start == 0 && r.end == 0) || (va_range.0 <= r.start && r.end > 0 && r.end - 1 <= va_range.1)
745}
746
747/// Gets the inclusive bounds of the managed virtual-address range.
748fn vaddr_range_bounds<C: PageTableConfig>() -> (ret: (Vaddr, Vaddr))
749    ensures
750        ret.0 == vaddr_range_bounds_spec::<C>().0,
751        ret.1 == vaddr_range_bounds_spec::<C>().1,
752{
753    let mut start = pt_va_range_start::<C>();
754    let mut end = pt_va_range_end::<C>();
755
756    proof {
757        lemma_vaddr_range_bounds_spec_unfold::<C>();
758        C::lemma_paging_consts_properties();
759        C::lemma_page_table_config_constant_properties();
760        crate::specs::mm::page_table::vaddr_range_proofs::lemma_idx_times_pow2_bound::<C>(
761            start,
762            end,
763        );
764    }
765    let pt_start = pt_va_range_start::<C>();
766    let va_sign_ext = C::VA_SIGN_EXT();
767    let sign_bit_set = sign_bit_of_va::<C>(pt_start);
768    if va_sign_ext && sign_bit_set {
769        proof {
770            let off = pte_index_bit_offset::<C>(C::NR_LEVELS()) as nat;
771            let aw_m1 = (C::ADDRESS_WIDTH() - 1) as nat;
772            let i_start = C::TOP_LEVEL_INDEX_RANGE_spec().start as int;
773            let p_off = pow2(off) as int;
774            let p_aw_m1 = pow2(aw_m1) as int;
775        }
776        start = apply_sign_ext::<C>(start);
777        end = apply_sign_ext::<C>(end);
778    } else {
779        proof {
780            // The if-condition was false, so either va_sign_ext is false
781            // or sign_bit_set is false. The contrapositive of the
782            // leading-bits requirement gives LEADING_BITS == 0.
783            assert(!va_sign_ext || !sign_bit_set);
784            // Bridge exec bool to spec form. `va_sign_ext == C::VA_SIGN_EXT()`
785            // by `when_used_as_spec`; `sign_bit_set == ((pt_start as int /
786            // 2^(aw-1)) % 2 == 1)` by `sign_bit_of_va`'s ensures.
787            assert(va_sign_ext == C::VA_SIGN_EXT());
788            let off = pte_index_bit_offset::<C>(C::NR_LEVELS()) as nat;
789            let aw_m1 = (C::ADDRESS_WIDTH() - 1) as nat;
790            let i_start = C::TOP_LEVEL_INDEX_RANGE_spec().start as int;
791            let p_off = pow2(off) as int;
792            let p_aw_m1 = pow2(aw_m1) as int;
793            assert(pt_start == i_start * p_off);
794            assert(sign_bit_set == ((pt_start as int / p_aw_m1) % 2 == 1));
795            assert(sign_bit_set == ((i_start * p_off / p_aw_m1) % 2 == 1));
796        }
797    }
798    proof {
799        // Both branches now establish the equation
800        //   start == lb * 2^48 + idx.start * 2^off
801        //   end == lb * 2^48 + idx.end * 2^off - 1
802        // matching the unfolded `vaddr_range_bounds_spec`.
803        assert(start == (C::LEADING_BITS_spec()) * 0x1_0000_0000_0000int + (
804        C::TOP_LEVEL_INDEX_RANGE_spec().start) * (pow2(
805            pte_index_bit_offset::<C>(C::NR_LEVELS()) as nat,
806        )));
807        assert(end == (C::LEADING_BITS_spec()) * 0x1_0000_0000_0000int + (
808        C::TOP_LEVEL_INDEX_RANGE_spec().end) * (pow2(
809            pte_index_bit_offset::<C>(C::NR_LEVELS()) as nat,
810        )) - 1);
811    }
812    (start, end)
813}
814
815/// Gets the managed virtual addresses range for the page table.
816///
817/// Returns a [`RangeInclusive`] because the end address, when the range
818/// reaches the top of the 64-bit address space (e.g. the canonical
819/// high-half kernel range ending at `usize::MAX`), would overflow the
820/// exclusive end of a [`Range<Vaddr>`].
821fn vaddr_range<C: PageTableConfig>() -> (ret: RangeInclusive<Vaddr>)
822    ensures
823        ret@.start == vaddr_range_bounds_spec::<C>().0,
824        ret@.end == vaddr_range_bounds_spec::<C>().1,
825        ret@.exhausted == false,
826{
827    let (start, end) = vaddr_range_bounds::<C>();
828    RangeInclusive::new(start, end)
829}
830
831/// Apply the sign-extension OR to a positional value.
832///
833/// For any value `va` in `[0, 2^ADDRESS_WIDTH)`, the OR with
834/// `!0 ^ ((1 << ADDRESS_WIDTH) - 1)` is equivalent to adding
835/// `LEADING_BITS_spec() * 2^48`, because the mask's bits and `va`'s bits are
836/// disjoint.
837fn apply_sign_ext<C: PageTableConfig>(va: Vaddr) -> (ret: Vaddr)
838    requires
839        va < pow2(C::ADDRESS_WIDTH() as nat),
840        C::ADDRESS_WIDTH() < usize::BITS,
841        C::LEADING_BITS_spec() * 0x1_0000_0000_0000int == 0x1_0000_0000_0000_0000int - pow2(
842            C::ADDRESS_WIDTH() as nat,
843        ),
844    ensures
845        ret == va + C::LEADING_BITS_spec() * 0x1_0000_0000_0000int,
846{
847    let address_width = C::ADDRESS_WIDTH();
848    let low_bit = 1usize << address_width;
849    proof {
850        vstd::layout::unsigned_int_max_values();
851        vstd::bits::lemma_usize_pow2_no_overflow(address_width as nat);
852        vstd::bits::lemma_usize_shl_is_mul(1usize, address_width);
853    }
854    let low_mask = low_bit - 1;
855    let sign_ext_mask = !0 ^ low_mask;
856    let ret = va | sign_ext_mask;
857    proof {
858        assert(!0usize == 0xffff_ffff_ffff_ffffusize) by (compute_only);
859        assert(sign_ext_mask == usize::MAX - low_mask) by (bit_vector)
860            requires
861                sign_ext_mask == (!0usize ^ low_mask),
862                !0usize == 0xffff_ffff_ffff_ffffusize,
863                usize::MAX == 0xffff_ffff_ffff_ffffusize,
864        ;
865        assert(pow2(64) == 0x1_0000_0000_0000_0000nat) by {
866            vstd::arithmetic::power2::lemma2_to64();
867        };
868        assert(sign_ext_mask == 0x1_0000_0000_0000_0000int - pow2(address_width as nat));
869        assert(sign_ext_mask == C::LEADING_BITS_spec() * 0x1_0000_0000_0000int);
870
871        assert((va & sign_ext_mask) == 0usize) by (bit_vector)
872            requires
873                address_width < usize::BITS,
874                low_bit == 1usize << address_width,
875                low_mask == low_bit - 1,
876                sign_ext_mask == !0usize ^ low_mask,
877                va < low_bit,
878        ;
879        assert(ret == va + sign_ext_mask) by (bit_vector)
880            requires
881                ret == va | sign_ext_mask,
882                (va & sign_ext_mask) == 0usize,
883        ;
884    }
885    ret
886}
887
888/// Checks if the given range is covered by the valid range of the page table.
889fn is_valid_range<C: PageTableConfig>(r: &Range<Vaddr>) -> (ret: bool)
890    ensures
891        ret == is_valid_range_spec::<C>(r),
892{
893    let (va_start, va_end) = vaddr_range_bounds::<C>();
894    (r.start == 0 && r.end == 0) || (va_start <= r.start && r.end > 0 && r.end - 1 <= va_end)
895}
896
897/// Sanity-check: for x86_64 kernel PT, `vaddr_range_spec` evaluates to the
898/// low-half `[2^47, 2^48)` window. The exec path (`vaddr_range`) applies
899/// sign extension on top of this and wraps at `usize::MAX + 1`, which is
900/// the "KNOWN BUG" referenced in `vm_space::unmap`. See
901/// `vaddr_range_bounds_spec` below for the overflow-free formulation.
902pub(crate) proof fn lemma_vaddr_range_spec_kernel()
903    ensures
904        vaddr_range_spec::<KernelPtConfig>() == (
905        0x0000_8000_0000_0000_usize..0x0001_0000_0000_0000_usize),
906{
907    use vstd::arithmetic::power2::{lemma2_to64, lemma2_to64_rest, lemma_pow2_adds};
908    lemma2_to64();
909    lemma2_to64_rest();
910    lemma_usize_pow2_ilog2(12);
911    assert(PagingConsts::BASE_PAGE_SIZE().ilog2() == 12u32);
912    assert(nr_subpage_per_huge::<PagingConsts>() == 512_usize);
913    lemma_usize_pow2_ilog2(9);
914    assert(nr_pte_index_bits::<PagingConsts>() == 9_usize);
915    assert(pte_index_bit_offset::<PagingConsts>(4) == 39);
916    lemma_pow2_adds(8, 39);
917    lemma_pow2_adds(9, 39);
918    assert(256 * pow2(39) == pow2(47));
919    assert(512 * pow2(39) == pow2(48));
920}
921
922/// Canonical bounds of the VA range managed by a page-table config,
923/// returned as inclusive `(start, end_inclusive)`. `end_inclusive` may
924/// equal `usize::MAX` for sign-extended kernel configs, which is why the
925/// inclusive form is used — `Range<Vaddr>` cannot represent that.
926///
927/// Derived from `LEADING_BITS_spec` and `TOP_LEVEL_INDEX_RANGE_spec`. For
928/// `UserPtConfig` `(LEADING_BITS=0, idx=0..256)` this is `(0, 2^47 - 1)`;
929/// for `KernelPtConfig` `(LEADING_BITS=0xffff, idx=256..512)` this is
930/// `(0xffff_8000_0000_0000, 0xffff_ffff_ffff_ffff)`.
931pub closed spec fn vaddr_range_bounds_spec<C: PageTableConfig>() -> (Vaddr, Vaddr) {
932    let idx = C::TOP_LEVEL_INDEX_RANGE_spec();
933    let off = pte_index_bit_offset::<C::C>(C::NR_LEVELS()) as nat;
934    let lb = C::LEADING_BITS_spec() as int;
935    let base = lb * 0x1_0000_0000_0000int;
936    let start = (base + (idx.start) * pow2(off)) as usize;
937    let end_inclusive = (base + (idx.end) * pow2(off) - 1) as usize;
938    (start, end_inclusive)
939}
940
941/// Reveal the body of `vaddr_range_bounds_spec` at a call site without
942/// making the function itself open (which causes z3-context pollution in
943/// cursor invariants).
944pub proof fn lemma_vaddr_range_bounds_spec_unfold<C: PageTableConfig>()
945    ensures
946        vaddr_range_bounds_spec::<C>() == {
947            let idx = C::TOP_LEVEL_INDEX_RANGE_spec();
948            let off = pte_index_bit_offset::<C>(C::NR_LEVELS()) as nat;
949            let lb = C::LEADING_BITS_spec() as int;
950            let base = lb * 0x1_0000_0000_0000int;
951            let start = (base + (idx.start) * pow2(off)) as usize;
952            let end_inclusive = (base + (idx.end) * pow2(off) - 1) as usize;
953            (start, end_inclusive)
954        },
955{
956}
957
958/// Sanity-check: for x86_64 user PT, the bounds are
959/// `(0, 0x0000_7FFF_FFFF_FFFF)`, i.e. the low-half 47-bit user VA space.
960pub(crate) proof fn lemma_vaddr_range_bounds_spec_user()
961    ensures
962        vaddr_range_bounds_spec::<crate::mm::vm_space::UserPtConfig>() == (
963            0_usize,
964            0x0000_7FFF_FFFF_FFFF_usize,
965        ),
966{
967    use vstd::arithmetic::power2::{lemma2_to64, lemma2_to64_rest, lemma_pow2_adds};
968    lemma2_to64();
969    lemma2_to64_rest();
970    lemma_usize_pow2_ilog2(12);
971    lemma_usize_pow2_ilog2(9);
972    lemma_pow2_adds(8, 39);
973    assert(nr_subpage_per_huge::<PagingConsts>() == 512_usize);
974    assert(nr_pte_index_bits::<PagingConsts>() == 9_usize);
975    assert(pte_index_bit_offset::<PagingConsts>(4) == 39);
976    assert(0 * pow2(39) == 0);
977    assert(256 * pow2(39) == pow2(47));
978    assert(pow2(47) - 1 == 0x0000_7FFF_FFFF_FFFF_int);
979    // UserPtConfig: LEADING_BITS = 0 via trait default.
980    assert(<crate::mm::vm_space::UserPtConfig as PageTableConfig>::LEADING_BITS_spec() == 0_usize);
981}
982
983/// Sanity-check: for x86_64 kernel PT, the bounds are the canonical
984/// upper half `(0xFFFF_8000_0000_0000, 0xFFFF_FFFF_FFFF_FFFF)`.
985pub(crate) proof fn lemma_vaddr_range_bounds_spec_kernel()
986    ensures
987        vaddr_range_bounds_spec::<KernelPtConfig>() == (
988            0xFFFF_8000_0000_0000_usize,
989            0xFFFF_FFFF_FFFF_FFFF_usize,
990        ),
991{
992    use vstd::arithmetic::power2::{lemma2_to64, lemma2_to64_rest, lemma_pow2_adds};
993    lemma2_to64();
994    lemma2_to64_rest();
995    lemma_usize_pow2_ilog2(12);
996    lemma_usize_pow2_ilog2(9);
997    lemma_pow2_adds(8, 39);
998    lemma_pow2_adds(9, 39);
999    assert(nr_subpage_per_huge::<PagingConsts>() == 512_usize);
1000    assert(nr_pte_index_bits::<PagingConsts>() == 9_usize);
1001    assert(PagingConsts::BASE_PAGE_SIZE().ilog2() == 12u32);
1002    assert(pte_index_bit_offset::<PagingConsts>(4) == 39);
1003    assert(256 * pow2(39) == pow2(47));
1004    assert(512 * pow2(39) == pow2(48));
1005    assert(0xffff_int * 0x1_0000_0000_0000int + pow2(47) == 0xffff_8000_0000_0000int);
1006    assert(0xffff_int * 0x1_0000_0000_0000int + pow2(48) - 1 == 0xffff_ffff_ffff_ffffint);
1007}
1008
1009// Here are some const values that are determined by the paging constants.
1010pub(crate) proof fn lemma_pte_index_consts<C: PagingConstsTrait>()
1011    ensures
1012        usize::BITS == 64,
1013        0 < C::BASE_PAGE_SIZE(),
1014        C::BASE_PAGE_SIZE().ilog2() == 12u32,
1015        C::NR_LEVELS() == NR_LEVELS,
1016        nr_subpage_per_huge::<C>() == NR_ENTRIES,
1017        nr_pte_index_bits::<C>() == 9usize,
1018        pow2(9) as usize == NR_ENTRIES,
1019{
1020    C::lemma_paging_consts_properties();
1021    lemma2_to64();
1022    lemma_usize_pow2_ilog2(12);
1023    lemma_usize_pow2_ilog2(9);
1024}
1025
1026/// The index of a VA's PTE in a page table node at the given level.
1027fn pte_index<C: PagingConstsTrait>(va: Vaddr, level: PagingLevel) -> (res: usize)
1028    requires
1029        1 <= level <= NR_LEVELS,
1030    ensures
1031        res == AbstractVaddr::from_vaddr(va).index[level - 1],
1032{
1033    let offset = pte_index_bit_offset::<C>(level);
1034    proof {
1035        lemma_pte_index_consts::<C>();
1036        assert(offset == 12 + 9 * (level - 1));
1037        assert(0 <= offset < usize::BITS) by (nonlinear_arith)
1038            requires
1039                1 <= level <= NR_LEVELS,
1040                NR_LEVELS == 4,
1041                usize::BITS == 64,
1042                offset == 12 + 9 * (level - 1),
1043        ;
1044    }
1045
1046    let shifted = va >> offset;
1047    let nr_subpages = nr_subpage_per_huge::<C>();
1048    proof {
1049        assert(nr_subpages == NR_ENTRIES);
1050        assert(nr_subpages > 0);
1051    }
1052    let mask = nr_subpages - 1;
1053    proof {
1054        lemma2_to64();
1055        lemma2_to64_rest();
1056        vstd::bits::lemma_usize_shr_is_div(va, offset);
1057
1058        vstd::bits::lemma_low_bits_mask_values();
1059        vstd::bits::lemma_usize_low_bits_mask_is_mod(shifted, 9);
1060    }
1061    shifted & mask
1062}
1063
1064#[verifier::inline]
1065pub open spec fn pte_index_bit_offset_spec<C: PagingConstsTrait>(level: PagingLevel) -> usize {
1066    (C::BASE_PAGE_SIZE().ilog2() + nr_pte_index_bits::<C>() * (level - 1)) as usize
1067}
1068
1069/// The bit offset of the entry offset part in a virtual address.
1070///
1071/// This function returns the bit offset of the least significant bit. Take
1072/// x86-64 as an example, the `pte_index_bit_offset(2)` should return 21, which
1073/// is 12 (the 4KiB in-page offset) plus 9 (index width in the level-1 table).
1074#[verifier::when_used_as_spec(pte_index_bit_offset_spec)]
1075pub fn pte_index_bit_offset<C: PagingConstsTrait>(level: PagingLevel) -> usize
1076    requires
1077        1 <= level <= NR_LEVELS,
1078    returns
1079        pte_index_bit_offset_spec::<C>(level),
1080{
1081    proof {
1082        lemma_pte_index_consts::<C>();
1083        assert(12 + 9 * (level - 1) <= 39) by (nonlinear_arith)
1084            requires
1085                1 <= level <= NR_LEVELS,
1086                NR_LEVELS == 4,
1087        ;
1088    }
1089    C::BASE_PAGE_SIZE().ilog2() as usize + nr_pte_index_bits::<C>() * (level as usize - 1)
1090}
1091
1092/*
1093impl PageTable<UserPtConfig> {
1094    pub fn activate(&self) {
1095        // SAFETY: The user mode page table is safe to activate since the kernel
1096        // mappings are shared.
1097        unsafe {
1098            self.root.activate();
1099        }
1100    }
1101}*/
1102
1103impl PageTable<KernelPtConfig> {
1104    /// Panic condition for [`Self::create_user_page_table`]:
1105    /// Some kernel root entry at index `i` in `TOP_LEVEL_INDEX_RANGE` is
1106    /// not a page table node (i.e., is absent or maps a huge frame).
1107    pub open spec fn create_user_pt_panic_condition(root_owner: NodeOwner<KernelPtConfig>) -> bool {
1108        exists|i: usize|
1109            #![trigger root_owner.children_perm.value()[i as int]]
1110            KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= i
1111                < KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end && {
1112                let pte = root_owner.children_perm.value()[i as int];
1113                ||| !pte.is_present()
1114                ||| pte.is_last(root_owner.level)
1115            }
1116    }
1117
1118    /// Create a new kernel page table.
1119    #[verifier::external_body]
1120    pub(crate) fn new_kernel_page_table() -> Self {
1121        unimplemented!()/*        let kpt = Self::empty();
1122
1123        // Make shared the page tables mapped by the root table in the kernel space.
1124        {
1125            let preempt_guard = disable_preempt();
1126            let mut root_node = kpt.root.borrow().lock(&preempt_guard);
1127
1128            for i in KernelPtConfig::TOP_LEVEL_INDEX_RANGE {
1129                let mut root_entry = root_node.entry(i);
1130                let _ = root_entry.alloc_if_none(&preempt_guard).unwrap();
1131            }
1132        }
1133
1134        kpt*/
1135
1136    }
1137
1138    /// Create a new user page table.
1139    ///
1140    /// This should be the only way to create the user page table, that is to
1141    /// duplicate the kernel page table with all the kernel mappings shared.
1142    #[verus_spec(r =>
1143        with Tracked(kernel_owner): Tracked<&PageTableOwner<KernelPtConfig>>,
1144            Tracked(regions): Tracked<&mut MetaRegionOwners>,
1145            Tracked(guards): Tracked<&mut Guards<'rcu>>,
1146        requires
1147            kernel_owner.inv(),
1148            old(regions).inv(),
1149            kernel_owner.0.value.is_node(),
1150            !Self::create_user_pt_panic_condition(kernel_owner.0.value.node()),
1151            // The kernel page table's root frame matches the tracked owner.
1152            self.root.ptr.addr() == kernel_owner.0.value.node().meta_addr_self(),
1153            // The kernel root entry is sound with respect to the meta regions.
1154            kernel_owner.0.value.metaregion_sound(*old(regions)),
1155            // The whole kernel page-table tree is sound: every entry's metaregion
1156            // bookkeeping matches `old(regions)`. Needed to derive each child's
1157            // soundness inside the loop body.
1158            kernel_owner.metaregion_sound(*old(regions)),
1159            // The kernel root is not currently locked.
1160            old(guards).unlocked(kernel_owner.0.value.node().meta_addr_self()),
1161        ensures
1162            final(regions).inv(),
1163    )]
1164    pub(in crate::mm) fn create_user_page_table<'rcu, G: InAtomicMode + 'static>(
1165        &'static self,
1166    ) -> PageTable<UserPtConfig> {
1167        let preempt_guard: &'rcu G = disable_preempt::<G>();
1168
1169        proof_decl! {
1170            let tracked mut new_pt_owner: Option<PageTableOwner<UserPtConfig>> = None;
1171        }
1172        let ghost regions_before_alloc = *regions;
1173        let new_pt: PageTable<UserPtConfig> = (
1174        #[verus_spec(with Tracked(&mut new_pt_owner), Tracked(regions), Tracked(guards))]
1175        PageTable::empty_with_owner());
1176        let new_root = new_pt.root;
1177        // Capture new_idx as a ghost BEFORE the tracked_take below empties new_pt_owner.
1178        let ghost new_idx_g: usize = crate::specs::mm::frame::mapping::frame_to_index(
1179            new_pt_owner@.unwrap().0.value.meta_slot_paddr().unwrap(),
1180        );
1181        let ghost new_pt_owner_snap = new_pt_owner@.unwrap();
1182        proof {
1183            let kern_idx = crate::specs::mm::frame::mapping::frame_to_index(
1184                kernel_owner.0.value.meta_slot_paddr().unwrap(),
1185            );
1186            let new_idx = new_idx_g;
1187            crate::specs::mm::page_table::node::entry_owners::EntryOwner::<
1188                KernelPtConfig,
1189            >::active_entry_not_in_free_pool(kernel_owner.0.value, regions_before_alloc, new_idx);
1190            assert(kern_idx != new_idx);
1191            assert(regions.slot_owners[kern_idx] == regions_before_alloc.slot_owners[kern_idx]);
1192            assert(kernel_owner.metaregion_sound(*regions));
1193            assert(!regions.slots.contains_key(new_idx));
1194        }
1195
1196        proof_decl! {
1197            let tracked root_owner: &NodeOwner<KernelPtConfig>
1198                = kernel_owner.0.borrow_value().tracked_borrow_node();
1199            let tracked mut new_pt_owner_val: PageTableOwner<UserPtConfig>
1200                = new_pt_owner.tracked_take();
1201            let tracked mut new_node_owner: NodeOwner<UserPtConfig>
1202                = new_pt_owner_val.0.value.tracked_take_node();
1203            let tracked mut entry_owner: &EntryOwner<KernelPtConfig>;
1204        }
1205
1206        // Discharge borrow/lock preconditions for the kernel root from
1207        // kernel_owner.inv() + metaregion_sound + guards unlocked.
1208        proof {
1209            assert(kernel_owner.0.value.is_node());
1210            assert(kernel_owner.0.value.metaregion_sound(*regions));
1211        }
1212        let ghost regions_before_self_borrow: MetaRegionOwners = *regions;
1213        let mut root_node = {
1214            #[verus_spec(with Tracked(regions))]
1215            let root_ref = self.root.borrow();
1216            #[verus_spec(with Tracked(root_owner), Tracked(guards))]
1217            root_ref.lock(preempt_guard)
1218        };
1219        let ghost regions_after_kroot_borrow: MetaRegionOwners = *regions;
1220        let mut new_node: PageTableGuard<'rcu, UserPtConfig> = {
1221            #[verus_spec(with Tracked(regions))]
1222            let new_ref = new_root.borrow();
1223            #[verus_spec(with Tracked(&new_node_owner), Tracked(guards))]
1224            new_ref.lock(preempt_guard)
1225        };
1226        proof {
1227            let kern_idx = crate::specs::mm::frame::mapping::frame_to_index(
1228                kernel_owner.0.value.meta_slot_paddr().unwrap(),
1229            );
1230            assert(regions_before_self_borrow.slot_owners
1231                == regions_after_kroot_borrow.slot_owners);
1232            assert forall|k: usize|
1233                regions_before_self_borrow.slots.contains_key(
1234                    k,
1235                ) implies regions_before_self_borrow.slots[k]
1236                == #[trigger] regions_after_kroot_borrow.slots[k] by {
1237                if k == kern_idx {
1238                    crate::specs::mm::page_table::node::entry_owners::EntryOwner::<
1239                        KernelPtConfig,
1240                    >::active_entry_not_in_free_pool(
1241                        kernel_owner.0.value,
1242                        regions_before_self_borrow,
1243                        k,
1244                    );
1245                }
1246            };
1247            kernel_owner.metaregion_sound_preserved_slot_owners_eq(
1248                regions_before_self_borrow,
1249                regions_after_kroot_borrow,
1250            );
1251
1252            let new_idx = new_idx_g;
1253            assert(regions_before_alloc.slots.contains_key(new_idx));
1254            assert(kern_idx != new_idx) by {
1255                crate::specs::mm::page_table::node::entry_owners::EntryOwner::<
1256                    KernelPtConfig,
1257                >::active_entry_not_in_free_pool(
1258                    kernel_owner.0.value,
1259                    regions_before_alloc,
1260                    new_idx,
1261                );
1262            };
1263
1264            assert(!regions_before_self_borrow.slots.contains_key(new_idx));
1265            assert(!regions_after_kroot_borrow.slots.contains_key(new_idx));
1266            assert forall|k: usize|
1267                regions_after_kroot_borrow.slots.contains_key(
1268                    k,
1269                ) implies regions_after_kroot_borrow.slots[k] == #[trigger] regions.slots[k] by {
1270                if k != new_idx {
1271                    // borrow preserves slots[k] at k != self.index() == new_idx
1272                }
1273            };
1274            assert(kernel_owner.metaregion_sound(regions_before_alloc));
1275
1276            kernel_owner.0.map_implies(
1277                kernel_owner.0.value.path,
1278                |
1279                    e: crate::specs::mm::page_table::node::entry_owners::EntryOwner<KernelPtConfig>,
1280                    p: vstd_extra::ghost_tree::TreePath<NR_ENTRIES>,
1281                |
1282                    e.is_frame() && e.parent_level > 1 ==> {
1283                        let pa = e.frame().mapped_pa;
1284                        let nr_pages = page_size(e.parent_level) / PAGE_SIZE;
1285                        forall|j: usize|
1286                            0 < j < nr_pages ==> {
1287                                let sub_idx =
1288                                    #[trigger] crate::specs::mm::frame::mapping::frame_to_index(
1289                                    (pa + j * PAGE_SIZE) as usize,
1290                                );
1291                                sub_idx != new_idx
1292                            }
1293                    },
1294                |
1295                    e: crate::specs::mm::page_table::node::entry_owners::EntryOwner<KernelPtConfig>,
1296                    p: vstd_extra::ghost_tree::TreePath<NR_ENTRIES>,
1297                |
1298                    e.is_frame() && e.parent_level > 1 ==> {
1299                        let pa = e.frame().mapped_pa;
1300                        let nr_pages = page_size(e.parent_level) / PAGE_SIZE;
1301                        forall|j: usize|
1302                            0 < j < nr_pages ==> {
1303                                let sub_idx =
1304                                    #[trigger] crate::specs::mm::frame::mapping::frame_to_index(
1305                                    (pa + j * PAGE_SIZE) as usize,
1306                                );
1307                                sub_idx != new_idx || (regions.slots.contains_key(sub_idx)
1308                                    && regions.slot_owners[sub_idx].inner_perms.ref_count.value()
1309                                    != REF_COUNT_UNUSED
1310                                    && regions.slot_owners[sub_idx].inner_perms.ref_count.value()
1311                                    > 0
1312                                    && regions.slot_owners[sub_idx].inner_perms.ref_count.value()
1313                                    <= REF_COUNT_MAX)
1314                            }
1315                    },
1316            );
1317            kernel_owner.metaregion_sound_preserved_one_slot_changed(
1318                regions_after_kroot_borrow,
1319                *regions,
1320                new_idx,
1321            );
1322        }
1323        let mut i: usize = KernelPtConfig::TOP_LEVEL_INDEX_RANGE().start;
1324        while i < KernelPtConfig::TOP_LEVEL_INDEX_RANGE().end
1325            invariant
1326                kernel_owner.inv(),
1327                kernel_owner.0.value.is_node(),
1328                regions.inv(),
1329                !Self::create_user_pt_panic_condition(kernel_owner.0.value.node()),
1330                i <= KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end,
1331                KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= i,
1332                // Lock postcondition for the kernel root.
1333                *root_owner == kernel_owner.0.value.node(),
1334                root_owner.relate_guard(root_node),
1335                // Tree-wide soundness of the kernel page table.
1336                kernel_owner.metaregion_sound(*regions),
1337                // The new node owner's invariants and guard relation.
1338                new_node_owner.inv(),
1339                new_node_owner.relate_guard(new_node),
1340                regions.slots.contains_key(new_node_owner.slot_index),
1341            decreases KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end - i,
1342        {
1343            proof {
1344                let kern_node = kernel_owner.0.value.node();
1345                assert forall|j: usize|
1346                    #![trigger kern_node.children_perm.value()[j as int]]
1347                    KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= j
1348                        < KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end implies {
1349                    let pte = kern_node.children_perm.value()[j as int];
1350                    pte.is_present() && !pte.is_last(kern_node.level)
1351                } by {
1352                    let pte = kern_node.children_perm.value()[j as int];
1353                    if !pte.is_present() || pte.is_last(kern_node.level) {
1354                        assert(Self::create_user_pt_panic_condition(kern_node));
1355                    }
1356                }
1357
1358                kernel_owner.pt_inv_unroll(i as int);
1359                let tracked child_opt: &Option<OwnerSubtree<KernelPtConfig>> =
1360                    kernel_owner.0.children.tracked_borrow(i as int);
1361                let tracked child_subtree: &OwnerSubtree<KernelPtConfig> =
1362                    child_opt.tracked_borrow();
1363                entry_owner = child_subtree.borrow_value();
1364                let kern_node = kernel_owner.0.value.node();
1365                assert(entry_owner.match_pte(
1366                    kern_node.children_perm.value()[i as int],
1367                    entry_owner.parent_level,
1368                ));
1369                assert(entry_owner.parent_level == kern_node.level);
1370                assert(child_subtree.inv());
1371                assert(entry_owner.inv());
1372                assert(root_owner.relate_guard(root_node));
1373
1374                kernel_owner.0.map_unroll_once(
1375                    kernel_owner.0.value.path,
1376                    PageTableOwner::<KernelPtConfig>::metaregion_sound_pred(*regions),
1377                    i as int,
1378                );
1379                assert(child_subtree.tree_predicate_map(
1380                    kernel_owner.0.value.path.push_tail(i as usize),
1381                    PageTableOwner::<KernelPtConfig>::metaregion_sound_pred(*regions),
1382                ));
1383                assert(entry_owner.metaregion_sound(*regions));
1384            }
1385
1386            #[verus_spec(with Tracked(root_owner), Tracked(entry_owner), Tracked(&*regions))]
1387            let root_entry = root_node.entry(i);
1388            let ghost pre_to_ref_regions: MetaRegionOwners = *regions;
1389            #[verus_spec(with Tracked(entry_owner), Tracked(root_owner), Tracked(regions))]
1390            let child = root_entry.to_ref();
1391
1392            proof {
1393                let kern_node = kernel_owner.0.value.node();
1394                let pte = kern_node.children_perm.value()[i as int];
1395
1396                assert(pte.is_present() && !pte.is_last(kern_node.level)) by {
1397                    if !pte.is_present() || pte.is_last(kern_node.level) {
1398                        assert(KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= i
1399                            < KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end);
1400                        assert(exists|j: usize|
1401                            KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().start <= j
1402                                < KernelPtConfig::TOP_LEVEL_INDEX_RANGE_spec().end && {
1403                                let p = #[trigger] kern_node.children_perm.value()[j as int];
1404                                ||| !p.is_present()
1405                                ||| p.is_last(kern_node.level)
1406                            });
1407                        assert(Self::create_user_pt_panic_condition(kern_node));
1408                    }
1409                }
1410                // entry_owner.match_pte(pte, parent_level) + (present && !is_last)
1411                // ⟹ entry_owner.is_node().
1412                assert(entry_owner.is_node());
1413                // ChildRef::invariants(entry_owner, regions) gives child.wf(entry_owner).
1414                // For the Frame and None variants, wf requires is_frame() or is_absent(),
1415                // contradicting is_node(). Hence child must be PageTable.
1416                assert(child is PageTable);
1417                // to_ref's borrow_paddr preserves slot_owners exactly and only
1418                // grows `slots` (existing keys preserved). Use the tree-wide
1419                // preservation lemma.
1420                kernel_owner.metaregion_sound_preserved_slot_owners_eq(
1421                    pre_to_ref_regions,
1422                    *regions,
1423                );
1424            }
1425            let pt = match child {
1426                ChildRef::PageTable(pt) => pt,
1427                _ => vstd::pervasive::unreached(),
1428            };
1429
1430            let ghost entry_node_slot_idx = entry_owner.tracked_borrow_node().slot_index;
1431            let tracked entry_node_slot_perm = regions.slots.tracked_borrow(entry_node_slot_idx);
1432            #[verus_spec(with Tracked(entry_node_slot_perm))]
1433            let pt_addr = pt.start_paddr();
1434            let pte = PageTableEntry::new_pt(pt_addr);
1435
1436            proof {
1437                assert(regions.slots.contains_key(new_node_owner.slot_index));
1438            }
1439            unsafe {
1440                #[verus_spec(with Tracked(&mut new_node_owner), Tracked(&*regions))]
1441                new_node.write_pte(i, pte)
1442            };
1443
1444            i = i + 1;
1445        }
1446
1447        PageTable::<UserPtConfig> { root: new_root }
1448    }/*
1449    /// Protect the given virtual address range in the kernel page table.
1450    ///
1451    /// This method flushes the TLB entries when doing protection.
1452    ///
1453    /// # Safety
1454    ///
1455    /// The caller must ensure that the protection operation does not affect
1456    /// the memory safety of the kernel.
1457    pub unsafe fn protect_flush_tlb(
1458        &self,
1459        vaddr: &Range<Vaddr>,
1460        mut op: impl FnMut(&mut PageProperty),
1461    ) -> Result<(), PageTableError> {
1462        let preempt_guard = disable_preempt();
1463        let mut cursor = CursorMut::new(self, &preempt_guard, vaddr)?;
1464        // SAFETY: The safety is upheld by the caller.
1465        while let Some(range) =
1466            unsafe { cursor.protect_next(vaddr.end - cursor.virt_addr(), &mut op) }
1467        {
1468            crate::arch::mm::tlb_flush_addr(range.start);
1469        }
1470        Ok(())
1471    }*/
1472
1473}
1474
1475#[verus_verify]
1476impl<C: PageTableConfig> PageTable<C> {
1477    pub uninterp spec fn root_paddr_spec(&self) -> Paddr;
1478
1479    /// Create a new empty page table.
1480    ///
1481    /// Useful for the IOMMU page tables only.
1482    #[verifier::external_body]
1483    pub fn empty() -> Self {
1484        unimplemented!()
1485    }
1486
1487    /// Create a new empty page table together with its tracked ownership.
1488    #[verifier::external_body]
1489    #[verus_spec(r =>
1490        with Tracked(owner): Tracked<&mut Option<PageTableOwner<C>>>,
1491            Tracked(regions): Tracked<&mut MetaRegionOwners>,
1492            Tracked(guards): Tracked<&mut Guards<'rcu>>,
1493        requires
1494            old(regions).inv(),
1495        ensures
1496            final(owner)@ is Some,
1497            final(owner)@->0.inv(),
1498            (final(owner)@->0).0.value.is_node(),
1499            (final(owner)@->0).0.value.is_node(),
1500            r.root.ptr.addr() == (final(owner)@->0).0.value.node().meta_addr_self(),
1501            (final(owner)@->0).0.value.metaregion_sound(*final(regions)),
1502            final(regions).inv(),
1503            final(guards).unlocked((final(owner)@->0).0.value.node().meta_addr_self()),
1504            // Allocating a fresh node does not change the lock set, so any node
1505            // that was (un)locked before remains so.
1506            final(guards).guards == old(guards).guards,
1507            // The newly allocated slot was in the free pool before the call.
1508            old(regions).slots.contains_key(
1509                crate::specs::mm::frame::mapping::frame_to_index(
1510                    (final(owner)@->0).0.value.meta_slot_paddr()->0)),
1511            // After the alloc, the slot is removed from the free pool (now owned
1512            // by the new pt's NodeOwner).
1513            !final(regions).slots.contains_key(
1514                crate::specs::mm::frame::mapping::frame_to_index(
1515                    (final(owner)@->0).0.value.meta_slot_paddr()->0)),
1516            // Other slots and lock state are preserved.
1517            forall |i: usize| #![trigger final(regions).slot_owners[i]]
1518                i != crate::specs::mm::frame::mapping::frame_to_index(
1519                    (final(owner)@->0).0.value.meta_slot_paddr()->0)
1520                ==> final(regions).slot_owners[i] == old(regions).slot_owners[i],
1521            forall |a: usize| old(guards).lock_held(a) ==> final(guards).lock_held(a),
1522            forall |idx: usize| #![trigger final(regions).slot_owners[idx].paths_in_pt]
1523                final(regions).slot_owners[idx].paths_in_pt
1524                    == old(regions).slot_owners[idx].paths_in_pt,
1525            // Allocation preserves the soundness of the kernel page-table tree:
1526            // a fresh allocation cannot collide with any active node or frame entry
1527            // (the allocator returns a slot that wasn't in use). Stated as a
1528            // postcondition because deriving it requires a freshness axiom on the
1529            // underlying frame allocator.
1530            forall |kt: PageTableOwner<KernelPtConfig>|
1531                #![trigger kt.metaregion_sound(*final(regions))]
1532                kt.inv() && kt.metaregion_sound(*old(regions))
1533                ==> kt.metaregion_sound(*final(regions)),
1534            // Freshness: the new PT's slot index is not used (as a primary slot
1535            // or huge-frame sub-page slot) by any entry in any KernelPtConfig PT
1536            // tree that was sound before the alloc. Used to discharge the borrow
1537            // step that mutates `slot_owners[new_idx]`.
1538            forall |kt: PageTableOwner<KernelPtConfig>|
1539                #![trigger kt.metaregion_sound(*old(regions))]
1540                kt.inv() && kt.metaregion_sound(*old(regions)) ==>
1541                kt.0.tree_predicate_map(
1542                    kt.0.value.path,
1543                    |e: crate::specs::mm::page_table::node::entry_owners::EntryOwner<KernelPtConfig>,
1544                     p: vstd_extra::ghost_tree::TreePath<NR_ENTRIES>|
1545                        e.meta_slot_paddr() is Some
1546                            ==> crate::specs::mm::frame::mapping::frame_to_index(
1547                                e.meta_slot_paddr()->0) !=
1548                                crate::specs::mm::frame::mapping::frame_to_index(
1549                                    (final(owner)@->0).0.value.meta_slot_paddr()->0),
1550                ),
1551            // Sub-page freshness: for any huge frame entry in any pre-existing
1552            // sound KernelPtConfig tree, the new PT's slot index isn't a sub-page
1553            // slot of the huge frame either. Same allocator-freshness rationale.
1554            forall |kt: PageTableOwner<KernelPtConfig>|
1555                #![trigger kt.metaregion_sound(*old(regions))]
1556                kt.inv() && kt.metaregion_sound(*old(regions)) ==>
1557                kt.0.tree_predicate_map(
1558                    kt.0.value.path,
1559                    |e: crate::specs::mm::page_table::node::entry_owners::EntryOwner<KernelPtConfig>,
1560                     p: vstd_extra::ghost_tree::TreePath<NR_ENTRIES>|
1561                        e.is_frame() && e.parent_level > 1 ==> {
1562                            let pa = e.frame().mapped_pa;
1563                            let nr_pages = page_size(
1564                                e.parent_level) / PAGE_SIZE;
1565                            forall |j: usize| 0 < j < nr_pages ==> {
1566                                let sub_idx =
1567                                    #[trigger] crate::specs::mm::frame::mapping::frame_to_index(
1568                                        (pa + j * PAGE_SIZE) as usize);
1569                                sub_idx != crate::specs::mm::frame::mapping::frame_to_index(
1570                                    (final(owner)@->0).0.value.meta_slot_paddr()->0)
1571                            }
1572                        },
1573                ),
1574    )]
1575    pub fn empty_with_owner<'rcu>() -> Self {
1576        unimplemented!()
1577    }
1578
1579    #[verifier::external_body]
1580    pub(in crate::mm) unsafe fn first_activate_unchecked(&self) {
1581        unimplemented!()
1582        // SAFETY: The safety is upheld by the caller.
1583        //        unsafe { self.root.first_activate() };
1584
1585    }
1586
1587    /// The physical address of the root page table.
1588    ///
1589    /// Obtaining the physical address of the root page table is safe, however, using it or
1590    /// providing it to the hardware will be unsafe since the page table node may be dropped,
1591    /// resulting in UAF.
1592    #[verifier::external_body]
1593    #[verifier::when_used_as_spec(root_paddr_spec)]
1594    pub fn root_paddr(&self) -> (r: Paddr)
1595        returns
1596            self.root_paddr_spec(),
1597    {
1598        unimplemented!()
1599        //        self.root.start_paddr()
1600
1601    }
1602
1603    /// Query about the mapping of a single byte at the given virtual address.
1604    ///
1605    /// Note that this function may fail reflect an accurate result if there are
1606    /// cursors concurrently accessing the same virtual address range, just like what
1607    /// happens for the hardware MMU walk.
1608    #[cfg(ktest)]
1609    pub fn page_walk(&self, vaddr: Vaddr) -> Option<(Paddr, PageProperty)> {
1610        // SAFETY: The root node is a valid page table node so the address is valid.
1611        unsafe { page_walk::<C>(self.root_paddr(), vaddr) }
1612    }
1613
1614    /// Create a new cursor exclusively accessing the virtual address range for mapping.
1615    ///
1616    /// If another cursor is already accessing the range, the new cursor may wait until the
1617    /// previous cursor is dropped.
1618    #[verus_spec(r =>
1619        with Tracked(owner): Tracked<PageTableOwner<C>>,
1620            Ghost(root_guard): Ghost<PageTableGuard<'rcu, C>>,
1621            Tracked(regions): Tracked<&mut MetaRegionOwners>,
1622            Tracked(guards): Tracked<&mut Guards<'rcu>>
1623        requires
1624            owner.inv(),
1625            // Per-config tightening; see `Cursor::new`.
1626            va.end <= C::LOCKED_END_BOUND_spec(),
1627        ensures
1628            Cursor::<C, G>::cursor_new_success_conditions(va) ==> {
1629                &&& r is Ok
1630                &&& r.unwrap().0.0.invariants(*r.unwrap().1, *final(regions), *final(guards))
1631                &&& r.unwrap().1.in_locked_range()
1632                &&& r.unwrap().0.0.level == r.unwrap().0.0.guard_level
1633                &&& r.unwrap().0.0.guard_level == NR_LEVELS as PagingLevel
1634                &&& r.unwrap().0.0.va < r.unwrap().0.0.barrier_va.end
1635                &&& r.unwrap().0.0.va == va.start
1636                &&& r.unwrap().0.0.barrier_va == *va
1637            },
1638            !Cursor::<C, G>::cursor_new_success_conditions(va) ==> r is Err,
1639            forall |item: C::Item| #![trigger CursorMut::<'rcu, C, G>::item_not_mapped(item, *old(regions))]
1640                CursorMut::<'rcu, C, G>::item_not_mapped(item, *old(regions)) ==>
1641                CursorMut::<'rcu, C, G>::item_not_mapped(item, *final(regions)),
1642            // CursorMut::new inherits Cursor::new's weakened preservation:
1643            // PT-node allocations come from UNUSED slots, so any slot that
1644            // was already in use keeps its paths_in_pt.
1645            forall |idx: usize| #![trigger final(regions).slot_owners[idx].paths_in_pt]
1646                old(regions).slot_owners[idx].inner_perms.ref_count.value()
1647                    != REF_COUNT_UNUSED
1648                ==> final(regions).slot_owners[idx].paths_in_pt
1649                        == old(regions).slot_owners[idx].paths_in_pt,
1650            forall|idx: usize| #![trigger final(regions).slot_owners[idx]]
1651                old(regions).slot_owners.contains_key(idx)
1652                && old(regions).slot_owners[idx].inner_perms.ref_count.value()
1653                    != REF_COUNT_UNUSED
1654                ==> final(regions).slot_owners[idx].inner_perms.ref_count.value()
1655                        == old(regions).slot_owners[idx].inner_perms.ref_count.value()
1656                    && final(regions).slot_owners[idx].usage
1657                        == old(regions).slot_owners[idx].usage,
1658    )]
1659    pub fn cursor_mut<'rcu, G: InAtomicMode>(
1660        &'rcu self,
1661        guard: &'rcu G,
1662        va: &Range<Vaddr>,
1663    ) -> Result<(CursorMut<'rcu, C, G>, Tracked<CursorOwner<'rcu, C>>), PageTableError> {
1664        #[verus_spec(with Tracked(owner), Ghost(root_guard), Tracked(regions), Tracked(guards))]
1665        CursorMut::new(self, guard, va)
1666    }
1667
1668    /// Create a new cursor exclusively accessing the virtual address range for querying.
1669    ///
1670    /// If another cursor is already accessing the range, the new cursor may wait until the
1671    /// previous cursor is dropped. The modification to the mapping by the cursor may also
1672    /// block or be overridden by the mapping of another cursor.
1673    #[verus_spec(r =>
1674        with Tracked(owner): Tracked<PageTableOwner<C>>,
1675            Ghost(root_guard): Ghost<PageTableGuard<'rcu, C>>,
1676            Tracked(regions): Tracked<&mut MetaRegionOwners>,
1677            Tracked(guards): Tracked<&mut Guards<'rcu>>
1678        requires
1679            owner.inv(),
1680            // Per-config tightening; see `Cursor::new`.
1681            va.end <= C::LOCKED_END_BOUND_spec(),
1682        ensures
1683            Cursor::<C, G>::cursor_new_success_conditions(va) ==> {
1684                &&& r is Ok
1685                &&& r.unwrap().0.invariants(*r.unwrap().1, *final(regions), *final(guards))
1686                &&& r.unwrap().1.in_locked_range()
1687                &&& r.unwrap().0.level == r.unwrap().0.guard_level
1688                &&& r.unwrap().0.va < r.unwrap().0.barrier_va.end
1689                &&& r.unwrap().0.va == va.start
1690                &&& r.unwrap().0.barrier_va == *va
1691                &&& r.unwrap().1@.as_page_table_owner() == owner
1692                &&& r.unwrap().1@.continuations[3].path() == owner.0.value.path
1693            },
1694            !Cursor::<C, G>::cursor_new_success_conditions(va) ==> r is Err,
1695            forall|idx: usize| #![trigger final(regions).slot_owners[idx].paths_in_pt]
1696                old(regions).slot_owners[idx].inner_perms.ref_count.value()
1697                    != REF_COUNT_UNUSED
1698                ==> final(regions).slot_owners[idx].paths_in_pt
1699                        == old(regions).slot_owners[idx].paths_in_pt,
1700            // Non-saturation preservation.
1701            (forall |i: usize| #![trigger old(regions).slot_owners[i]]
1702                old(regions).slot_owners.contains_key(i)
1703                && old(regions).slot_owners[i].inner_perms.ref_count.value()
1704                    != REF_COUNT_UNUSED
1705                ==> old(regions).slot_owners[i].inner_perms.ref_count.value() + 1
1706                    < REF_COUNT_MAX)
1707            ==>
1708            (forall |i: usize| #![trigger final(regions).slot_owners[i]]
1709                final(regions).slot_owners.contains_key(i)
1710                && final(regions).slot_owners[i].inner_perms.ref_count.value()
1711                    != REF_COUNT_UNUSED
1712                ==> final(regions).slot_owners[i].inner_perms.ref_count.value() + 1
1713                    < REF_COUNT_MAX),
1714            // Saturated-slot bridge (relayed from `Cursor::new`):
1715            // a slot at `>= REF_COUNT_MAX` before iff after, with the same
1716            // value. Used by `KVirtArea::query` to bridge inner-cursor
1717            // saturation back to the caller's snapshot.
1718            forall|idx: usize| #![trigger final(regions).slot_owners[idx].inner_perms.ref_count.value()]
1719                final(regions).slot_owners[idx].inner_perms.ref_count.value()
1720                    >= REF_COUNT_MAX
1721                ==> old(regions).slot_owners[idx].inner_perms.ref_count.value()
1722                        == final(regions).slot_owners[idx].inner_perms.ref_count.value(),
1723            forall|idx: usize| #![trigger old(regions).slot_owners[idx].inner_perms.ref_count.value()]
1724                old(regions).slot_owners[idx].inner_perms.ref_count.value()
1725                    >= REF_COUNT_MAX
1726                ==> final(regions).slot_owners[idx].inner_perms.ref_count.value()
1727                        == old(regions).slot_owners[idx].inner_perms.ref_count.value(),
1728    )]
1729    pub fn cursor<'rcu, G: InAtomicMode>(&'rcu self, guard: &'rcu G, va: &Range<Vaddr>) -> Result<
1730        (Cursor<'rcu, C, G>, Tracked<CursorOwner<'rcu, C>>),
1731        PageTableError,
1732    > {
1733        #[verus_spec(with Tracked(owner), Ghost(root_guard), Tracked(regions), Tracked(guards))]
1734        Cursor::new(self, guard, va)
1735    }/*
1736    /// Create a new reference to the same page table.
1737    /// The caller must ensure that the kernel page table is not copied.
1738    /// This is only useful for IOMMU page tables. Think twice before using it in other cases.
1739    pub unsafe fn shallow_copy(&self) -> Self {
1740        PageTable {
1741            root: self.root.clone(),
1742        }
1743    }
1744    */
1745
1746}
1747
1748/// A software emulation of the MMU address translation process.
1749///
1750/// This method returns the physical address of the given virtual address and
1751/// the page property if a valid mapping exists for the given virtual address.
1752///
1753/// # Safety
1754///
1755/// The caller must ensure that the `root_paddr` is a pointer to a valid root
1756/// page table node.
1757///
1758/// # Notes on the page table use-after-free problem
1759///
1760/// Neither the hardware MMU nor the software page walk method acquires the page
1761/// table locks while reading. They can enter a to-be-recycled page table node
1762/// and read the page table entries after the node is recycled and reused.
1763///
1764/// For the hardware MMU page walk, we mitigate this problem by dropping the page
1765/// table nodes only after the TLBs have been flushed on all the CPUs that
1766/// activate the page table.
1767///
1768/// For the software page walk, we only need to disable preemption at the beginning
1769/// since the page table nodes won't be recycled in the RCU critical section.
1770#[cfg(ktest)]
1771pub(super) unsafe fn page_walk<C: PageTableConfig>(root_paddr: Paddr, vaddr: Vaddr) -> Option<
1772    (Paddr, PageProperty),
1773> {
1774    use super::paddr_to_vaddr;
1775
1776    let _rcu_guard = disable_preempt();
1777
1778    let mut pt_addr = paddr_to_vaddr(root_paddr);
1779    #[verusfmt::skip]
1780    for cur_level in (1..= C::NR_LEVELS()).rev() {
1781        let offset = pte_index::<C>(vaddr, cur_level);
1782        // SAFETY:
1783        //  - The page table node is alive because (1) the root node is alive and
1784        //    (2) all child nodes cannot be recycled because we're in the RCU critical section.
1785        //  - The index is inside the bound, so the page table entry is valid.
1786        //  - All page table entries are aligned and accessed with atomic operations only.
1787        let cur_pte = unsafe { load_pte((pt_addr as *mut C::E).add(offset), Ordering::Acquire) };
1788
1789        if !cur_pte.is_present() {
1790            return None;
1791        }
1792        if cur_pte.is_last(cur_level) {
1793            debug_assert!(cur_level <= C::HIGHEST_TRANSLATION_LEVEL);
1794            return Some(
1795                (cur_pte.paddr() + (vaddr & (page_size::<C>(cur_level) - 1)), cur_pte.prop()),
1796            );
1797        }
1798        pt_addr = paddr_to_vaddr(cur_pte.paddr());
1799    }
1800
1801    unreachable!("All present PTEs at the level 1 must be last-level PTEs");
1802}
1803
1804/// A trait that abstracts architecture-specific page table entries (PTEs).
1805///
1806/// Note that a default PTE should be a PTE that points to nothing.
1807pub trait PageTableEntryTrait:
1808    Clone + Copy + Debug + Default + Sized + Pod + PodOnce + Send + Sync + 'static {
1809    spec fn new_absent_spec() -> Self;
1810
1811    /// Create a set of new invalid page table flags that indicates an absent page.
1812    ///
1813    /// Note that currently the implementation requires an all zero PTE to be an absent PTE.
1814    #[verifier(when_used_as_spec(new_absent_spec))]
1815    fn new_absent() -> (res: Self)
1816        ensures
1817            res.paddr() % PAGE_SIZE == 0,
1818            res.paddr() < MAX_PADDR,
1819            !res.is_present(),
1820        returns
1821            Self::new_absent(),
1822    ;
1823
1824    spec fn is_present_spec(&self) -> bool;
1825
1826    /// Returns if the PTE points to something.
1827    ///
1828    /// For PTEs created by [`Self::new_absent`], this method should return
1829    /// false. For PTEs created by [`Self::new_page`] or [`Self::new_pt`]
1830    /// and modified with [`Self::set_prop`], this method should return true.
1831    #[verifier::when_used_as_spec(is_present_spec)]
1832    fn is_present(&self) -> bool
1833        returns
1834            self.is_present_spec(),
1835    ;
1836
1837    spec fn new_page_spec(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> Self;
1838
1839    /// The preconditions for creating a new page-mapping PTE.
1840    spec fn new_page_req(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> bool;
1841
1842    /// Creates a new PTE that maps to a page.
1843    #[verifier::when_used_as_spec(new_page_spec)]
1844    fn new_page(paddr: Paddr, level: PagingLevel, prop: PageProperty) -> (res: Self)
1845        requires
1846            paddr < MAX_PADDR,
1847            Self::new_page_req(paddr, level, prop),
1848        ensures
1849            res.paddr() == paddr & !((PAGE_SIZE - 1) as usize),
1850            paddr % PAGE_SIZE == 0 ==> res.paddr() == paddr,
1851            res.paddr() % PAGE_SIZE == 0,
1852            res.paddr() < MAX_PADDR,
1853            res.is_present(),
1854            res.is_last(level),
1855            res.prop() == prop,
1856        returns
1857            Self::new_page(paddr, level, prop),
1858    ;
1859
1860    spec fn new_pt_spec(paddr: Paddr) -> Self;
1861
1862    /// Create a new PTE that map to a child page table.
1863    #[verifier::when_used_as_spec(new_pt_spec)]
1864    fn new_pt(paddr: Paddr) -> (res: Self)
1865        requires
1866            paddr < MAX_PADDR,
1867        ensures
1868            res.paddr() == paddr & !((PAGE_SIZE - 1) as usize),
1869            paddr % PAGE_SIZE == 0 ==> res.paddr() == paddr,
1870            res.paddr() % PAGE_SIZE == 0,
1871            res.paddr() < MAX_PADDR,
1872            res.is_present(),
1873            forall|level: PagingLevel| !res.is_last(level),
1874        returns
1875            Self::new_pt(paddr),
1876    ;
1877
1878    /// Returns the physical address from the PTE.
1879    ///
1880    /// The physical address recorded in the PTE is either:
1881    /// - the physical address of the next-level page table, or
1882    /// - the physical address of the page that the PTE maps to.
1883    spec fn paddr_spec(&self) -> Paddr;
1884
1885    #[verifier::when_used_as_spec(paddr_spec)]
1886    fn paddr(&self) -> (res: Paddr)
1887        ensures
1888            res % PAGE_SIZE == 0,
1889        returns
1890            self.paddr(),
1891    ;
1892
1893    spec fn prop_spec(&self) -> PageProperty;
1894
1895    #[verifier::when_used_as_spec(prop_spec)]
1896    fn prop(&self) -> PageProperty
1897        returns
1898            self.prop(),
1899    ;
1900
1901    /// The preconditions for setting the page property of a PTE.
1902    spec fn set_prop_req(self, prop: PageProperty) -> bool;
1903
1904    fn set_prop(&mut self, prop: PageProperty)
1905        requires
1906            old(self).set_prop_req(prop),
1907        ensures
1908            !old(self).is_present() ==> *old(self) == *final(self),
1909            old(self).is_present() ==> {
1910                &&& final(self).prop() == prop
1911                &&& final(self).paddr() == old(self).paddr()
1912                &&& final(self).is_present()
1913                &&& forall|level: PagingLevel|
1914                    #![trigger old(self).is_last(level)]
1915                    old(self).is_last(level) ==> final(self).is_last(level)
1916            },
1917    ;
1918
1919    spec fn is_last_spec(&self, level: PagingLevel) -> bool;
1920
1921    /// Returns if the PTE maps a page rather than a child page table.
1922    ///
1923    /// The method needs to know the level of the page table where the PTE resides,
1924    /// since architectures like x86-64 have a huge bit only in intermediate levels.
1925    #[verifier::when_used_as_spec(is_last_spec)]
1926    fn is_last(&self, level: PagingLevel) -> bool
1927        returns
1928            self.is_last_spec(level),
1929    ;
1930
1931    spec fn as_usize_spec(self) -> usize;
1932
1933    /// Converts the PTE into a raw `usize` value.
1934    #[verifier::external_body]
1935    #[verifier::when_used_as_spec(as_usize_spec)]
1936    fn as_usize(self) -> usize
1937        returns
1938            self.as_usize(),
1939    {
1940        unimplemented!()
1941        // const { assert!(size_of::<Self>() == size_of::<usize>()) };
1942        // SAFETY: `Self` is `Pod` and has the same memory representation as `usize`.
1943        // unsafe { transmute_unchecked(self) }
1944
1945    }
1946
1947    /// Converts the raw `usize` value into a PTE.
1948    #[verifier::external_body]
1949    fn from_usize(pte_raw: usize) -> Self {
1950        unimplemented!()
1951        // const { assert!(size_of::<Self>() == size_of::<usize>()) };
1952        // SAFETY: `Self` is `Pod` and has the same memory representation as `usize`.
1953        // unsafe { transmute_unchecked(pte_raw) }
1954
1955    }
1956
1957    /// Absent (zero) PTE has well-formed paddr for match_pte.
1958    proof fn lemma_page_table_entry_properties()
1959        ensures
1960            Self::new_absent().paddr() % PAGE_SIZE == 0,
1961            Self::new_absent().paddr() < MAX_PADDR,
1962            !Self::new_absent().is_present(),
1963            forall|level: PagingLevel|
1964                #![trigger Self::new_absent().is_last(level)]
1965                1 < level ==> !Self::new_absent().is_last(level),
1966            forall|paddr: Paddr, level: PagingLevel, prop: PageProperty|
1967                #![trigger Self::new_page(paddr, level, prop)]
1968                Self::new_page_req(paddr, level, prop) && (prop.cache is Writeback
1969                    || prop.cache is Writethrough || prop.cache is Uncacheable) ==> {
1970                    &&& Self::new_page(paddr, level, prop).is_present()
1971                    &&& (paddr < MAX_PADDR ==> Self::new_page(paddr, level, prop).paddr() == paddr
1972                        & !((PAGE_SIZE - 1) as usize))
1973                    &&& (paddr < MAX_PADDR && paddr % PAGE_SIZE == 0 ==> Self::new_page(
1974                        paddr,
1975                        level,
1976                        prop,
1977                    ).paddr() == paddr)
1978                    &&& Self::new_page(paddr, level, prop).prop() == prop
1979                    &&& Self::new_page(paddr, level, prop).is_last(level)
1980                },
1981            forall|paddr: Paddr|
1982                #![trigger Self::new_pt(paddr)]
1983                {
1984                    &&& Self::new_pt(paddr).is_present()
1985                    &&& (paddr < MAX_PADDR ==> Self::new_pt(paddr).paddr() == paddr & !((PAGE_SIZE
1986                        - 1) as usize))
1987                    &&& (paddr < MAX_PADDR && paddr % PAGE_SIZE == 0 ==> Self::new_pt(paddr).paddr()
1988                        == paddr)
1989                    &&& forall|level: PagingLevel| !Self::new_pt(paddr).is_last(level)
1990                },
1991    ;
1992
1993    proof fn lemma_paddr_is_page_aligned(self)
1994        ensures
1995            self.paddr() % PAGE_SIZE == 0,
1996    ;
1997}
1998
1999/// Loads a page table entry with an atomic instruction.
2000///
2001/// # Verification Design
2002/// ## Preconditions
2003/// - The pointer must be a valid pointer to the array that represents the page table node.
2004/// - The array must be initialized at the target index.
2005/// ## Postconditions
2006/// - The value is loaded from the array at the given index.
2007/// ## Safety
2008/// - We require the caller to provide a permission token to ensure that this function is only called on a valid array
2009/// and the pointer is in bounds.
2010/// - Like an `AtomicUsize::load` in normal Rust, this function assumes that the value being loaded is an integer
2011/// (and therefore can be safely cloned). We model the PTE as an abstract type, but in all actual implementations it is an
2012/// integer. Importantly, it does not include any data that is unsafe to duplicate.
2013#[verifier::external_body]
2014#[verus_spec(
2015    with Tracked(perm): Tracked<&vstd_extra::array_ptr::PointsTo<E, NR_ENTRIES>>
2016    requires
2017        perm.is_init(ptr.index as int),
2018        perm.addr() == ptr.addr(),
2019        0 <= ptr.index < NR_ENTRIES,
2020    returns
2021        perm.value()[ptr.index as int],
2022)]
2023pub unsafe fn load_pte<E: PageTableEntryTrait>(
2024    ptr: vstd_extra::array_ptr::ArrayPtr<E, NR_ENTRIES>,
2025    ordering: Ordering,
2026) -> (pte: E) {
2027    unimplemented!()
2028}
2029
2030/// Stores a page table entry with an atomic instruction.
2031///
2032/// # Verification Design
2033/// We axiomatize this function as a store operation in the array that represents the page table node.
2034/// ## Preconditions
2035/// - The pointer must be a valid pointer to the array that represents the page table node.
2036/// - The array must be initialized so that the verifier knows that it remains initialized after the store.
2037/// ## Postconditions
2038/// - The new value is stored in the array at the given index.
2039/// ## Safety
2040/// - We require the caller to provide a permission token to ensure that this function is only called on a valid array
2041/// and the pointer is in bounds.
2042#[verifier::external_body]
2043#[verus_spec(
2044    with Tracked(perm): Tracked<&mut vstd_extra::array_ptr::PointsTo<E, NR_ENTRIES>>
2045    requires
2046        old(perm).addr() == ptr.addr(),
2047        0 <= ptr.index < NR_ENTRIES,
2048        old(perm).is_init_all(),
2049    ensures
2050        final(perm).value()[ptr.index as int] == new_val,
2051        final(perm).value() == old(perm).value().update(ptr.index as int, new_val),
2052        final(perm).addr() == old(perm).addr(),
2053        final(perm).is_init_all(),
2054)]
2055pub unsafe fn store_pte<E: PageTableEntryTrait>(
2056    ptr: vstd_extra::array_ptr::ArrayPtr<E, NR_ENTRIES>,
2057    new_val: E,
2058    ordering: Ordering,
2059);
2060
2061} // verus!