ostd/mm/
tlb.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! TLB flush operations.
4
5use alloc::vec::Vec;
6use core::{
7    mem::MaybeUninit,
8    ops::Range,
9    sync::atomic::{AtomicBool, Ordering},
10};
11
12use super::{
13    PAGE_SIZE, Vaddr,
14    frame::{Frame, meta::AnyFrameMeta},
15};
16use crate::{
17    arch::irq,
18    const_assert,
19    cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu},
20    cpu_local,
21    sync::{LocalIrqDisabled, SpinLock},
22};
23
24/// A TLB flusher that is aware of which CPUs are needed to be flushed.
25///
26/// The flusher needs to stick to the current CPU.
27pub struct TlbFlusher<'a, G: PinCurrentCpu> {
28    target_cpus: &'a AtomicCpuSet,
29    have_unsynced_flush: CpuSet,
30    ops_stack: OpsStack,
31    _pin_current: G,
32}
33
34impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> {
35    /// Creates a new TLB flusher with the specified CPUs to be flushed.
36    ///
37    /// The target CPUs should be a reference to an [`AtomicCpuSet`] that will
38    /// be loaded upon [`Self::dispatch_tlb_flush`].
39    ///
40    /// The flusher needs to stick to the current CPU. So please provide a
41    /// guard that implements [`PinCurrentCpu`].
42    pub fn new(target_cpus: &'a AtomicCpuSet, pin_current_guard: G) -> Self {
43        Self {
44            target_cpus,
45            have_unsynced_flush: CpuSet::new_empty(),
46            ops_stack: OpsStack::new(),
47            _pin_current: pin_current_guard,
48        }
49    }
50
51    /// Issues a pending TLB flush request.
52    ///
53    /// This function does not guarantee to flush the TLB entries on either
54    /// this CPU or remote CPUs. The flush requests are only performed when
55    /// [`Self::dispatch_tlb_flush`] is called.
56    pub fn issue_tlb_flush(&mut self, op: TlbFlushOp) {
57        self.ops_stack.push(op, None);
58    }
59
60    /// Issues a TLB flush request that must happen before dropping the page.
61    ///
62    /// If we need to remove a mapped page from the page table, we can only
63    /// recycle the page after all the relevant TLB entries in all CPUs are
64    /// flushed. Otherwise if the page is recycled for other purposes, the user
65    /// space program can still access the page through the TLB entries. This
66    /// method is designed to be used in such cases.
67    pub fn issue_tlb_flush_with(
68        &mut self,
69        op: TlbFlushOp,
70        drop_after_flush: Frame<dyn AnyFrameMeta>,
71    ) {
72        self.ops_stack.push(op, Some(drop_after_flush));
73    }
74
75    /// Dispatches all the pending TLB flush requests.
76    ///
77    /// All previous pending requests issued by [`Self::issue_tlb_flush`] or
78    /// [`Self::issue_tlb_flush_with`] starts to be processed after this
79    /// function. But it may not be synchronous. Upon the return of this
80    /// function, the TLB entries may not be coherent.
81    pub fn dispatch_tlb_flush(&mut self) {
82        let irq_guard = crate::irq::disable_local();
83
84        if self.ops_stack.is_empty() {
85            return;
86        }
87
88        // `Release` to make sure our modification on the PT is visible to CPUs
89        // that are going to activate the PT.
90        let mut target_cpus = self.target_cpus.load(Ordering::Release);
91
92        let cur_cpu = irq_guard.current_cpu();
93        let mut need_flush_on_self = false;
94
95        if target_cpus.contains(cur_cpu) {
96            target_cpus.remove(cur_cpu);
97            need_flush_on_self = true;
98        }
99
100        for cpu in target_cpus.iter() {
101            {
102                let mut flush_ops = FLUSH_OPS.get_on_cpu(cpu).lock();
103                flush_ops.push_from(&self.ops_stack);
104
105                // Clear ACK before dropping the lock to avoid false ACKs.
106                ACK_REMOTE_FLUSH
107                    .get_on_cpu(cpu)
108                    .store(false, Ordering::Relaxed);
109            }
110            self.have_unsynced_flush.add(cpu);
111        }
112
113        crate::smp::inter_processor_call(&target_cpus, do_remote_flush);
114
115        // Flush ourselves after sending all IPIs to save some time.
116        if need_flush_on_self {
117            self.ops_stack.flush_all();
118        } else {
119            self.ops_stack.clear_without_flush();
120        }
121    }
122
123    /// Waits for all the previous TLB flush requests to be completed.
124    ///
125    /// After this function, all TLB entries corresponding to previous
126    /// dispatched TLB flush requests are guaranteed to be coherent.
127    ///
128    /// The TLB flush requests are issued with [`Self::issue_tlb_flush`] and
129    /// dispatched with [`Self::dispatch_tlb_flush`]. This method will not
130    /// dispatch any issued requests so it will not guarantee TLB coherence
131    /// of requests that are not dispatched.
132    ///
133    /// # Panics
134    ///
135    /// This method panics if the IRQs are disabled. Since the remote flush are
136    /// processed in IRQs, two CPUs may deadlock if they are waiting for each
137    /// other's TLB coherence.
138    pub fn sync_tlb_flush(&mut self) {
139        assert!(
140            irq::is_local_enabled(),
141            "Waiting for remote flush with IRQs disabled"
142        );
143
144        for cpu in self.have_unsynced_flush.iter() {
145            while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Relaxed) {
146                core::hint::spin_loop();
147            }
148        }
149
150        self.have_unsynced_flush = CpuSet::new_empty();
151    }
152}
153
154/// The operation to flush TLB entries.
155///
156/// The variants of this structure are:
157///  - Flushing all TLB entries except for the global entries;
158///  - Flushing the TLB entry associated with an address;
159///  - Flushing the TLB entries for a specific range of virtual addresses;
160///
161/// This is a `struct` instead of an `enum` because if trivially representing
162/// the three variants with an `enum`, it would be 24 bytes. To minimize the
163/// memory footprint, we encode all three variants into an 8-byte integer.
164#[derive(Debug, Clone, PartialEq, Eq)]
165pub struct TlbFlushOp(Vaddr);
166
167// We require the address to be page-aligned, so the in-page offset part of the
168// address can be used to store the length. A sanity check to ensure that we
169// don't allow ranged flush operations with a too long length.
170const_assert!(TlbFlushOp::FLUSH_RANGE_NPAGES_MASK | (PAGE_SIZE - 1) == PAGE_SIZE - 1);
171
172impl TlbFlushOp {
173    const FLUSH_ALL_VAL: Vaddr = Vaddr::MAX;
174    const FLUSH_RANGE_NPAGES_MASK: Vaddr =
175        (1 << (usize::BITS - FLUSH_ALL_PAGES_THRESHOLD.leading_zeros())) - 1;
176
177    /// Performs the TLB flush operation on the current CPU.
178    pub fn perform_on_current(&self) {
179        use crate::arch::mm::{
180            tlb_flush_addr, tlb_flush_addr_range, tlb_flush_all_excluding_global,
181        };
182        match self.0 {
183            Self::FLUSH_ALL_VAL => tlb_flush_all_excluding_global(),
184            addr => {
185                let start = addr & !Self::FLUSH_RANGE_NPAGES_MASK;
186                let num_pages = addr & Self::FLUSH_RANGE_NPAGES_MASK;
187
188                debug_assert!((addr & (PAGE_SIZE - 1)) < FLUSH_ALL_PAGES_THRESHOLD);
189                debug_assert!(num_pages != 0);
190
191                if num_pages == 1 {
192                    tlb_flush_addr(start);
193                } else {
194                    tlb_flush_addr_range(&(start..start + num_pages * PAGE_SIZE));
195                }
196            }
197        }
198    }
199
200    /// Creates a new TLB flush operation that flushes all TLB entries except
201    /// for the global entries.
202    pub const fn for_all() -> Self {
203        TlbFlushOp(Self::FLUSH_ALL_VAL)
204    }
205
206    /// Creates a new TLB flush operation that flushes the TLB entry associated
207    /// with the provided virtual address.
208    pub const fn for_single(addr: Vaddr) -> Self {
209        TlbFlushOp(addr | 1)
210    }
211
212    /// Creates a new TLB flush operation that flushes the TLB entries for the
213    /// specified virtual address range.
214    ///
215    /// If the range is too large, the resulting [`TlbFlushOp`] will flush all
216    /// TLB entries instead.
217    ///
218    /// # Panics
219    ///
220    /// Panics if the range is not page-aligned or if the range is empty.
221    pub const fn for_range(range: Range<Vaddr>) -> Self {
222        assert!(
223            range.start.is_multiple_of(PAGE_SIZE),
224            "Range start must be page-aligned"
225        );
226        assert!(
227            range.end.is_multiple_of(PAGE_SIZE),
228            "Range end must be page-aligned"
229        );
230        assert!(range.start < range.end, "Range must not be empty");
231        let num_pages = (range.end - range.start) / PAGE_SIZE;
232        if num_pages >= FLUSH_ALL_PAGES_THRESHOLD {
233            return TlbFlushOp::for_all();
234        }
235        TlbFlushOp(range.start | (num_pages as Vaddr))
236    }
237
238    /// Returns the number of pages to flush.
239    ///
240    /// If it returns `u32::MAX`, it means to flush all the entries. Otherwise
241    /// the return value should be less than [`FLUSH_ALL_PAGES_THRESHOLD`] and
242    /// non-zero.
243    fn num_pages(&self) -> u32 {
244        if self.0 == Self::FLUSH_ALL_VAL {
245            u32::MAX
246        } else {
247            debug_assert!((self.0 & (PAGE_SIZE - 1)) < FLUSH_ALL_PAGES_THRESHOLD);
248            let num_pages = (self.0 & Self::FLUSH_RANGE_NPAGES_MASK) as u32;
249            debug_assert!(num_pages != 0);
250            num_pages
251        }
252    }
253}
254
255// The queues of pending requests on each CPU.
256cpu_local! {
257    static FLUSH_OPS: SpinLock<OpsStack, LocalIrqDisabled> = SpinLock::new(OpsStack::new());
258    /// Whether this CPU finishes the last remote flush request.
259    static ACK_REMOTE_FLUSH: AtomicBool = AtomicBool::new(true);
260}
261
262fn do_remote_flush() {
263    // No races because we are in IRQs or have disabled preemption.
264    let current_cpu = crate::cpu::CpuId::current_racy();
265
266    let mut new_op_queue = OpsStack::new();
267    {
268        let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock();
269
270        core::mem::swap(&mut *op_queue, &mut new_op_queue);
271
272        // ACK before dropping the lock so that we won't miss flush requests.
273        ACK_REMOTE_FLUSH
274            .get_on_cpu(current_cpu)
275            .store(true, Ordering::Relaxed);
276    }
277    // Unlock the locks quickly to avoid contention. ACK before flushing is
278    // fine since we cannot switch back to userspace now.
279    new_op_queue.flush_all();
280}
281
282/// If the number of pending pages to flush exceeds this threshold, we flush all the
283/// TLB entries instead of flushing them one by one.
284const FLUSH_ALL_PAGES_THRESHOLD: usize = 32;
285
286struct OpsStack {
287    /// From 0 to `num_ops`, the array entry must be initialized.
288    ops: [MaybeUninit<TlbFlushOp>; FLUSH_ALL_PAGES_THRESHOLD],
289    num_ops: u32,
290    /// If this is `u32::MAX`, we should flush all entries irrespective of the
291    /// contents of `ops`. And in this case `num_ops` must be zero.
292    ///
293    /// Otherwise, it counts the number of pages to flush in `ops`.
294    num_pages_to_flush: u32,
295    page_keeper: Vec<Frame<dyn AnyFrameMeta>>,
296}
297
298impl OpsStack {
299    const fn new() -> Self {
300        Self {
301            ops: [const { MaybeUninit::uninit() }; FLUSH_ALL_PAGES_THRESHOLD],
302            num_ops: 0,
303            num_pages_to_flush: 0,
304            page_keeper: Vec::new(),
305        }
306    }
307
308    fn is_empty(&self) -> bool {
309        self.num_ops == 0 && self.num_pages_to_flush == 0
310    }
311
312    fn need_flush_all(&self) -> bool {
313        self.num_pages_to_flush == u32::MAX
314    }
315
316    fn push(&mut self, op: TlbFlushOp, drop_after_flush: Option<Frame<dyn AnyFrameMeta>>) {
317        if let Some(frame) = drop_after_flush {
318            self.page_keeper.push(frame);
319        }
320
321        if self.need_flush_all() {
322            return;
323        }
324        let op_num_pages = op.num_pages();
325        if op == TlbFlushOp::for_all()
326            || self.num_pages_to_flush + op_num_pages >= FLUSH_ALL_PAGES_THRESHOLD as u32
327        {
328            self.num_pages_to_flush = u32::MAX;
329            self.num_ops = 0;
330            return;
331        }
332
333        self.ops[self.num_ops as usize].write(op);
334        self.num_ops += 1;
335        self.num_pages_to_flush += op_num_pages;
336    }
337
338    fn push_from(&mut self, other: &OpsStack) {
339        self.page_keeper.extend(other.page_keeper.iter().cloned());
340
341        if self.need_flush_all() {
342            return;
343        }
344        if other.need_flush_all()
345            || self.num_pages_to_flush + other.num_pages_to_flush
346                >= FLUSH_ALL_PAGES_THRESHOLD as u32
347        {
348            self.num_pages_to_flush = u32::MAX;
349            self.num_ops = 0;
350            return;
351        }
352
353        for other_op in other.ops_iter() {
354            self.ops[self.num_ops as usize].write(other_op.clone());
355            self.num_ops += 1;
356        }
357        self.num_pages_to_flush += other.num_pages_to_flush;
358    }
359
360    fn flush_all(&mut self) {
361        if self.need_flush_all() {
362            crate::arch::mm::tlb_flush_all_excluding_global();
363        } else {
364            self.ops_iter().for_each(|op| {
365                op.perform_on_current();
366            });
367        }
368
369        self.clear_without_flush();
370    }
371
372    fn clear_without_flush(&mut self) {
373        self.num_pages_to_flush = 0;
374        self.num_ops = 0;
375        self.page_keeper.clear();
376    }
377
378    fn ops_iter(&self) -> impl Iterator<Item = &TlbFlushOp> {
379        self.ops.iter().take(self.num_ops as usize).map(|op| {
380            // SAFETY: From 0 to `num_ops`, the array entry must be initialized.
381            unsafe { op.assume_init_ref() }
382        })
383    }
384}