ostd/mm/
tlb.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! TLB flush operations.
4
5use alloc::vec::Vec;
6use core::{
7    mem::MaybeUninit,
8    ops::Range,
9    sync::atomic::{AtomicBool, Ordering},
10};
11
12use super::{
13    PAGE_SIZE, Vaddr,
14    frame::{Frame, meta::AnyFrameMeta},
15};
16use crate::{
17    arch::irq,
18    const_assert,
19    cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu},
20    cpu_local,
21    smp::IpiSender,
22    sync::{LocalIrqDisabled, RcuDrop, SpinLock},
23};
24
25/// A TLB flusher that is aware of which CPUs are needed to be flushed.
26///
27/// The flusher needs to stick to the current CPU.
28pub struct TlbFlusher<'a, G: PinCurrentCpu> {
29    target_cpus: &'a AtomicCpuSet,
30    have_unsynced_flush: CpuSet,
31    ops_stack: OpsStack,
32    ipi_sender: Option<&'static IpiSender>,
33    _pin_current: G,
34}
35
36impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> {
37    /// Creates a new TLB flusher with the specified CPUs to be flushed.
38    ///
39    /// The target CPUs should be a reference to an [`AtomicCpuSet`] that will
40    /// be loaded upon [`Self::dispatch_tlb_flush`].
41    ///
42    /// The flusher needs to stick to the current CPU. So please provide a
43    /// guard that implements [`PinCurrentCpu`].
44    pub fn new(target_cpus: &'a AtomicCpuSet, pin_current_guard: G) -> Self {
45        Self {
46            target_cpus,
47            have_unsynced_flush: CpuSet::new_empty(),
48            ops_stack: OpsStack::new(),
49            ipi_sender: crate::smp::IPI_SENDER.get(),
50            _pin_current: pin_current_guard,
51        }
52    }
53
54    /// Issues a pending TLB flush request.
55    ///
56    /// This function does not guarantee to flush the TLB entries on either
57    /// this CPU or remote CPUs. The flush requests are only performed when
58    /// [`Self::dispatch_tlb_flush`] is called.
59    pub fn issue_tlb_flush(&mut self, op: TlbFlushOp) {
60        self.ops_stack.push(op, None);
61    }
62
63    /// Issues a TLB flush request that must happen before dropping the page.
64    ///
65    /// If we need to remove a mapped page from the page table, we can only
66    /// recycle the page after all the relevant TLB entries in all CPUs are
67    /// flushed. Otherwise if the page is recycled for other purposes, the user
68    /// space program can still access the page through the TLB entries. This
69    /// method is designed to be used in such cases.
70    ///
71    /// Furthermore, the frames will be dropped after the RCU grace period to
72    /// ensure that no RCU references are held to the frames.
73    pub fn issue_tlb_flush_with(
74        &mut self,
75        op: TlbFlushOp,
76        drop_after_flush: RcuDrop<Frame<dyn AnyFrameMeta>>,
77    ) {
78        self.ops_stack.push(op, Some(drop_after_flush));
79    }
80
81    /// Dispatches all the pending TLB flush requests.
82    ///
83    /// All previous pending requests issued by [`Self::issue_tlb_flush`] or
84    /// [`Self::issue_tlb_flush_with`] starts to be processed after this
85    /// function. But it may not be synchronous. Upon the return of this
86    /// function, the TLB entries may not be coherent.
87    pub fn dispatch_tlb_flush(&mut self) {
88        let irq_guard = crate::irq::disable_local();
89
90        if self.ops_stack.is_empty() {
91            return;
92        }
93
94        // `Release` to make sure our modification on the PT is visible to CPUs
95        // that are going to activate the PT.
96        let mut target_cpus = self.target_cpus.load(Ordering::Release);
97
98        let cur_cpu = irq_guard.current_cpu();
99        let mut need_flush_on_self = false;
100
101        if target_cpus.contains(cur_cpu) {
102            target_cpus.remove(cur_cpu);
103            need_flush_on_self = true;
104        }
105
106        if let Some(ipi_sender) = self.ipi_sender {
107            for cpu in target_cpus.iter() {
108                self.have_unsynced_flush.add(cpu);
109
110                let mut flush_ops = FLUSH_OPS.get_on_cpu(cpu).lock();
111                flush_ops.push_from(&self.ops_stack);
112                // Clear ACK before dropping the lock to avoid false ACKs.
113                ACK_REMOTE_FLUSH
114                    .get_on_cpu(cpu)
115                    .store(false, Ordering::Relaxed);
116            }
117
118            ipi_sender.inter_processor_call(&target_cpus, do_remote_flush);
119        }
120
121        // Flush ourselves after sending all IPIs to save some time.
122        if need_flush_on_self {
123            self.ops_stack.flush_all();
124        } else {
125            self.ops_stack.clear_without_flush();
126        }
127    }
128
129    /// Waits for all the previous TLB flush requests to be completed.
130    ///
131    /// After this function, all TLB entries corresponding to previous
132    /// dispatched TLB flush requests are guaranteed to be coherent.
133    ///
134    /// The TLB flush requests are issued with [`Self::issue_tlb_flush`] and
135    /// dispatched with [`Self::dispatch_tlb_flush`]. This method will not
136    /// dispatch any issued requests so it will not guarantee TLB coherence
137    /// of requests that are not dispatched.
138    ///
139    /// # Panics
140    ///
141    /// This method panics if the IRQs are disabled. Since the remote flush are
142    /// processed in IRQs, two CPUs may deadlock if they are waiting for each
143    /// other's TLB coherence.
144    pub fn sync_tlb_flush(&mut self) {
145        if self.ipi_sender.is_none() {
146            // We performed some TLB flushes in the boot context. The AP's boot
147            // process should take care of them.
148            return;
149        }
150
151        assert!(
152            irq::is_local_enabled(),
153            "Waiting for remote flush with IRQs disabled"
154        );
155
156        for cpu in self.have_unsynced_flush.iter() {
157            while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Relaxed) {
158                core::hint::spin_loop();
159            }
160        }
161
162        self.have_unsynced_flush = CpuSet::new_empty();
163    }
164}
165
166/// The operation to flush TLB entries.
167///
168/// The variants of this structure are:
169///  - Flushing all TLB entries except for the global entries;
170///  - Flushing the TLB entry associated with an address;
171///  - Flushing the TLB entries for a specific range of virtual addresses;
172///
173/// This is a `struct` instead of an `enum` because if trivially representing
174/// the three variants with an `enum`, it would be 24 bytes. To minimize the
175/// memory footprint, we encode all three variants into an 8-byte integer.
176#[derive(Debug, Clone, PartialEq, Eq)]
177pub struct TlbFlushOp(Vaddr);
178
179// We require the address to be page-aligned, so the in-page offset part of the
180// address can be used to store the length. A sanity check to ensure that we
181// don't allow ranged flush operations with a too long length.
182const_assert!(TlbFlushOp::FLUSH_RANGE_NPAGES_MASK | (PAGE_SIZE - 1) == PAGE_SIZE - 1);
183
184impl TlbFlushOp {
185    const FLUSH_ALL_VAL: Vaddr = Vaddr::MAX;
186    const FLUSH_RANGE_NPAGES_MASK: Vaddr =
187        (1 << (usize::BITS - FLUSH_ALL_PAGES_THRESHOLD.leading_zeros())) - 1;
188
189    /// Performs the TLB flush operation on the current CPU.
190    pub fn perform_on_current(&self) {
191        use crate::arch::mm::{
192            tlb_flush_addr, tlb_flush_addr_range, tlb_flush_all_excluding_global,
193        };
194        match self.0 {
195            Self::FLUSH_ALL_VAL => tlb_flush_all_excluding_global(),
196            addr => {
197                let start = addr & !Self::FLUSH_RANGE_NPAGES_MASK;
198                let num_pages = addr & Self::FLUSH_RANGE_NPAGES_MASK;
199
200                debug_assert!((addr & (PAGE_SIZE - 1)) < FLUSH_ALL_PAGES_THRESHOLD);
201                debug_assert!(num_pages != 0);
202
203                if num_pages == 1 {
204                    tlb_flush_addr(start);
205                } else {
206                    tlb_flush_addr_range(&(start..start + num_pages * PAGE_SIZE));
207                }
208            }
209        }
210    }
211
212    /// Creates a new TLB flush operation that flushes all TLB entries except
213    /// for the global entries.
214    pub const fn for_all() -> Self {
215        TlbFlushOp(Self::FLUSH_ALL_VAL)
216    }
217
218    /// Creates a new TLB flush operation that flushes the TLB entry associated
219    /// with the provided virtual address.
220    pub const fn for_single(addr: Vaddr) -> Self {
221        TlbFlushOp(addr | 1)
222    }
223
224    /// Creates a new TLB flush operation that flushes the TLB entries for the
225    /// specified virtual address range.
226    ///
227    /// If the range is too large, the resulting [`TlbFlushOp`] will flush all
228    /// TLB entries instead.
229    ///
230    /// # Panics
231    ///
232    /// Panics if the range is not page-aligned or if the range is empty.
233    pub const fn for_range(range: Range<Vaddr>) -> Self {
234        assert!(
235            range.start.is_multiple_of(PAGE_SIZE),
236            "Range start must be page-aligned"
237        );
238        assert!(
239            range.end.is_multiple_of(PAGE_SIZE),
240            "Range end must be page-aligned"
241        );
242        assert!(range.start < range.end, "Range must not be empty");
243        let num_pages = (range.end - range.start) / PAGE_SIZE;
244        if num_pages >= FLUSH_ALL_PAGES_THRESHOLD {
245            return TlbFlushOp::for_all();
246        }
247        TlbFlushOp(range.start | (num_pages as Vaddr))
248    }
249
250    /// Returns the number of pages to flush.
251    ///
252    /// If it returns `u32::MAX`, it means to flush all the entries. Otherwise
253    /// the return value should be less than [`FLUSH_ALL_PAGES_THRESHOLD`] and
254    /// non-zero.
255    fn num_pages(&self) -> u32 {
256        if self.0 == Self::FLUSH_ALL_VAL {
257            u32::MAX
258        } else {
259            debug_assert!((self.0 & (PAGE_SIZE - 1)) < FLUSH_ALL_PAGES_THRESHOLD);
260            let num_pages = (self.0 & Self::FLUSH_RANGE_NPAGES_MASK) as u32;
261            debug_assert!(num_pages != 0);
262            num_pages
263        }
264    }
265}
266
267// The queues of pending requests on each CPU.
268cpu_local! {
269    static FLUSH_OPS: SpinLock<OpsStack, LocalIrqDisabled> = SpinLock::new(OpsStack::new());
270    /// Whether this CPU finishes the last remote flush request.
271    static ACK_REMOTE_FLUSH: AtomicBool = AtomicBool::new(true);
272}
273
274fn do_remote_flush() {
275    // No races because we are in IRQs or have disabled preemption.
276    let current_cpu = crate::cpu::CpuId::current_racy();
277
278    let mut new_op_queue = OpsStack::new();
279    {
280        let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock();
281
282        core::mem::swap(&mut *op_queue, &mut new_op_queue);
283
284        // ACK before dropping the lock so that we won't miss flush requests.
285        ACK_REMOTE_FLUSH
286            .get_on_cpu(current_cpu)
287            .store(true, Ordering::Relaxed);
288    }
289    // Unlock the locks quickly to avoid contention. ACK before flushing is
290    // fine since we cannot switch back to userspace now.
291    new_op_queue.flush_all();
292}
293
294/// If the number of pending pages to flush exceeds this threshold, we flush all the
295/// TLB entries instead of flushing them one by one.
296const FLUSH_ALL_PAGES_THRESHOLD: usize = 32;
297
298struct OpsStack {
299    /// From 0 to `num_ops`, the array entry must be initialized.
300    ops: [MaybeUninit<TlbFlushOp>; FLUSH_ALL_PAGES_THRESHOLD],
301    num_ops: u32,
302    /// If this is `u32::MAX`, we should flush all entries irrespective of the
303    /// contents of `ops`. And in this case `num_ops` must be zero.
304    ///
305    /// Otherwise, it counts the number of pages to flush in `ops`.
306    num_pages_to_flush: u32,
307    /// Keeps all the to-be-dropped frames.
308    ///
309    /// The elements cannot be modified after being pushed. And they must be
310    /// dropped after the RCU grace period and the TLB flushes.
311    frame_keeper: Vec<Frame<dyn AnyFrameMeta>>,
312}
313
314impl OpsStack {
315    const fn new() -> Self {
316        Self {
317            ops: [const { MaybeUninit::uninit() }; FLUSH_ALL_PAGES_THRESHOLD],
318            num_ops: 0,
319            num_pages_to_flush: 0,
320            frame_keeper: Vec::new(),
321        }
322    }
323
324    fn is_empty(&self) -> bool {
325        self.num_ops == 0 && self.num_pages_to_flush == 0
326    }
327
328    fn need_flush_all(&self) -> bool {
329        self.num_pages_to_flush == u32::MAX
330    }
331
332    fn push(&mut self, op: TlbFlushOp, drop_after_flush: Option<RcuDrop<Frame<dyn AnyFrameMeta>>>) {
333        if let Some(frame) = drop_after_flush {
334            // SAFETY: By pushing into the `frame_keeper`, the frame will be
335            // dropped after the RCU grace period.
336            let (frame, panic_guard) = unsafe { RcuDrop::into_inner(frame) };
337            self.frame_keeper.push(frame);
338            panic_guard.forget();
339        }
340
341        if self.need_flush_all() {
342            return;
343        }
344        let op_num_pages = op.num_pages();
345        if op == TlbFlushOp::for_all()
346            || self.num_pages_to_flush + op_num_pages >= FLUSH_ALL_PAGES_THRESHOLD as u32
347        {
348            self.num_pages_to_flush = u32::MAX;
349            self.num_ops = 0;
350            return;
351        }
352
353        self.ops[self.num_ops as usize].write(op);
354        self.num_ops += 1;
355        self.num_pages_to_flush += op_num_pages;
356    }
357
358    fn push_from(&mut self, other: &OpsStack) {
359        self.frame_keeper.extend(other.frame_keeper.iter().cloned());
360
361        if self.need_flush_all() {
362            return;
363        }
364        if other.need_flush_all()
365            || self.num_pages_to_flush + other.num_pages_to_flush
366                >= FLUSH_ALL_PAGES_THRESHOLD as u32
367        {
368            self.num_pages_to_flush = u32::MAX;
369            self.num_ops = 0;
370            return;
371        }
372
373        for other_op in other.ops_iter() {
374            self.ops[self.num_ops as usize].write(other_op.clone());
375            self.num_ops += 1;
376        }
377        self.num_pages_to_flush += other.num_pages_to_flush;
378    }
379
380    fn flush_all(&mut self) {
381        if self.need_flush_all() {
382            crate::arch::mm::tlb_flush_all_excluding_global();
383        } else {
384            self.ops_iter().for_each(|op| {
385                op.perform_on_current();
386            });
387        }
388
389        self.clear_without_flush();
390    }
391
392    fn clear_without_flush(&mut self) {
393        self.num_pages_to_flush = 0;
394        self.num_ops = 0;
395        if !self.frame_keeper.is_empty() {
396            let _ = RcuDrop::new(core::mem::take(&mut self.frame_keeper));
397        }
398    }
399
400    fn ops_iter(&self) -> impl Iterator<Item = &TlbFlushOp> {
401        self.ops.iter().take(self.num_ops as usize).map(|op| {
402            // SAFETY: From 0 to `num_ops`, the array entry must be initialized.
403            unsafe { op.assume_init_ref() }
404        })
405    }
406}
407
408impl Drop for OpsStack {
409    fn drop(&mut self) {
410        if !self.frame_keeper.is_empty() {
411            let _ = RcuDrop::new(core::mem::take(&mut self.frame_keeper));
412        }
413    }
414}