ostd/mm/
tlb.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! TLB flush operations.
4
5use alloc::vec::Vec;
6use core::{
7    mem::MaybeUninit,
8    ops::Range,
9    sync::atomic::{AtomicBool, Ordering},
10};
11
12use super::{
13    PAGE_SIZE, Vaddr,
14    frame::{Frame, meta::AnyFrameMeta},
15};
16use crate::{
17    arch::irq,
18    const_assert,
19    cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu},
20    cpu_local,
21    smp::IpiSender,
22    sync::{LocalIrqDisabled, SpinLock},
23};
24
25/// A TLB flusher that is aware of which CPUs are needed to be flushed.
26///
27/// The flusher needs to stick to the current CPU.
28pub struct TlbFlusher<'a, G: PinCurrentCpu> {
29    target_cpus: &'a AtomicCpuSet,
30    have_unsynced_flush: CpuSet,
31    ops_stack: OpsStack,
32    ipi_sender: Option<&'static IpiSender>,
33    _pin_current: G,
34}
35
36impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> {
37    /// Creates a new TLB flusher with the specified CPUs to be flushed.
38    ///
39    /// The target CPUs should be a reference to an [`AtomicCpuSet`] that will
40    /// be loaded upon [`Self::dispatch_tlb_flush`].
41    ///
42    /// The flusher needs to stick to the current CPU. So please provide a
43    /// guard that implements [`PinCurrentCpu`].
44    pub fn new(target_cpus: &'a AtomicCpuSet, pin_current_guard: G) -> Self {
45        Self {
46            target_cpus,
47            have_unsynced_flush: CpuSet::new_empty(),
48            ops_stack: OpsStack::new(),
49            ipi_sender: crate::smp::IPI_SENDER.get(),
50            _pin_current: pin_current_guard,
51        }
52    }
53
54    /// Issues a pending TLB flush request.
55    ///
56    /// This function does not guarantee to flush the TLB entries on either
57    /// this CPU or remote CPUs. The flush requests are only performed when
58    /// [`Self::dispatch_tlb_flush`] is called.
59    pub fn issue_tlb_flush(&mut self, op: TlbFlushOp) {
60        self.ops_stack.push(op, None);
61    }
62
63    /// Issues a TLB flush request that must happen before dropping the page.
64    ///
65    /// If we need to remove a mapped page from the page table, we can only
66    /// recycle the page after all the relevant TLB entries in all CPUs are
67    /// flushed. Otherwise if the page is recycled for other purposes, the user
68    /// space program can still access the page through the TLB entries. This
69    /// method is designed to be used in such cases.
70    pub fn issue_tlb_flush_with(
71        &mut self,
72        op: TlbFlushOp,
73        drop_after_flush: Frame<dyn AnyFrameMeta>,
74    ) {
75        self.ops_stack.push(op, Some(drop_after_flush));
76    }
77
78    /// Dispatches all the pending TLB flush requests.
79    ///
80    /// All previous pending requests issued by [`Self::issue_tlb_flush`] or
81    /// [`Self::issue_tlb_flush_with`] starts to be processed after this
82    /// function. But it may not be synchronous. Upon the return of this
83    /// function, the TLB entries may not be coherent.
84    pub fn dispatch_tlb_flush(&mut self) {
85        let irq_guard = crate::irq::disable_local();
86
87        if self.ops_stack.is_empty() {
88            return;
89        }
90
91        // `Release` to make sure our modification on the PT is visible to CPUs
92        // that are going to activate the PT.
93        let mut target_cpus = self.target_cpus.load(Ordering::Release);
94
95        let cur_cpu = irq_guard.current_cpu();
96        let mut need_flush_on_self = false;
97
98        if target_cpus.contains(cur_cpu) {
99            target_cpus.remove(cur_cpu);
100            need_flush_on_self = true;
101        }
102
103        if let Some(ipi_sender) = self.ipi_sender {
104            for cpu in target_cpus.iter() {
105                self.have_unsynced_flush.add(cpu);
106
107                let mut flush_ops = FLUSH_OPS.get_on_cpu(cpu).lock();
108                flush_ops.push_from(&self.ops_stack);
109                // Clear ACK before dropping the lock to avoid false ACKs.
110                ACK_REMOTE_FLUSH
111                    .get_on_cpu(cpu)
112                    .store(false, Ordering::Relaxed);
113            }
114
115            ipi_sender.inter_processor_call(&target_cpus, do_remote_flush);
116        }
117
118        // Flush ourselves after sending all IPIs to save some time.
119        if need_flush_on_self {
120            self.ops_stack.flush_all();
121        } else {
122            self.ops_stack.clear_without_flush();
123        }
124    }
125
126    /// Waits for all the previous TLB flush requests to be completed.
127    ///
128    /// After this function, all TLB entries corresponding to previous
129    /// dispatched TLB flush requests are guaranteed to be coherent.
130    ///
131    /// The TLB flush requests are issued with [`Self::issue_tlb_flush`] and
132    /// dispatched with [`Self::dispatch_tlb_flush`]. This method will not
133    /// dispatch any issued requests so it will not guarantee TLB coherence
134    /// of requests that are not dispatched.
135    ///
136    /// # Panics
137    ///
138    /// This method panics if the IRQs are disabled. Since the remote flush are
139    /// processed in IRQs, two CPUs may deadlock if they are waiting for each
140    /// other's TLB coherence.
141    pub fn sync_tlb_flush(&mut self) {
142        if self.ipi_sender.is_none() {
143            // We performed some TLB flushes in the boot context. The AP's boot
144            // process should take care of them.
145            return;
146        }
147
148        assert!(
149            irq::is_local_enabled(),
150            "Waiting for remote flush with IRQs disabled"
151        );
152
153        for cpu in self.have_unsynced_flush.iter() {
154            while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Relaxed) {
155                core::hint::spin_loop();
156            }
157        }
158
159        self.have_unsynced_flush = CpuSet::new_empty();
160    }
161}
162
163/// The operation to flush TLB entries.
164///
165/// The variants of this structure are:
166///  - Flushing all TLB entries except for the global entries;
167///  - Flushing the TLB entry associated with an address;
168///  - Flushing the TLB entries for a specific range of virtual addresses;
169///
170/// This is a `struct` instead of an `enum` because if trivially representing
171/// the three variants with an `enum`, it would be 24 bytes. To minimize the
172/// memory footprint, we encode all three variants into an 8-byte integer.
173#[derive(Debug, Clone, PartialEq, Eq)]
174pub struct TlbFlushOp(Vaddr);
175
176// We require the address to be page-aligned, so the in-page offset part of the
177// address can be used to store the length. A sanity check to ensure that we
178// don't allow ranged flush operations with a too long length.
179const_assert!(TlbFlushOp::FLUSH_RANGE_NPAGES_MASK | (PAGE_SIZE - 1) == PAGE_SIZE - 1);
180
181impl TlbFlushOp {
182    const FLUSH_ALL_VAL: Vaddr = Vaddr::MAX;
183    const FLUSH_RANGE_NPAGES_MASK: Vaddr =
184        (1 << (usize::BITS - FLUSH_ALL_PAGES_THRESHOLD.leading_zeros())) - 1;
185
186    /// Performs the TLB flush operation on the current CPU.
187    pub fn perform_on_current(&self) {
188        use crate::arch::mm::{
189            tlb_flush_addr, tlb_flush_addr_range, tlb_flush_all_excluding_global,
190        };
191        match self.0 {
192            Self::FLUSH_ALL_VAL => tlb_flush_all_excluding_global(),
193            addr => {
194                let start = addr & !Self::FLUSH_RANGE_NPAGES_MASK;
195                let num_pages = addr & Self::FLUSH_RANGE_NPAGES_MASK;
196
197                debug_assert!((addr & (PAGE_SIZE - 1)) < FLUSH_ALL_PAGES_THRESHOLD);
198                debug_assert!(num_pages != 0);
199
200                if num_pages == 1 {
201                    tlb_flush_addr(start);
202                } else {
203                    tlb_flush_addr_range(&(start..start + num_pages * PAGE_SIZE));
204                }
205            }
206        }
207    }
208
209    /// Creates a new TLB flush operation that flushes all TLB entries except
210    /// for the global entries.
211    pub const fn for_all() -> Self {
212        TlbFlushOp(Self::FLUSH_ALL_VAL)
213    }
214
215    /// Creates a new TLB flush operation that flushes the TLB entry associated
216    /// with the provided virtual address.
217    pub const fn for_single(addr: Vaddr) -> Self {
218        TlbFlushOp(addr | 1)
219    }
220
221    /// Creates a new TLB flush operation that flushes the TLB entries for the
222    /// specified virtual address range.
223    ///
224    /// If the range is too large, the resulting [`TlbFlushOp`] will flush all
225    /// TLB entries instead.
226    ///
227    /// # Panics
228    ///
229    /// Panics if the range is not page-aligned or if the range is empty.
230    pub const fn for_range(range: Range<Vaddr>) -> Self {
231        assert!(
232            range.start.is_multiple_of(PAGE_SIZE),
233            "Range start must be page-aligned"
234        );
235        assert!(
236            range.end.is_multiple_of(PAGE_SIZE),
237            "Range end must be page-aligned"
238        );
239        assert!(range.start < range.end, "Range must not be empty");
240        let num_pages = (range.end - range.start) / PAGE_SIZE;
241        if num_pages >= FLUSH_ALL_PAGES_THRESHOLD {
242            return TlbFlushOp::for_all();
243        }
244        TlbFlushOp(range.start | (num_pages as Vaddr))
245    }
246
247    /// Returns the number of pages to flush.
248    ///
249    /// If it returns `u32::MAX`, it means to flush all the entries. Otherwise
250    /// the return value should be less than [`FLUSH_ALL_PAGES_THRESHOLD`] and
251    /// non-zero.
252    fn num_pages(&self) -> u32 {
253        if self.0 == Self::FLUSH_ALL_VAL {
254            u32::MAX
255        } else {
256            debug_assert!((self.0 & (PAGE_SIZE - 1)) < FLUSH_ALL_PAGES_THRESHOLD);
257            let num_pages = (self.0 & Self::FLUSH_RANGE_NPAGES_MASK) as u32;
258            debug_assert!(num_pages != 0);
259            num_pages
260        }
261    }
262}
263
264// The queues of pending requests on each CPU.
265cpu_local! {
266    static FLUSH_OPS: SpinLock<OpsStack, LocalIrqDisabled> = SpinLock::new(OpsStack::new());
267    /// Whether this CPU finishes the last remote flush request.
268    static ACK_REMOTE_FLUSH: AtomicBool = AtomicBool::new(true);
269}
270
271fn do_remote_flush() {
272    // No races because we are in IRQs or have disabled preemption.
273    let current_cpu = crate::cpu::CpuId::current_racy();
274
275    let mut new_op_queue = OpsStack::new();
276    {
277        let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock();
278
279        core::mem::swap(&mut *op_queue, &mut new_op_queue);
280
281        // ACK before dropping the lock so that we won't miss flush requests.
282        ACK_REMOTE_FLUSH
283            .get_on_cpu(current_cpu)
284            .store(true, Ordering::Relaxed);
285    }
286    // Unlock the locks quickly to avoid contention. ACK before flushing is
287    // fine since we cannot switch back to userspace now.
288    new_op_queue.flush_all();
289}
290
291/// If the number of pending pages to flush exceeds this threshold, we flush all the
292/// TLB entries instead of flushing them one by one.
293const FLUSH_ALL_PAGES_THRESHOLD: usize = 32;
294
295struct OpsStack {
296    /// From 0 to `num_ops`, the array entry must be initialized.
297    ops: [MaybeUninit<TlbFlushOp>; FLUSH_ALL_PAGES_THRESHOLD],
298    num_ops: u32,
299    /// If this is `u32::MAX`, we should flush all entries irrespective of the
300    /// contents of `ops`. And in this case `num_ops` must be zero.
301    ///
302    /// Otherwise, it counts the number of pages to flush in `ops`.
303    num_pages_to_flush: u32,
304    page_keeper: Vec<Frame<dyn AnyFrameMeta>>,
305}
306
307impl OpsStack {
308    const fn new() -> Self {
309        Self {
310            ops: [const { MaybeUninit::uninit() }; FLUSH_ALL_PAGES_THRESHOLD],
311            num_ops: 0,
312            num_pages_to_flush: 0,
313            page_keeper: Vec::new(),
314        }
315    }
316
317    fn is_empty(&self) -> bool {
318        self.num_ops == 0 && self.num_pages_to_flush == 0
319    }
320
321    fn need_flush_all(&self) -> bool {
322        self.num_pages_to_flush == u32::MAX
323    }
324
325    fn push(&mut self, op: TlbFlushOp, drop_after_flush: Option<Frame<dyn AnyFrameMeta>>) {
326        if let Some(frame) = drop_after_flush {
327            self.page_keeper.push(frame);
328        }
329
330        if self.need_flush_all() {
331            return;
332        }
333        let op_num_pages = op.num_pages();
334        if op == TlbFlushOp::for_all()
335            || self.num_pages_to_flush + op_num_pages >= FLUSH_ALL_PAGES_THRESHOLD as u32
336        {
337            self.num_pages_to_flush = u32::MAX;
338            self.num_ops = 0;
339            return;
340        }
341
342        self.ops[self.num_ops as usize].write(op);
343        self.num_ops += 1;
344        self.num_pages_to_flush += op_num_pages;
345    }
346
347    fn push_from(&mut self, other: &OpsStack) {
348        self.page_keeper.extend(other.page_keeper.iter().cloned());
349
350        if self.need_flush_all() {
351            return;
352        }
353        if other.need_flush_all()
354            || self.num_pages_to_flush + other.num_pages_to_flush
355                >= FLUSH_ALL_PAGES_THRESHOLD as u32
356        {
357            self.num_pages_to_flush = u32::MAX;
358            self.num_ops = 0;
359            return;
360        }
361
362        for other_op in other.ops_iter() {
363            self.ops[self.num_ops as usize].write(other_op.clone());
364            self.num_ops += 1;
365        }
366        self.num_pages_to_flush += other.num_pages_to_flush;
367    }
368
369    fn flush_all(&mut self) {
370        if self.need_flush_all() {
371            crate::arch::mm::tlb_flush_all_excluding_global();
372        } else {
373            self.ops_iter().for_each(|op| {
374                op.perform_on_current();
375            });
376        }
377
378        self.clear_without_flush();
379    }
380
381    fn clear_without_flush(&mut self) {
382        self.num_pages_to_flush = 0;
383        self.num_ops = 0;
384        self.page_keeper.clear();
385    }
386
387    fn ops_iter(&self) -> impl Iterator<Item = &TlbFlushOp> {
388        self.ops.iter().take(self.num_ops as usize).map(|op| {
389            // SAFETY: From 0 to `num_ops`, the array entry must be initialized.
390            unsafe { op.assume_init_ref() }
391        })
392    }
393}