ostd/mm/
io.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! Abstractions for reading and writing virtual memory (VM) objects.
4//!
5//! # Safety
6//!
7//! The core virtual memory (VM) access APIs provided by this module are [`VmReader`] and
8//! [`VmWriter`], which allow for writing to or reading from a region of memory _safely_.
9//! `VmReader` and `VmWriter` objects can be constructed from memory regions of either typed memory
10//! (e.g., `&[u8]`) or untyped memory (e.g, [`UFrame`]). Behind the scene, `VmReader` and `VmWriter`
11//! must be constructed via their [`from_user_space`] and [`from_kernel_space`] methods, whose
12//! safety depends on whether the given memory regions are _valid_ or not.
13//!
14//! [`UFrame`]: crate::mm::UFrame
15//! [`from_user_space`]: `VmReader::from_user_space`
16//! [`from_kernel_space`]: `VmReader::from_kernel_space`
17//!
18//! Here is a list of conditions for memory regions to be considered valid:
19//!
20//! - The memory region as a whole must be either typed or untyped memory, not both typed and
21//!   untyped.
22//!
23//! - If the memory region is typed, we require that:
24//!   - the [validity requirements] from the official Rust documentation must be met, and
25//!   - the type of the memory region (which must exist since the memory is typed) must be
26//!     plain-old-data, so that the writer can fill it with arbitrary data safely.
27//!
28//! [validity requirements]: core::ptr#safety
29//!
30//! - If the memory region is untyped, we require that:
31//!   - the underlying pages must remain alive while the validity requirements are in effect, and
32//!   - the kernel must access the memory region using only the APIs provided in this module, but
33//!     external accesses from hardware devices or user programs do not count.
34//!
35//! We have the last requirement for untyped memory to be valid because the safety interaction with
36//! other ways to access the memory region (e.g., atomic/volatile memory loads/stores) is not
37//! currently specified. Tis may be relaxed in the future, if appropriate and necessary.
38//!
39//! Note that data races on untyped memory are explicitly allowed (since pages can be mapped to
40//! user space, making it impossible to avoid data races). However, they may produce erroneous
41//! results, such as unexpected bytes being copied, but do not cause soundness problems.
42
43use core::{marker::PhantomData, mem::MaybeUninit};
44
45use crate::{
46    Error, Pod,
47    arch::mm::{
48        __atomic_cmpxchg_fallible, __atomic_load_fallible, __memcpy_fallible, __memset_fallible,
49    },
50    mm::{
51        MAX_USERSPACE_VADDR,
52        kspace::{KERNEL_BASE_VADDR, KERNEL_END_VADDR},
53    },
54    prelude::*,
55};
56
57/// A trait that enables reading/writing data from/to a VM object,
58/// e.g., [`USegment`], [`Vec<UFrame>`] and [`UFrame`].
59///
60/// # Concurrency
61///
62/// The methods may be executed by multiple concurrent reader and writer
63/// threads. In this case, if the results of concurrent reads or writes
64/// desire predictability or atomicity, the users should add extra mechanism
65/// for such properties.
66///
67/// [`USegment`]: crate::mm::USegment
68/// [`UFrame`]: crate::mm::UFrame
69pub trait VmIo {
70    /// Reads requested data at a specified offset into a given `VmWriter`.
71    ///
72    /// # No short reads
73    ///
74    /// On success, the `writer` must be written with the requested data
75    /// completely. If, for any reason, the requested data is only partially
76    /// available, then the method shall return an error.
77    fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()>;
78
79    /// Reads a specified number of bytes at a specified offset into a given buffer.
80    ///
81    /// # No short reads
82    ///
83    /// Similar to [`read`].
84    ///
85    /// [`read`]: VmIo::read
86    fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> {
87        let mut writer = VmWriter::from(buf).to_fallible();
88        self.read(offset, &mut writer)
89    }
90
91    /// Reads a value of a specified type at a specified offset.
92    fn read_val<T: Pod>(&self, offset: usize) -> Result<T> {
93        // Why not use `MaybeUninit` for a faster implementation?
94        //
95        // ```rust
96        // let mut val: MaybeUninit<T> = MaybeUninit::uninit();
97        // let writer = unsafe {
98        //     VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>())
99        // };
100        // self.read(offset, &mut writer.to_fallible())?;
101        // Ok(unsafe { val.assume_init() })
102        // ```
103        //
104        // The above implementation avoids initializing `val` upfront,
105        // so it is more efficient than our actual implementation.
106        // Unfortunately, it is unsound.
107        // This is because the `read` method,
108        // which could be implemented outside OSTD and thus is untrusted,
109        // may not really initialize the bits of `val` at all!
110
111        let mut val = T::new_zeroed();
112        self.read_bytes(offset, val.as_bytes_mut())?;
113        Ok(val)
114    }
115
116    /// Reads a slice of a specified type at a specified offset.
117    ///
118    /// # No short reads
119    ///
120    /// Similar to [`read`].
121    ///
122    /// [`read`]: VmIo::read
123    fn read_slice<T: Pod>(&self, offset: usize, slice: &mut [T]) -> Result<()> {
124        let len_in_bytes = size_of_val(slice);
125        let ptr = slice as *mut [T] as *mut u8;
126        // SAFETY: the slice can be transmuted to a writable byte slice since the elements
127        // are all Plain-Old-Data (Pod) types.
128        let buf = unsafe { core::slice::from_raw_parts_mut(ptr, len_in_bytes) };
129        self.read_bytes(offset, buf)
130    }
131
132    /// Writes all data from a given `VmReader` at a specified offset.
133    ///
134    /// # No short writes
135    ///
136    /// On success, the data from the `reader` must be read to the VM object entirely.
137    /// If, for any reason, the input data can only be written partially,
138    /// then the method shall return an error.
139    fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()>;
140
141    /// Writes a specified number of bytes from a given buffer at a specified offset.
142    ///
143    /// # No short writes
144    ///
145    /// Similar to [`write`].
146    ///
147    /// [`write`]: VmIo::write
148    fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> {
149        let mut reader = VmReader::from(buf).to_fallible();
150        self.write(offset, &mut reader)
151    }
152
153    /// Writes a value of a specified type at a specified offset.
154    fn write_val<T: Pod>(&self, offset: usize, new_val: &T) -> Result<()> {
155        self.write_bytes(offset, new_val.as_bytes())?;
156        Ok(())
157    }
158
159    /// Writes a slice of a specified type at a specified offset.
160    ///
161    /// # No short write
162    ///
163    /// Similar to [`write`].
164    ///
165    /// [`write`]: VmIo::write
166    fn write_slice<T: Pod>(&self, offset: usize, slice: &[T]) -> Result<()> {
167        let len_in_bytes = size_of_val(slice);
168        let ptr = slice as *const [T] as *const u8;
169        // SAFETY: the slice can be transmuted to a readable byte slice since the elements
170        // are all Plain-Old-Data (Pod) types.
171        let buf = unsafe { core::slice::from_raw_parts(ptr, len_in_bytes) };
172        self.write_bytes(offset, buf)
173    }
174}
175
176/// A trait that enables filling bytes (e.g., filling zeros) to a VM object.
177pub trait VmIoFill {
178    /// Writes `len` zeros at a specified offset.
179    ///
180    /// Unlike the methods in [`VmIo`], this method allows for short writes because `len` can be
181    /// effectively unbounded. However, if not all bytes can be written successfully, an `Err(_)`
182    /// will be returned with the error and the number of zeros that have been written thus far.
183    ///
184    /// # A slow, general implementation
185    ///
186    /// Suppose that [`VmIo`] has already been implemented for the type,
187    /// this method can be implemented in the following general way.
188    ///
189    /// ```rust
190    /// fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)> {
191    ///     for i in 0..len {
192    ///         match self.write_slice(offset + i, &[0u8]) {
193    ///             Ok(()) => continue,
194    ///             Err(err) => return Err((err, i)),
195    ///         }
196    ///     }
197    ///     Ok(())
198    /// }
199    /// ```
200    ///
201    /// But we choose not to provide a general, default implementation
202    /// because doing so would make it too easy for a concrete type of `VmIoFill`
203    /// to settle with a slower implementation for such a performance-sensitive operation.
204    fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)>;
205}
206
207/// A trait that enables reading/writing data from/to a VM object using one non-tearing memory
208/// load/store.
209///
210/// See also [`VmIo`], which enables reading/writing data from/to a VM object without the guarantee
211/// of using one non-tearing memory load/store.
212pub trait VmIoOnce {
213    /// Reads a value of the `PodOnce` type at the specified offset using one non-tearing memory
214    /// load.
215    ///
216    /// Except that the offset is specified explicitly, the semantics of this method is the same as
217    /// [`VmReader::read_once`].
218    fn read_once<T: PodOnce>(&self, offset: usize) -> Result<T>;
219
220    /// Writes a value of the `PodOnce` type at the specified offset using one non-tearing memory
221    /// store.
222    ///
223    /// Except that the offset is specified explicitly, the semantics of this method is the same as
224    /// [`VmWriter::write_once`].
225    fn write_once<T: PodOnce>(&self, offset: usize, new_val: &T) -> Result<()>;
226}
227
228/// A marker type used for [`VmReader`] and [`VmWriter`],
229/// representing whether reads or writes on the underlying memory region are fallible.
230pub enum Fallible {}
231
232/// A marker type used for [`VmReader`] and [`VmWriter`],
233/// representing whether reads or writes on the underlying memory region are infallible.
234pub enum Infallible {}
235
236/// Copies `len` bytes from `src` to `dst`.
237///
238/// # Safety
239///
240/// - `src` must be [valid] for reads of `len` bytes.
241/// - `dst` must be [valid] for writes of `len` bytes.
242///
243/// [valid]: crate::mm::io#safety
244unsafe fn memcpy(dst: *mut u8, src: *const u8, len: usize) {
245    // This method is implemented by calling `volatile_copy_memory`. Note that even with the
246    // "volatile" keyword, data races are still considered undefined behavior (UB) in both the Rust
247    // documentation and the C/C++ standards. In general, UB makes the behavior of the entire
248    // program unpredictable, usually due to compiler optimizations that assume the absence of UB.
249    // However, in this particular case, considering that the Linux kernel uses the "volatile"
250    // keyword to implement `READ_ONCE` and `WRITE_ONCE`, the compiler is extremely unlikely to
251    // break our code unless it also breaks the Linux kernel.
252    //
253    // For more details and future possibilities, see
254    // <https://github.com/asterinas/asterinas/pull/1001#discussion_r1667317406>.
255
256    // SAFETY: The safety is guaranteed by the safety preconditions and the explanation above.
257    unsafe { core::intrinsics::volatile_copy_memory(dst, src, len) };
258}
259
260/// Fills `len` bytes of memory at `dst` with the specified `value`.
261///
262/// # Safety
263///
264/// - `dst` must be [valid] for writes of `len` bytes.
265///
266/// [valid]: crate::mm::io#safety
267unsafe fn memset(dst: *mut u8, value: u8, len: usize) {
268    // SAFETY: The safety is guaranteed by the safety preconditions and the explanation above.
269    unsafe {
270        core::intrinsics::volatile_set_memory(dst, value, len);
271    }
272}
273
274/// Copies `len` bytes from `src` to `dst`.
275/// This function will early stop copying if encountering an unresolvable page fault.
276///
277/// Returns the number of successfully copied bytes.
278///
279/// In the following cases, this method may cause unexpected bytes to be copied, but will not cause
280/// safety problems as long as the safety requirements are met:
281/// - The source and destination overlap.
282/// - The current context is not associated with valid user space (e.g., in the kernel thread).
283///
284/// # Safety
285///
286/// - `src` must either be [valid] for reads of `len` bytes or be in user space for `len` bytes.
287/// - `dst` must either be [valid] for writes of `len` bytes or be in user space for `len` bytes.
288///
289/// [valid]: crate::mm::io#safety
290unsafe fn memcpy_fallible(dst: *mut u8, src: *const u8, len: usize) -> usize {
291    // SAFETY: The safety is upheld by the caller.
292    let failed_bytes = unsafe { __memcpy_fallible(dst, src, len) };
293    len - failed_bytes
294}
295
296/// Fills `len` bytes of memory at `dst` with the specified `value`.
297/// This function will early stop filling if encountering an unresolvable page fault.
298///
299/// Returns the number of successfully set bytes.
300///
301/// # Safety
302///
303/// - `dst` must either be [valid] for writes of `len` bytes or be in user space for `len` bytes.
304///
305/// [valid]: crate::mm::io#safety
306unsafe fn memset_fallible(dst: *mut u8, value: u8, len: usize) -> usize {
307    // SAFETY: The safety is upheld by the caller.
308    let failed_bytes = unsafe { __memset_fallible(dst, value, len) };
309    len - failed_bytes
310}
311
312/// Fallible memory read from a `VmWriter`.
313pub trait FallibleVmRead<F> {
314    /// Reads all data into the writer until one of the three conditions is met:
315    /// 1. The reader has no remaining data.
316    /// 2. The writer has no available space.
317    /// 3. The reader/writer encounters some error.
318    ///
319    /// On success, the number of bytes read is returned;
320    /// On error, both the error and the number of bytes read so far are returned.
321    fn read_fallible(
322        &mut self,
323        writer: &mut VmWriter<'_, F>,
324    ) -> core::result::Result<usize, (Error, usize)>;
325}
326
327/// Fallible memory write from a `VmReader`.
328pub trait FallibleVmWrite<F> {
329    /// Writes all data from the reader until one of the three conditions is met:
330    /// 1. The reader has no remaining data.
331    /// 2. The writer has no available space.
332    /// 3. The reader/writer encounters some error.
333    ///
334    /// On success, the number of bytes written is returned;
335    /// On error, both the error and the number of bytes written so far are returned.
336    fn write_fallible(
337        &mut self,
338        reader: &mut VmReader<'_, F>,
339    ) -> core::result::Result<usize, (Error, usize)>;
340}
341
342/// `VmReader` is a reader for reading data from a contiguous range of memory.
343///
344/// The memory range read by `VmReader` can be in either kernel space or user space.
345/// When the operating range is in kernel space, the memory within that range
346/// is guaranteed to be valid, and the corresponding memory reads are infallible.
347/// When the operating range is in user space, it is ensured that the page table of
348/// the process creating the `VmReader` is active for the duration of `'a`,
349/// and the corresponding memory reads are considered fallible.
350///
351/// When perform reading with a `VmWriter`, if one of them represents typed memory,
352/// it can ensure that the reading range in this reader and writing range in the
353/// writer are not overlapped.
354///
355/// NOTE: The overlap mentioned above is at both the virtual address level
356/// and physical address level. There is not guarantee for the operation results
357/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
358/// the user's responsibility to handle this situation.
359pub struct VmReader<'a, Fallibility = Fallible> {
360    cursor: *const u8,
361    end: *const u8,
362    phantom: PhantomData<(&'a [u8], Fallibility)>,
363}
364
365// `Clone` can be implemented for `VmReader`
366// because it either points to untyped memory or represents immutable references.
367// Note that we cannot implement `Clone` for `VmWriter`
368// because it can represent mutable references, which must remain exclusive.
369impl<Fallibility> Clone for VmReader<'_, Fallibility> {
370    fn clone(&self) -> Self {
371        Self {
372            cursor: self.cursor,
373            end: self.end,
374            phantom: PhantomData,
375        }
376    }
377}
378
379macro_rules! impl_read_fallible {
380    ($reader_fallibility:ty, $writer_fallibility:ty) => {
381        impl<'a> FallibleVmRead<$writer_fallibility> for VmReader<'a, $reader_fallibility> {
382            fn read_fallible(
383                &mut self,
384                writer: &mut VmWriter<'_, $writer_fallibility>,
385            ) -> core::result::Result<usize, (Error, usize)> {
386                let copy_len = self.remain().min(writer.avail());
387                if copy_len == 0 {
388                    return Ok(0);
389                }
390
391                // SAFETY: The source and destination are subsets of memory ranges specified by
392                // the reader and writer, so they are either valid for reading and writing or in
393                // user space.
394                let copied_len = unsafe { memcpy_fallible(writer.cursor, self.cursor, copy_len) };
395                self.cursor = self.cursor.wrapping_add(copied_len);
396                writer.cursor = writer.cursor.wrapping_add(copied_len);
397
398                if copied_len < copy_len {
399                    Err((Error::PageFault, copied_len))
400                } else {
401                    Ok(copied_len)
402                }
403            }
404        }
405    };
406}
407
408macro_rules! impl_write_fallible {
409    ($writer_fallibility:ty, $reader_fallibility:ty) => {
410        impl<'a> FallibleVmWrite<$reader_fallibility> for VmWriter<'a, $writer_fallibility> {
411            fn write_fallible(
412                &mut self,
413                reader: &mut VmReader<'_, $reader_fallibility>,
414            ) -> core::result::Result<usize, (Error, usize)> {
415                reader.read_fallible(self)
416            }
417        }
418    };
419}
420
421impl_read_fallible!(Fallible, Infallible);
422impl_read_fallible!(Fallible, Fallible);
423impl_read_fallible!(Infallible, Fallible);
424impl_write_fallible!(Fallible, Infallible);
425impl_write_fallible!(Fallible, Fallible);
426impl_write_fallible!(Infallible, Fallible);
427
428impl<'a> VmReader<'a, Infallible> {
429    /// Constructs a `VmReader` from a pointer and a length, which represents
430    /// a memory range in kernel space.
431    ///
432    /// # Safety
433    ///
434    /// `ptr` must be [valid] for reads of `len` bytes during the entire lifetime `a`.
435    ///
436    /// [valid]: crate::mm::io#safety
437    pub unsafe fn from_kernel_space(ptr: *const u8, len: usize) -> Self {
438        // Rust is allowed to give the reference to a zero-sized object a very small address,
439        // falling out of the kernel virtual address space range.
440        // So when `len` is zero, we should not and need not to check `ptr`.
441        debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
442        debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
443
444        Self {
445            cursor: ptr,
446            end: ptr.wrapping_add(len),
447            phantom: PhantomData,
448        }
449    }
450
451    /// Reads all data into the writer until one of the two conditions is met:
452    /// 1. The reader has no remaining data.
453    /// 2. The writer has no available space.
454    ///
455    /// Returns the number of bytes read.
456    pub fn read(&mut self, writer: &mut VmWriter<'_, Infallible>) -> usize {
457        let copy_len = self.remain().min(writer.avail());
458        if copy_len == 0 {
459            return 0;
460        }
461
462        // SAFETY: The source and destination are subsets of memory ranges specified by the reader
463        // and writer, so they are valid for reading and writing.
464        unsafe { memcpy(writer.cursor, self.cursor, copy_len) };
465        self.cursor = self.cursor.wrapping_add(copy_len);
466        writer.cursor = writer.cursor.wrapping_add(copy_len);
467
468        copy_len
469    }
470
471    /// Reads a value of `Pod` type.
472    ///
473    /// If the length of the `Pod` type exceeds `self.remain()`,
474    /// this method will return `Err`.
475    pub fn read_val<T: Pod>(&mut self) -> Result<T> {
476        if self.remain() < size_of::<T>() {
477            return Err(Error::InvalidArgs);
478        }
479
480        let mut val = MaybeUninit::<T>::uninit();
481
482        // SAFETY:
483        // - The memory range points to typed memory.
484        // - The validity requirements for write accesses are met because the pointer is converted
485        //   from a mutable pointer where the underlying storage outlives the temporary lifetime
486        //   and no other Rust references to the same storage exist during the lifetime.
487        // - The type, i.e., `T`, is plain-old-data.
488        let mut writer =
489            unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
490        self.read(&mut writer);
491        debug_assert!(!writer.has_avail());
492
493        // SAFETY:
494        // - `self.read` has initialized all the bytes in `val`.
495        // - The type is plain-old-data.
496        let val_inited = unsafe { val.assume_init() };
497        Ok(val_inited)
498    }
499
500    /// Reads a value of the `PodOnce` type using one non-tearing memory load.
501    ///
502    /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
503    ///
504    /// This method will not compile if the `Pod` type is too large for the current architecture
505    /// and the operation must be tear into multiple memory loads.
506    ///
507    /// # Panics
508    ///
509    /// This method will panic if the current position of the reader does not meet the alignment
510    /// requirements of type `T`.
511    pub fn read_once<T: PodOnce>(&mut self) -> Result<T> {
512        if self.remain() < size_of::<T>() {
513            return Err(Error::InvalidArgs);
514        }
515
516        let cursor = self.cursor.cast::<T>();
517        assert!(cursor.is_aligned());
518
519        const { assert!(pod_once_impls::is_non_tearing::<T>()) };
520
521        // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
522        // and that the cursor is properly aligned with respect to the type `T`. All other safety
523        // requirements are the same as for `Self::read`.
524        let val = unsafe { cursor.read_volatile() };
525        self.cursor = self.cursor.wrapping_add(size_of::<T>());
526
527        Ok(val)
528    }
529
530    // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
531    // not provide an infallible implementation of `VmReader::atomic_load`.
532
533    /// Converts to a fallible reader.
534    pub fn to_fallible(self) -> VmReader<'a, Fallible> {
535        // It is safe to construct a fallible reader since an infallible reader covers the
536        // capabilities of a fallible reader.
537        VmReader {
538            cursor: self.cursor,
539            end: self.end,
540            phantom: PhantomData,
541        }
542    }
543}
544
545impl VmReader<'_, Fallible> {
546    /// Constructs a `VmReader` from a pointer and a length, which represents
547    /// a memory range in user space.
548    ///
549    /// # Safety
550    ///
551    /// The virtual address range `ptr..ptr + len` must be in user space.
552    pub unsafe fn from_user_space(ptr: *const u8, len: usize) -> Self {
553        debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
554
555        Self {
556            cursor: ptr,
557            end: ptr.wrapping_add(len),
558            phantom: PhantomData,
559        }
560    }
561
562    /// Reads a value of `Pod` type.
563    ///
564    /// If the length of the `Pod` type exceeds `self.remain()`,
565    /// or the value can not be read completely,
566    /// this method will return `Err`.
567    ///
568    /// If the memory read failed, this method will return `Err`
569    /// and the current reader's cursor remains pointing to
570    /// the original starting position.
571    pub fn read_val<T: Pod>(&mut self) -> Result<T> {
572        if self.remain() < size_of::<T>() {
573            return Err(Error::InvalidArgs);
574        }
575
576        let mut val = MaybeUninit::<T>::uninit();
577
578        // SAFETY:
579        // - The memory range points to typed memory.
580        // - The validity requirements for write accesses are met because the pointer is converted
581        //   from a mutable pointer where the underlying storage outlives the temporary lifetime
582        //   and no other Rust references to the same storage exist during the lifetime.
583        // - The type, i.e., `T`, is plain-old-data.
584        let mut writer =
585            unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
586        self.read_fallible(&mut writer)
587            .map_err(|(err, copied_len)| {
588                // The `copied_len` is the number of bytes read so far.
589                // So the `cursor` can be moved back to the original position.
590                self.cursor = self.cursor.wrapping_sub(copied_len);
591                err
592            })?;
593        debug_assert!(!writer.has_avail());
594
595        // SAFETY:
596        // - `self.read_fallible` has initialized all the bytes in `val`.
597        // - The type is plain-old-data.
598        let val_inited = unsafe { val.assume_init() };
599        Ok(val_inited)
600    }
601
602    /// Atomically loads a `PodAtomic` value.
603    ///
604    /// Regardless of whether it is successful, the cursor of the reader will not move.
605    ///
606    /// This method only guarantees the atomicity of the specific operation. There are no
607    /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
608    /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
609    /// specified in the C++11 memory model.
610    ///
611    /// This method will fail with errors if
612    ///  1. the remaining space of the reader is less than `size_of::<T>()` bytes, or
613    ///  2. the memory operation fails due to an unresolvable page fault.
614    ///
615    /// # Panics
616    ///
617    /// This method will panic if the memory location is not aligned on an `align_of::<T>()`-byte
618    /// boundary.
619    pub fn atomic_load<T: PodAtomic>(&self) -> Result<T> {
620        if self.remain() < size_of::<T>() {
621            return Err(Error::InvalidArgs);
622        }
623
624        let cursor = self.cursor.cast::<T>();
625        assert!(cursor.is_aligned());
626
627        // SAFETY:
628        // 1. The cursor is either valid for reading or in user space for `size_of::<T>()` bytes.
629        // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
630        unsafe { T::atomic_load_fallible(cursor) }
631    }
632}
633
634impl<Fallibility> VmReader<'_, Fallibility> {
635    /// Returns the number of bytes for the remaining data.
636    pub fn remain(&self) -> usize {
637        self.end.addr() - self.cursor.addr()
638    }
639
640    /// Returns the cursor pointer, which refers to the address of the next byte to read.
641    pub fn cursor(&self) -> *const u8 {
642        self.cursor
643    }
644
645    /// Returns if it has remaining data to read.
646    pub fn has_remain(&self) -> bool {
647        self.remain() > 0
648    }
649
650    /// Limits the length of remaining data.
651    ///
652    /// This method ensures the post condition of `self.remain() <= max_remain`.
653    pub fn limit(&mut self, max_remain: usize) -> &mut Self {
654        if max_remain < self.remain() {
655            self.end = self.cursor.wrapping_add(max_remain);
656        }
657
658        self
659    }
660
661    /// Skips the first `nbytes` bytes of data.
662    /// The length of remaining data is decreased accordingly.
663    ///
664    /// # Panics
665    ///
666    /// If `nbytes` is greater than `self.remain()`, then the method panics.
667    pub fn skip(&mut self, nbytes: usize) -> &mut Self {
668        assert!(nbytes <= self.remain());
669        self.cursor = self.cursor.wrapping_add(nbytes);
670
671        self
672    }
673}
674
675impl<'a> From<&'a [u8]> for VmReader<'a, Infallible> {
676    fn from(slice: &'a [u8]) -> Self {
677        // SAFETY:
678        // - The memory range points to typed memory.
679        // - The validity requirements for read accesses are met because the pointer is converted
680        //   from an immutable reference that outlives the lifetime `'a`.
681        // - The type, i.e., the `u8` slice, is plain-old-data.
682        unsafe { Self::from_kernel_space(slice.as_ptr(), slice.len()) }
683    }
684}
685
686/// `VmWriter` is a writer for writing data to a contiguous range of memory.
687///
688/// The memory range write by `VmWriter` can be in either kernel space or user space.
689/// When the operating range is in kernel space, the memory within that range
690/// is guaranteed to be valid, and the corresponding memory writes are infallible.
691/// When the operating range is in user space, it is ensured that the page table of
692/// the process creating the `VmWriter` is active for the duration of `'a`,
693/// and the corresponding memory writes are considered fallible.
694///
695/// When perform writing with a `VmReader`, if one of them represents typed memory,
696/// it can ensure that the writing range in this writer and reading range in the
697/// reader are not overlapped.
698///
699/// NOTE: The overlap mentioned above is at both the virtual address level
700/// and physical address level. There is not guarantee for the operation results
701/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
702/// the user's responsibility to handle this situation.
703pub struct VmWriter<'a, Fallibility = Fallible> {
704    cursor: *mut u8,
705    end: *mut u8,
706    phantom: PhantomData<(&'a mut [u8], Fallibility)>,
707}
708
709impl<'a> VmWriter<'a, Infallible> {
710    /// Constructs a `VmWriter` from a pointer and a length, which represents
711    /// a memory range in kernel space.
712    ///
713    /// # Safety
714    ///
715    /// `ptr` must be [valid] for writes of `len` bytes during the entire lifetime `a`.
716    ///
717    /// [valid]: crate::mm::io#safety
718    pub unsafe fn from_kernel_space(ptr: *mut u8, len: usize) -> Self {
719        // If casting a zero sized slice to a pointer, the pointer may be null
720        // and does not reside in our kernel space range.
721        debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
722        debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
723
724        Self {
725            cursor: ptr,
726            end: ptr.wrapping_add(len),
727            phantom: PhantomData,
728        }
729    }
730
731    /// Writes all data from the reader until one of the two conditions is met:
732    /// 1. The reader has no remaining data.
733    /// 2. The writer has no available space.
734    ///
735    /// Returns the number of bytes written.
736    pub fn write(&mut self, reader: &mut VmReader<'_, Infallible>) -> usize {
737        reader.read(self)
738    }
739
740    /// Writes a value of `Pod` type.
741    ///
742    /// If the length of the `Pod` type exceeds `self.avail()`,
743    /// this method will return `Err`.
744    pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
745        if self.avail() < size_of::<T>() {
746            return Err(Error::InvalidArgs);
747        }
748
749        let mut reader = VmReader::from(new_val.as_bytes());
750        self.write(&mut reader);
751        Ok(())
752    }
753
754    /// Writes a value of the `PodOnce` type using one non-tearing memory store.
755    ///
756    /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
757    ///
758    /// # Panics
759    ///
760    /// This method will panic if the current position of the writer does not meet the alignment
761    /// requirements of type `T`.
762    pub fn write_once<T: PodOnce>(&mut self, new_val: &T) -> Result<()> {
763        if self.avail() < size_of::<T>() {
764            return Err(Error::InvalidArgs);
765        }
766
767        let cursor = self.cursor.cast::<T>();
768        assert!(cursor.is_aligned());
769
770        const { assert!(pod_once_impls::is_non_tearing::<T>()) };
771
772        // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
773        // and that the cursor is properly aligned with respect to the type `T`. All other safety
774        // requirements are the same as for `Self::write`.
775        unsafe { cursor.write_volatile(*new_val) };
776        self.cursor = self.cursor.wrapping_add(size_of::<T>());
777
778        Ok(())
779    }
780
781    // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
782    // not provide an infallible implementation of `VmWriter::atomic_compare_exchange`.
783
784    /// Writes `len` zeros to the target memory.
785    ///
786    /// This method attempts to fill up to `len` bytes with zeros. If the available
787    /// memory from the current cursor position is less than `len`, it will only fill
788    /// the available space.
789    pub fn fill_zeros(&mut self, len: usize) -> usize {
790        let len_to_set = self.avail().min(len);
791        if len_to_set == 0 {
792            return 0;
793        }
794
795        // SAFETY: The destination is a subset of the memory range specified by
796        // the current writer, so it is valid for writing.
797        unsafe { memset(self.cursor, 0u8, len_to_set) };
798        self.cursor = self.cursor.wrapping_add(len_to_set);
799
800        len_to_set
801    }
802
803    /// Converts to a fallible writer.
804    pub fn to_fallible(self) -> VmWriter<'a, Fallible> {
805        // It is safe to construct a fallible reader since an infallible reader covers the
806        // capabilities of a fallible reader.
807        VmWriter {
808            cursor: self.cursor,
809            end: self.end,
810            phantom: PhantomData,
811        }
812    }
813}
814
815impl VmWriter<'_, Fallible> {
816    /// Constructs a `VmWriter` from a pointer and a length, which represents
817    /// a memory range in user space.
818    ///
819    /// The current context should be consistently associated with valid user space during the
820    /// entire lifetime `'a`. This is for correct semantics and is not a safety requirement.
821    ///
822    /// # Safety
823    ///
824    /// `ptr` must be in user space for `len` bytes.
825    pub unsafe fn from_user_space(ptr: *mut u8, len: usize) -> Self {
826        debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
827
828        Self {
829            cursor: ptr,
830            end: ptr.wrapping_add(len),
831            phantom: PhantomData,
832        }
833    }
834
835    /// Writes a value of `Pod` type.
836    ///
837    /// If the length of the `Pod` type exceeds `self.avail()`,
838    /// or the value can not be write completely,
839    /// this method will return `Err`.
840    ///
841    /// If the memory write failed, this method will return `Err`
842    /// and the current writer's cursor remains pointing to
843    /// the original starting position.
844    pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
845        if self.avail() < size_of::<T>() {
846            return Err(Error::InvalidArgs);
847        }
848
849        let mut reader = VmReader::from(new_val.as_bytes());
850        self.write_fallible(&mut reader)
851            .map_err(|(err, copied_len)| {
852                // The `copied_len` is the number of bytes written so far.
853                // So the `cursor` can be moved back to the original position.
854                self.cursor = self.cursor.wrapping_sub(copied_len);
855                err
856            })?;
857        Ok(())
858    }
859
860    /// Atomically compares and exchanges a `PodAtomic` value.
861    ///
862    /// This method compares `old_val` with the value pointed by `self` and, if they are equal,
863    /// updates it with `new_val`.
864    ///
865    /// The value that was previously in memory will be returned, along with a boolean denoting
866    /// whether the compare-and-exchange succeeds. The caller usually wants to retry if this
867    /// flag is false, passing the most recent value that was returned by this method.
868    ///
869    /// The caller is required to provide a reader which points to the exact same memory location
870    /// to ensure that reading from the memory is allowed.
871    ///
872    /// Regardless of whether it is successful, the cursors of the reader and writer will not move.
873    ///
874    /// This method only guarantees the atomicity of the specific operation. There are no
875    /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
876    /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
877    /// specified in the C++11 memory model.
878    ///
879    /// Since the operation does not involve memory locks, it can't prevent the [ABA
880    /// problem](https://en.wikipedia.org/wiki/ABA_problem).
881    ///
882    /// This method will fail with errors if:
883    ///  1. the remaining space of the reader or the available space of the writer are less than
884    ///     `size_of::<T>()` bytes, or
885    ///  2. the memory operation fails due to an unresolvable page fault.
886    ///
887    /// # Panics
888    ///
889    /// This method will panic if:
890    ///  1. the reader and the writer does not point to the same memory location, or
891    ///  2. the memory location is not aligned on an `align_of::<T>()`-byte boundary.
892    pub fn atomic_compare_exchange<T>(
893        &self,
894        reader: &VmReader,
895        old_val: T,
896        new_val: T,
897    ) -> Result<(T, bool)>
898    where
899        T: PodAtomic + Eq,
900    {
901        if self.avail() < size_of::<T>() || reader.remain() < size_of::<T>() {
902            return Err(Error::InvalidArgs);
903        }
904
905        assert_eq!(self.cursor.cast_const(), reader.cursor);
906
907        let cursor = self.cursor.cast::<T>();
908        assert!(cursor.is_aligned());
909
910        // SAFETY:
911        // 1. The cursor is either valid for reading and writing or in user space for
912        //    `size_of::<T>()` bytes.
913        // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
914        let cur_val = unsafe { T::atomic_cmpxchg_fallible(cursor, old_val, new_val)? };
915
916        Ok((cur_val, old_val == cur_val))
917    }
918
919    /// Writes `len` zeros to the target memory.
920    ///
921    /// This method attempts to fill up to `len` bytes with zeros. If the available
922    /// memory from the current cursor position is less than `len`, it will only fill
923    /// the available space.
924    ///
925    /// If the memory write failed due to an unresolvable page fault, this method
926    /// will return `Err` with the length set so far.
927    pub fn fill_zeros(&mut self, len: usize) -> core::result::Result<usize, (Error, usize)> {
928        let len_to_set = self.avail().min(len);
929        if len_to_set == 0 {
930            return Ok(0);
931        }
932
933        // SAFETY: The destination is a subset of the memory range specified by
934        // the current writer, so it is either valid for writing or in user space.
935        let set_len = unsafe { memset_fallible(self.cursor, 0u8, len_to_set) };
936        self.cursor = self.cursor.wrapping_add(set_len);
937
938        if set_len < len_to_set {
939            Err((Error::PageFault, set_len))
940        } else {
941            Ok(len_to_set)
942        }
943    }
944}
945
946impl<Fallibility> VmWriter<'_, Fallibility> {
947    /// Returns the number of bytes for the available space.
948    pub fn avail(&self) -> usize {
949        self.end.addr() - self.cursor.addr()
950    }
951
952    /// Returns the cursor pointer, which refers to the address of the next byte to write.
953    pub fn cursor(&self) -> *mut u8 {
954        self.cursor
955    }
956
957    /// Returns if it has available space to write.
958    pub fn has_avail(&self) -> bool {
959        self.avail() > 0
960    }
961
962    /// Limits the length of available space.
963    ///
964    /// This method ensures the post condition of `self.avail() <= max_avail`.
965    pub fn limit(&mut self, max_avail: usize) -> &mut Self {
966        if max_avail < self.avail() {
967            self.end = self.cursor.wrapping_add(max_avail);
968        }
969
970        self
971    }
972
973    /// Skips the first `nbytes` bytes of data.
974    /// The length of available space is decreased accordingly.
975    ///
976    /// # Panics
977    ///
978    /// If `nbytes` is greater than `self.avail()`, then the method panics.
979    pub fn skip(&mut self, nbytes: usize) -> &mut Self {
980        assert!(nbytes <= self.avail());
981        self.cursor = self.cursor.wrapping_add(nbytes);
982
983        self
984    }
985}
986
987impl<'a> From<&'a mut [u8]> for VmWriter<'a, Infallible> {
988    fn from(slice: &'a mut [u8]) -> Self {
989        // SAFETY:
990        // - The memory range points to typed memory.
991        // - The validity requirements for write accesses are met because the pointer is converted
992        //   from a mutable reference that outlives the lifetime `'a`.
993        // - The type, i.e., the `u8` slice, is plain-old-data.
994        unsafe { Self::from_kernel_space(slice.as_mut_ptr(), slice.len()) }
995    }
996}
997
998/// A marker trait for POD types that can be read or written with one instruction.
999///
1000/// This trait is mostly a hint, since it's safe and can be implemented for _any_ POD type. If it
1001/// is implemented for a type that cannot be read or written with a single instruction, calling
1002/// `read_once`/`write_once` will lead to a failed compile-time assertion.
1003pub trait PodOnce: Pod {}
1004
1005#[cfg(any(
1006    target_arch = "x86_64",
1007    target_arch = "riscv64",
1008    target_arch = "loongarch64"
1009))]
1010mod pod_once_impls {
1011    use super::PodOnce;
1012
1013    impl PodOnce for u8 {}
1014    impl PodOnce for u16 {}
1015    impl PodOnce for u32 {}
1016    impl PodOnce for u64 {}
1017    impl PodOnce for usize {}
1018    impl PodOnce for i8 {}
1019    impl PodOnce for i16 {}
1020    impl PodOnce for i32 {}
1021    impl PodOnce for i64 {}
1022    impl PodOnce for isize {}
1023
1024    /// Checks whether the memory operation created by `ptr::read_volatile` and
1025    /// `ptr::write_volatile` doesn't tear.
1026    ///
1027    /// Note that the Rust documentation makes no such guarantee, and even the wording in the LLVM
1028    /// LangRef is ambiguous. But this is unlikely to break in practice because the Linux kernel
1029    /// also uses "volatile" semantics to implement `READ_ONCE`/`WRITE_ONCE`.
1030    pub(super) const fn is_non_tearing<T>() -> bool {
1031        let size = size_of::<T>();
1032
1033        size == 1 || size == 2 || size == 4 || size == 8
1034    }
1035}
1036
1037/// A marker trait for POD types that can be read or written atomically.
1038pub trait PodAtomic: Pod {
1039    /// Atomically loads a value.
1040    /// This function will return errors if encountering an unresolvable page fault.
1041    ///
1042    /// Returns the loaded value.
1043    ///
1044    /// # Safety
1045    ///
1046    /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1047    ///   space for `size_of::<T>()` bytes.
1048    /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1049    ///
1050    /// [valid]: crate::mm::io#safety
1051    #[doc(hidden)]
1052    unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self>;
1053
1054    /// Atomically compares and exchanges a value.
1055    /// This function will return errors if encountering an unresolvable page fault.
1056    ///
1057    /// Returns the previous value.
1058    /// `new_val` will be written if and only if the previous value is equal to `old_val`.
1059    ///
1060    /// # Safety
1061    ///
1062    /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1063    ///   space for `size_of::<T>()` bytes.
1064    /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1065    ///
1066    /// [valid]: crate::mm::io#safety
1067    #[doc(hidden)]
1068    unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self)
1069    -> Result<Self>;
1070}
1071
1072impl PodAtomic for u32 {
1073    unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self> {
1074        // SAFETY: The safety is upheld by the caller.
1075        let result = unsafe { __atomic_load_fallible(ptr) };
1076        if result == !0 {
1077            Err(Error::PageFault)
1078        } else {
1079            Ok(result as Self)
1080        }
1081    }
1082
1083    unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self) -> Result<u32> {
1084        // SAFETY: The safety is upheld by the caller.
1085        let result = unsafe { __atomic_cmpxchg_fallible(ptr, old_val, new_val) };
1086        if result == !0 {
1087            Err(Error::PageFault)
1088        } else {
1089            Ok(result as Self)
1090        }
1091    }
1092}