ostd/mm/io/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! Abstractions for reading and writing virtual memory (VM) objects.
4//!
5//! # Safety
6//!
7//! The core virtual memory (VM) access APIs provided by this module are [`VmReader`] and
8//! [`VmWriter`], which allow for writing to or reading from a region of memory _safely_.
9//! `VmReader` and `VmWriter` objects can be constructed from memory regions of either typed memory
10//! (e.g., `&[u8]`) or untyped memory (e.g, [`UFrame`]). Behind the scene, `VmReader` and `VmWriter`
11//! must be constructed via their [`from_user_space`] and [`from_kernel_space`] methods, whose
12//! safety depends on whether the given memory regions are _valid_ or not.
13//!
14//! [`UFrame`]: crate::mm::UFrame
15//! [`from_user_space`]: `VmReader::from_user_space`
16//! [`from_kernel_space`]: `VmReader::from_kernel_space`
17//!
18//! Here is a list of conditions for memory regions to be considered valid:
19//!
20//! - The memory region as a whole must be either typed or untyped memory, not both typed and
21//!   untyped.
22//!
23//! - If the memory region is typed, we require that:
24//!   - the [validity requirements] from the official Rust documentation must be met, and
25//!   - the type of the memory region (which must exist since the memory is typed) must be
26//!     plain-old-data, so that the writer can fill it with arbitrary data safely.
27//!
28//! [validity requirements]: core::ptr#safety
29//!
30//! - If the memory region is untyped, we require that:
31//!   - the underlying pages must remain alive while the validity requirements are in effect, and
32//!   - the kernel must access the memory region using only the APIs provided in this module, but
33//!     external accesses from hardware devices or user programs do not count.
34//!
35//! We have the last requirement for untyped memory to be valid because the safety interaction with
36//! other ways to access the memory region (e.g., atomic/volatile memory loads/stores) is not
37//! currently specified. Tis may be relaxed in the future, if appropriate and necessary.
38//!
39//! Note that data races on untyped memory are explicitly allowed (since pages can be mapped to
40//! user space, making it impossible to avoid data races). However, they may produce erroneous
41//! results, such as unexpected bytes being copied, but do not cause soundness problems.
42
43pub(crate) mod copy;
44pub mod util;
45
46use core::{marker::PhantomData, mem::MaybeUninit};
47
48use ostd_pod::Pod;
49
50use self::copy::{memcpy, memset};
51use crate::{
52    Error,
53    arch::mm::{__atomic_cmpxchg_fallible, __atomic_load_fallible},
54    mm::{
55        MAX_USERSPACE_VADDR,
56        kspace::{KERNEL_BASE_VADDR, KERNEL_END_VADDR},
57    },
58    prelude::*,
59};
60
61/// A trait that enables reading/writing data from/to a VM object,
62/// e.g., [`USegment`], [`Vec<UFrame>`] and [`UFrame`].
63///
64/// # Concurrency
65///
66/// The methods may be executed by multiple concurrent reader and writer
67/// threads. In this case, if the results of concurrent reads or writes
68/// desire predictability or atomicity, the users should add extra mechanism
69/// for such properties.
70///
71/// [`USegment`]: crate::mm::USegment
72/// [`UFrame`]: crate::mm::UFrame
73pub trait VmIo {
74    /// Reads requested data at a specified offset into a given `VmWriter`.
75    ///
76    /// # No short reads
77    ///
78    /// On success, the `writer` must be written with the requested data
79    /// completely. If, for any reason, the requested data is only partially
80    /// available, then the method shall return an error.
81    fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()>;
82
83    /// Reads a specified number of bytes at a specified offset into a given buffer.
84    ///
85    /// # No short reads
86    ///
87    /// Similar to [`read`].
88    ///
89    /// [`read`]: VmIo::read
90    fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> {
91        let mut writer = VmWriter::from(buf).to_fallible();
92        self.read(offset, &mut writer)
93    }
94
95    /// Reads a value of a specified type at a specified offset.
96    fn read_val<T: Pod>(&self, offset: usize) -> Result<T> {
97        // Why not use `MaybeUninit` for a faster implementation?
98        //
99        // ```rust
100        // let mut val: MaybeUninit<T> = MaybeUninit::uninit();
101        // let writer = unsafe {
102        //     VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>())
103        // };
104        // self.read(offset, &mut writer.to_fallible())?;
105        // Ok(unsafe { val.assume_init() })
106        // ```
107        //
108        // The above implementation avoids initializing `val` upfront,
109        // so it is more efficient than our actual implementation.
110        // Unfortunately, it is unsound.
111        // This is because the `read` method,
112        // which could be implemented outside OSTD and thus is untrusted,
113        // may not really initialize the bits of `val` at all!
114
115        let mut val = T::new_zeroed();
116        self.read_bytes(offset, val.as_mut_bytes())?;
117        Ok(val)
118    }
119
120    /// Reads a slice of a specified type at a specified offset.
121    ///
122    /// # No short reads
123    ///
124    /// Similar to [`read`].
125    ///
126    /// [`read`]: VmIo::read
127    fn read_slice<T: Pod>(&self, offset: usize, slice: &mut [T]) -> Result<()> {
128        let len_in_bytes = size_of_val(slice);
129        let ptr = slice as *mut [T] as *mut u8;
130        // SAFETY: the slice can be transmuted to a writable byte slice since the elements
131        // are all Plain-Old-Data (Pod) types.
132        let buf = unsafe { core::slice::from_raw_parts_mut(ptr, len_in_bytes) };
133        self.read_bytes(offset, buf)
134    }
135
136    /// Writes all data from a given `VmReader` at a specified offset.
137    ///
138    /// # No short writes
139    ///
140    /// On success, the data from the `reader` must be read to the VM object entirely.
141    /// If, for any reason, the input data can only be written partially,
142    /// then the method shall return an error.
143    fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()>;
144
145    /// Writes a specified number of bytes from a given buffer at a specified offset.
146    ///
147    /// # No short writes
148    ///
149    /// Similar to [`write`].
150    ///
151    /// [`write`]: VmIo::write
152    fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> {
153        let mut reader = VmReader::from(buf).to_fallible();
154        self.write(offset, &mut reader)
155    }
156
157    /// Writes a value of a specified type at a specified offset.
158    fn write_val<T: Pod>(&self, offset: usize, new_val: &T) -> Result<()> {
159        self.write_bytes(offset, new_val.as_bytes())?;
160        Ok(())
161    }
162
163    /// Writes a slice of a specified type at a specified offset.
164    ///
165    /// # No short write
166    ///
167    /// Similar to [`write`].
168    ///
169    /// [`write`]: VmIo::write
170    fn write_slice<T: Pod>(&self, offset: usize, slice: &[T]) -> Result<()> {
171        let len_in_bytes = size_of_val(slice);
172        let ptr = slice as *const [T] as *const u8;
173        // SAFETY: the slice can be transmuted to a readable byte slice since the elements
174        // are all Plain-Old-Data (Pod) types.
175        let buf = unsafe { core::slice::from_raw_parts(ptr, len_in_bytes) };
176        self.write_bytes(offset, buf)
177    }
178}
179
180/// A trait that enables filling bytes (e.g., filling zeros) to a VM object.
181pub trait VmIoFill {
182    /// Writes `len` zeros at a specified offset.
183    ///
184    /// Unlike the methods in [`VmIo`], this method allows for short writes because `len` can be
185    /// effectively unbounded. However, if not all bytes can be written successfully, an `Err(_)`
186    /// will be returned with the error and the number of zeros that have been written thus far.
187    ///
188    /// # A slow, general implementation
189    ///
190    /// Suppose that [`VmIo`] has already been implemented for the type,
191    /// this method can be implemented in the following general way.
192    ///
193    /// ```rust
194    /// fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)> {
195    ///     for i in 0..len {
196    ///         match self.write_slice(offset + i, &[0u8]) {
197    ///             Ok(()) => continue,
198    ///             Err(err) => return Err((err, i)),
199    ///         }
200    ///     }
201    ///     Ok(())
202    /// }
203    /// ```
204    ///
205    /// But we choose not to provide a general, default implementation
206    /// because doing so would make it too easy for a concrete type of `VmIoFill`
207    /// to settle with a slower implementation for such a performance-sensitive operation.
208    fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)>;
209}
210
211/// A trait that enables reading/writing data from/to a VM object using one non-tearing memory
212/// load/store.
213///
214/// See also [`VmIo`], which enables reading/writing data from/to a VM object without the guarantee
215/// of using one non-tearing memory load/store.
216pub trait VmIoOnce {
217    /// Reads a value of the `PodOnce` type at the specified offset using one non-tearing memory
218    /// load.
219    ///
220    /// Except that the offset is specified explicitly, the semantics of this method is the same as
221    /// [`VmReader::read_once`].
222    fn read_once<T: PodOnce>(&self, offset: usize) -> Result<T>;
223
224    /// Writes a value of the `PodOnce` type at the specified offset using one non-tearing memory
225    /// store.
226    ///
227    /// Except that the offset is specified explicitly, the semantics of this method is the same as
228    /// [`VmWriter::write_once`].
229    fn write_once<T: PodOnce>(&self, offset: usize, new_val: &T) -> Result<()>;
230}
231
232/// A marker type used for _fallible_ memory,
233/// where memory access _might_ trigger page faults.
234///
235/// The most prominent example of fallible memory is user virtual memory.
236///
237/// By definition, infallible memory is a subset of fallible memory.
238/// As a consequence, any code that intends to work with fallible memory
239/// should work for both user virtual memory and kernel virtual memory.
240///
241/// [`VmReader`] and [`VmWriter`] types use this marker type
242/// to indicate the property of the underlying memory.
243pub enum Fallible {}
244
245/// A marker type used for _infallible_ memory,
246/// where memory access is valid and won't trigger page faults.
247///
248/// The most prominent example of infallible memory is kernel virtual memory
249/// (at least for the part where Rust code and data reside).
250///
251/// [`VmReader`] and [`VmWriter`] types use this marker type
252/// to indicate the property of the underlying memory.
253pub enum Infallible {}
254
255/// A marker type for I/O memory regions.
256///
257/// This marker is used by [`memcpy`] and [`memset`]
258/// to indicate that a source or destination operand
259/// resides in I/O memory (MMIO).
260///
261/// Unlike [`Fallible`] and [`Infallible`],
262/// `Io` cannot statically determine
263/// whether a memory access will fault:
264/// MMIO fallibility is platform-dependent.
265/// For example, on Intel TDX
266/// every MMIO access triggers a #VE exception,
267/// whereas on a non-CVM x86 host
268/// the same access completes without faulting.
269pub(crate) enum Io {}
270
271/// Fallible memory read from a `VmWriter`.
272pub trait FallibleVmRead<F> {
273    /// Reads all data into the writer until one of the three conditions is met:
274    /// 1. The reader has no remaining data.
275    /// 2. The writer has no available space.
276    /// 3. The reader/writer encounters some error.
277    ///
278    /// On success, the number of bytes read is returned;
279    /// On error, both the error and the number of bytes read so far are returned.
280    fn read_fallible(
281        &mut self,
282        writer: &mut VmWriter<'_, F>,
283    ) -> core::result::Result<usize, (Error, usize)>;
284}
285
286/// Fallible memory write from a `VmReader`.
287pub trait FallibleVmWrite<F> {
288    /// Writes all data from the reader until one of the three conditions is met:
289    /// 1. The reader has no remaining data.
290    /// 2. The writer has no available space.
291    /// 3. The reader/writer encounters some error.
292    ///
293    /// On success, the number of bytes written is returned;
294    /// On error, both the error and the number of bytes written so far are returned.
295    fn write_fallible(
296        &mut self,
297        reader: &mut VmReader<'_, F>,
298    ) -> core::result::Result<usize, (Error, usize)>;
299}
300
301/// `VmReader` is a reader for reading data from a contiguous range of memory.
302///
303/// The memory range read by `VmReader` can be in either kernel space or user space.
304/// When the operating range is in kernel space, the memory within that range
305/// is guaranteed to be valid, and the corresponding memory reads are infallible.
306/// When the operating range is in user space, it is ensured that the page table of
307/// the process creating the `VmReader` is active for the duration of `'a`,
308/// and the corresponding memory reads are considered fallible.
309///
310/// When perform reading with a `VmWriter`, if one of them represents typed memory,
311/// it can ensure that the reading range in this reader and writing range in the
312/// writer are not overlapped.
313///
314/// NOTE: The overlap mentioned above is at both the virtual address level
315/// and physical address level. There is not guarantee for the operation results
316/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
317/// the user's responsibility to handle this situation.
318pub struct VmReader<'a, Fallibility = Fallible> {
319    cursor: *const u8,
320    end: *const u8,
321    phantom: PhantomData<(&'a [u8], Fallibility)>,
322}
323
324// `Clone` can be implemented for `VmReader`
325// because it either points to untyped memory or represents immutable references.
326// Note that we cannot implement `Clone` for `VmWriter`
327// because it can represent mutable references, which must remain exclusive.
328impl<Fallibility> Clone for VmReader<'_, Fallibility> {
329    fn clone(&self) -> Self {
330        Self {
331            cursor: self.cursor,
332            end: self.end,
333            phantom: PhantomData,
334        }
335    }
336}
337
338macro_rules! impl_read_fallible {
339    ($reader_fallibility:ty, $writer_fallibility:ty) => {
340        impl<'a> FallibleVmRead<$writer_fallibility> for VmReader<'a, $reader_fallibility> {
341            fn read_fallible(
342                &mut self,
343                writer: &mut VmWriter<'_, $writer_fallibility>,
344            ) -> core::result::Result<usize, (Error, usize)> {
345                let copy_len = self.remain().min(writer.avail());
346                if copy_len == 0 {
347                    return Ok(0);
348                }
349
350                // SAFETY: The source and destination are subsets of memory ranges specified by
351                // the reader and writer, so they are either valid for reading and writing or in
352                // user space.
353                let copied_len = unsafe {
354                    memcpy::<$writer_fallibility, $reader_fallibility>(
355                        writer.cursor,
356                        self.cursor,
357                        copy_len,
358                    )
359                };
360                self.cursor = self.cursor.wrapping_add(copied_len);
361                writer.cursor = writer.cursor.wrapping_add(copied_len);
362
363                if copied_len < copy_len {
364                    Err((Error::PageFault, copied_len))
365                } else {
366                    Ok(copied_len)
367                }
368            }
369        }
370    };
371}
372
373macro_rules! impl_write_fallible {
374    ($writer_fallibility:ty, $reader_fallibility:ty) => {
375        impl<'a> FallibleVmWrite<$reader_fallibility> for VmWriter<'a, $writer_fallibility> {
376            fn write_fallible(
377                &mut self,
378                reader: &mut VmReader<'_, $reader_fallibility>,
379            ) -> core::result::Result<usize, (Error, usize)> {
380                reader.read_fallible(self)
381            }
382        }
383    };
384}
385
386impl_read_fallible!(Fallible, Infallible);
387impl_read_fallible!(Fallible, Fallible);
388impl_read_fallible!(Infallible, Fallible);
389impl_write_fallible!(Fallible, Infallible);
390impl_write_fallible!(Fallible, Fallible);
391impl_write_fallible!(Infallible, Fallible);
392
393impl<'a> VmReader<'a, Infallible> {
394    /// Constructs a `VmReader` from a pointer and a length, which represents
395    /// a memory range in kernel space.
396    ///
397    /// # Safety
398    ///
399    /// `ptr` must be [valid] for reads of `len` bytes during the entire lifetime `a`.
400    ///
401    /// [valid]: crate::mm::io#safety
402    pub unsafe fn from_kernel_space(ptr: *const u8, len: usize) -> Self {
403        // Rust is allowed to give the reference to a zero-sized object a very small address,
404        // falling out of the kernel virtual address space range.
405        // So when `len` is zero, we should not and need not to check `ptr`.
406        debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
407        debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
408
409        Self {
410            cursor: ptr,
411            end: ptr.wrapping_add(len),
412            phantom: PhantomData,
413        }
414    }
415
416    /// Reads all data into the writer until one of the two conditions is met:
417    /// 1. The reader has no remaining data.
418    /// 2. The writer has no available space.
419    ///
420    /// Returns the number of bytes read.
421    pub fn read(&mut self, writer: &mut VmWriter<'_, Infallible>) -> usize {
422        let copy_len = self.remain().min(writer.avail());
423        if copy_len == 0 {
424            return 0;
425        }
426
427        // SAFETY: The source and destination are subsets of memory ranges specified by the reader
428        // and writer, so they are valid for reading and writing.
429        unsafe { memcpy::<Infallible, Infallible>(writer.cursor, self.cursor, copy_len) };
430        self.cursor = self.cursor.wrapping_add(copy_len);
431        writer.cursor = writer.cursor.wrapping_add(copy_len);
432
433        copy_len
434    }
435
436    /// Reads a value of `Pod` type.
437    ///
438    /// If the length of the `Pod` type exceeds `self.remain()`,
439    /// this method will return `Err`.
440    pub fn read_val<T: Pod>(&mut self) -> Result<T> {
441        if self.remain() < size_of::<T>() {
442            return Err(Error::InvalidArgs);
443        }
444
445        let mut val = MaybeUninit::<T>::uninit();
446
447        // SAFETY:
448        // - The memory range points to typed memory.
449        // - The validity requirements for write accesses are met because the pointer is converted
450        //   from a mutable pointer where the underlying storage outlives the temporary lifetime
451        //   and no other Rust references to the same storage exist during the lifetime.
452        // - The type, i.e., `T`, is plain-old-data.
453        let mut writer =
454            unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
455        self.read(&mut writer);
456        debug_assert!(!writer.has_avail());
457
458        // SAFETY:
459        // - `self.read` has initialized all the bytes in `val`.
460        // - The type is plain-old-data.
461        let val_inited = unsafe { val.assume_init() };
462        Ok(val_inited)
463    }
464
465    /// Reads a value of the `PodOnce` type using one non-tearing memory load.
466    ///
467    /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
468    ///
469    /// This method will not compile if the `Pod` type is too large for the current architecture
470    /// and the operation must be tear into multiple memory loads.
471    ///
472    /// # Panics
473    ///
474    /// This method will panic if the current position of the reader does not meet the alignment
475    /// requirements of type `T`.
476    pub fn read_once<T: PodOnce>(&mut self) -> Result<T> {
477        if self.remain() < size_of::<T>() {
478            return Err(Error::InvalidArgs);
479        }
480
481        let cursor = self.cursor.cast::<T>();
482        assert!(cursor.is_aligned());
483
484        const { assert!(pod_once_impls::is_non_tearing::<T>()) };
485
486        // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
487        // and that the cursor is properly aligned with respect to the type `T`. All other safety
488        // requirements are the same as for `Self::read`.
489        let val = unsafe { cursor.read_volatile() };
490        self.cursor = self.cursor.wrapping_add(size_of::<T>());
491
492        Ok(val)
493    }
494
495    // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
496    // not provide an infallible implementation of `VmReader::atomic_load`.
497
498    /// Converts to a fallible reader.
499    pub fn to_fallible(self) -> VmReader<'a, Fallible> {
500        // It is safe to construct a fallible reader since an infallible reader covers the
501        // capabilities of a fallible reader.
502        VmReader {
503            cursor: self.cursor,
504            end: self.end,
505            phantom: PhantomData,
506        }
507    }
508}
509
510impl VmReader<'_, Fallible> {
511    /// Constructs a `VmReader` from a pointer and a length, which represents
512    /// a memory range in user space.
513    ///
514    /// # Safety
515    ///
516    /// The virtual address range `ptr..ptr + len` must be in user space.
517    pub unsafe fn from_user_space(ptr: *const u8, len: usize) -> Self {
518        debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
519
520        Self {
521            cursor: ptr,
522            end: ptr.wrapping_add(len),
523            phantom: PhantomData,
524        }
525    }
526
527    /// Reads a value of `Pod` type.
528    ///
529    /// If the length of the `Pod` type exceeds `self.remain()`,
530    /// or the value can not be read completely,
531    /// this method will return `Err`.
532    ///
533    /// If the memory read failed, this method will return `Err`
534    /// and the current reader's cursor remains pointing to
535    /// the original starting position.
536    pub fn read_val<T: Pod>(&mut self) -> Result<T> {
537        if self.remain() < size_of::<T>() {
538            return Err(Error::InvalidArgs);
539        }
540
541        let mut val = MaybeUninit::<T>::uninit();
542
543        // SAFETY:
544        // - The memory range points to typed memory.
545        // - The validity requirements for write accesses are met because the pointer is converted
546        //   from a mutable pointer where the underlying storage outlives the temporary lifetime
547        //   and no other Rust references to the same storage exist during the lifetime.
548        // - The type, i.e., `T`, is plain-old-data.
549        let mut writer =
550            unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
551        self.read_fallible(&mut writer)
552            .map_err(|(err, copied_len)| {
553                // The `copied_len` is the number of bytes read so far.
554                // So the `cursor` can be moved back to the original position.
555                self.cursor = self.cursor.wrapping_sub(copied_len);
556                err
557            })?;
558        debug_assert!(!writer.has_avail());
559
560        // SAFETY:
561        // - `self.read_fallible` has initialized all the bytes in `val`.
562        // - The type is plain-old-data.
563        let val_inited = unsafe { val.assume_init() };
564        Ok(val_inited)
565    }
566
567    /// Atomically loads a `PodAtomic` value.
568    ///
569    /// Regardless of whether it is successful, the cursor of the reader will not move.
570    ///
571    /// This method only guarantees the atomicity of the specific operation. There are no
572    /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
573    /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
574    /// specified in the C++11 memory model.
575    ///
576    /// This method will fail with errors if
577    ///  1. the remaining space of the reader is less than `size_of::<T>()` bytes, or
578    ///  2. the memory operation fails due to an unresolvable page fault.
579    ///
580    /// # Panics
581    ///
582    /// This method will panic if the memory location is not aligned on an `align_of::<T>()`-byte
583    /// boundary.
584    pub fn atomic_load<T: PodAtomic>(&self) -> Result<T> {
585        if self.remain() < size_of::<T>() {
586            return Err(Error::InvalidArgs);
587        }
588
589        let cursor = self.cursor.cast::<T>();
590        assert!(cursor.is_aligned());
591
592        // SAFETY:
593        // 1. The cursor is either valid for reading or in user space for `size_of::<T>()` bytes.
594        // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
595        unsafe { T::atomic_load_fallible(cursor) }
596    }
597}
598
599impl<Fallibility> VmReader<'_, Fallibility> {
600    /// Returns the number of bytes for the remaining data.
601    pub fn remain(&self) -> usize {
602        self.end.addr() - self.cursor.addr()
603    }
604
605    /// Returns the cursor pointer, which refers to the address of the next byte to read.
606    pub fn cursor(&self) -> *const u8 {
607        self.cursor
608    }
609
610    /// Returns if it has remaining data to read.
611    pub fn has_remain(&self) -> bool {
612        self.remain() > 0
613    }
614
615    /// Limits the length of remaining data.
616    ///
617    /// This method ensures the post condition of `self.remain() <= max_remain`.
618    pub fn limit(&mut self, max_remain: usize) -> &mut Self {
619        if max_remain < self.remain() {
620            self.end = self.cursor.wrapping_add(max_remain);
621        }
622
623        self
624    }
625
626    /// Skips the first `nbytes` bytes of data.
627    /// The length of remaining data is decreased accordingly.
628    ///
629    /// # Panics
630    ///
631    /// If `nbytes` is greater than `self.remain()`, then the method panics.
632    pub fn skip(&mut self, nbytes: usize) -> &mut Self {
633        assert!(nbytes <= self.remain());
634        self.cursor = self.cursor.wrapping_add(nbytes);
635
636        self
637    }
638}
639
640impl<'a> From<&'a [u8]> for VmReader<'a, Infallible> {
641    fn from(slice: &'a [u8]) -> Self {
642        // SAFETY:
643        // - The memory range points to typed memory.
644        // - The validity requirements for read accesses are met because the pointer is converted
645        //   from an immutable reference that outlives the lifetime `'a`.
646        // - The type, i.e., the `u8` slice, is plain-old-data.
647        unsafe { Self::from_kernel_space(slice.as_ptr(), slice.len()) }
648    }
649}
650
651/// `VmWriter` is a writer for writing data to a contiguous range of memory.
652///
653/// The memory range write by `VmWriter` can be in either kernel space or user space.
654/// When the operating range is in kernel space, the memory within that range
655/// is guaranteed to be valid, and the corresponding memory writes are infallible.
656/// When the operating range is in user space, it is ensured that the page table of
657/// the process creating the `VmWriter` is active for the duration of `'a`,
658/// and the corresponding memory writes are considered fallible.
659///
660/// When perform writing with a `VmReader`, if one of them represents typed memory,
661/// it can ensure that the writing range in this writer and reading range in the
662/// reader are not overlapped.
663///
664/// NOTE: The overlap mentioned above is at both the virtual address level
665/// and physical address level. There is not guarantee for the operation results
666/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
667/// the user's responsibility to handle this situation.
668pub struct VmWriter<'a, Fallibility = Fallible> {
669    cursor: *mut u8,
670    end: *mut u8,
671    phantom: PhantomData<(&'a mut [u8], Fallibility)>,
672}
673
674impl<'a> VmWriter<'a, Infallible> {
675    /// Constructs a `VmWriter` from a pointer and a length, which represents
676    /// a memory range in kernel space.
677    ///
678    /// # Safety
679    ///
680    /// `ptr` must be [valid] for writes of `len` bytes during the entire lifetime `a`.
681    ///
682    /// [valid]: crate::mm::io#safety
683    pub unsafe fn from_kernel_space(ptr: *mut u8, len: usize) -> Self {
684        // If casting a zero sized slice to a pointer, the pointer may be null
685        // and does not reside in our kernel space range.
686        debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
687        debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
688
689        Self {
690            cursor: ptr,
691            end: ptr.wrapping_add(len),
692            phantom: PhantomData,
693        }
694    }
695
696    /// Writes all data from the reader until one of the two conditions is met:
697    /// 1. The reader has no remaining data.
698    /// 2. The writer has no available space.
699    ///
700    /// Returns the number of bytes written.
701    pub fn write(&mut self, reader: &mut VmReader<'_, Infallible>) -> usize {
702        reader.read(self)
703    }
704
705    /// Writes a value of `Pod` type.
706    ///
707    /// If the length of the `Pod` type exceeds `self.avail()`,
708    /// this method will return `Err`.
709    pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
710        if self.avail() < size_of::<T>() {
711            return Err(Error::InvalidArgs);
712        }
713
714        let mut reader = VmReader::from(new_val.as_bytes());
715        self.write(&mut reader);
716        Ok(())
717    }
718
719    /// Writes a value of the `PodOnce` type using one non-tearing memory store.
720    ///
721    /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
722    ///
723    /// # Panics
724    ///
725    /// This method will panic if the current position of the writer does not meet the alignment
726    /// requirements of type `T`.
727    pub fn write_once<T: PodOnce>(&mut self, new_val: &T) -> Result<()> {
728        if self.avail() < size_of::<T>() {
729            return Err(Error::InvalidArgs);
730        }
731
732        let cursor = self.cursor.cast::<T>();
733        assert!(cursor.is_aligned());
734
735        const { assert!(pod_once_impls::is_non_tearing::<T>()) };
736
737        // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
738        // and that the cursor is properly aligned with respect to the type `T`. All other safety
739        // requirements are the same as for `Self::write`.
740        unsafe { cursor.write_volatile(*new_val) };
741        self.cursor = self.cursor.wrapping_add(size_of::<T>());
742
743        Ok(())
744    }
745
746    // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
747    // not provide an infallible implementation of `VmWriter::atomic_compare_exchange`.
748
749    /// Writes `len` zeros to the target memory.
750    ///
751    /// This method attempts to fill up to `len` bytes with zeros. If the available
752    /// memory from the current cursor position is less than `len`, it will only fill
753    /// the available space.
754    pub fn fill_zeros(&mut self, len: usize) -> usize {
755        let len_to_set = self.avail().min(len);
756        if len_to_set == 0 {
757            return 0;
758        }
759
760        // SAFETY: The destination is a subset of the memory range specified by
761        // the current writer, so it is valid for writing.
762        unsafe { memset::<Infallible>(self.cursor, 0u8, len_to_set) };
763        self.cursor = self.cursor.wrapping_add(len_to_set);
764
765        len_to_set
766    }
767
768    /// Converts to a fallible writer.
769    pub fn to_fallible(self) -> VmWriter<'a, Fallible> {
770        // It is safe to construct a fallible reader since an infallible reader covers the
771        // capabilities of a fallible reader.
772        VmWriter {
773            cursor: self.cursor,
774            end: self.end,
775            phantom: PhantomData,
776        }
777    }
778}
779
780impl VmWriter<'_, Fallible> {
781    /// Constructs a `VmWriter` from a pointer and a length, which represents
782    /// a memory range in user space.
783    ///
784    /// The current context should be consistently associated with valid user space during the
785    /// entire lifetime `'a`. This is for correct semantics and is not a safety requirement.
786    ///
787    /// # Safety
788    ///
789    /// `ptr` must be in user space for `len` bytes.
790    pub unsafe fn from_user_space(ptr: *mut u8, len: usize) -> Self {
791        debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
792
793        Self {
794            cursor: ptr,
795            end: ptr.wrapping_add(len),
796            phantom: PhantomData,
797        }
798    }
799
800    /// Writes a value of `Pod` type.
801    ///
802    /// If the length of the `Pod` type exceeds `self.avail()`,
803    /// or the value can not be write completely,
804    /// this method will return `Err`.
805    ///
806    /// If the memory write failed, this method will return `Err`
807    /// and the current writer's cursor remains pointing to
808    /// the original starting position.
809    pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
810        if self.avail() < size_of::<T>() {
811            return Err(Error::InvalidArgs);
812        }
813
814        let mut reader = VmReader::from(new_val.as_bytes());
815        self.write_fallible(&mut reader)
816            .map_err(|(err, copied_len)| {
817                // The `copied_len` is the number of bytes written so far.
818                // So the `cursor` can be moved back to the original position.
819                self.cursor = self.cursor.wrapping_sub(copied_len);
820                err
821            })?;
822        Ok(())
823    }
824
825    /// Atomically compares and exchanges a `PodAtomic` value.
826    ///
827    /// This method compares `old_val` with the value pointed by `self` and, if they are equal,
828    /// updates it with `new_val`.
829    ///
830    /// The value that was previously in memory will be returned, along with a boolean denoting
831    /// whether the compare-and-exchange succeeds. The caller usually wants to retry if this
832    /// flag is false, passing the most recent value that was returned by this method.
833    ///
834    /// The caller is required to provide a reader which points to the exact same memory location
835    /// to ensure that reading from the memory is allowed.
836    ///
837    /// Regardless of whether it is successful, the cursors of the reader and writer will not move.
838    ///
839    /// This method only guarantees the atomicity of the specific operation. There are no
840    /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
841    /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
842    /// specified in the C++11 memory model.
843    ///
844    /// Since the operation does not involve memory locks, it can't prevent the [ABA
845    /// problem](https://en.wikipedia.org/wiki/ABA_problem).
846    ///
847    /// This method will fail with errors if:
848    ///  1. the remaining space of the reader or the available space of the writer are less than
849    ///     `size_of::<T>()` bytes, or
850    ///  2. the memory operation fails due to an unresolvable page fault.
851    ///
852    /// # Panics
853    ///
854    /// This method will panic if:
855    ///  1. the reader and the writer does not point to the same memory location, or
856    ///  2. the memory location is not aligned on an `align_of::<T>()`-byte boundary.
857    pub fn atomic_compare_exchange<T>(
858        &self,
859        reader: &VmReader,
860        old_val: T,
861        new_val: T,
862    ) -> Result<(T, bool)>
863    where
864        T: PodAtomic + Eq,
865    {
866        if self.avail() < size_of::<T>() || reader.remain() < size_of::<T>() {
867            return Err(Error::InvalidArgs);
868        }
869
870        assert_eq!(self.cursor.cast_const(), reader.cursor);
871
872        let cursor = self.cursor.cast::<T>();
873        assert!(cursor.is_aligned());
874
875        // SAFETY:
876        // 1. The cursor is either valid for reading and writing or in user space for
877        //    `size_of::<T>()` bytes.
878        // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
879        let cur_val = unsafe { T::atomic_cmpxchg_fallible(cursor, old_val, new_val)? };
880
881        Ok((cur_val, old_val == cur_val))
882    }
883
884    /// Writes `len` zeros to the target memory.
885    ///
886    /// This method attempts to fill up to `len` bytes with zeros. If the available
887    /// memory from the current cursor position is less than `len`, it will only fill
888    /// the available space.
889    ///
890    /// If the memory write failed due to an unresolvable page fault, this method
891    /// will return `Err` with the length set so far.
892    pub fn fill_zeros(&mut self, len: usize) -> core::result::Result<usize, (Error, usize)> {
893        let len_to_set = self.avail().min(len);
894        if len_to_set == 0 {
895            return Ok(0);
896        }
897
898        // SAFETY: The destination is a subset of the memory range specified by
899        // the current writer, so it is either valid for writing or in user space.
900        let set_len = unsafe { memset::<Fallible>(self.cursor, 0u8, len_to_set) };
901        self.cursor = self.cursor.wrapping_add(set_len);
902
903        if set_len < len_to_set {
904            Err((Error::PageFault, set_len))
905        } else {
906            Ok(len_to_set)
907        }
908    }
909}
910
911impl<Fallibility> VmWriter<'_, Fallibility> {
912    /// Returns the number of bytes for the available space.
913    pub fn avail(&self) -> usize {
914        self.end.addr() - self.cursor.addr()
915    }
916
917    /// Returns the cursor pointer, which refers to the address of the next byte to write.
918    pub fn cursor(&self) -> *mut u8 {
919        self.cursor
920    }
921
922    /// Returns if it has available space to write.
923    pub fn has_avail(&self) -> bool {
924        self.avail() > 0
925    }
926
927    /// Limits the length of available space.
928    ///
929    /// This method ensures the post condition of `self.avail() <= max_avail`.
930    pub fn limit(&mut self, max_avail: usize) -> &mut Self {
931        if max_avail < self.avail() {
932            self.end = self.cursor.wrapping_add(max_avail);
933        }
934
935        self
936    }
937
938    /// Skips the first `nbytes` bytes of data.
939    /// The length of available space is decreased accordingly.
940    ///
941    /// # Panics
942    ///
943    /// If `nbytes` is greater than `self.avail()`, then the method panics.
944    pub fn skip(&mut self, nbytes: usize) -> &mut Self {
945        assert!(nbytes <= self.avail());
946        self.cursor = self.cursor.wrapping_add(nbytes);
947
948        self
949    }
950
951    /// Creates a clone of this writer, requiring exclusive access.
952    ///
953    /// This method is analogous to [`Clone::clone`], but takes `&mut self`
954    /// instead of `&self`. The `&mut self` receiver is necessary because
955    /// `VmWriter` cannot safely implement `Clone`:
956    /// the underlying buffer may be a mutable slice,
957    /// and two concurrent writers would violate Rust's aliasing rules.
958    ///
959    /// The returned writer has the same cursor position and limit as `self`.
960    /// Because it borrows `self` mutably,
961    /// the original writer cannot be used until the returned writer is dropped.
962    ///
963    /// Note that writes through the returned writer
964    /// do **not** advance the cursor of the original writer.
965    pub fn clone_exclusive(&mut self) -> VmWriter<'_, Fallibility> {
966        VmWriter {
967            cursor: self.cursor,
968            end: self.end,
969            phantom: PhantomData,
970        }
971    }
972}
973
974impl<'a> From<&'a mut [u8]> for VmWriter<'a, Infallible> {
975    fn from(slice: &'a mut [u8]) -> Self {
976        // SAFETY:
977        // - The memory range points to typed memory.
978        // - The validity requirements for write accesses are met because the pointer is converted
979        //   from a mutable reference that outlives the lifetime `'a`.
980        // - The type, i.e., the `u8` slice, is plain-old-data.
981        unsafe { Self::from_kernel_space(slice.as_mut_ptr(), slice.len()) }
982    }
983}
984
985/// A marker trait for POD types that can be read or written with one instruction.
986///
987/// This trait is mostly a hint, since it's safe and can be implemented for _any_ POD type. If it
988/// is implemented for a type that cannot be read or written with a single instruction, calling
989/// `read_once`/`write_once` will lead to a failed compile-time assertion.
990pub trait PodOnce: Pod {}
991
992#[cfg(any(
993    target_arch = "x86_64",
994    target_arch = "riscv64",
995    target_arch = "loongarch64"
996))]
997mod pod_once_impls {
998    use super::PodOnce;
999
1000    impl PodOnce for u8 {}
1001    impl PodOnce for u16 {}
1002    impl PodOnce for u32 {}
1003    impl PodOnce for u64 {}
1004    impl PodOnce for usize {}
1005    impl PodOnce for i8 {}
1006    impl PodOnce for i16 {}
1007    impl PodOnce for i32 {}
1008    impl PodOnce for i64 {}
1009    impl PodOnce for isize {}
1010
1011    /// Checks whether the memory operation created by `ptr::read_volatile` and
1012    /// `ptr::write_volatile` doesn't tear.
1013    ///
1014    /// Note that the Rust documentation makes no such guarantee, and even the wording in the LLVM
1015    /// LangRef is ambiguous. But this is unlikely to break in practice because the Linux kernel
1016    /// also uses "volatile" semantics to implement `READ_ONCE`/`WRITE_ONCE`.
1017    pub(super) const fn is_non_tearing<T>() -> bool {
1018        let size = size_of::<T>();
1019
1020        size == 1 || size == 2 || size == 4 || size == 8
1021    }
1022}
1023
1024/// A marker trait for POD types that can be read or written atomically.
1025pub trait PodAtomic: Pod {
1026    /// Atomically loads a value.
1027    /// This function will return errors if encountering an unresolvable page fault.
1028    ///
1029    /// Returns the loaded value.
1030    ///
1031    /// # Safety
1032    ///
1033    /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1034    ///   space for `size_of::<T>()` bytes.
1035    /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1036    ///
1037    /// [valid]: crate::mm::io#safety
1038    #[doc(hidden)]
1039    unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self>;
1040
1041    /// Atomically compares and exchanges a value.
1042    /// This function will return errors if encountering an unresolvable page fault.
1043    ///
1044    /// Returns the previous value.
1045    /// `new_val` will be written if and only if the previous value is equal to `old_val`.
1046    ///
1047    /// # Safety
1048    ///
1049    /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1050    ///   space for `size_of::<T>()` bytes.
1051    /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1052    ///
1053    /// [valid]: crate::mm::io#safety
1054    #[doc(hidden)]
1055    unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self)
1056    -> Result<Self>;
1057}
1058
1059impl PodAtomic for u32 {
1060    unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self> {
1061        // SAFETY: The safety is upheld by the caller.
1062        let result = unsafe { __atomic_load_fallible(ptr) };
1063        if result == !0 {
1064            Err(Error::PageFault)
1065        } else {
1066            Ok(result as Self)
1067        }
1068    }
1069
1070    unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self) -> Result<u32> {
1071        // SAFETY: The safety is upheld by the caller.
1072        let result = unsafe { __atomic_cmpxchg_fallible(ptr, old_val, new_val) };
1073        if result == !0 {
1074            Err(Error::PageFault)
1075        } else {
1076            Ok(result as Self)
1077        }
1078    }
1079}