ostd/mm/io/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! Abstractions for reading and writing virtual memory (VM) objects.
4//!
5//! # Safety
6//!
7//! The core virtual memory (VM) access APIs provided by this module are [`VmReader`] and
8//! [`VmWriter`], which allow for writing to or reading from a region of memory _safely_.
9//! `VmReader` and `VmWriter` objects can be constructed from memory regions of either typed memory
10//! (e.g., `&[u8]`) or untyped memory (e.g, [`UFrame`]). Behind the scene, `VmReader` and `VmWriter`
11//! must be constructed via their [`from_user_space`] and [`from_kernel_space`] methods, whose
12//! safety depends on whether the given memory regions are _valid_ or not.
13//!
14//! [`UFrame`]: crate::mm::UFrame
15//! [`from_user_space`]: `VmReader::from_user_space`
16//! [`from_kernel_space`]: `VmReader::from_kernel_space`
17//!
18//! Here is a list of conditions for memory regions to be considered valid:
19//!
20//! - The memory region as a whole must be either typed or untyped memory, not both typed and
21//!   untyped.
22//!
23//! - If the memory region is typed, we require that:
24//!   - the [validity requirements] from the official Rust documentation must be met, and
25//!   - the type of the memory region (which must exist since the memory is typed) must be
26//!     plain-old-data, so that the writer can fill it with arbitrary data safely.
27//!
28//! [validity requirements]: core::ptr#safety
29//!
30//! - If the memory region is untyped, we require that:
31//!   - the underlying pages must remain alive while the validity requirements are in effect, and
32//!   - the kernel must access the memory region using only the APIs provided in this module, but
33//!     external accesses from hardware devices or user programs do not count.
34//!
35//! We have the last requirement for untyped memory to be valid because the safety interaction with
36//! other ways to access the memory region (e.g., atomic/volatile memory loads/stores) is not
37//! currently specified. Tis may be relaxed in the future, if appropriate and necessary.
38//!
39//! Note that data races on untyped memory are explicitly allowed (since pages can be mapped to
40//! user space, making it impossible to avoid data races). However, they may produce erroneous
41//! results, such as unexpected bytes being copied, but do not cause soundness problems.
42
43pub(crate) mod copy;
44pub mod util;
45
46use core::{marker::PhantomData, mem::MaybeUninit};
47
48use ostd_pod::Pod;
49
50use self::copy::{memcpy, memset};
51use crate::{
52    Error,
53    arch::mm::{__atomic_cmpxchg_fallible, __atomic_load_fallible},
54    mm::{
55        MAX_USERSPACE_VADDR,
56        kspace::{KERNEL_BASE_VADDR, KERNEL_END_VADDR},
57    },
58    prelude::*,
59};
60
61/// A trait that enables reading/writing data from/to a VM object,
62/// e.g., [`USegment`], [`Vec<UFrame>`] and [`UFrame`].
63///
64/// # Concurrency
65///
66/// The methods may be executed by multiple concurrent reader and writer
67/// threads. In this case, if the results of concurrent reads or writes
68/// desire predictability or atomicity, the users should add extra mechanism
69/// for such properties.
70///
71/// [`USegment`]: crate::mm::USegment
72/// [`UFrame`]: crate::mm::UFrame
73pub trait VmIo {
74    /// Reads requested data at a specified offset into a given `VmWriter`.
75    ///
76    /// # No short reads
77    ///
78    /// On success, the `writer` must be written with the requested data
79    /// completely. If, for any reason, the requested data is only partially
80    /// available, then the method shall return an error.
81    fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()>;
82
83    /// Reads a specified number of bytes at a specified offset into a given buffer.
84    ///
85    /// # No short reads
86    ///
87    /// Similar to [`read`].
88    ///
89    /// [`read`]: VmIo::read
90    fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> {
91        let mut writer = VmWriter::from(buf).to_fallible();
92        self.read(offset, &mut writer)
93    }
94
95    /// Reads a value of a specified type at a specified offset.
96    fn read_val<T: Pod>(&self, offset: usize) -> Result<T> {
97        // Why not use `MaybeUninit` for a faster implementation?
98        //
99        // ```rust
100        // let mut val: MaybeUninit<T> = MaybeUninit::uninit();
101        // let writer = unsafe {
102        //     VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>())
103        // };
104        // self.read(offset, &mut writer.to_fallible())?;
105        // Ok(unsafe { val.assume_init() })
106        // ```
107        //
108        // The above implementation avoids initializing `val` upfront,
109        // so it is more efficient than our actual implementation.
110        // Unfortunately, it is unsound.
111        // This is because the `read` method,
112        // which could be implemented outside OSTD and thus is untrusted,
113        // may not really initialize the bits of `val` at all!
114
115        let mut val = T::new_zeroed();
116        self.read_bytes(offset, val.as_mut_bytes())?;
117        Ok(val)
118    }
119
120    /// Reads a slice of a specified type at a specified offset.
121    ///
122    /// # No short reads
123    ///
124    /// Similar to [`read`].
125    ///
126    /// [`read`]: VmIo::read
127    fn read_slice<T: Pod>(&self, offset: usize, slice: &mut [T]) -> Result<()> {
128        let len_in_bytes = size_of_val(slice);
129        let ptr = slice as *mut [T] as *mut u8;
130        // SAFETY: the slice can be transmuted to a writable byte slice since the elements
131        // are all Plain-Old-Data (Pod) types.
132        let buf = unsafe { core::slice::from_raw_parts_mut(ptr, len_in_bytes) };
133        self.read_bytes(offset, buf)
134    }
135
136    /// Writes all data from a given `VmReader` at a specified offset.
137    ///
138    /// # No short writes
139    ///
140    /// On success, the data from the `reader` must be read to the VM object entirely.
141    /// If, for any reason, the input data can only be written partially,
142    /// then the method shall return an error.
143    fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()>;
144
145    /// Writes a specified number of bytes from a given buffer at a specified offset.
146    ///
147    /// # No short writes
148    ///
149    /// Similar to [`write`].
150    ///
151    /// [`write`]: VmIo::write
152    fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> {
153        let mut reader = VmReader::from(buf).to_fallible();
154        self.write(offset, &mut reader)
155    }
156
157    /// Writes a value of a specified type at a specified offset.
158    fn write_val<T: Pod>(&self, offset: usize, new_val: &T) -> Result<()> {
159        self.write_bytes(offset, new_val.as_bytes())?;
160        Ok(())
161    }
162
163    /// Writes a slice of a specified type at a specified offset.
164    ///
165    /// # No short write
166    ///
167    /// Similar to [`write`].
168    ///
169    /// [`write`]: VmIo::write
170    fn write_slice<T: Pod>(&self, offset: usize, slice: &[T]) -> Result<()> {
171        let len_in_bytes = size_of_val(slice);
172        let ptr = slice as *const [T] as *const u8;
173        // SAFETY: the slice can be transmuted to a readable byte slice since the elements
174        // are all Plain-Old-Data (Pod) types.
175        let buf = unsafe { core::slice::from_raw_parts(ptr, len_in_bytes) };
176        self.write_bytes(offset, buf)
177    }
178}
179
180/// A trait that enables filling bytes (e.g., filling zeros) to a VM object.
181pub trait VmIoFill {
182    /// Writes `len` zeros at a specified offset.
183    ///
184    /// Unlike the methods in [`VmIo`], this method allows for short writes because `len` can be
185    /// effectively unbounded. However, if not all bytes can be written successfully, an `Err(_)`
186    /// will be returned with the error and the number of zeros that have been written thus far.
187    ///
188    /// # A slow, general implementation
189    ///
190    /// Suppose that [`VmIo`] has already been implemented for the type,
191    /// this method can be implemented in the following general way.
192    ///
193    /// ```rust
194    /// fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)> {
195    ///     for i in 0..len {
196    ///         match self.write_slice(offset + i, &[0u8]) {
197    ///             Ok(()) => continue,
198    ///             Err(err) => return Err((err, i)),
199    ///         }
200    ///     }
201    ///     Ok(())
202    /// }
203    /// ```
204    ///
205    /// But we choose not to provide a general, default implementation
206    /// because doing so would make it too easy for a concrete type of `VmIoFill`
207    /// to settle with a slower implementation for such a performance-sensitive operation.
208    fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)>;
209}
210
211/// A trait that enables reading/writing data from/to a VM object using one non-tearing memory
212/// load/store.
213///
214/// See also [`VmIo`], which enables reading/writing data from/to a VM object without the guarantee
215/// of using one non-tearing memory load/store.
216pub trait VmIoOnce {
217    /// Reads a value of the `PodOnce` type at the specified offset using one non-tearing memory
218    /// load.
219    ///
220    /// Except that the offset is specified explicitly, the semantics of this method is the same as
221    /// [`VmReader::read_once`].
222    fn read_once<T: PodOnce>(&self, offset: usize) -> Result<T>;
223
224    /// Writes a value of the `PodOnce` type at the specified offset using one non-tearing memory
225    /// store.
226    ///
227    /// Except that the offset is specified explicitly, the semantics of this method is the same as
228    /// [`VmWriter::write_once`].
229    fn write_once<T: PodOnce>(&self, offset: usize, new_val: &T) -> Result<()>;
230}
231
232/// A marker type used for _fallible_ memory,
233/// where memory access _might_ trigger page faults.
234///
235/// The most prominent example of fallible memory is user virtual memory.
236///
237/// By definition, infallible memory is a subset of fallible memory.
238/// As a consequence, any code that intends to work with fallible memory
239/// should work for both user virtual memory and kernel virtual memory.
240///
241/// [`VmReader`] and [`VmWriter`] types use this marker type
242/// to indicate the property of the underlying memory.
243pub enum Fallible {}
244
245/// A marker type used for _infallible_ memory,
246/// where memory access is valid and won't trigger page faults.
247///
248/// The most prominent example of infallible memory is kernel virtual memory
249/// (at least for the part where Rust code and data reside).
250///
251/// [`VmReader`] and [`VmWriter`] types use this marker type
252/// to indicate the property of the underlying memory.
253pub enum Infallible {}
254
255/// Fallible memory read from a `VmWriter`.
256pub trait FallibleVmRead<F> {
257    /// Reads all data into the writer until one of the three conditions is met:
258    /// 1. The reader has no remaining data.
259    /// 2. The writer has no available space.
260    /// 3. The reader/writer encounters some error.
261    ///
262    /// On success, the number of bytes read is returned;
263    /// On error, both the error and the number of bytes read so far are returned.
264    fn read_fallible(
265        &mut self,
266        writer: &mut VmWriter<'_, F>,
267    ) -> core::result::Result<usize, (Error, usize)>;
268}
269
270/// Fallible memory write from a `VmReader`.
271pub trait FallibleVmWrite<F> {
272    /// Writes all data from the reader until one of the three conditions is met:
273    /// 1. The reader has no remaining data.
274    /// 2. The writer has no available space.
275    /// 3. The reader/writer encounters some error.
276    ///
277    /// On success, the number of bytes written is returned;
278    /// On error, both the error and the number of bytes written so far are returned.
279    fn write_fallible(
280        &mut self,
281        reader: &mut VmReader<'_, F>,
282    ) -> core::result::Result<usize, (Error, usize)>;
283}
284
285/// `VmReader` is a reader for reading data from a contiguous range of memory.
286///
287/// The memory range read by `VmReader` can be in either kernel space or user space.
288/// When the operating range is in kernel space, the memory within that range
289/// is guaranteed to be valid, and the corresponding memory reads are infallible.
290/// When the operating range is in user space, it is ensured that the page table of
291/// the process creating the `VmReader` is active for the duration of `'a`,
292/// and the corresponding memory reads are considered fallible.
293///
294/// When perform reading with a `VmWriter`, if one of them represents typed memory,
295/// it can ensure that the reading range in this reader and writing range in the
296/// writer are not overlapped.
297///
298/// NOTE: The overlap mentioned above is at both the virtual address level
299/// and physical address level. There is not guarantee for the operation results
300/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
301/// the user's responsibility to handle this situation.
302pub struct VmReader<'a, Fallibility = Fallible> {
303    cursor: *const u8,
304    end: *const u8,
305    phantom: PhantomData<(&'a [u8], Fallibility)>,
306}
307
308// `Clone` can be implemented for `VmReader`
309// because it either points to untyped memory or represents immutable references.
310// Note that we cannot implement `Clone` for `VmWriter`
311// because it can represent mutable references, which must remain exclusive.
312impl<Fallibility> Clone for VmReader<'_, Fallibility> {
313    fn clone(&self) -> Self {
314        Self {
315            cursor: self.cursor,
316            end: self.end,
317            phantom: PhantomData,
318        }
319    }
320}
321
322macro_rules! impl_read_fallible {
323    ($reader_fallibility:ty, $writer_fallibility:ty) => {
324        impl<'a> FallibleVmRead<$writer_fallibility> for VmReader<'a, $reader_fallibility> {
325            fn read_fallible(
326                &mut self,
327                writer: &mut VmWriter<'_, $writer_fallibility>,
328            ) -> core::result::Result<usize, (Error, usize)> {
329                let copy_len = self.remain().min(writer.avail());
330                if copy_len == 0 {
331                    return Ok(0);
332                }
333
334                // SAFETY: The source and destination are subsets of memory ranges specified by
335                // the reader and writer, so they are either valid for reading and writing or in
336                // user space.
337                let copied_len = unsafe {
338                    memcpy::<$writer_fallibility, $reader_fallibility>(
339                        writer.cursor,
340                        self.cursor,
341                        copy_len,
342                    )
343                };
344                self.cursor = self.cursor.wrapping_add(copied_len);
345                writer.cursor = writer.cursor.wrapping_add(copied_len);
346
347                if copied_len < copy_len {
348                    Err((Error::PageFault, copied_len))
349                } else {
350                    Ok(copied_len)
351                }
352            }
353        }
354    };
355}
356
357macro_rules! impl_write_fallible {
358    ($writer_fallibility:ty, $reader_fallibility:ty) => {
359        impl<'a> FallibleVmWrite<$reader_fallibility> for VmWriter<'a, $writer_fallibility> {
360            fn write_fallible(
361                &mut self,
362                reader: &mut VmReader<'_, $reader_fallibility>,
363            ) -> core::result::Result<usize, (Error, usize)> {
364                reader.read_fallible(self)
365            }
366        }
367    };
368}
369
370impl_read_fallible!(Fallible, Infallible);
371impl_read_fallible!(Fallible, Fallible);
372impl_read_fallible!(Infallible, Fallible);
373impl_write_fallible!(Fallible, Infallible);
374impl_write_fallible!(Fallible, Fallible);
375impl_write_fallible!(Infallible, Fallible);
376
377impl<'a> VmReader<'a, Infallible> {
378    /// Constructs a `VmReader` from a pointer and a length, which represents
379    /// a memory range in kernel space.
380    ///
381    /// # Safety
382    ///
383    /// `ptr` must be [valid] for reads of `len` bytes during the entire lifetime `a`.
384    ///
385    /// [valid]: crate::mm::io#safety
386    pub unsafe fn from_kernel_space(ptr: *const u8, len: usize) -> Self {
387        // Rust is allowed to give the reference to a zero-sized object a very small address,
388        // falling out of the kernel virtual address space range.
389        // So when `len` is zero, we should not and need not to check `ptr`.
390        debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
391        debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
392
393        Self {
394            cursor: ptr,
395            end: ptr.wrapping_add(len),
396            phantom: PhantomData,
397        }
398    }
399
400    /// Reads all data into the writer until one of the two conditions is met:
401    /// 1. The reader has no remaining data.
402    /// 2. The writer has no available space.
403    ///
404    /// Returns the number of bytes read.
405    pub fn read(&mut self, writer: &mut VmWriter<'_, Infallible>) -> usize {
406        let copy_len = self.remain().min(writer.avail());
407        if copy_len == 0 {
408            return 0;
409        }
410
411        // SAFETY: The source and destination are subsets of memory ranges specified by the reader
412        // and writer, so they are valid for reading and writing.
413        unsafe { memcpy::<Infallible, Infallible>(writer.cursor, self.cursor, copy_len) };
414        self.cursor = self.cursor.wrapping_add(copy_len);
415        writer.cursor = writer.cursor.wrapping_add(copy_len);
416
417        copy_len
418    }
419
420    /// Reads a value of `Pod` type.
421    ///
422    /// If the length of the `Pod` type exceeds `self.remain()`,
423    /// this method will return `Err`.
424    pub fn read_val<T: Pod>(&mut self) -> Result<T> {
425        if self.remain() < size_of::<T>() {
426            return Err(Error::InvalidArgs);
427        }
428
429        let mut val = MaybeUninit::<T>::uninit();
430
431        // SAFETY:
432        // - The memory range points to typed memory.
433        // - The validity requirements for write accesses are met because the pointer is converted
434        //   from a mutable pointer where the underlying storage outlives the temporary lifetime
435        //   and no other Rust references to the same storage exist during the lifetime.
436        // - The type, i.e., `T`, is plain-old-data.
437        let mut writer =
438            unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
439        self.read(&mut writer);
440        debug_assert!(!writer.has_avail());
441
442        // SAFETY:
443        // - `self.read` has initialized all the bytes in `val`.
444        // - The type is plain-old-data.
445        let val_inited = unsafe { val.assume_init() };
446        Ok(val_inited)
447    }
448
449    /// Reads a value of the `PodOnce` type using one non-tearing memory load.
450    ///
451    /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
452    ///
453    /// This method will not compile if the `Pod` type is too large for the current architecture
454    /// and the operation must be tear into multiple memory loads.
455    ///
456    /// # Panics
457    ///
458    /// This method will panic if the current position of the reader does not meet the alignment
459    /// requirements of type `T`.
460    pub fn read_once<T: PodOnce>(&mut self) -> Result<T> {
461        if self.remain() < size_of::<T>() {
462            return Err(Error::InvalidArgs);
463        }
464
465        let cursor = self.cursor.cast::<T>();
466        assert!(cursor.is_aligned());
467
468        const { assert!(pod_once_impls::is_non_tearing::<T>()) };
469
470        // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
471        // and that the cursor is properly aligned with respect to the type `T`. All other safety
472        // requirements are the same as for `Self::read`.
473        let val = unsafe { cursor.read_volatile() };
474        self.cursor = self.cursor.wrapping_add(size_of::<T>());
475
476        Ok(val)
477    }
478
479    // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
480    // not provide an infallible implementation of `VmReader::atomic_load`.
481
482    /// Converts to a fallible reader.
483    pub fn to_fallible(self) -> VmReader<'a, Fallible> {
484        // It is safe to construct a fallible reader since an infallible reader covers the
485        // capabilities of a fallible reader.
486        VmReader {
487            cursor: self.cursor,
488            end: self.end,
489            phantom: PhantomData,
490        }
491    }
492}
493
494impl VmReader<'_, Fallible> {
495    /// Constructs a `VmReader` from a pointer and a length, which represents
496    /// a memory range in user space.
497    ///
498    /// # Safety
499    ///
500    /// The virtual address range `ptr..ptr + len` must be in user space.
501    pub unsafe fn from_user_space(ptr: *const u8, len: usize) -> Self {
502        debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
503
504        Self {
505            cursor: ptr,
506            end: ptr.wrapping_add(len),
507            phantom: PhantomData,
508        }
509    }
510
511    /// Reads a value of `Pod` type.
512    ///
513    /// If the length of the `Pod` type exceeds `self.remain()`,
514    /// or the value can not be read completely,
515    /// this method will return `Err`.
516    ///
517    /// If the memory read failed, this method will return `Err`
518    /// and the current reader's cursor remains pointing to
519    /// the original starting position.
520    pub fn read_val<T: Pod>(&mut self) -> Result<T> {
521        if self.remain() < size_of::<T>() {
522            return Err(Error::InvalidArgs);
523        }
524
525        let mut val = MaybeUninit::<T>::uninit();
526
527        // SAFETY:
528        // - The memory range points to typed memory.
529        // - The validity requirements for write accesses are met because the pointer is converted
530        //   from a mutable pointer where the underlying storage outlives the temporary lifetime
531        //   and no other Rust references to the same storage exist during the lifetime.
532        // - The type, i.e., `T`, is plain-old-data.
533        let mut writer =
534            unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
535        self.read_fallible(&mut writer)
536            .map_err(|(err, copied_len)| {
537                // The `copied_len` is the number of bytes read so far.
538                // So the `cursor` can be moved back to the original position.
539                self.cursor = self.cursor.wrapping_sub(copied_len);
540                err
541            })?;
542        debug_assert!(!writer.has_avail());
543
544        // SAFETY:
545        // - `self.read_fallible` has initialized all the bytes in `val`.
546        // - The type is plain-old-data.
547        let val_inited = unsafe { val.assume_init() };
548        Ok(val_inited)
549    }
550
551    /// Atomically loads a `PodAtomic` value.
552    ///
553    /// Regardless of whether it is successful, the cursor of the reader will not move.
554    ///
555    /// This method only guarantees the atomicity of the specific operation. There are no
556    /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
557    /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
558    /// specified in the C++11 memory model.
559    ///
560    /// This method will fail with errors if
561    ///  1. the remaining space of the reader is less than `size_of::<T>()` bytes, or
562    ///  2. the memory operation fails due to an unresolvable page fault.
563    ///
564    /// # Panics
565    ///
566    /// This method will panic if the memory location is not aligned on an `align_of::<T>()`-byte
567    /// boundary.
568    pub fn atomic_load<T: PodAtomic>(&self) -> Result<T> {
569        if self.remain() < size_of::<T>() {
570            return Err(Error::InvalidArgs);
571        }
572
573        let cursor = self.cursor.cast::<T>();
574        assert!(cursor.is_aligned());
575
576        // SAFETY:
577        // 1. The cursor is either valid for reading or in user space for `size_of::<T>()` bytes.
578        // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
579        unsafe { T::atomic_load_fallible(cursor) }
580    }
581}
582
583impl<Fallibility> VmReader<'_, Fallibility> {
584    /// Returns the number of bytes for the remaining data.
585    pub fn remain(&self) -> usize {
586        self.end.addr() - self.cursor.addr()
587    }
588
589    /// Returns the cursor pointer, which refers to the address of the next byte to read.
590    pub fn cursor(&self) -> *const u8 {
591        self.cursor
592    }
593
594    /// Returns if it has remaining data to read.
595    pub fn has_remain(&self) -> bool {
596        self.remain() > 0
597    }
598
599    /// Limits the length of remaining data.
600    ///
601    /// This method ensures the post condition of `self.remain() <= max_remain`.
602    pub fn limit(&mut self, max_remain: usize) -> &mut Self {
603        if max_remain < self.remain() {
604            self.end = self.cursor.wrapping_add(max_remain);
605        }
606
607        self
608    }
609
610    /// Skips the first `nbytes` bytes of data.
611    /// The length of remaining data is decreased accordingly.
612    ///
613    /// # Panics
614    ///
615    /// If `nbytes` is greater than `self.remain()`, then the method panics.
616    pub fn skip(&mut self, nbytes: usize) -> &mut Self {
617        assert!(nbytes <= self.remain());
618        self.cursor = self.cursor.wrapping_add(nbytes);
619
620        self
621    }
622}
623
624impl<'a> From<&'a [u8]> for VmReader<'a, Infallible> {
625    fn from(slice: &'a [u8]) -> Self {
626        // SAFETY:
627        // - The memory range points to typed memory.
628        // - The validity requirements for read accesses are met because the pointer is converted
629        //   from an immutable reference that outlives the lifetime `'a`.
630        // - The type, i.e., the `u8` slice, is plain-old-data.
631        unsafe { Self::from_kernel_space(slice.as_ptr(), slice.len()) }
632    }
633}
634
635/// `VmWriter` is a writer for writing data to a contiguous range of memory.
636///
637/// The memory range write by `VmWriter` can be in either kernel space or user space.
638/// When the operating range is in kernel space, the memory within that range
639/// is guaranteed to be valid, and the corresponding memory writes are infallible.
640/// When the operating range is in user space, it is ensured that the page table of
641/// the process creating the `VmWriter` is active for the duration of `'a`,
642/// and the corresponding memory writes are considered fallible.
643///
644/// When perform writing with a `VmReader`, if one of them represents typed memory,
645/// it can ensure that the writing range in this writer and reading range in the
646/// reader are not overlapped.
647///
648/// NOTE: The overlap mentioned above is at both the virtual address level
649/// and physical address level. There is not guarantee for the operation results
650/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
651/// the user's responsibility to handle this situation.
652pub struct VmWriter<'a, Fallibility = Fallible> {
653    cursor: *mut u8,
654    end: *mut u8,
655    phantom: PhantomData<(&'a mut [u8], Fallibility)>,
656}
657
658impl<'a> VmWriter<'a, Infallible> {
659    /// Constructs a `VmWriter` from a pointer and a length, which represents
660    /// a memory range in kernel space.
661    ///
662    /// # Safety
663    ///
664    /// `ptr` must be [valid] for writes of `len` bytes during the entire lifetime `a`.
665    ///
666    /// [valid]: crate::mm::io#safety
667    pub unsafe fn from_kernel_space(ptr: *mut u8, len: usize) -> Self {
668        // If casting a zero sized slice to a pointer, the pointer may be null
669        // and does not reside in our kernel space range.
670        debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
671        debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
672
673        Self {
674            cursor: ptr,
675            end: ptr.wrapping_add(len),
676            phantom: PhantomData,
677        }
678    }
679
680    /// Writes all data from the reader until one of the two conditions is met:
681    /// 1. The reader has no remaining data.
682    /// 2. The writer has no available space.
683    ///
684    /// Returns the number of bytes written.
685    pub fn write(&mut self, reader: &mut VmReader<'_, Infallible>) -> usize {
686        reader.read(self)
687    }
688
689    /// Writes a value of `Pod` type.
690    ///
691    /// If the length of the `Pod` type exceeds `self.avail()`,
692    /// this method will return `Err`.
693    pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
694        if self.avail() < size_of::<T>() {
695            return Err(Error::InvalidArgs);
696        }
697
698        let mut reader = VmReader::from(new_val.as_bytes());
699        self.write(&mut reader);
700        Ok(())
701    }
702
703    /// Writes a value of the `PodOnce` type using one non-tearing memory store.
704    ///
705    /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
706    ///
707    /// # Panics
708    ///
709    /// This method will panic if the current position of the writer does not meet the alignment
710    /// requirements of type `T`.
711    pub fn write_once<T: PodOnce>(&mut self, new_val: &T) -> Result<()> {
712        if self.avail() < size_of::<T>() {
713            return Err(Error::InvalidArgs);
714        }
715
716        let cursor = self.cursor.cast::<T>();
717        assert!(cursor.is_aligned());
718
719        const { assert!(pod_once_impls::is_non_tearing::<T>()) };
720
721        // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
722        // and that the cursor is properly aligned with respect to the type `T`. All other safety
723        // requirements are the same as for `Self::write`.
724        unsafe { cursor.write_volatile(*new_val) };
725        self.cursor = self.cursor.wrapping_add(size_of::<T>());
726
727        Ok(())
728    }
729
730    // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
731    // not provide an infallible implementation of `VmWriter::atomic_compare_exchange`.
732
733    /// Writes `len` zeros to the target memory.
734    ///
735    /// This method attempts to fill up to `len` bytes with zeros. If the available
736    /// memory from the current cursor position is less than `len`, it will only fill
737    /// the available space.
738    pub fn fill_zeros(&mut self, len: usize) -> usize {
739        let len_to_set = self.avail().min(len);
740        if len_to_set == 0 {
741            return 0;
742        }
743
744        // SAFETY: The destination is a subset of the memory range specified by
745        // the current writer, so it is valid for writing.
746        unsafe { memset::<Infallible>(self.cursor, 0u8, len_to_set) };
747        self.cursor = self.cursor.wrapping_add(len_to_set);
748
749        len_to_set
750    }
751
752    /// Converts to a fallible writer.
753    pub fn to_fallible(self) -> VmWriter<'a, Fallible> {
754        // It is safe to construct a fallible reader since an infallible reader covers the
755        // capabilities of a fallible reader.
756        VmWriter {
757            cursor: self.cursor,
758            end: self.end,
759            phantom: PhantomData,
760        }
761    }
762}
763
764impl VmWriter<'_, Fallible> {
765    /// Constructs a `VmWriter` from a pointer and a length, which represents
766    /// a memory range in user space.
767    ///
768    /// The current context should be consistently associated with valid user space during the
769    /// entire lifetime `'a`. This is for correct semantics and is not a safety requirement.
770    ///
771    /// # Safety
772    ///
773    /// `ptr` must be in user space for `len` bytes.
774    pub unsafe fn from_user_space(ptr: *mut u8, len: usize) -> Self {
775        debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
776
777        Self {
778            cursor: ptr,
779            end: ptr.wrapping_add(len),
780            phantom: PhantomData,
781        }
782    }
783
784    /// Writes a value of `Pod` type.
785    ///
786    /// If the length of the `Pod` type exceeds `self.avail()`,
787    /// or the value can not be write completely,
788    /// this method will return `Err`.
789    ///
790    /// If the memory write failed, this method will return `Err`
791    /// and the current writer's cursor remains pointing to
792    /// the original starting position.
793    pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
794        if self.avail() < size_of::<T>() {
795            return Err(Error::InvalidArgs);
796        }
797
798        let mut reader = VmReader::from(new_val.as_bytes());
799        self.write_fallible(&mut reader)
800            .map_err(|(err, copied_len)| {
801                // The `copied_len` is the number of bytes written so far.
802                // So the `cursor` can be moved back to the original position.
803                self.cursor = self.cursor.wrapping_sub(copied_len);
804                err
805            })?;
806        Ok(())
807    }
808
809    /// Atomically compares and exchanges a `PodAtomic` value.
810    ///
811    /// This method compares `old_val` with the value pointed by `self` and, if they are equal,
812    /// updates it with `new_val`.
813    ///
814    /// The value that was previously in memory will be returned, along with a boolean denoting
815    /// whether the compare-and-exchange succeeds. The caller usually wants to retry if this
816    /// flag is false, passing the most recent value that was returned by this method.
817    ///
818    /// The caller is required to provide a reader which points to the exact same memory location
819    /// to ensure that reading from the memory is allowed.
820    ///
821    /// Regardless of whether it is successful, the cursors of the reader and writer will not move.
822    ///
823    /// This method only guarantees the atomicity of the specific operation. There are no
824    /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
825    /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
826    /// specified in the C++11 memory model.
827    ///
828    /// Since the operation does not involve memory locks, it can't prevent the [ABA
829    /// problem](https://en.wikipedia.org/wiki/ABA_problem).
830    ///
831    /// This method will fail with errors if:
832    ///  1. the remaining space of the reader or the available space of the writer are less than
833    ///     `size_of::<T>()` bytes, or
834    ///  2. the memory operation fails due to an unresolvable page fault.
835    ///
836    /// # Panics
837    ///
838    /// This method will panic if:
839    ///  1. the reader and the writer does not point to the same memory location, or
840    ///  2. the memory location is not aligned on an `align_of::<T>()`-byte boundary.
841    pub fn atomic_compare_exchange<T>(
842        &self,
843        reader: &VmReader,
844        old_val: T,
845        new_val: T,
846    ) -> Result<(T, bool)>
847    where
848        T: PodAtomic + Eq,
849    {
850        if self.avail() < size_of::<T>() || reader.remain() < size_of::<T>() {
851            return Err(Error::InvalidArgs);
852        }
853
854        assert_eq!(self.cursor.cast_const(), reader.cursor);
855
856        let cursor = self.cursor.cast::<T>();
857        assert!(cursor.is_aligned());
858
859        // SAFETY:
860        // 1. The cursor is either valid for reading and writing or in user space for
861        //    `size_of::<T>()` bytes.
862        // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
863        let cur_val = unsafe { T::atomic_cmpxchg_fallible(cursor, old_val, new_val)? };
864
865        Ok((cur_val, old_val == cur_val))
866    }
867
868    /// Writes `len` zeros to the target memory.
869    ///
870    /// This method attempts to fill up to `len` bytes with zeros. If the available
871    /// memory from the current cursor position is less than `len`, it will only fill
872    /// the available space.
873    ///
874    /// If the memory write failed due to an unresolvable page fault, this method
875    /// will return `Err` with the length set so far.
876    pub fn fill_zeros(&mut self, len: usize) -> core::result::Result<usize, (Error, usize)> {
877        let len_to_set = self.avail().min(len);
878        if len_to_set == 0 {
879            return Ok(0);
880        }
881
882        // SAFETY: The destination is a subset of the memory range specified by
883        // the current writer, so it is either valid for writing or in user space.
884        let set_len = unsafe { memset::<Fallible>(self.cursor, 0u8, len_to_set) };
885        self.cursor = self.cursor.wrapping_add(set_len);
886
887        if set_len < len_to_set {
888            Err((Error::PageFault, set_len))
889        } else {
890            Ok(len_to_set)
891        }
892    }
893}
894
895impl<Fallibility> VmWriter<'_, Fallibility> {
896    /// Returns the number of bytes for the available space.
897    pub fn avail(&self) -> usize {
898        self.end.addr() - self.cursor.addr()
899    }
900
901    /// Returns the cursor pointer, which refers to the address of the next byte to write.
902    pub fn cursor(&self) -> *mut u8 {
903        self.cursor
904    }
905
906    /// Returns if it has available space to write.
907    pub fn has_avail(&self) -> bool {
908        self.avail() > 0
909    }
910
911    /// Limits the length of available space.
912    ///
913    /// This method ensures the post condition of `self.avail() <= max_avail`.
914    pub fn limit(&mut self, max_avail: usize) -> &mut Self {
915        if max_avail < self.avail() {
916            self.end = self.cursor.wrapping_add(max_avail);
917        }
918
919        self
920    }
921
922    /// Skips the first `nbytes` bytes of data.
923    /// The length of available space is decreased accordingly.
924    ///
925    /// # Panics
926    ///
927    /// If `nbytes` is greater than `self.avail()`, then the method panics.
928    pub fn skip(&mut self, nbytes: usize) -> &mut Self {
929        assert!(nbytes <= self.avail());
930        self.cursor = self.cursor.wrapping_add(nbytes);
931
932        self
933    }
934}
935
936impl<'a> From<&'a mut [u8]> for VmWriter<'a, Infallible> {
937    fn from(slice: &'a mut [u8]) -> Self {
938        // SAFETY:
939        // - The memory range points to typed memory.
940        // - The validity requirements for write accesses are met because the pointer is converted
941        //   from a mutable reference that outlives the lifetime `'a`.
942        // - The type, i.e., the `u8` slice, is plain-old-data.
943        unsafe { Self::from_kernel_space(slice.as_mut_ptr(), slice.len()) }
944    }
945}
946
947/// A marker trait for POD types that can be read or written with one instruction.
948///
949/// This trait is mostly a hint, since it's safe and can be implemented for _any_ POD type. If it
950/// is implemented for a type that cannot be read or written with a single instruction, calling
951/// `read_once`/`write_once` will lead to a failed compile-time assertion.
952pub trait PodOnce: Pod {}
953
954#[cfg(any(
955    target_arch = "x86_64",
956    target_arch = "riscv64",
957    target_arch = "loongarch64"
958))]
959mod pod_once_impls {
960    use super::PodOnce;
961
962    impl PodOnce for u8 {}
963    impl PodOnce for u16 {}
964    impl PodOnce for u32 {}
965    impl PodOnce for u64 {}
966    impl PodOnce for usize {}
967    impl PodOnce for i8 {}
968    impl PodOnce for i16 {}
969    impl PodOnce for i32 {}
970    impl PodOnce for i64 {}
971    impl PodOnce for isize {}
972
973    /// Checks whether the memory operation created by `ptr::read_volatile` and
974    /// `ptr::write_volatile` doesn't tear.
975    ///
976    /// Note that the Rust documentation makes no such guarantee, and even the wording in the LLVM
977    /// LangRef is ambiguous. But this is unlikely to break in practice because the Linux kernel
978    /// also uses "volatile" semantics to implement `READ_ONCE`/`WRITE_ONCE`.
979    pub(super) const fn is_non_tearing<T>() -> bool {
980        let size = size_of::<T>();
981
982        size == 1 || size == 2 || size == 4 || size == 8
983    }
984}
985
986/// A marker trait for POD types that can be read or written atomically.
987pub trait PodAtomic: Pod {
988    /// Atomically loads a value.
989    /// This function will return errors if encountering an unresolvable page fault.
990    ///
991    /// Returns the loaded value.
992    ///
993    /// # Safety
994    ///
995    /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
996    ///   space for `size_of::<T>()` bytes.
997    /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
998    ///
999    /// [valid]: crate::mm::io#safety
1000    #[doc(hidden)]
1001    unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self>;
1002
1003    /// Atomically compares and exchanges a value.
1004    /// This function will return errors if encountering an unresolvable page fault.
1005    ///
1006    /// Returns the previous value.
1007    /// `new_val` will be written if and only if the previous value is equal to `old_val`.
1008    ///
1009    /// # Safety
1010    ///
1011    /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1012    ///   space for `size_of::<T>()` bytes.
1013    /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1014    ///
1015    /// [valid]: crate::mm::io#safety
1016    #[doc(hidden)]
1017    unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self)
1018    -> Result<Self>;
1019}
1020
1021impl PodAtomic for u32 {
1022    unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self> {
1023        // SAFETY: The safety is upheld by the caller.
1024        let result = unsafe { __atomic_load_fallible(ptr) };
1025        if result == !0 {
1026            Err(Error::PageFault)
1027        } else {
1028            Ok(result as Self)
1029        }
1030    }
1031
1032    unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self) -> Result<u32> {
1033        // SAFETY: The safety is upheld by the caller.
1034        let result = unsafe { __atomic_cmpxchg_fallible(ptr, old_val, new_val) };
1035        if result == !0 {
1036            Err(Error::PageFault)
1037        } else {
1038            Ok(result as Self)
1039        }
1040    }
1041}