ostd/mm/io.rs
1// SPDX-License-Identifier: MPL-2.0
2
3//! Abstractions for reading and writing virtual memory (VM) objects.
4//!
5//! # Safety
6//!
7//! The core virtual memory (VM) access APIs provided by this module are [`VmReader`] and
8//! [`VmWriter`], which allow for writing to or reading from a region of memory _safely_.
9//! `VmReader` and `VmWriter` objects can be constructed from memory regions of either typed memory
10//! (e.g., `&[u8]`) or untyped memory (e.g, [`UFrame`]). Behind the scene, `VmReader` and `VmWriter`
11//! must be constructed via their [`from_user_space`] and [`from_kernel_space`] methods, whose
12//! safety depends on whether the given memory regions are _valid_ or not.
13//!
14//! [`UFrame`]: crate::mm::UFrame
15//! [`from_user_space`]: `VmReader::from_user_space`
16//! [`from_kernel_space`]: `VmReader::from_kernel_space`
17//!
18//! Here is a list of conditions for memory regions to be considered valid:
19//!
20//! - The memory region as a whole must be either typed or untyped memory, not both typed and
21//! untyped.
22//!
23//! - If the memory region is typed, we require that:
24//! - the [validity requirements] from the official Rust documentation must be met, and
25//! - the type of the memory region (which must exist since the memory is typed) must be
26//! plain-old-data, so that the writer can fill it with arbitrary data safely.
27//!
28//! [validity requirements]: core::ptr#safety
29//!
30//! - If the memory region is untyped, we require that:
31//! - the underlying pages must remain alive while the validity requirements are in effect, and
32//! - the kernel must access the memory region using only the APIs provided in this module, but
33//! external accesses from hardware devices or user programs do not count.
34//!
35//! We have the last requirement for untyped memory to be valid because the safety interaction with
36//! other ways to access the memory region (e.g., atomic/volatile memory loads/stores) is not
37//! currently specified. Tis may be relaxed in the future, if appropriate and necessary.
38//!
39//! Note that data races on untyped memory are explicitly allowed (since pages can be mapped to
40//! user space, making it impossible to avoid data races). However, they may produce erroneous
41//! results, such as unexpected bytes being copied, but do not cause soundness problems.
42
43use core::{marker::PhantomData, mem::MaybeUninit};
44
45use ostd_pod::Pod;
46
47use crate::{
48 Error,
49 arch::mm::{
50 __atomic_cmpxchg_fallible, __atomic_load_fallible, __memcpy_fallible, __memset_fallible,
51 },
52 mm::{
53 MAX_USERSPACE_VADDR,
54 kspace::{KERNEL_BASE_VADDR, KERNEL_END_VADDR},
55 },
56 prelude::*,
57};
58
59/// A trait that enables reading/writing data from/to a VM object,
60/// e.g., [`USegment`], [`Vec<UFrame>`] and [`UFrame`].
61///
62/// # Concurrency
63///
64/// The methods may be executed by multiple concurrent reader and writer
65/// threads. In this case, if the results of concurrent reads or writes
66/// desire predictability or atomicity, the users should add extra mechanism
67/// for such properties.
68///
69/// [`USegment`]: crate::mm::USegment
70/// [`UFrame`]: crate::mm::UFrame
71pub trait VmIo {
72 /// Reads requested data at a specified offset into a given `VmWriter`.
73 ///
74 /// # No short reads
75 ///
76 /// On success, the `writer` must be written with the requested data
77 /// completely. If, for any reason, the requested data is only partially
78 /// available, then the method shall return an error.
79 fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()>;
80
81 /// Reads a specified number of bytes at a specified offset into a given buffer.
82 ///
83 /// # No short reads
84 ///
85 /// Similar to [`read`].
86 ///
87 /// [`read`]: VmIo::read
88 fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> {
89 let mut writer = VmWriter::from(buf).to_fallible();
90 self.read(offset, &mut writer)
91 }
92
93 /// Reads a value of a specified type at a specified offset.
94 fn read_val<T: Pod>(&self, offset: usize) -> Result<T> {
95 // Why not use `MaybeUninit` for a faster implementation?
96 //
97 // ```rust
98 // let mut val: MaybeUninit<T> = MaybeUninit::uninit();
99 // let writer = unsafe {
100 // VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>())
101 // };
102 // self.read(offset, &mut writer.to_fallible())?;
103 // Ok(unsafe { val.assume_init() })
104 // ```
105 //
106 // The above implementation avoids initializing `val` upfront,
107 // so it is more efficient than our actual implementation.
108 // Unfortunately, it is unsound.
109 // This is because the `read` method,
110 // which could be implemented outside OSTD and thus is untrusted,
111 // may not really initialize the bits of `val` at all!
112
113 let mut val = T::new_zeroed();
114 self.read_bytes(offset, val.as_mut_bytes())?;
115 Ok(val)
116 }
117
118 /// Reads a slice of a specified type at a specified offset.
119 ///
120 /// # No short reads
121 ///
122 /// Similar to [`read`].
123 ///
124 /// [`read`]: VmIo::read
125 fn read_slice<T: Pod>(&self, offset: usize, slice: &mut [T]) -> Result<()> {
126 let len_in_bytes = size_of_val(slice);
127 let ptr = slice as *mut [T] as *mut u8;
128 // SAFETY: the slice can be transmuted to a writable byte slice since the elements
129 // are all Plain-Old-Data (Pod) types.
130 let buf = unsafe { core::slice::from_raw_parts_mut(ptr, len_in_bytes) };
131 self.read_bytes(offset, buf)
132 }
133
134 /// Writes all data from a given `VmReader` at a specified offset.
135 ///
136 /// # No short writes
137 ///
138 /// On success, the data from the `reader` must be read to the VM object entirely.
139 /// If, for any reason, the input data can only be written partially,
140 /// then the method shall return an error.
141 fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()>;
142
143 /// Writes a specified number of bytes from a given buffer at a specified offset.
144 ///
145 /// # No short writes
146 ///
147 /// Similar to [`write`].
148 ///
149 /// [`write`]: VmIo::write
150 fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> {
151 let mut reader = VmReader::from(buf).to_fallible();
152 self.write(offset, &mut reader)
153 }
154
155 /// Writes a value of a specified type at a specified offset.
156 fn write_val<T: Pod>(&self, offset: usize, new_val: &T) -> Result<()> {
157 self.write_bytes(offset, new_val.as_bytes())?;
158 Ok(())
159 }
160
161 /// Writes a slice of a specified type at a specified offset.
162 ///
163 /// # No short write
164 ///
165 /// Similar to [`write`].
166 ///
167 /// [`write`]: VmIo::write
168 fn write_slice<T: Pod>(&self, offset: usize, slice: &[T]) -> Result<()> {
169 let len_in_bytes = size_of_val(slice);
170 let ptr = slice as *const [T] as *const u8;
171 // SAFETY: the slice can be transmuted to a readable byte slice since the elements
172 // are all Plain-Old-Data (Pod) types.
173 let buf = unsafe { core::slice::from_raw_parts(ptr, len_in_bytes) };
174 self.write_bytes(offset, buf)
175 }
176}
177
178/// A trait that enables filling bytes (e.g., filling zeros) to a VM object.
179pub trait VmIoFill {
180 /// Writes `len` zeros at a specified offset.
181 ///
182 /// Unlike the methods in [`VmIo`], this method allows for short writes because `len` can be
183 /// effectively unbounded. However, if not all bytes can be written successfully, an `Err(_)`
184 /// will be returned with the error and the number of zeros that have been written thus far.
185 ///
186 /// # A slow, general implementation
187 ///
188 /// Suppose that [`VmIo`] has already been implemented for the type,
189 /// this method can be implemented in the following general way.
190 ///
191 /// ```rust
192 /// fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)> {
193 /// for i in 0..len {
194 /// match self.write_slice(offset + i, &[0u8]) {
195 /// Ok(()) => continue,
196 /// Err(err) => return Err((err, i)),
197 /// }
198 /// }
199 /// Ok(())
200 /// }
201 /// ```
202 ///
203 /// But we choose not to provide a general, default implementation
204 /// because doing so would make it too easy for a concrete type of `VmIoFill`
205 /// to settle with a slower implementation for such a performance-sensitive operation.
206 fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)>;
207}
208
209/// A trait that enables reading/writing data from/to a VM object using one non-tearing memory
210/// load/store.
211///
212/// See also [`VmIo`], which enables reading/writing data from/to a VM object without the guarantee
213/// of using one non-tearing memory load/store.
214pub trait VmIoOnce {
215 /// Reads a value of the `PodOnce` type at the specified offset using one non-tearing memory
216 /// load.
217 ///
218 /// Except that the offset is specified explicitly, the semantics of this method is the same as
219 /// [`VmReader::read_once`].
220 fn read_once<T: PodOnce>(&self, offset: usize) -> Result<T>;
221
222 /// Writes a value of the `PodOnce` type at the specified offset using one non-tearing memory
223 /// store.
224 ///
225 /// Except that the offset is specified explicitly, the semantics of this method is the same as
226 /// [`VmWriter::write_once`].
227 fn write_once<T: PodOnce>(&self, offset: usize, new_val: &T) -> Result<()>;
228}
229
230/// A marker type used for [`VmReader`] and [`VmWriter`],
231/// representing whether reads or writes on the underlying memory region are fallible.
232pub enum Fallible {}
233
234/// A marker type used for [`VmReader`] and [`VmWriter`],
235/// representing whether reads or writes on the underlying memory region are infallible.
236pub enum Infallible {}
237
238/// Copies `len` bytes from `src` to `dst`.
239///
240/// # Safety
241///
242/// - `src` must be [valid] for reads of `len` bytes.
243/// - `dst` must be [valid] for writes of `len` bytes.
244///
245/// [valid]: crate::mm::io#safety
246unsafe fn memcpy(dst: *mut u8, src: *const u8, len: usize) {
247 // This method is implemented by calling `volatile_copy_memory`. Note that even with the
248 // "volatile" keyword, data races are still considered undefined behavior (UB) in both the Rust
249 // documentation and the C/C++ standards. In general, UB makes the behavior of the entire
250 // program unpredictable, usually due to compiler optimizations that assume the absence of UB.
251 // However, in this particular case, considering that the Linux kernel uses the "volatile"
252 // keyword to implement `READ_ONCE` and `WRITE_ONCE`, the compiler is extremely unlikely to
253 // break our code unless it also breaks the Linux kernel.
254 //
255 // For more details and future possibilities, see
256 // <https://github.com/asterinas/asterinas/pull/1001#discussion_r1667317406>.
257
258 // SAFETY: The safety is guaranteed by the safety preconditions and the explanation above.
259 unsafe { core::intrinsics::volatile_copy_memory(dst, src, len) };
260}
261
262/// Fills `len` bytes of memory at `dst` with the specified `value`.
263///
264/// # Safety
265///
266/// - `dst` must be [valid] for writes of `len` bytes.
267///
268/// [valid]: crate::mm::io#safety
269unsafe fn memset(dst: *mut u8, value: u8, len: usize) {
270 // SAFETY: The safety is guaranteed by the safety preconditions and the explanation above.
271 unsafe {
272 core::intrinsics::volatile_set_memory(dst, value, len);
273 }
274}
275
276/// Copies `len` bytes from `src` to `dst`.
277/// This function will early stop copying if encountering an unresolvable page fault.
278///
279/// Returns the number of successfully copied bytes.
280///
281/// In the following cases, this method may cause unexpected bytes to be copied, but will not cause
282/// safety problems as long as the safety requirements are met:
283/// - The source and destination overlap.
284/// - The current context is not associated with valid user space (e.g., in the kernel thread).
285///
286/// # Safety
287///
288/// - `src` must either be [valid] for reads of `len` bytes or be in user space for `len` bytes.
289/// - `dst` must either be [valid] for writes of `len` bytes or be in user space for `len` bytes.
290///
291/// [valid]: crate::mm::io#safety
292unsafe fn memcpy_fallible(dst: *mut u8, src: *const u8, len: usize) -> usize {
293 // SAFETY: The safety is upheld by the caller.
294 let failed_bytes = unsafe { __memcpy_fallible(dst, src, len) };
295 len - failed_bytes
296}
297
298/// Fills `len` bytes of memory at `dst` with the specified `value`.
299/// This function will early stop filling if encountering an unresolvable page fault.
300///
301/// Returns the number of successfully set bytes.
302///
303/// # Safety
304///
305/// - `dst` must either be [valid] for writes of `len` bytes or be in user space for `len` bytes.
306///
307/// [valid]: crate::mm::io#safety
308unsafe fn memset_fallible(dst: *mut u8, value: u8, len: usize) -> usize {
309 // SAFETY: The safety is upheld by the caller.
310 let failed_bytes = unsafe { __memset_fallible(dst, value, len) };
311 len - failed_bytes
312}
313
314/// Fallible memory read from a `VmWriter`.
315pub trait FallibleVmRead<F> {
316 /// Reads all data into the writer until one of the three conditions is met:
317 /// 1. The reader has no remaining data.
318 /// 2. The writer has no available space.
319 /// 3. The reader/writer encounters some error.
320 ///
321 /// On success, the number of bytes read is returned;
322 /// On error, both the error and the number of bytes read so far are returned.
323 fn read_fallible(
324 &mut self,
325 writer: &mut VmWriter<'_, F>,
326 ) -> core::result::Result<usize, (Error, usize)>;
327}
328
329/// Fallible memory write from a `VmReader`.
330pub trait FallibleVmWrite<F> {
331 /// Writes all data from the reader until one of the three conditions is met:
332 /// 1. The reader has no remaining data.
333 /// 2. The writer has no available space.
334 /// 3. The reader/writer encounters some error.
335 ///
336 /// On success, the number of bytes written is returned;
337 /// On error, both the error and the number of bytes written so far are returned.
338 fn write_fallible(
339 &mut self,
340 reader: &mut VmReader<'_, F>,
341 ) -> core::result::Result<usize, (Error, usize)>;
342}
343
344/// `VmReader` is a reader for reading data from a contiguous range of memory.
345///
346/// The memory range read by `VmReader` can be in either kernel space or user space.
347/// When the operating range is in kernel space, the memory within that range
348/// is guaranteed to be valid, and the corresponding memory reads are infallible.
349/// When the operating range is in user space, it is ensured that the page table of
350/// the process creating the `VmReader` is active for the duration of `'a`,
351/// and the corresponding memory reads are considered fallible.
352///
353/// When perform reading with a `VmWriter`, if one of them represents typed memory,
354/// it can ensure that the reading range in this reader and writing range in the
355/// writer are not overlapped.
356///
357/// NOTE: The overlap mentioned above is at both the virtual address level
358/// and physical address level. There is not guarantee for the operation results
359/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
360/// the user's responsibility to handle this situation.
361pub struct VmReader<'a, Fallibility = Fallible> {
362 cursor: *const u8,
363 end: *const u8,
364 phantom: PhantomData<(&'a [u8], Fallibility)>,
365}
366
367// `Clone` can be implemented for `VmReader`
368// because it either points to untyped memory or represents immutable references.
369// Note that we cannot implement `Clone` for `VmWriter`
370// because it can represent mutable references, which must remain exclusive.
371impl<Fallibility> Clone for VmReader<'_, Fallibility> {
372 fn clone(&self) -> Self {
373 Self {
374 cursor: self.cursor,
375 end: self.end,
376 phantom: PhantomData,
377 }
378 }
379}
380
381macro_rules! impl_read_fallible {
382 ($reader_fallibility:ty, $writer_fallibility:ty) => {
383 impl<'a> FallibleVmRead<$writer_fallibility> for VmReader<'a, $reader_fallibility> {
384 fn read_fallible(
385 &mut self,
386 writer: &mut VmWriter<'_, $writer_fallibility>,
387 ) -> core::result::Result<usize, (Error, usize)> {
388 let copy_len = self.remain().min(writer.avail());
389 if copy_len == 0 {
390 return Ok(0);
391 }
392
393 // SAFETY: The source and destination are subsets of memory ranges specified by
394 // the reader and writer, so they are either valid for reading and writing or in
395 // user space.
396 let copied_len = unsafe { memcpy_fallible(writer.cursor, self.cursor, copy_len) };
397 self.cursor = self.cursor.wrapping_add(copied_len);
398 writer.cursor = writer.cursor.wrapping_add(copied_len);
399
400 if copied_len < copy_len {
401 Err((Error::PageFault, copied_len))
402 } else {
403 Ok(copied_len)
404 }
405 }
406 }
407 };
408}
409
410macro_rules! impl_write_fallible {
411 ($writer_fallibility:ty, $reader_fallibility:ty) => {
412 impl<'a> FallibleVmWrite<$reader_fallibility> for VmWriter<'a, $writer_fallibility> {
413 fn write_fallible(
414 &mut self,
415 reader: &mut VmReader<'_, $reader_fallibility>,
416 ) -> core::result::Result<usize, (Error, usize)> {
417 reader.read_fallible(self)
418 }
419 }
420 };
421}
422
423impl_read_fallible!(Fallible, Infallible);
424impl_read_fallible!(Fallible, Fallible);
425impl_read_fallible!(Infallible, Fallible);
426impl_write_fallible!(Fallible, Infallible);
427impl_write_fallible!(Fallible, Fallible);
428impl_write_fallible!(Infallible, Fallible);
429
430impl<'a> VmReader<'a, Infallible> {
431 /// Constructs a `VmReader` from a pointer and a length, which represents
432 /// a memory range in kernel space.
433 ///
434 /// # Safety
435 ///
436 /// `ptr` must be [valid] for reads of `len` bytes during the entire lifetime `a`.
437 ///
438 /// [valid]: crate::mm::io#safety
439 pub unsafe fn from_kernel_space(ptr: *const u8, len: usize) -> Self {
440 // Rust is allowed to give the reference to a zero-sized object a very small address,
441 // falling out of the kernel virtual address space range.
442 // So when `len` is zero, we should not and need not to check `ptr`.
443 debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
444 debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
445
446 Self {
447 cursor: ptr,
448 end: ptr.wrapping_add(len),
449 phantom: PhantomData,
450 }
451 }
452
453 /// Reads all data into the writer until one of the two conditions is met:
454 /// 1. The reader has no remaining data.
455 /// 2. The writer has no available space.
456 ///
457 /// Returns the number of bytes read.
458 pub fn read(&mut self, writer: &mut VmWriter<'_, Infallible>) -> usize {
459 let copy_len = self.remain().min(writer.avail());
460 if copy_len == 0 {
461 return 0;
462 }
463
464 // SAFETY: The source and destination are subsets of memory ranges specified by the reader
465 // and writer, so they are valid for reading and writing.
466 unsafe { memcpy(writer.cursor, self.cursor, copy_len) };
467 self.cursor = self.cursor.wrapping_add(copy_len);
468 writer.cursor = writer.cursor.wrapping_add(copy_len);
469
470 copy_len
471 }
472
473 /// Reads a value of `Pod` type.
474 ///
475 /// If the length of the `Pod` type exceeds `self.remain()`,
476 /// this method will return `Err`.
477 pub fn read_val<T: Pod>(&mut self) -> Result<T> {
478 if self.remain() < size_of::<T>() {
479 return Err(Error::InvalidArgs);
480 }
481
482 let mut val = MaybeUninit::<T>::uninit();
483
484 // SAFETY:
485 // - The memory range points to typed memory.
486 // - The validity requirements for write accesses are met because the pointer is converted
487 // from a mutable pointer where the underlying storage outlives the temporary lifetime
488 // and no other Rust references to the same storage exist during the lifetime.
489 // - The type, i.e., `T`, is plain-old-data.
490 let mut writer =
491 unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
492 self.read(&mut writer);
493 debug_assert!(!writer.has_avail());
494
495 // SAFETY:
496 // - `self.read` has initialized all the bytes in `val`.
497 // - The type is plain-old-data.
498 let val_inited = unsafe { val.assume_init() };
499 Ok(val_inited)
500 }
501
502 /// Reads a value of the `PodOnce` type using one non-tearing memory load.
503 ///
504 /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
505 ///
506 /// This method will not compile if the `Pod` type is too large for the current architecture
507 /// and the operation must be tear into multiple memory loads.
508 ///
509 /// # Panics
510 ///
511 /// This method will panic if the current position of the reader does not meet the alignment
512 /// requirements of type `T`.
513 pub fn read_once<T: PodOnce>(&mut self) -> Result<T> {
514 if self.remain() < size_of::<T>() {
515 return Err(Error::InvalidArgs);
516 }
517
518 let cursor = self.cursor.cast::<T>();
519 assert!(cursor.is_aligned());
520
521 const { assert!(pod_once_impls::is_non_tearing::<T>()) };
522
523 // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
524 // and that the cursor is properly aligned with respect to the type `T`. All other safety
525 // requirements are the same as for `Self::read`.
526 let val = unsafe { cursor.read_volatile() };
527 self.cursor = self.cursor.wrapping_add(size_of::<T>());
528
529 Ok(val)
530 }
531
532 // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
533 // not provide an infallible implementation of `VmReader::atomic_load`.
534
535 /// Converts to a fallible reader.
536 pub fn to_fallible(self) -> VmReader<'a, Fallible> {
537 // It is safe to construct a fallible reader since an infallible reader covers the
538 // capabilities of a fallible reader.
539 VmReader {
540 cursor: self.cursor,
541 end: self.end,
542 phantom: PhantomData,
543 }
544 }
545}
546
547impl VmReader<'_, Fallible> {
548 /// Constructs a `VmReader` from a pointer and a length, which represents
549 /// a memory range in user space.
550 ///
551 /// # Safety
552 ///
553 /// The virtual address range `ptr..ptr + len` must be in user space.
554 pub unsafe fn from_user_space(ptr: *const u8, len: usize) -> Self {
555 debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
556
557 Self {
558 cursor: ptr,
559 end: ptr.wrapping_add(len),
560 phantom: PhantomData,
561 }
562 }
563
564 /// Reads a value of `Pod` type.
565 ///
566 /// If the length of the `Pod` type exceeds `self.remain()`,
567 /// or the value can not be read completely,
568 /// this method will return `Err`.
569 ///
570 /// If the memory read failed, this method will return `Err`
571 /// and the current reader's cursor remains pointing to
572 /// the original starting position.
573 pub fn read_val<T: Pod>(&mut self) -> Result<T> {
574 if self.remain() < size_of::<T>() {
575 return Err(Error::InvalidArgs);
576 }
577
578 let mut val = MaybeUninit::<T>::uninit();
579
580 // SAFETY:
581 // - The memory range points to typed memory.
582 // - The validity requirements for write accesses are met because the pointer is converted
583 // from a mutable pointer where the underlying storage outlives the temporary lifetime
584 // and no other Rust references to the same storage exist during the lifetime.
585 // - The type, i.e., `T`, is plain-old-data.
586 let mut writer =
587 unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
588 self.read_fallible(&mut writer)
589 .map_err(|(err, copied_len)| {
590 // The `copied_len` is the number of bytes read so far.
591 // So the `cursor` can be moved back to the original position.
592 self.cursor = self.cursor.wrapping_sub(copied_len);
593 err
594 })?;
595 debug_assert!(!writer.has_avail());
596
597 // SAFETY:
598 // - `self.read_fallible` has initialized all the bytes in `val`.
599 // - The type is plain-old-data.
600 let val_inited = unsafe { val.assume_init() };
601 Ok(val_inited)
602 }
603
604 /// Atomically loads a `PodAtomic` value.
605 ///
606 /// Regardless of whether it is successful, the cursor of the reader will not move.
607 ///
608 /// This method only guarantees the atomicity of the specific operation. There are no
609 /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
610 /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
611 /// specified in the C++11 memory model.
612 ///
613 /// This method will fail with errors if
614 /// 1. the remaining space of the reader is less than `size_of::<T>()` bytes, or
615 /// 2. the memory operation fails due to an unresolvable page fault.
616 ///
617 /// # Panics
618 ///
619 /// This method will panic if the memory location is not aligned on an `align_of::<T>()`-byte
620 /// boundary.
621 pub fn atomic_load<T: PodAtomic>(&self) -> Result<T> {
622 if self.remain() < size_of::<T>() {
623 return Err(Error::InvalidArgs);
624 }
625
626 let cursor = self.cursor.cast::<T>();
627 assert!(cursor.is_aligned());
628
629 // SAFETY:
630 // 1. The cursor is either valid for reading or in user space for `size_of::<T>()` bytes.
631 // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
632 unsafe { T::atomic_load_fallible(cursor) }
633 }
634}
635
636impl<Fallibility> VmReader<'_, Fallibility> {
637 /// Returns the number of bytes for the remaining data.
638 pub fn remain(&self) -> usize {
639 self.end.addr() - self.cursor.addr()
640 }
641
642 /// Returns the cursor pointer, which refers to the address of the next byte to read.
643 pub fn cursor(&self) -> *const u8 {
644 self.cursor
645 }
646
647 /// Returns if it has remaining data to read.
648 pub fn has_remain(&self) -> bool {
649 self.remain() > 0
650 }
651
652 /// Limits the length of remaining data.
653 ///
654 /// This method ensures the post condition of `self.remain() <= max_remain`.
655 pub fn limit(&mut self, max_remain: usize) -> &mut Self {
656 if max_remain < self.remain() {
657 self.end = self.cursor.wrapping_add(max_remain);
658 }
659
660 self
661 }
662
663 /// Skips the first `nbytes` bytes of data.
664 /// The length of remaining data is decreased accordingly.
665 ///
666 /// # Panics
667 ///
668 /// If `nbytes` is greater than `self.remain()`, then the method panics.
669 pub fn skip(&mut self, nbytes: usize) -> &mut Self {
670 assert!(nbytes <= self.remain());
671 self.cursor = self.cursor.wrapping_add(nbytes);
672
673 self
674 }
675}
676
677impl<'a> From<&'a [u8]> for VmReader<'a, Infallible> {
678 fn from(slice: &'a [u8]) -> Self {
679 // SAFETY:
680 // - The memory range points to typed memory.
681 // - The validity requirements for read accesses are met because the pointer is converted
682 // from an immutable reference that outlives the lifetime `'a`.
683 // - The type, i.e., the `u8` slice, is plain-old-data.
684 unsafe { Self::from_kernel_space(slice.as_ptr(), slice.len()) }
685 }
686}
687
688/// `VmWriter` is a writer for writing data to a contiguous range of memory.
689///
690/// The memory range write by `VmWriter` can be in either kernel space or user space.
691/// When the operating range is in kernel space, the memory within that range
692/// is guaranteed to be valid, and the corresponding memory writes are infallible.
693/// When the operating range is in user space, it is ensured that the page table of
694/// the process creating the `VmWriter` is active for the duration of `'a`,
695/// and the corresponding memory writes are considered fallible.
696///
697/// When perform writing with a `VmReader`, if one of them represents typed memory,
698/// it can ensure that the writing range in this writer and reading range in the
699/// reader are not overlapped.
700///
701/// NOTE: The overlap mentioned above is at both the virtual address level
702/// and physical address level. There is not guarantee for the operation results
703/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
704/// the user's responsibility to handle this situation.
705pub struct VmWriter<'a, Fallibility = Fallible> {
706 cursor: *mut u8,
707 end: *mut u8,
708 phantom: PhantomData<(&'a mut [u8], Fallibility)>,
709}
710
711impl<'a> VmWriter<'a, Infallible> {
712 /// Constructs a `VmWriter` from a pointer and a length, which represents
713 /// a memory range in kernel space.
714 ///
715 /// # Safety
716 ///
717 /// `ptr` must be [valid] for writes of `len` bytes during the entire lifetime `a`.
718 ///
719 /// [valid]: crate::mm::io#safety
720 pub unsafe fn from_kernel_space(ptr: *mut u8, len: usize) -> Self {
721 // If casting a zero sized slice to a pointer, the pointer may be null
722 // and does not reside in our kernel space range.
723 debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
724 debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
725
726 Self {
727 cursor: ptr,
728 end: ptr.wrapping_add(len),
729 phantom: PhantomData,
730 }
731 }
732
733 /// Writes all data from the reader until one of the two conditions is met:
734 /// 1. The reader has no remaining data.
735 /// 2. The writer has no available space.
736 ///
737 /// Returns the number of bytes written.
738 pub fn write(&mut self, reader: &mut VmReader<'_, Infallible>) -> usize {
739 reader.read(self)
740 }
741
742 /// Writes a value of `Pod` type.
743 ///
744 /// If the length of the `Pod` type exceeds `self.avail()`,
745 /// this method will return `Err`.
746 pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
747 if self.avail() < size_of::<T>() {
748 return Err(Error::InvalidArgs);
749 }
750
751 let mut reader = VmReader::from(new_val.as_bytes());
752 self.write(&mut reader);
753 Ok(())
754 }
755
756 /// Writes a value of the `PodOnce` type using one non-tearing memory store.
757 ///
758 /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
759 ///
760 /// # Panics
761 ///
762 /// This method will panic if the current position of the writer does not meet the alignment
763 /// requirements of type `T`.
764 pub fn write_once<T: PodOnce>(&mut self, new_val: &T) -> Result<()> {
765 if self.avail() < size_of::<T>() {
766 return Err(Error::InvalidArgs);
767 }
768
769 let cursor = self.cursor.cast::<T>();
770 assert!(cursor.is_aligned());
771
772 const { assert!(pod_once_impls::is_non_tearing::<T>()) };
773
774 // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
775 // and that the cursor is properly aligned with respect to the type `T`. All other safety
776 // requirements are the same as for `Self::write`.
777 unsafe { cursor.write_volatile(*new_val) };
778 self.cursor = self.cursor.wrapping_add(size_of::<T>());
779
780 Ok(())
781 }
782
783 // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
784 // not provide an infallible implementation of `VmWriter::atomic_compare_exchange`.
785
786 /// Writes `len` zeros to the target memory.
787 ///
788 /// This method attempts to fill up to `len` bytes with zeros. If the available
789 /// memory from the current cursor position is less than `len`, it will only fill
790 /// the available space.
791 pub fn fill_zeros(&mut self, len: usize) -> usize {
792 let len_to_set = self.avail().min(len);
793 if len_to_set == 0 {
794 return 0;
795 }
796
797 // SAFETY: The destination is a subset of the memory range specified by
798 // the current writer, so it is valid for writing.
799 unsafe { memset(self.cursor, 0u8, len_to_set) };
800 self.cursor = self.cursor.wrapping_add(len_to_set);
801
802 len_to_set
803 }
804
805 /// Converts to a fallible writer.
806 pub fn to_fallible(self) -> VmWriter<'a, Fallible> {
807 // It is safe to construct a fallible reader since an infallible reader covers the
808 // capabilities of a fallible reader.
809 VmWriter {
810 cursor: self.cursor,
811 end: self.end,
812 phantom: PhantomData,
813 }
814 }
815}
816
817impl VmWriter<'_, Fallible> {
818 /// Constructs a `VmWriter` from a pointer and a length, which represents
819 /// a memory range in user space.
820 ///
821 /// The current context should be consistently associated with valid user space during the
822 /// entire lifetime `'a`. This is for correct semantics and is not a safety requirement.
823 ///
824 /// # Safety
825 ///
826 /// `ptr` must be in user space for `len` bytes.
827 pub unsafe fn from_user_space(ptr: *mut u8, len: usize) -> Self {
828 debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
829
830 Self {
831 cursor: ptr,
832 end: ptr.wrapping_add(len),
833 phantom: PhantomData,
834 }
835 }
836
837 /// Writes a value of `Pod` type.
838 ///
839 /// If the length of the `Pod` type exceeds `self.avail()`,
840 /// or the value can not be write completely,
841 /// this method will return `Err`.
842 ///
843 /// If the memory write failed, this method will return `Err`
844 /// and the current writer's cursor remains pointing to
845 /// the original starting position.
846 pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
847 if self.avail() < size_of::<T>() {
848 return Err(Error::InvalidArgs);
849 }
850
851 let mut reader = VmReader::from(new_val.as_bytes());
852 self.write_fallible(&mut reader)
853 .map_err(|(err, copied_len)| {
854 // The `copied_len` is the number of bytes written so far.
855 // So the `cursor` can be moved back to the original position.
856 self.cursor = self.cursor.wrapping_sub(copied_len);
857 err
858 })?;
859 Ok(())
860 }
861
862 /// Atomically compares and exchanges a `PodAtomic` value.
863 ///
864 /// This method compares `old_val` with the value pointed by `self` and, if they are equal,
865 /// updates it with `new_val`.
866 ///
867 /// The value that was previously in memory will be returned, along with a boolean denoting
868 /// whether the compare-and-exchange succeeds. The caller usually wants to retry if this
869 /// flag is false, passing the most recent value that was returned by this method.
870 ///
871 /// The caller is required to provide a reader which points to the exact same memory location
872 /// to ensure that reading from the memory is allowed.
873 ///
874 /// Regardless of whether it is successful, the cursors of the reader and writer will not move.
875 ///
876 /// This method only guarantees the atomicity of the specific operation. There are no
877 /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
878 /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
879 /// specified in the C++11 memory model.
880 ///
881 /// Since the operation does not involve memory locks, it can't prevent the [ABA
882 /// problem](https://en.wikipedia.org/wiki/ABA_problem).
883 ///
884 /// This method will fail with errors if:
885 /// 1. the remaining space of the reader or the available space of the writer are less than
886 /// `size_of::<T>()` bytes, or
887 /// 2. the memory operation fails due to an unresolvable page fault.
888 ///
889 /// # Panics
890 ///
891 /// This method will panic if:
892 /// 1. the reader and the writer does not point to the same memory location, or
893 /// 2. the memory location is not aligned on an `align_of::<T>()`-byte boundary.
894 pub fn atomic_compare_exchange<T>(
895 &self,
896 reader: &VmReader,
897 old_val: T,
898 new_val: T,
899 ) -> Result<(T, bool)>
900 where
901 T: PodAtomic + Eq,
902 {
903 if self.avail() < size_of::<T>() || reader.remain() < size_of::<T>() {
904 return Err(Error::InvalidArgs);
905 }
906
907 assert_eq!(self.cursor.cast_const(), reader.cursor);
908
909 let cursor = self.cursor.cast::<T>();
910 assert!(cursor.is_aligned());
911
912 // SAFETY:
913 // 1. The cursor is either valid for reading and writing or in user space for
914 // `size_of::<T>()` bytes.
915 // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
916 let cur_val = unsafe { T::atomic_cmpxchg_fallible(cursor, old_val, new_val)? };
917
918 Ok((cur_val, old_val == cur_val))
919 }
920
921 /// Writes `len` zeros to the target memory.
922 ///
923 /// This method attempts to fill up to `len` bytes with zeros. If the available
924 /// memory from the current cursor position is less than `len`, it will only fill
925 /// the available space.
926 ///
927 /// If the memory write failed due to an unresolvable page fault, this method
928 /// will return `Err` with the length set so far.
929 pub fn fill_zeros(&mut self, len: usize) -> core::result::Result<usize, (Error, usize)> {
930 let len_to_set = self.avail().min(len);
931 if len_to_set == 0 {
932 return Ok(0);
933 }
934
935 // SAFETY: The destination is a subset of the memory range specified by
936 // the current writer, so it is either valid for writing or in user space.
937 let set_len = unsafe { memset_fallible(self.cursor, 0u8, len_to_set) };
938 self.cursor = self.cursor.wrapping_add(set_len);
939
940 if set_len < len_to_set {
941 Err((Error::PageFault, set_len))
942 } else {
943 Ok(len_to_set)
944 }
945 }
946}
947
948impl<Fallibility> VmWriter<'_, Fallibility> {
949 /// Returns the number of bytes for the available space.
950 pub fn avail(&self) -> usize {
951 self.end.addr() - self.cursor.addr()
952 }
953
954 /// Returns the cursor pointer, which refers to the address of the next byte to write.
955 pub fn cursor(&self) -> *mut u8 {
956 self.cursor
957 }
958
959 /// Returns if it has available space to write.
960 pub fn has_avail(&self) -> bool {
961 self.avail() > 0
962 }
963
964 /// Limits the length of available space.
965 ///
966 /// This method ensures the post condition of `self.avail() <= max_avail`.
967 pub fn limit(&mut self, max_avail: usize) -> &mut Self {
968 if max_avail < self.avail() {
969 self.end = self.cursor.wrapping_add(max_avail);
970 }
971
972 self
973 }
974
975 /// Skips the first `nbytes` bytes of data.
976 /// The length of available space is decreased accordingly.
977 ///
978 /// # Panics
979 ///
980 /// If `nbytes` is greater than `self.avail()`, then the method panics.
981 pub fn skip(&mut self, nbytes: usize) -> &mut Self {
982 assert!(nbytes <= self.avail());
983 self.cursor = self.cursor.wrapping_add(nbytes);
984
985 self
986 }
987}
988
989impl<'a> From<&'a mut [u8]> for VmWriter<'a, Infallible> {
990 fn from(slice: &'a mut [u8]) -> Self {
991 // SAFETY:
992 // - The memory range points to typed memory.
993 // - The validity requirements for write accesses are met because the pointer is converted
994 // from a mutable reference that outlives the lifetime `'a`.
995 // - The type, i.e., the `u8` slice, is plain-old-data.
996 unsafe { Self::from_kernel_space(slice.as_mut_ptr(), slice.len()) }
997 }
998}
999
1000/// A marker trait for POD types that can be read or written with one instruction.
1001///
1002/// This trait is mostly a hint, since it's safe and can be implemented for _any_ POD type. If it
1003/// is implemented for a type that cannot be read or written with a single instruction, calling
1004/// `read_once`/`write_once` will lead to a failed compile-time assertion.
1005pub trait PodOnce: Pod {}
1006
1007#[cfg(any(
1008 target_arch = "x86_64",
1009 target_arch = "riscv64",
1010 target_arch = "loongarch64"
1011))]
1012mod pod_once_impls {
1013 use super::PodOnce;
1014
1015 impl PodOnce for u8 {}
1016 impl PodOnce for u16 {}
1017 impl PodOnce for u32 {}
1018 impl PodOnce for u64 {}
1019 impl PodOnce for usize {}
1020 impl PodOnce for i8 {}
1021 impl PodOnce for i16 {}
1022 impl PodOnce for i32 {}
1023 impl PodOnce for i64 {}
1024 impl PodOnce for isize {}
1025
1026 /// Checks whether the memory operation created by `ptr::read_volatile` and
1027 /// `ptr::write_volatile` doesn't tear.
1028 ///
1029 /// Note that the Rust documentation makes no such guarantee, and even the wording in the LLVM
1030 /// LangRef is ambiguous. But this is unlikely to break in practice because the Linux kernel
1031 /// also uses "volatile" semantics to implement `READ_ONCE`/`WRITE_ONCE`.
1032 pub(super) const fn is_non_tearing<T>() -> bool {
1033 let size = size_of::<T>();
1034
1035 size == 1 || size == 2 || size == 4 || size == 8
1036 }
1037}
1038
1039/// A marker trait for POD types that can be read or written atomically.
1040pub trait PodAtomic: Pod {
1041 /// Atomically loads a value.
1042 /// This function will return errors if encountering an unresolvable page fault.
1043 ///
1044 /// Returns the loaded value.
1045 ///
1046 /// # Safety
1047 ///
1048 /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1049 /// space for `size_of::<T>()` bytes.
1050 /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1051 ///
1052 /// [valid]: crate::mm::io#safety
1053 #[doc(hidden)]
1054 unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self>;
1055
1056 /// Atomically compares and exchanges a value.
1057 /// This function will return errors if encountering an unresolvable page fault.
1058 ///
1059 /// Returns the previous value.
1060 /// `new_val` will be written if and only if the previous value is equal to `old_val`.
1061 ///
1062 /// # Safety
1063 ///
1064 /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1065 /// space for `size_of::<T>()` bytes.
1066 /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1067 ///
1068 /// [valid]: crate::mm::io#safety
1069 #[doc(hidden)]
1070 unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self)
1071 -> Result<Self>;
1072}
1073
1074impl PodAtomic for u32 {
1075 unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self> {
1076 // SAFETY: The safety is upheld by the caller.
1077 let result = unsafe { __atomic_load_fallible(ptr) };
1078 if result == !0 {
1079 Err(Error::PageFault)
1080 } else {
1081 Ok(result as Self)
1082 }
1083 }
1084
1085 unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self) -> Result<u32> {
1086 // SAFETY: The safety is upheld by the caller.
1087 let result = unsafe { __atomic_cmpxchg_fallible(ptr, old_val, new_val) };
1088 if result == !0 {
1089 Err(Error::PageFault)
1090 } else {
1091 Ok(result as Self)
1092 }
1093 }
1094}