ostd/mm/io/mod.rs
1// SPDX-License-Identifier: MPL-2.0
2
3//! Abstractions for reading and writing virtual memory (VM) objects.
4//!
5//! # Safety
6//!
7//! The core virtual memory (VM) access APIs provided by this module are [`VmReader`] and
8//! [`VmWriter`], which allow for writing to or reading from a region of memory _safely_.
9//! `VmReader` and `VmWriter` objects can be constructed from memory regions of either typed memory
10//! (e.g., `&[u8]`) or untyped memory (e.g, [`UFrame`]). Behind the scene, `VmReader` and `VmWriter`
11//! must be constructed via their [`from_user_space`] and [`from_kernel_space`] methods, whose
12//! safety depends on whether the given memory regions are _valid_ or not.
13//!
14//! [`UFrame`]: crate::mm::UFrame
15//! [`from_user_space`]: `VmReader::from_user_space`
16//! [`from_kernel_space`]: `VmReader::from_kernel_space`
17//!
18//! Here is a list of conditions for memory regions to be considered valid:
19//!
20//! - The memory region as a whole must be either typed or untyped memory, not both typed and
21//! untyped.
22//!
23//! - If the memory region is typed, we require that:
24//! - the [validity requirements] from the official Rust documentation must be met, and
25//! - the type of the memory region (which must exist since the memory is typed) must be
26//! plain-old-data, so that the writer can fill it with arbitrary data safely.
27//!
28//! [validity requirements]: core::ptr#safety
29//!
30//! - If the memory region is untyped, we require that:
31//! - the underlying pages must remain alive while the validity requirements are in effect, and
32//! - the kernel must access the memory region using only the APIs provided in this module, but
33//! external accesses from hardware devices or user programs do not count.
34//!
35//! We have the last requirement for untyped memory to be valid because the safety interaction with
36//! other ways to access the memory region (e.g., atomic/volatile memory loads/stores) is not
37//! currently specified. Tis may be relaxed in the future, if appropriate and necessary.
38//!
39//! Note that data races on untyped memory are explicitly allowed (since pages can be mapped to
40//! user space, making it impossible to avoid data races). However, they may produce erroneous
41//! results, such as unexpected bytes being copied, but do not cause soundness problems.
42
43pub(crate) mod copy;
44pub mod util;
45
46use core::{marker::PhantomData, mem::MaybeUninit};
47
48use ostd_pod::Pod;
49
50use self::copy::{memcpy, memset};
51use crate::{
52 Error,
53 arch::mm::{__atomic_cmpxchg_fallible, __atomic_load_fallible},
54 mm::{
55 MAX_USERSPACE_VADDR,
56 kspace::{KERNEL_BASE_VADDR, KERNEL_END_VADDR},
57 },
58 prelude::*,
59};
60
61/// A trait that enables reading/writing data from/to a VM object,
62/// e.g., [`USegment`], [`Vec<UFrame>`] and [`UFrame`].
63///
64/// # Concurrency
65///
66/// The methods may be executed by multiple concurrent reader and writer
67/// threads. In this case, if the results of concurrent reads or writes
68/// desire predictability or atomicity, the users should add extra mechanism
69/// for such properties.
70///
71/// [`USegment`]: crate::mm::USegment
72/// [`UFrame`]: crate::mm::UFrame
73pub trait VmIo {
74 /// Reads requested data at a specified offset into a given `VmWriter`.
75 ///
76 /// # No short reads
77 ///
78 /// On success, the `writer` must be written with the requested data
79 /// completely. If, for any reason, the requested data is only partially
80 /// available, then the method shall return an error.
81 fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()>;
82
83 /// Reads a specified number of bytes at a specified offset into a given buffer.
84 ///
85 /// # No short reads
86 ///
87 /// Similar to [`read`].
88 ///
89 /// [`read`]: VmIo::read
90 fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> {
91 let mut writer = VmWriter::from(buf).to_fallible();
92 self.read(offset, &mut writer)
93 }
94
95 /// Reads a value of a specified type at a specified offset.
96 fn read_val<T: Pod>(&self, offset: usize) -> Result<T> {
97 // Why not use `MaybeUninit` for a faster implementation?
98 //
99 // ```rust
100 // let mut val: MaybeUninit<T> = MaybeUninit::uninit();
101 // let writer = unsafe {
102 // VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>())
103 // };
104 // self.read(offset, &mut writer.to_fallible())?;
105 // Ok(unsafe { val.assume_init() })
106 // ```
107 //
108 // The above implementation avoids initializing `val` upfront,
109 // so it is more efficient than our actual implementation.
110 // Unfortunately, it is unsound.
111 // This is because the `read` method,
112 // which could be implemented outside OSTD and thus is untrusted,
113 // may not really initialize the bits of `val` at all!
114
115 let mut val = T::new_zeroed();
116 self.read_bytes(offset, val.as_mut_bytes())?;
117 Ok(val)
118 }
119
120 /// Reads a slice of a specified type at a specified offset.
121 ///
122 /// # No short reads
123 ///
124 /// Similar to [`read`].
125 ///
126 /// [`read`]: VmIo::read
127 fn read_slice<T: Pod>(&self, offset: usize, slice: &mut [T]) -> Result<()> {
128 let len_in_bytes = size_of_val(slice);
129 let ptr = slice as *mut [T] as *mut u8;
130 // SAFETY: the slice can be transmuted to a writable byte slice since the elements
131 // are all Plain-Old-Data (Pod) types.
132 let buf = unsafe { core::slice::from_raw_parts_mut(ptr, len_in_bytes) };
133 self.read_bytes(offset, buf)
134 }
135
136 /// Writes all data from a given `VmReader` at a specified offset.
137 ///
138 /// # No short writes
139 ///
140 /// On success, the data from the `reader` must be read to the VM object entirely.
141 /// If, for any reason, the input data can only be written partially,
142 /// then the method shall return an error.
143 fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()>;
144
145 /// Writes a specified number of bytes from a given buffer at a specified offset.
146 ///
147 /// # No short writes
148 ///
149 /// Similar to [`write`].
150 ///
151 /// [`write`]: VmIo::write
152 fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> {
153 let mut reader = VmReader::from(buf).to_fallible();
154 self.write(offset, &mut reader)
155 }
156
157 /// Writes a value of a specified type at a specified offset.
158 fn write_val<T: Pod>(&self, offset: usize, new_val: &T) -> Result<()> {
159 self.write_bytes(offset, new_val.as_bytes())?;
160 Ok(())
161 }
162
163 /// Writes a slice of a specified type at a specified offset.
164 ///
165 /// # No short write
166 ///
167 /// Similar to [`write`].
168 ///
169 /// [`write`]: VmIo::write
170 fn write_slice<T: Pod>(&self, offset: usize, slice: &[T]) -> Result<()> {
171 let len_in_bytes = size_of_val(slice);
172 let ptr = slice as *const [T] as *const u8;
173 // SAFETY: the slice can be transmuted to a readable byte slice since the elements
174 // are all Plain-Old-Data (Pod) types.
175 let buf = unsafe { core::slice::from_raw_parts(ptr, len_in_bytes) };
176 self.write_bytes(offset, buf)
177 }
178}
179
180/// A trait that enables filling bytes (e.g., filling zeros) to a VM object.
181pub trait VmIoFill {
182 /// Writes `len` zeros at a specified offset.
183 ///
184 /// Unlike the methods in [`VmIo`], this method allows for short writes because `len` can be
185 /// effectively unbounded. However, if not all bytes can be written successfully, an `Err(_)`
186 /// will be returned with the error and the number of zeros that have been written thus far.
187 ///
188 /// # A slow, general implementation
189 ///
190 /// Suppose that [`VmIo`] has already been implemented for the type,
191 /// this method can be implemented in the following general way.
192 ///
193 /// ```rust
194 /// fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)> {
195 /// for i in 0..len {
196 /// match self.write_slice(offset + i, &[0u8]) {
197 /// Ok(()) => continue,
198 /// Err(err) => return Err((err, i)),
199 /// }
200 /// }
201 /// Ok(())
202 /// }
203 /// ```
204 ///
205 /// But we choose not to provide a general, default implementation
206 /// because doing so would make it too easy for a concrete type of `VmIoFill`
207 /// to settle with a slower implementation for such a performance-sensitive operation.
208 fn fill_zeros(&self, offset: usize, len: usize) -> core::result::Result<(), (Error, usize)>;
209}
210
211/// A trait that enables reading/writing data from/to a VM object using one non-tearing memory
212/// load/store.
213///
214/// See also [`VmIo`], which enables reading/writing data from/to a VM object without the guarantee
215/// of using one non-tearing memory load/store.
216pub trait VmIoOnce {
217 /// Reads a value of the `PodOnce` type at the specified offset using one non-tearing memory
218 /// load.
219 ///
220 /// Except that the offset is specified explicitly, the semantics of this method is the same as
221 /// [`VmReader::read_once`].
222 fn read_once<T: PodOnce>(&self, offset: usize) -> Result<T>;
223
224 /// Writes a value of the `PodOnce` type at the specified offset using one non-tearing memory
225 /// store.
226 ///
227 /// Except that the offset is specified explicitly, the semantics of this method is the same as
228 /// [`VmWriter::write_once`].
229 fn write_once<T: PodOnce>(&self, offset: usize, new_val: &T) -> Result<()>;
230}
231
232/// A marker type used for _fallible_ memory,
233/// where memory access _might_ trigger page faults.
234///
235/// The most prominent example of fallible memory is user virtual memory.
236///
237/// By definition, infallible memory is a subset of fallible memory.
238/// As a consequence, any code that intends to work with fallible memory
239/// should work for both user virtual memory and kernel virtual memory.
240///
241/// [`VmReader`] and [`VmWriter`] types use this marker type
242/// to indicate the property of the underlying memory.
243pub enum Fallible {}
244
245/// A marker type used for _infallible_ memory,
246/// where memory access is valid and won't trigger page faults.
247///
248/// The most prominent example of infallible memory is kernel virtual memory
249/// (at least for the part where Rust code and data reside).
250///
251/// [`VmReader`] and [`VmWriter`] types use this marker type
252/// to indicate the property of the underlying memory.
253pub enum Infallible {}
254
255/// A marker type for I/O memory regions.
256///
257/// This marker is used by [`memcpy`] and [`memset`]
258/// to indicate that a source or destination operand
259/// resides in I/O memory (MMIO).
260///
261/// Unlike [`Fallible`] and [`Infallible`],
262/// `Io` cannot statically determine
263/// whether a memory access will fault:
264/// MMIO fallibility is platform-dependent.
265/// For example, on Intel TDX
266/// every MMIO access triggers a #VE exception,
267/// whereas on a non-CVM x86 host
268/// the same access completes without faulting.
269pub(crate) enum Io {}
270
271/// Fallible memory read from a `VmWriter`.
272pub trait FallibleVmRead<F> {
273 /// Reads all data into the writer until one of the three conditions is met:
274 /// 1. The reader has no remaining data.
275 /// 2. The writer has no available space.
276 /// 3. The reader/writer encounters some error.
277 ///
278 /// On success, the number of bytes read is returned;
279 /// On error, both the error and the number of bytes read so far are returned.
280 fn read_fallible(
281 &mut self,
282 writer: &mut VmWriter<'_, F>,
283 ) -> core::result::Result<usize, (Error, usize)>;
284}
285
286/// Fallible memory write from a `VmReader`.
287pub trait FallibleVmWrite<F> {
288 /// Writes all data from the reader until one of the three conditions is met:
289 /// 1. The reader has no remaining data.
290 /// 2. The writer has no available space.
291 /// 3. The reader/writer encounters some error.
292 ///
293 /// On success, the number of bytes written is returned;
294 /// On error, both the error and the number of bytes written so far are returned.
295 fn write_fallible(
296 &mut self,
297 reader: &mut VmReader<'_, F>,
298 ) -> core::result::Result<usize, (Error, usize)>;
299}
300
301/// `VmReader` is a reader for reading data from a contiguous range of memory.
302///
303/// The memory range read by `VmReader` can be in either kernel space or user space.
304/// When the operating range is in kernel space, the memory within that range
305/// is guaranteed to be valid, and the corresponding memory reads are infallible.
306/// When the operating range is in user space, it is ensured that the page table of
307/// the process creating the `VmReader` is active for the duration of `'a`,
308/// and the corresponding memory reads are considered fallible.
309///
310/// When perform reading with a `VmWriter`, if one of them represents typed memory,
311/// it can ensure that the reading range in this reader and writing range in the
312/// writer are not overlapped.
313///
314/// NOTE: The overlap mentioned above is at both the virtual address level
315/// and physical address level. There is not guarantee for the operation results
316/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
317/// the user's responsibility to handle this situation.
318pub struct VmReader<'a, Fallibility = Fallible> {
319 cursor: *const u8,
320 end: *const u8,
321 phantom: PhantomData<(&'a [u8], Fallibility)>,
322}
323
324// `Clone` can be implemented for `VmReader`
325// because it either points to untyped memory or represents immutable references.
326// Note that we cannot implement `Clone` for `VmWriter`
327// because it can represent mutable references, which must remain exclusive.
328impl<Fallibility> Clone for VmReader<'_, Fallibility> {
329 fn clone(&self) -> Self {
330 Self {
331 cursor: self.cursor,
332 end: self.end,
333 phantom: PhantomData,
334 }
335 }
336}
337
338macro_rules! impl_read_fallible {
339 ($reader_fallibility:ty, $writer_fallibility:ty) => {
340 impl<'a> FallibleVmRead<$writer_fallibility> for VmReader<'a, $reader_fallibility> {
341 fn read_fallible(
342 &mut self,
343 writer: &mut VmWriter<'_, $writer_fallibility>,
344 ) -> core::result::Result<usize, (Error, usize)> {
345 let copy_len = self.remain().min(writer.avail());
346 if copy_len == 0 {
347 return Ok(0);
348 }
349
350 // SAFETY: The source and destination are subsets of memory ranges specified by
351 // the reader and writer, so they are either valid for reading and writing or in
352 // user space.
353 let copied_len = unsafe {
354 memcpy::<$writer_fallibility, $reader_fallibility>(
355 writer.cursor,
356 self.cursor,
357 copy_len,
358 )
359 };
360 self.cursor = self.cursor.wrapping_add(copied_len);
361 writer.cursor = writer.cursor.wrapping_add(copied_len);
362
363 if copied_len < copy_len {
364 Err((Error::PageFault, copied_len))
365 } else {
366 Ok(copied_len)
367 }
368 }
369 }
370 };
371}
372
373macro_rules! impl_write_fallible {
374 ($writer_fallibility:ty, $reader_fallibility:ty) => {
375 impl<'a> FallibleVmWrite<$reader_fallibility> for VmWriter<'a, $writer_fallibility> {
376 fn write_fallible(
377 &mut self,
378 reader: &mut VmReader<'_, $reader_fallibility>,
379 ) -> core::result::Result<usize, (Error, usize)> {
380 reader.read_fallible(self)
381 }
382 }
383 };
384}
385
386impl_read_fallible!(Fallible, Infallible);
387impl_read_fallible!(Fallible, Fallible);
388impl_read_fallible!(Infallible, Fallible);
389impl_write_fallible!(Fallible, Infallible);
390impl_write_fallible!(Fallible, Fallible);
391impl_write_fallible!(Infallible, Fallible);
392
393impl<'a> VmReader<'a, Infallible> {
394 /// Constructs a `VmReader` from a pointer and a length, which represents
395 /// a memory range in kernel space.
396 ///
397 /// # Safety
398 ///
399 /// `ptr` must be [valid] for reads of `len` bytes during the entire lifetime `a`.
400 ///
401 /// [valid]: crate::mm::io#safety
402 pub unsafe fn from_kernel_space(ptr: *const u8, len: usize) -> Self {
403 // Rust is allowed to give the reference to a zero-sized object a very small address,
404 // falling out of the kernel virtual address space range.
405 // So when `len` is zero, we should not and need not to check `ptr`.
406 debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
407 debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
408
409 Self {
410 cursor: ptr,
411 end: ptr.wrapping_add(len),
412 phantom: PhantomData,
413 }
414 }
415
416 /// Reads all data into the writer until one of the two conditions is met:
417 /// 1. The reader has no remaining data.
418 /// 2. The writer has no available space.
419 ///
420 /// Returns the number of bytes read.
421 pub fn read(&mut self, writer: &mut VmWriter<'_, Infallible>) -> usize {
422 let copy_len = self.remain().min(writer.avail());
423 if copy_len == 0 {
424 return 0;
425 }
426
427 // SAFETY: The source and destination are subsets of memory ranges specified by the reader
428 // and writer, so they are valid for reading and writing.
429 unsafe { memcpy::<Infallible, Infallible>(writer.cursor, self.cursor, copy_len) };
430 self.cursor = self.cursor.wrapping_add(copy_len);
431 writer.cursor = writer.cursor.wrapping_add(copy_len);
432
433 copy_len
434 }
435
436 /// Reads a value of `Pod` type.
437 ///
438 /// If the length of the `Pod` type exceeds `self.remain()`,
439 /// this method will return `Err`.
440 pub fn read_val<T: Pod>(&mut self) -> Result<T> {
441 if self.remain() < size_of::<T>() {
442 return Err(Error::InvalidArgs);
443 }
444
445 let mut val = MaybeUninit::<T>::uninit();
446
447 // SAFETY:
448 // - The memory range points to typed memory.
449 // - The validity requirements for write accesses are met because the pointer is converted
450 // from a mutable pointer where the underlying storage outlives the temporary lifetime
451 // and no other Rust references to the same storage exist during the lifetime.
452 // - The type, i.e., `T`, is plain-old-data.
453 let mut writer =
454 unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
455 self.read(&mut writer);
456 debug_assert!(!writer.has_avail());
457
458 // SAFETY:
459 // - `self.read` has initialized all the bytes in `val`.
460 // - The type is plain-old-data.
461 let val_inited = unsafe { val.assume_init() };
462 Ok(val_inited)
463 }
464
465 /// Reads a value of the `PodOnce` type using one non-tearing memory load.
466 ///
467 /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
468 ///
469 /// This method will not compile if the `Pod` type is too large for the current architecture
470 /// and the operation must be tear into multiple memory loads.
471 ///
472 /// # Panics
473 ///
474 /// This method will panic if the current position of the reader does not meet the alignment
475 /// requirements of type `T`.
476 pub fn read_once<T: PodOnce>(&mut self) -> Result<T> {
477 if self.remain() < size_of::<T>() {
478 return Err(Error::InvalidArgs);
479 }
480
481 let cursor = self.cursor.cast::<T>();
482 assert!(cursor.is_aligned());
483
484 const { assert!(pod_once_impls::is_non_tearing::<T>()) };
485
486 // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
487 // and that the cursor is properly aligned with respect to the type `T`. All other safety
488 // requirements are the same as for `Self::read`.
489 let val = unsafe { cursor.read_volatile() };
490 self.cursor = self.cursor.wrapping_add(size_of::<T>());
491
492 Ok(val)
493 }
494
495 // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
496 // not provide an infallible implementation of `VmReader::atomic_load`.
497
498 /// Converts to a fallible reader.
499 pub fn to_fallible(self) -> VmReader<'a, Fallible> {
500 // It is safe to construct a fallible reader since an infallible reader covers the
501 // capabilities of a fallible reader.
502 VmReader {
503 cursor: self.cursor,
504 end: self.end,
505 phantom: PhantomData,
506 }
507 }
508}
509
510impl VmReader<'_, Fallible> {
511 /// Constructs a `VmReader` from a pointer and a length, which represents
512 /// a memory range in user space.
513 ///
514 /// # Safety
515 ///
516 /// The virtual address range `ptr..ptr + len` must be in user space.
517 pub unsafe fn from_user_space(ptr: *const u8, len: usize) -> Self {
518 debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
519
520 Self {
521 cursor: ptr,
522 end: ptr.wrapping_add(len),
523 phantom: PhantomData,
524 }
525 }
526
527 /// Reads a value of `Pod` type.
528 ///
529 /// If the length of the `Pod` type exceeds `self.remain()`,
530 /// or the value can not be read completely,
531 /// this method will return `Err`.
532 ///
533 /// If the memory read failed, this method will return `Err`
534 /// and the current reader's cursor remains pointing to
535 /// the original starting position.
536 pub fn read_val<T: Pod>(&mut self) -> Result<T> {
537 if self.remain() < size_of::<T>() {
538 return Err(Error::InvalidArgs);
539 }
540
541 let mut val = MaybeUninit::<T>::uninit();
542
543 // SAFETY:
544 // - The memory range points to typed memory.
545 // - The validity requirements for write accesses are met because the pointer is converted
546 // from a mutable pointer where the underlying storage outlives the temporary lifetime
547 // and no other Rust references to the same storage exist during the lifetime.
548 // - The type, i.e., `T`, is plain-old-data.
549 let mut writer =
550 unsafe { VmWriter::from_kernel_space(val.as_mut_ptr().cast(), size_of::<T>()) };
551 self.read_fallible(&mut writer)
552 .map_err(|(err, copied_len)| {
553 // The `copied_len` is the number of bytes read so far.
554 // So the `cursor` can be moved back to the original position.
555 self.cursor = self.cursor.wrapping_sub(copied_len);
556 err
557 })?;
558 debug_assert!(!writer.has_avail());
559
560 // SAFETY:
561 // - `self.read_fallible` has initialized all the bytes in `val`.
562 // - The type is plain-old-data.
563 let val_inited = unsafe { val.assume_init() };
564 Ok(val_inited)
565 }
566
567 /// Atomically loads a `PodAtomic` value.
568 ///
569 /// Regardless of whether it is successful, the cursor of the reader will not move.
570 ///
571 /// This method only guarantees the atomicity of the specific operation. There are no
572 /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
573 /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
574 /// specified in the C++11 memory model.
575 ///
576 /// This method will fail with errors if
577 /// 1. the remaining space of the reader is less than `size_of::<T>()` bytes, or
578 /// 2. the memory operation fails due to an unresolvable page fault.
579 ///
580 /// # Panics
581 ///
582 /// This method will panic if the memory location is not aligned on an `align_of::<T>()`-byte
583 /// boundary.
584 pub fn atomic_load<T: PodAtomic>(&self) -> Result<T> {
585 if self.remain() < size_of::<T>() {
586 return Err(Error::InvalidArgs);
587 }
588
589 let cursor = self.cursor.cast::<T>();
590 assert!(cursor.is_aligned());
591
592 // SAFETY:
593 // 1. The cursor is either valid for reading or in user space for `size_of::<T>()` bytes.
594 // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
595 unsafe { T::atomic_load_fallible(cursor) }
596 }
597}
598
599impl<Fallibility> VmReader<'_, Fallibility> {
600 /// Returns the number of bytes for the remaining data.
601 pub fn remain(&self) -> usize {
602 self.end.addr() - self.cursor.addr()
603 }
604
605 /// Returns the cursor pointer, which refers to the address of the next byte to read.
606 pub fn cursor(&self) -> *const u8 {
607 self.cursor
608 }
609
610 /// Returns if it has remaining data to read.
611 pub fn has_remain(&self) -> bool {
612 self.remain() > 0
613 }
614
615 /// Limits the length of remaining data.
616 ///
617 /// This method ensures the post condition of `self.remain() <= max_remain`.
618 pub fn limit(&mut self, max_remain: usize) -> &mut Self {
619 if max_remain < self.remain() {
620 self.end = self.cursor.wrapping_add(max_remain);
621 }
622
623 self
624 }
625
626 /// Skips the first `nbytes` bytes of data.
627 /// The length of remaining data is decreased accordingly.
628 ///
629 /// # Panics
630 ///
631 /// If `nbytes` is greater than `self.remain()`, then the method panics.
632 pub fn skip(&mut self, nbytes: usize) -> &mut Self {
633 assert!(nbytes <= self.remain());
634 self.cursor = self.cursor.wrapping_add(nbytes);
635
636 self
637 }
638}
639
640impl<'a> From<&'a [u8]> for VmReader<'a, Infallible> {
641 fn from(slice: &'a [u8]) -> Self {
642 // SAFETY:
643 // - The memory range points to typed memory.
644 // - The validity requirements for read accesses are met because the pointer is converted
645 // from an immutable reference that outlives the lifetime `'a`.
646 // - The type, i.e., the `u8` slice, is plain-old-data.
647 unsafe { Self::from_kernel_space(slice.as_ptr(), slice.len()) }
648 }
649}
650
651/// `VmWriter` is a writer for writing data to a contiguous range of memory.
652///
653/// The memory range write by `VmWriter` can be in either kernel space or user space.
654/// When the operating range is in kernel space, the memory within that range
655/// is guaranteed to be valid, and the corresponding memory writes are infallible.
656/// When the operating range is in user space, it is ensured that the page table of
657/// the process creating the `VmWriter` is active for the duration of `'a`,
658/// and the corresponding memory writes are considered fallible.
659///
660/// When perform writing with a `VmReader`, if one of them represents typed memory,
661/// it can ensure that the writing range in this writer and reading range in the
662/// reader are not overlapped.
663///
664/// NOTE: The overlap mentioned above is at both the virtual address level
665/// and physical address level. There is not guarantee for the operation results
666/// of `VmReader` and `VmWriter` in overlapping untyped addresses, and it is
667/// the user's responsibility to handle this situation.
668pub struct VmWriter<'a, Fallibility = Fallible> {
669 cursor: *mut u8,
670 end: *mut u8,
671 phantom: PhantomData<(&'a mut [u8], Fallibility)>,
672}
673
674impl<'a> VmWriter<'a, Infallible> {
675 /// Constructs a `VmWriter` from a pointer and a length, which represents
676 /// a memory range in kernel space.
677 ///
678 /// # Safety
679 ///
680 /// `ptr` must be [valid] for writes of `len` bytes during the entire lifetime `a`.
681 ///
682 /// [valid]: crate::mm::io#safety
683 pub unsafe fn from_kernel_space(ptr: *mut u8, len: usize) -> Self {
684 // If casting a zero sized slice to a pointer, the pointer may be null
685 // and does not reside in our kernel space range.
686 debug_assert!(len == 0 || KERNEL_BASE_VADDR <= ptr.addr());
687 debug_assert!(len == 0 || ptr.addr().checked_add(len).unwrap() <= KERNEL_END_VADDR);
688
689 Self {
690 cursor: ptr,
691 end: ptr.wrapping_add(len),
692 phantom: PhantomData,
693 }
694 }
695
696 /// Writes all data from the reader until one of the two conditions is met:
697 /// 1. The reader has no remaining data.
698 /// 2. The writer has no available space.
699 ///
700 /// Returns the number of bytes written.
701 pub fn write(&mut self, reader: &mut VmReader<'_, Infallible>) -> usize {
702 reader.read(self)
703 }
704
705 /// Writes a value of `Pod` type.
706 ///
707 /// If the length of the `Pod` type exceeds `self.avail()`,
708 /// this method will return `Err`.
709 pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
710 if self.avail() < size_of::<T>() {
711 return Err(Error::InvalidArgs);
712 }
713
714 let mut reader = VmReader::from(new_val.as_bytes());
715 self.write(&mut reader);
716 Ok(())
717 }
718
719 /// Writes a value of the `PodOnce` type using one non-tearing memory store.
720 ///
721 /// If the length of the `PodOnce` type exceeds `self.remain()`, this method will return `Err`.
722 ///
723 /// # Panics
724 ///
725 /// This method will panic if the current position of the writer does not meet the alignment
726 /// requirements of type `T`.
727 pub fn write_once<T: PodOnce>(&mut self, new_val: &T) -> Result<()> {
728 if self.avail() < size_of::<T>() {
729 return Err(Error::InvalidArgs);
730 }
731
732 let cursor = self.cursor.cast::<T>();
733 assert!(cursor.is_aligned());
734
735 const { assert!(pod_once_impls::is_non_tearing::<T>()) };
736
737 // SAFETY: We have checked that the number of bytes remaining is at least the size of `T`
738 // and that the cursor is properly aligned with respect to the type `T`. All other safety
739 // requirements are the same as for `Self::write`.
740 unsafe { cursor.write_volatile(*new_val) };
741 self.cursor = self.cursor.wrapping_add(size_of::<T>());
742
743 Ok(())
744 }
745
746 // Currently, there are no volatile atomic operations in `core::intrinsics`. Therefore, we do
747 // not provide an infallible implementation of `VmWriter::atomic_compare_exchange`.
748
749 /// Writes `len` zeros to the target memory.
750 ///
751 /// This method attempts to fill up to `len` bytes with zeros. If the available
752 /// memory from the current cursor position is less than `len`, it will only fill
753 /// the available space.
754 pub fn fill_zeros(&mut self, len: usize) -> usize {
755 let len_to_set = self.avail().min(len);
756 if len_to_set == 0 {
757 return 0;
758 }
759
760 // SAFETY: The destination is a subset of the memory range specified by
761 // the current writer, so it is valid for writing.
762 unsafe { memset::<Infallible>(self.cursor, 0u8, len_to_set) };
763 self.cursor = self.cursor.wrapping_add(len_to_set);
764
765 len_to_set
766 }
767
768 /// Converts to a fallible writer.
769 pub fn to_fallible(self) -> VmWriter<'a, Fallible> {
770 // It is safe to construct a fallible reader since an infallible reader covers the
771 // capabilities of a fallible reader.
772 VmWriter {
773 cursor: self.cursor,
774 end: self.end,
775 phantom: PhantomData,
776 }
777 }
778}
779
780impl VmWriter<'_, Fallible> {
781 /// Constructs a `VmWriter` from a pointer and a length, which represents
782 /// a memory range in user space.
783 ///
784 /// The current context should be consistently associated with valid user space during the
785 /// entire lifetime `'a`. This is for correct semantics and is not a safety requirement.
786 ///
787 /// # Safety
788 ///
789 /// `ptr` must be in user space for `len` bytes.
790 pub unsafe fn from_user_space(ptr: *mut u8, len: usize) -> Self {
791 debug_assert!(ptr.addr().checked_add(len).unwrap() <= MAX_USERSPACE_VADDR);
792
793 Self {
794 cursor: ptr,
795 end: ptr.wrapping_add(len),
796 phantom: PhantomData,
797 }
798 }
799
800 /// Writes a value of `Pod` type.
801 ///
802 /// If the length of the `Pod` type exceeds `self.avail()`,
803 /// or the value can not be write completely,
804 /// this method will return `Err`.
805 ///
806 /// If the memory write failed, this method will return `Err`
807 /// and the current writer's cursor remains pointing to
808 /// the original starting position.
809 pub fn write_val<T: Pod>(&mut self, new_val: &T) -> Result<()> {
810 if self.avail() < size_of::<T>() {
811 return Err(Error::InvalidArgs);
812 }
813
814 let mut reader = VmReader::from(new_val.as_bytes());
815 self.write_fallible(&mut reader)
816 .map_err(|(err, copied_len)| {
817 // The `copied_len` is the number of bytes written so far.
818 // So the `cursor` can be moved back to the original position.
819 self.cursor = self.cursor.wrapping_sub(copied_len);
820 err
821 })?;
822 Ok(())
823 }
824
825 /// Atomically compares and exchanges a `PodAtomic` value.
826 ///
827 /// This method compares `old_val` with the value pointed by `self` and, if they are equal,
828 /// updates it with `new_val`.
829 ///
830 /// The value that was previously in memory will be returned, along with a boolean denoting
831 /// whether the compare-and-exchange succeeds. The caller usually wants to retry if this
832 /// flag is false, passing the most recent value that was returned by this method.
833 ///
834 /// The caller is required to provide a reader which points to the exact same memory location
835 /// to ensure that reading from the memory is allowed.
836 ///
837 /// Regardless of whether it is successful, the cursors of the reader and writer will not move.
838 ///
839 /// This method only guarantees the atomicity of the specific operation. There are no
840 /// synchronization constraints on other memory accesses. This aligns with the [Relaxed
841 /// ordering](https://en.cppreference.com/w/cpp/atomic/memory_order.html#Relaxed_ordering)
842 /// specified in the C++11 memory model.
843 ///
844 /// Since the operation does not involve memory locks, it can't prevent the [ABA
845 /// problem](https://en.wikipedia.org/wiki/ABA_problem).
846 ///
847 /// This method will fail with errors if:
848 /// 1. the remaining space of the reader or the available space of the writer are less than
849 /// `size_of::<T>()` bytes, or
850 /// 2. the memory operation fails due to an unresolvable page fault.
851 ///
852 /// # Panics
853 ///
854 /// This method will panic if:
855 /// 1. the reader and the writer does not point to the same memory location, or
856 /// 2. the memory location is not aligned on an `align_of::<T>()`-byte boundary.
857 pub fn atomic_compare_exchange<T>(
858 &self,
859 reader: &VmReader,
860 old_val: T,
861 new_val: T,
862 ) -> Result<(T, bool)>
863 where
864 T: PodAtomic + Eq,
865 {
866 if self.avail() < size_of::<T>() || reader.remain() < size_of::<T>() {
867 return Err(Error::InvalidArgs);
868 }
869
870 assert_eq!(self.cursor.cast_const(), reader.cursor);
871
872 let cursor = self.cursor.cast::<T>();
873 assert!(cursor.is_aligned());
874
875 // SAFETY:
876 // 1. The cursor is either valid for reading and writing or in user space for
877 // `size_of::<T>()` bytes.
878 // 2. The cursor is aligned on an `align_of::<T>()`-byte boundary.
879 let cur_val = unsafe { T::atomic_cmpxchg_fallible(cursor, old_val, new_val)? };
880
881 Ok((cur_val, old_val == cur_val))
882 }
883
884 /// Writes `len` zeros to the target memory.
885 ///
886 /// This method attempts to fill up to `len` bytes with zeros. If the available
887 /// memory from the current cursor position is less than `len`, it will only fill
888 /// the available space.
889 ///
890 /// If the memory write failed due to an unresolvable page fault, this method
891 /// will return `Err` with the length set so far.
892 pub fn fill_zeros(&mut self, len: usize) -> core::result::Result<usize, (Error, usize)> {
893 let len_to_set = self.avail().min(len);
894 if len_to_set == 0 {
895 return Ok(0);
896 }
897
898 // SAFETY: The destination is a subset of the memory range specified by
899 // the current writer, so it is either valid for writing or in user space.
900 let set_len = unsafe { memset::<Fallible>(self.cursor, 0u8, len_to_set) };
901 self.cursor = self.cursor.wrapping_add(set_len);
902
903 if set_len < len_to_set {
904 Err((Error::PageFault, set_len))
905 } else {
906 Ok(len_to_set)
907 }
908 }
909}
910
911impl<Fallibility> VmWriter<'_, Fallibility> {
912 /// Returns the number of bytes for the available space.
913 pub fn avail(&self) -> usize {
914 self.end.addr() - self.cursor.addr()
915 }
916
917 /// Returns the cursor pointer, which refers to the address of the next byte to write.
918 pub fn cursor(&self) -> *mut u8 {
919 self.cursor
920 }
921
922 /// Returns if it has available space to write.
923 pub fn has_avail(&self) -> bool {
924 self.avail() > 0
925 }
926
927 /// Limits the length of available space.
928 ///
929 /// This method ensures the post condition of `self.avail() <= max_avail`.
930 pub fn limit(&mut self, max_avail: usize) -> &mut Self {
931 if max_avail < self.avail() {
932 self.end = self.cursor.wrapping_add(max_avail);
933 }
934
935 self
936 }
937
938 /// Skips the first `nbytes` bytes of data.
939 /// The length of available space is decreased accordingly.
940 ///
941 /// # Panics
942 ///
943 /// If `nbytes` is greater than `self.avail()`, then the method panics.
944 pub fn skip(&mut self, nbytes: usize) -> &mut Self {
945 assert!(nbytes <= self.avail());
946 self.cursor = self.cursor.wrapping_add(nbytes);
947
948 self
949 }
950
951 /// Creates a clone of this writer, requiring exclusive access.
952 ///
953 /// This method is analogous to [`Clone::clone`], but takes `&mut self`
954 /// instead of `&self`. The `&mut self` receiver is necessary because
955 /// `VmWriter` cannot safely implement `Clone`:
956 /// the underlying buffer may be a mutable slice,
957 /// and two concurrent writers would violate Rust's aliasing rules.
958 ///
959 /// The returned writer has the same cursor position and limit as `self`.
960 /// Because it borrows `self` mutably,
961 /// the original writer cannot be used until the returned writer is dropped.
962 ///
963 /// Note that writes through the returned writer
964 /// do **not** advance the cursor of the original writer.
965 pub fn clone_exclusive(&mut self) -> VmWriter<'_, Fallibility> {
966 VmWriter {
967 cursor: self.cursor,
968 end: self.end,
969 phantom: PhantomData,
970 }
971 }
972}
973
974impl<'a> From<&'a mut [u8]> for VmWriter<'a, Infallible> {
975 fn from(slice: &'a mut [u8]) -> Self {
976 // SAFETY:
977 // - The memory range points to typed memory.
978 // - The validity requirements for write accesses are met because the pointer is converted
979 // from a mutable reference that outlives the lifetime `'a`.
980 // - The type, i.e., the `u8` slice, is plain-old-data.
981 unsafe { Self::from_kernel_space(slice.as_mut_ptr(), slice.len()) }
982 }
983}
984
985/// A marker trait for POD types that can be read or written with one instruction.
986///
987/// This trait is mostly a hint, since it's safe and can be implemented for _any_ POD type. If it
988/// is implemented for a type that cannot be read or written with a single instruction, calling
989/// `read_once`/`write_once` will lead to a failed compile-time assertion.
990pub trait PodOnce: Pod {}
991
992#[cfg(any(
993 target_arch = "x86_64",
994 target_arch = "riscv64",
995 target_arch = "loongarch64"
996))]
997mod pod_once_impls {
998 use super::PodOnce;
999
1000 impl PodOnce for u8 {}
1001 impl PodOnce for u16 {}
1002 impl PodOnce for u32 {}
1003 impl PodOnce for u64 {}
1004 impl PodOnce for usize {}
1005 impl PodOnce for i8 {}
1006 impl PodOnce for i16 {}
1007 impl PodOnce for i32 {}
1008 impl PodOnce for i64 {}
1009 impl PodOnce for isize {}
1010
1011 /// Checks whether the memory operation created by `ptr::read_volatile` and
1012 /// `ptr::write_volatile` doesn't tear.
1013 ///
1014 /// Note that the Rust documentation makes no such guarantee, and even the wording in the LLVM
1015 /// LangRef is ambiguous. But this is unlikely to break in practice because the Linux kernel
1016 /// also uses "volatile" semantics to implement `READ_ONCE`/`WRITE_ONCE`.
1017 pub(super) const fn is_non_tearing<T>() -> bool {
1018 let size = size_of::<T>();
1019
1020 size == 1 || size == 2 || size == 4 || size == 8
1021 }
1022}
1023
1024/// A marker trait for POD types that can be read or written atomically.
1025pub trait PodAtomic: Pod {
1026 /// Atomically loads a value.
1027 /// This function will return errors if encountering an unresolvable page fault.
1028 ///
1029 /// Returns the loaded value.
1030 ///
1031 /// # Safety
1032 ///
1033 /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1034 /// space for `size_of::<T>()` bytes.
1035 /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1036 ///
1037 /// [valid]: crate::mm::io#safety
1038 #[doc(hidden)]
1039 unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self>;
1040
1041 /// Atomically compares and exchanges a value.
1042 /// This function will return errors if encountering an unresolvable page fault.
1043 ///
1044 /// Returns the previous value.
1045 /// `new_val` will be written if and only if the previous value is equal to `old_val`.
1046 ///
1047 /// # Safety
1048 ///
1049 /// - `ptr` must either be [valid] for writes of `size_of::<T>()` bytes or be in user
1050 /// space for `size_of::<T>()` bytes.
1051 /// - `ptr` must be aligned on an `align_of::<T>()`-byte boundary.
1052 ///
1053 /// [valid]: crate::mm::io#safety
1054 #[doc(hidden)]
1055 unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self)
1056 -> Result<Self>;
1057}
1058
1059impl PodAtomic for u32 {
1060 unsafe fn atomic_load_fallible(ptr: *const Self) -> Result<Self> {
1061 // SAFETY: The safety is upheld by the caller.
1062 let result = unsafe { __atomic_load_fallible(ptr) };
1063 if result == !0 {
1064 Err(Error::PageFault)
1065 } else {
1066 Ok(result as Self)
1067 }
1068 }
1069
1070 unsafe fn atomic_cmpxchg_fallible(ptr: *mut Self, old_val: Self, new_val: Self) -> Result<u32> {
1071 // SAFETY: The safety is upheld by the caller.
1072 let result = unsafe { __atomic_cmpxchg_fallible(ptr, old_val, new_val) };
1073 if result == !0 {
1074 Err(Error::PageFault)
1075 } else {
1076 Ok(result as Self)
1077 }
1078 }
1079}