Skip to main content

ostd/io/io_mem/
mod.rs

1// SPDX-License-Identifier: MPL-2.0
2
3//! I/O memory and its allocator that allocates memory I/O (MMIO) to device drivers.
4
5mod allocator;
6pub(crate) mod util;
7
8use core::{
9    marker::PhantomData,
10    ops::{Deref, Range},
11};
12
13use align_ext::AlignExt;
14use inherit_methods_macro::inherit_methods;
15
16pub(crate) use self::allocator::IoMemAllocatorBuilder;
17pub(super) use self::allocator::init;
18#[cfg(all(target_arch = "x86_64", feature = "cvm_guest"))]
19use crate::arch::{if_tdx_enabled, tdx_guest::unprotect_gpa_tdvm_call};
20use crate::{
21    Error,
22    arch::io::io_mem::{read_once, write_once},
23    cpu::{AtomicCpuSet, CpuSet},
24    mm::{
25        Fallible, Infallible, PAGE_SIZE, PodOnce, VmIo, VmIoFill, VmIoOnce, VmReader, VmWriter,
26        io::{
27            Io,
28            copy::{memcpy, memset},
29        },
30        kspace::kvirt_area::KVirtArea,
31        page_prop::{CachePolicy, PageFlags, PageProperty, PrivilegedPageFlags},
32        tlb::{TlbFlushOp, TlbFlusher},
33    },
34    prelude::*,
35    task::disable_preempt,
36};
37
38/// A marker type used for [`IoMem`],
39/// representing that the underlying MMIO is used for security-sensitive operations.
40#[derive(Clone, Debug)]
41pub(crate) enum Sensitive {}
42
43/// A marker type used for [`IoMem`],
44/// representing that the underlying MMIO is used for security-insensitive operations.
45#[derive(Clone, Debug)]
46pub enum Insensitive {}
47
48/// I/O memory.
49#[derive(Clone, Debug)]
50pub struct IoMem<SecuritySensitivity = Insensitive> {
51    kvirt_area: Arc<KVirtArea>,
52    // The actually used range for MMIO is `kvirt_area.start + offset..kvirt_area.start + offset + limit`
53    offset: usize,
54    limit: usize,
55    pa: Paddr,
56    cache_policy: CachePolicy,
57    phantom: PhantomData<SecuritySensitivity>,
58}
59
60impl<SecuritySensitivity> IoMem<SecuritySensitivity> {
61    /// Slices the `IoMem`, returning another `IoMem` representing the subslice.
62    ///
63    /// # Panics
64    ///
65    /// This method will panic if the range is empty or out of bounds.
66    pub fn slice(&self, range: Range<usize>) -> Self {
67        // This ensures `range.start < range.end` and `range.end <= limit`.
68        assert!(!range.is_empty() && range.end <= self.limit);
69
70        // We've checked the range is in bounds, so we can construct the new `IoMem` safely.
71        Self {
72            kvirt_area: self.kvirt_area.clone(),
73            offset: self.offset + range.start,
74            limit: range.len(),
75            pa: self.pa + range.start,
76            cache_policy: self.cache_policy,
77            phantom: PhantomData,
78        }
79    }
80
81    /// Creates a new `IoMem`.
82    ///
83    /// # Safety
84    ///
85    /// 1. This function must be called after the kernel page table is activated.
86    /// 2. The given physical address range must be in the I/O memory region.
87    /// 3. Reading from or writing to I/O memory regions may have side effects.
88    ///    If `SecuritySensitivity` is `Insensitive`, those side effects must
89    ///    not cause soundness problems (e.g., they must not corrupt the kernel
90    ///    memory).
91    pub(crate) unsafe fn new(range: Range<Paddr>, flags: PageFlags, cache: CachePolicy) -> Self {
92        let first_page_start = range.start.align_down(PAGE_SIZE);
93        let last_page_end = range.end.align_up(PAGE_SIZE);
94
95        let frames_range = first_page_start..last_page_end;
96        let area_size = frames_range.len();
97
98        #[cfg(target_arch = "x86_64")]
99        let priv_flags = if_tdx_enabled!({
100            assert!(
101                first_page_start == range.start && last_page_end == range.end,
102                "I/O memory is not page aligned, which cannot be unprotected in TDX: {:#x?}..{:#x?}",
103                range.start,
104                range.end,
105            );
106
107            // SAFETY:
108            //  - The range `first_page_start..last_page_end` is always page aligned.
109            //  - FIXME: We currently do not limit the I/O memory allocator with the maximum GPA,
110            //    so the address range may not fall in the GPA limit.
111            //  - The caller guarantees that operations on the I/O memory do not have any side
112            //    effects that may cause soundness problems, so the pages can safely be viewed as
113            //    untyped memory.
114            unsafe { unprotect_gpa_tdvm_call(first_page_start, area_size).unwrap() };
115
116            PrivilegedPageFlags::SHARED
117        } else {
118            PrivilegedPageFlags::empty()
119        });
120        #[cfg(not(target_arch = "x86_64"))]
121        let priv_flags = PrivilegedPageFlags::empty();
122
123        let prop = PageProperty {
124            flags,
125            cache,
126            priv_flags,
127        };
128
129        let kva = {
130            // SAFETY: The caller of `IoMem::new()` ensures that the given
131            // physical address range is I/O memory, so it is safe to map.
132            let kva = unsafe { KVirtArea::map_untracked_frames(area_size, 0, frames_range, prop) };
133
134            let target_cpus = AtomicCpuSet::new(CpuSet::new_full());
135            let mut flusher = TlbFlusher::new(&target_cpus, disable_preempt());
136            flusher.issue_tlb_flush(TlbFlushOp::for_range(kva.range()));
137            flusher.dispatch_tlb_flush();
138            flusher.sync_tlb_flush();
139
140            kva
141        };
142
143        Self {
144            kvirt_area: Arc::new(kva),
145            offset: range.start - first_page_start,
146            limit: range.len(),
147            pa: range.start,
148            cache_policy: cache,
149            phantom: PhantomData,
150        }
151    }
152
153    /// Returns the cache policy of this `IoMem`.
154    pub fn cache_policy(&self) -> CachePolicy {
155        self.cache_policy
156    }
157
158    /// Returns the base virtual address of the MMIO range.
159    fn base(&self) -> usize {
160        self.kvirt_area.deref().start() + self.offset
161    }
162
163    /// Validates that the offset range lies within the MMIO window.
164    fn check_range(&self, offset: usize, len: usize) -> Result<()> {
165        if offset.checked_add(len).is_none_or(|end| end > self.limit) {
166            return Err(Error::InvalidArgs);
167        }
168        Ok(())
169    }
170}
171
172#[cfg_attr(target_arch = "loongarch64", expect(unused))]
173impl IoMem<Sensitive> {
174    /// Reads a value of the `PodOnce` type at the specified offset using one
175    /// non-tearing memory load.
176    ///
177    /// Except that the offset is specified explicitly, the semantics of this
178    /// method is the same as [`VmReader::read_once`].
179    ///
180    /// # Safety
181    ///
182    /// The caller must ensure that the offset and the read operation is valid,
183    /// e.g., follows the specification when used for implementing drivers, does
184    /// not cause any out-of-bounds access, and does not cause unsound side
185    /// effects (e.g., corrupting the kernel memory).
186    pub(crate) unsafe fn read_once<T: PodOnce>(&self, offset: usize) -> T {
187        debug_assert!(offset + size_of::<T>() <= self.limit);
188        let ptr = (self.kvirt_area.deref().start() + self.offset + offset) as *const T;
189        // SAFETY: The safety of the read operation's semantics is upheld by the caller.
190        unsafe { read_once(ptr) }
191    }
192
193    /// Writes a value of the `PodOnce` type at the specified offset using one
194    /// non-tearing memory store.
195    ///
196    /// Except that the offset is specified explicitly, the semantics of this
197    /// method is the same as [`VmWriter::write_once`].
198    ///
199    /// # Safety
200    ///
201    /// The caller must ensure that the offset and the write operation is valid,
202    /// e.g., follows the specification when used for implementing drivers, does
203    /// not cause any out-of-bounds access, and does not cause unsound side
204    /// effects (e.g., corrupting the kernel memory).
205    pub(crate) unsafe fn write_once<T: PodOnce>(&self, offset: usize, value: &T) {
206        debug_assert!(offset + size_of::<T>() <= self.limit);
207        let ptr = (self.kvirt_area.deref().start() + self.offset + offset) as *mut T;
208        // SAFETY: The safety of the write operation's semantics is upheld by the caller.
209        unsafe { write_once(ptr, *value) };
210    }
211}
212
213impl IoMem<Insensitive> {
214    /// Acquires an `IoMem` instance for the given range.
215    ///
216    /// The I/O memory cache policy is set to uncacheable by default.
217    pub fn acquire(range: Range<Paddr>) -> Result<IoMem<Insensitive>> {
218        Self::acquire_with_cache_policy(range, CachePolicy::Uncacheable)
219    }
220
221    /// Acquires an `IoMem` instance for the given range with the specified cache policy.
222    pub fn acquire_with_cache_policy(
223        range: Range<Paddr>,
224        cache_policy: CachePolicy,
225    ) -> Result<IoMem<Insensitive>> {
226        allocator::IO_MEM_ALLOCATOR
227            .get()
228            .unwrap()
229            .acquire(range, cache_policy)
230            .ok_or(Error::AccessDenied)
231    }
232
233    /// Reads from MMIO into fallible memory and returns the copied length.
234    ///
235    /// This method performs the same low-level copy primitive as [`VmIo::read`],
236    /// but exposes partial progress instead of enforcing no-short-read semantics.
237    pub fn read_fallible(
238        &self,
239        offset: usize,
240        writer: &mut VmWriter,
241    ) -> Result<usize, (Error, usize)> {
242        let len = writer.avail();
243        self.check_range(offset, len).map_err(|err| (err, 0))?;
244
245        let src = (self.base() + offset) as *const u8;
246        // SAFETY: `src` points to a validated MMIO range and `writer.cursor()` points to
247        // fallible destination memory tracked by `writer`.
248        let copied = unsafe { memcpy::<Fallible, Io>(writer.cursor(), src, len) };
249        writer.skip(copied);
250
251        if copied < len {
252            Err((Error::PageFault, copied))
253        } else {
254            Ok(copied)
255        }
256    }
257
258    /// Writes from fallible memory to MMIO and returns the copied length.
259    ///
260    /// This method performs the same low-level copy primitive as [`VmIo::write`],
261    /// but exposes partial progress instead of enforcing no-short-write semantics.
262    pub fn write_fallible(
263        &self,
264        offset: usize,
265        reader: &mut VmReader,
266    ) -> Result<usize, (Error, usize)> {
267        let len = reader.remain();
268        self.check_range(offset, len).map_err(|err| (err, 0))?;
269
270        let dst = (self.base() + offset) as *mut u8;
271        // SAFETY: `dst` points to a validated MMIO range and `reader.cursor()` points to
272        // fallible source memory tracked by `reader`.
273        let copied = unsafe { memcpy::<Io, Fallible>(dst, reader.cursor(), len) };
274        reader.skip(copied);
275
276        if copied < len {
277            Err((Error::PageFault, copied))
278        } else {
279            Ok(copied)
280        }
281    }
282}
283
284impl VmIoOnce for IoMem<Insensitive> {
285    fn read_once<T: PodOnce>(&self, offset: usize) -> Result<T> {
286        self.check_range(offset, size_of::<T>())?;
287        let ptr = (self.base() + offset) as *const T;
288        if !ptr.is_aligned() {
289            return Err(Error::InvalidArgs);
290        }
291
292        // SAFETY: The pointer is properly aligned and within the validated range.
293        let val = unsafe { read_once(ptr) };
294        Ok(val)
295    }
296
297    fn write_once<T: PodOnce>(&self, offset: usize, value: &T) -> Result<()> {
298        self.check_range(offset, size_of::<T>())?;
299        let ptr = (self.base() + offset) as *mut T;
300        if !ptr.is_aligned() {
301            return Err(Error::InvalidArgs);
302        }
303
304        // SAFETY: The pointer is properly aligned and within the validated range.
305        unsafe { write_once(ptr, *value) };
306        Ok(())
307    }
308}
309
310impl VmIo for IoMem<Insensitive> {
311    fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()> {
312        let len = writer.avail();
313        self.check_range(offset, len)?;
314
315        let src = (self.base() + offset) as *const u8;
316        // SAFETY: `src` points to a validated MMIO range and `writer.cursor()` points to
317        // fallible destination memory tracked by `writer`.
318        let copied = unsafe { memcpy::<Fallible, Io>(writer.cursor(), src, len) };
319        if copied < len {
320            return Err(Error::PageFault);
321        }
322
323        writer.skip(copied);
324        Ok(())
325    }
326
327    fn read_bytes(&self, offset: usize, buf: &mut [u8]) -> Result<()> {
328        let len = buf.len();
329        self.check_range(offset, len)?;
330        let src = (self.base() + offset) as *const u8;
331        let dst = buf.as_mut_ptr();
332
333        // SAFETY: The `dst` and `src` buffers are valid to write and read for `len` bytes.
334        unsafe { memcpy::<Infallible, Io>(dst, src, len) };
335        Ok(())
336    }
337
338    fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()> {
339        let len = reader.remain();
340        self.check_range(offset, len)?;
341
342        let dst = (self.base() + offset) as *mut u8;
343        // SAFETY: `dst` points to a validated MMIO range and `reader.cursor()` points to
344        // fallible source memory tracked by `reader`.
345        let copied = unsafe { memcpy::<Io, Fallible>(dst, reader.cursor(), len) };
346        if copied < len {
347            return Err(Error::PageFault);
348        }
349
350        reader.skip(copied);
351        Ok(())
352    }
353
354    fn write_bytes(&self, offset: usize, buf: &[u8]) -> Result<()> {
355        let len = buf.len();
356        self.check_range(offset, len)?;
357        let src = buf.as_ptr();
358        let dst = (self.base() + offset) as *mut u8;
359
360        // SAFETY: The `dst` and `src` buffers are valid to write and read for `len` bytes.
361        unsafe { memcpy::<Io, Infallible>(dst, src, len) };
362        Ok(())
363    }
364}
365
366impl VmIoFill for IoMem<Insensitive> {
367    fn fill_zeros(&self, offset: usize, len: usize) -> Result<(), (Error, usize)> {
368        if len == 0 {
369            return Ok(());
370        }
371
372        if offset > self.limit {
373            return Err((Error::InvalidArgs, 0));
374        }
375
376        let available = self.limit - offset;
377        let write_len = core::cmp::min(len, available);
378        if write_len == 0 {
379            return Err((Error::InvalidArgs, 0));
380        }
381
382        let dst = (self.base() + offset) as *mut u8;
383        // SAFETY: `dst` points to the validated MMIO subrange of `write_len` bytes.
384        unsafe { memset::<Io>(dst, 0u8, write_len) };
385
386        if write_len == len {
387            Ok(())
388        } else {
389            Err((Error::InvalidArgs, write_len))
390        }
391    }
392}
393
394macro_rules! impl_vm_io_pointer {
395    ($ty:ty, $from:tt) => {
396        #[inherit_methods(from = $from)]
397        impl VmIo for $ty {
398            fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()>;
399            fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()>;
400        }
401
402        #[inherit_methods(from = $from)]
403        impl VmIoOnce for $ty {
404            fn read_once<T: PodOnce>(&self, offset: usize) -> Result<T>;
405            fn write_once<T: PodOnce>(&self, offset: usize, value: &T) -> Result<()>;
406        }
407
408        #[inherit_methods(from = $from)]
409        impl VmIoFill for $ty {
410            fn fill_zeros(&self, offset: usize, len: usize) -> Result<(), (Error, usize)>;
411        }
412    };
413}
414
415impl_vm_io_pointer!(&IoMem<Insensitive>, "(**self)");
416impl_vm_io_pointer!(&mut IoMem<Insensitive>, "(**self)");
417
418impl<SecuritySensitivity> HasPaddr for IoMem<SecuritySensitivity> {
419    fn paddr(&self) -> Paddr {
420        self.pa
421    }
422}
423
424impl<SecuritySensitivity> HasSize for IoMem<SecuritySensitivity> {
425    fn size(&self) -> usize {
426        self.limit
427    }
428}
429
430impl<SecuritySensitivity> Drop for IoMem<SecuritySensitivity> {
431    fn drop(&mut self) {
432        // TODO: Multiple `IoMem` instances should not overlap, we should refactor the driver code and
433        // remove the `Clone` and `IoMem::slice`. After refactoring, the `Drop` can be implemented to recycle
434        // the `IoMem`.
435    }
436}
437
438#[cfg(ktest)]
439mod test {
440    use crate::{
441        arch::io::io_mem::{copy_from_mmio, copy_to_mmio, read_once, write_once},
442        prelude::ktest,
443    };
444
445    #[ktest]
446    fn read_write_u8() {
447        let mut data: u8 = 0;
448        // SAFETY: `data` is valid for a single MMIO read/write.
449        unsafe {
450            write_once(&mut data, 42u8);
451            assert_eq!(read_once(&data), 42u8);
452        }
453    }
454
455    #[ktest]
456    fn read_write_u16() {
457        let mut data: u16 = 0;
458        let val: u16 = 0x1234;
459        // SAFETY: `data` is valid for a single MMIO read/write.
460        unsafe {
461            write_once(&mut data, val);
462            assert_eq!(read_once(&data), val);
463        }
464    }
465
466    #[ktest]
467    fn read_write_u32() {
468        let mut data: u32 = 0;
469        let val: u32 = 0x12345678;
470        // SAFETY: `data` is valid for a single MMIO read/write.
471        unsafe {
472            write_once(&mut data, val);
473            assert_eq!(read_once(&data), val);
474        }
475    }
476
477    #[ktest]
478    fn read_write_u64() {
479        let mut data: u64 = 0;
480        let val: u64 = 0xDEADBEEFCAFEBABE;
481        // SAFETY: `data` is valid for a single MMIO read/write.
482        unsafe {
483            write_once(&mut data, val);
484            assert_eq!(read_once(&data), val);
485        }
486    }
487
488    #[ktest]
489    fn boundary_overlap() {
490        let mut data: [u8; 2] = [0xAA, 0xBB];
491        // SAFETY: `data` is valid for a single MMIO read/write.
492        unsafe {
493            write_once(&mut data[0], 0x11u8);
494            assert_eq!(data[0], 0x11);
495            assert_eq!(data[1], 0xBB);
496        }
497    }
498
499    fn fill_pattern(buf: &mut [u8]) {
500        for (idx, byte) in buf.iter_mut().enumerate() {
501            *byte = (idx as u8).wrapping_mul(3).wrapping_add(1);
502        }
503    }
504
505    fn run_copy_from_case(src_offset: usize, dst_offset: usize, len: usize) {
506        let mut src = [0u8; 64];
507        let mut dst = [0u8; 64];
508        fill_pattern(&mut src);
509
510        // SAFETY: Offsets are validated by callers before this helper is invoked.
511        let src_ptr = unsafe { src.as_ptr().add(src_offset) };
512        // SAFETY: Offsets are validated by callers before this helper is invoked.
513        let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };
514
515        // SAFETY: The test buffers are valid for the requested range.
516        unsafe { copy_from_mmio(dst_ptr, src_ptr, len) };
517
518        assert_eq!(
519            &dst[dst_offset..dst_offset + len],
520            &src[src_offset..src_offset + len]
521        );
522    }
523
524    fn run_copy_to_case(src_offset: usize, dst_offset: usize, len: usize) {
525        let mut src = [0u8; 64];
526        let mut dst = [0u8; 64];
527        fill_pattern(&mut src);
528
529        // SAFETY: Offsets are validated by callers before this helper is invoked.
530        let src_ptr = unsafe { src.as_ptr().add(src_offset) };
531        // SAFETY: Offsets are validated by callers before this helper is invoked.
532        let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) };
533
534        // SAFETY: The test buffers are valid for the requested range.
535        unsafe { copy_to_mmio(src_ptr, dst_ptr, len) };
536
537        assert_eq!(
538            &dst[dst_offset..dst_offset + len],
539            &src[src_offset..src_offset + len]
540        );
541    }
542
543    #[ktest]
544    fn copy_from_alignment_and_sizes() {
545        let word_size = size_of::<usize>();
546        let sizes = [
547            0,
548            1,
549            word_size.saturating_sub(1),
550            word_size,
551            word_size + 1,
552            word_size * 2 + 3,
553        ];
554        let offsets = [0, 1, 2];
555
556        for &len in &sizes {
557            for &src_offset in &offsets {
558                for &dst_offset in &offsets {
559                    if src_offset + len <= 64 && dst_offset + len <= 64 {
560                        run_copy_from_case(src_offset, dst_offset, len);
561                    }
562                }
563            }
564        }
565    }
566
567    #[ktest]
568    fn copy_to_alignment_and_sizes() {
569        let word_size = size_of::<usize>();
570        let sizes = [
571            0,
572            1,
573            word_size.saturating_sub(1),
574            word_size,
575            word_size + 1,
576            word_size * 2 + 3,
577        ];
578        let offsets = [0, 1, 2];
579
580        for &len in &sizes {
581            for &src_offset in &offsets {
582                for &dst_offset in &offsets {
583                    if src_offset + len <= 64 && dst_offset + len <= 64 {
584                        run_copy_to_case(src_offset, dst_offset, len);
585                    }
586                }
587            }
588        }
589    }
590}