lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240514131711.379322-28-wedsonaf@gmail.com>
Date: Tue, 14 May 2024 10:17:08 -0300
From: Wedson Almeida Filho <wedsonaf@...il.com>
To: Alexander Viro <viro@...iv.linux.org.uk>,
	Christian Brauner <brauner@...nel.org>,
	Matthew Wilcox <willy@...radead.org>,
	Dave Chinner <david@...morbit.com>
Cc: Kent Overstreet <kent.overstreet@...il.com>,
	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	linux-fsdevel@...r.kernel.org,
	rust-for-linux@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	Wedson Almeida Filho <walmeida@...rosoft.com>
Subject: [RFC PATCH v2 27/30] rust: fs: add `iomap` module

From: Wedson Almeida Filho <walmeida@...rosoft.com>

Allow file systems to implement their address space operations via
iomap, which delegates a lot of the complexity to common code.

Signed-off-by: Wedson Almeida Filho <walmeida@...rosoft.com>
---
 rust/bindings/bindings_helper.h |   1 +
 rust/kernel/fs.rs               |   1 +
 rust/kernel/fs/iomap.rs         | 281 ++++++++++++++++++++++++++++++++
 3 files changed, 283 insertions(+)
 create mode 100644 rust/kernel/fs/iomap.rs

diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index f4c7c3951dbe..629fce394dbe 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -13,6 +13,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/fs_context.h>
+#include <linux/iomap.h>
 #include <linux/jiffies.h>
 #include <linux/mdio.h>
 #include <linux/pagemap.h>
diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs
index 4d90b23735bc..7a1c4884c370 100644
--- a/rust/kernel/fs.rs
+++ b/rust/kernel/fs.rs
@@ -19,6 +19,7 @@
 pub mod dentry;
 pub mod file;
 pub mod inode;
+pub mod iomap;
 pub mod sb;
 
 /// The offset of a file in a file system.
diff --git a/rust/kernel/fs/iomap.rs b/rust/kernel/fs/iomap.rs
new file mode 100644
index 000000000000..e48e200e555e
--- /dev/null
+++ b/rust/kernel/fs/iomap.rs
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! File system io maps.
+//!
+//! This module allows Rust code to use iomaps to implement filesystems.
+//!
+//! C headers: [`include/linux/iomap.h`](srctree/include/linux/iomap.h)
+
+use super::{address_space, FileSystem, INode, Offset};
+use crate::error::{from_result, Result};
+use crate::{bindings, block};
+use core::marker::PhantomData;
+
+/// The type of mapping.
+///
+/// This is used in [`Map`].
+#[repr(u16)]
+pub enum Type {
+    /// No blocks allocated, need allocation.
+    Hole = bindings::IOMAP_HOLE as u16,
+
+    /// Delayed allocation blocks.
+    DelAlloc = bindings::IOMAP_DELALLOC as u16,
+
+    /// Blocks allocated at the given address.
+    Mapped = bindings::IOMAP_MAPPED as u16,
+
+    /// Blocks allocated at the given address in unwritten state.
+    Unwritten = bindings::IOMAP_UNWRITTEN as u16,
+
+    /// Data inline in the inode.
+    Inline = bindings::IOMAP_INLINE as u16,
+}
+
+/// Flags usable in [`Map`], in [`Map::set_flags`] in particular.
+pub mod map_flags {
+    /// Indicates that the blocks have been newly allocated and need zeroing for areas that no data
+    /// is copied to.
+    pub const NEW: u16 = bindings::IOMAP_F_NEW as u16;
+
+    /// Indicates that the inode has uncommitted metadata needed to access written data and
+    /// requires fdatasync to commit them to persistent storage. This needs to take into account
+    /// metadata changes that *may* be made at IO completion, such as file size updates from direct
+    /// IO.
+    pub const DIRTY: u16 = bindings::IOMAP_F_DIRTY as u16;
+
+    /// Indicates that the blocks are shared, and will need to be unshared as part a write.
+    pub const SHARED: u16 = bindings::IOMAP_F_SHARED as u16;
+
+    /// Indicates that the iomap contains the merge of multiple block mappings.
+    pub const MERGED: u16 = bindings::IOMAP_F_MERGED as u16;
+
+    /// Indicates that the file system requires the use of buffer heads for this mapping.
+    pub const BUFFER_HEAD: u16 = bindings::IOMAP_F_BUFFER_HEAD as u16;
+
+    /// Indicates that the iomap is for an extended attribute extent rather than a file data
+    /// extent.
+    pub const XATTR: u16 = bindings::IOMAP_F_XATTR as u16;
+
+    /// Indicates to the iomap_end method that the file size has changed as the result of this
+    /// write operation.
+    pub const SIZE_CHANGED: u16 = bindings::IOMAP_F_SIZE_CHANGED as u16;
+
+    /// Indicates that the iomap is not valid any longer and the file range it covers needs to be
+    /// remapped by the high level before the operation can proceed.
+    pub const STALE: u16 = bindings::IOMAP_F_STALE as u16;
+
+    /// Flags from 0x1000 up are for file system specific usage.
+    pub const PRIVATE: u16 = bindings::IOMAP_F_PRIVATE as u16;
+}
+
+/// A map from address space to block device.
+#[repr(transparent)]
+pub struct Map<'a>(pub bindings::iomap, PhantomData<&'a ()>);
+
+impl<'a> Map<'a> {
+    /// Sets the map type.
+    pub fn set_type(&mut self, t: Type) -> &mut Self {
+        self.0.type_ = t as u16;
+        self
+    }
+
+    /// Sets the file offset, in bytes.
+    pub fn set_offset(&mut self, v: Offset) -> &mut Self {
+        self.0.offset = v;
+        self
+    }
+
+    /// Sets the length of the mapping, in bytes.
+    pub fn set_length(&mut self, len: u64) -> &mut Self {
+        self.0.length = len;
+        self
+    }
+
+    /// Sets the mapping flags.
+    ///
+    /// Values come from the [`map_flags`] module.
+    pub fn set_flags(&mut self, flags: u16) -> &mut Self {
+        self.0.flags = flags;
+        self
+    }
+
+    /// Sets the disk offset of the mapping, in bytes.
+    pub fn set_addr(&mut self, addr: u64) -> &mut Self {
+        self.0.addr = addr;
+        self
+    }
+
+    /// Sets the block device of the mapping.
+    pub fn set_bdev(&mut self, bdev: Option<&'a block::Device>) -> &mut Self {
+        self.0.bdev = if let Some(b) = bdev {
+            b.0.get()
+        } else {
+            core::ptr::null_mut()
+        };
+        self
+    }
+}
+
+/// Flags passed to [`Operations::begin`] and [`Operations::end`].
+pub mod flags {
+    /// Writing, must allocate block.
+    pub const WRITE: u32 = bindings::IOMAP_WRITE;
+
+    /// Zeroing operation, may skip holes.
+    pub const ZERO: u32 = bindings::IOMAP_ZERO;
+
+    /// Report extent status, e.g. FIEMAP.
+    pub const REPORT: u32 = bindings::IOMAP_REPORT;
+
+    /// Mapping for page fault.
+    pub const FAULT: u32 = bindings::IOMAP_FAULT;
+
+    /// Direct I/O.
+    pub const DIRECT: u32 = bindings::IOMAP_DIRECT;
+
+    /// Do not block.
+    pub const NOWAIT: u32 = bindings::IOMAP_NOWAIT;
+
+    /// Only pure overwrites allowed.
+    pub const OVERWRITE_ONLY: u32 = bindings::IOMAP_OVERWRITE_ONLY;
+
+    /// `unshare_file_range`.
+    pub const UNSHARE: u32 = bindings::IOMAP_UNSHARE;
+
+    /// DAX mapping.
+    pub const DAX: u32 = bindings::IOMAP_DAX;
+}
+
+/// Operations implemented by iomap users.
+pub trait Operations {
+    /// File system that these operations are compatible with.
+    type FileSystem: FileSystem + ?Sized;
+
+    /// Returns the existing mapping at `pos`, or reserves space starting at `pos` for up to
+    /// `length`, as long as it can be done as a single mapping. The actual length is returned in
+    /// `iomap`.
+    ///
+    /// The values of `flags` come from the [`flags`] module.
+    fn begin<'a>(
+        inode: &'a INode<Self::FileSystem>,
+        pos: Offset,
+        length: Offset,
+        flags: u32,
+        map: &mut Map<'a>,
+        srcmap: &mut Map<'a>,
+    ) -> Result;
+
+    /// Commits and/or unreserves space previously allocated using [`Operations::begin`]. `writte`n
+    /// indicates the length of the successful write operation which needs to be commited, while
+    /// the rest needs to be unreserved. `written` might be zero if no data was written.
+    ///
+    /// The values of `flags` come from the [`flags`] module.
+    fn end<'a>(
+        _inode: &'a INode<Self::FileSystem>,
+        _pos: Offset,
+        _length: Offset,
+        _written: isize,
+        _flags: u32,
+        _map: &Map<'a>,
+    ) -> Result {
+        Ok(())
+    }
+}
+
+/// Returns address space oprerations backed by iomaps.
+pub const fn ro_aops<T: Operations + ?Sized>() -> address_space::Ops<T::FileSystem> {
+    struct Table<T: Operations + ?Sized>(PhantomData<T>);
+    impl<T: Operations + ?Sized> Table<T> {
+        const MAP_TABLE: bindings::iomap_ops = bindings::iomap_ops {
+            iomap_begin: Some(Self::iomap_begin_callback),
+            iomap_end: Some(Self::iomap_end_callback),
+        };
+
+        extern "C" fn iomap_begin_callback(
+            inode_ptr: *mut bindings::inode,
+            pos: Offset,
+            length: Offset,
+            flags: u32,
+            map: *mut bindings::iomap,
+            srcmap: *mut bindings::iomap,
+        ) -> i32 {
+            from_result(|| {
+                // SAFETY: The C API guarantees that `inode_ptr` is a valid inode.
+                let inode = unsafe { INode::from_raw(inode_ptr) };
+                T::begin(
+                    inode,
+                    pos,
+                    length,
+                    flags,
+                    // SAFETY: The C API guarantees that `map` is valid for write.
+                    unsafe { &mut *map.cast::<Map<'_>>() },
+                    // SAFETY: The C API guarantees that `srcmap` is valid for write.
+                    unsafe { &mut *srcmap.cast::<Map<'_>>() },
+                )?;
+                Ok(0)
+            })
+        }
+
+        extern "C" fn iomap_end_callback(
+            inode_ptr: *mut bindings::inode,
+            pos: Offset,
+            length: Offset,
+            written: isize,
+            flags: u32,
+            map: *mut bindings::iomap,
+        ) -> i32 {
+            from_result(|| {
+                // SAFETY: The C API guarantees that `inode_ptr` is a valid inode.
+                let inode = unsafe { INode::from_raw(inode_ptr) };
+                // SAFETY: The C API guarantees that `map` is valid for read.
+                T::end(inode, pos, length, written, flags, unsafe {
+                    &*map.cast::<Map<'_>>()
+                })?;
+                Ok(0)
+            })
+        }
+
+        const TABLE: bindings::address_space_operations = bindings::address_space_operations {
+            writepage: None,
+            read_folio: Some(Self::read_folio_callback),
+            writepages: None,
+            dirty_folio: None,
+            readahead: Some(Self::readahead_callback),
+            write_begin: None,
+            write_end: None,
+            bmap: Some(Self::bmap_callback),
+            invalidate_folio: Some(bindings::iomap_invalidate_folio),
+            release_folio: Some(bindings::iomap_release_folio),
+            free_folio: None,
+            direct_IO: Some(bindings::noop_direct_IO),
+            migrate_folio: None,
+            launder_folio: None,
+            is_partially_uptodate: None,
+            is_dirty_writeback: None,
+            error_remove_folio: None,
+            swap_activate: None,
+            swap_deactivate: None,
+            swap_rw: None,
+        };
+
+        extern "C" fn read_folio_callback(
+            _file: *mut bindings::file,
+            folio: *mut bindings::folio,
+        ) -> i32 {
+            // SAFETY: `folio` is just forwarded from C and `Self::MAP_TABLE` is always valid.
+            unsafe { bindings::iomap_read_folio(folio, &Self::MAP_TABLE) }
+        }
+
+        extern "C" fn readahead_callback(rac: *mut bindings::readahead_control) {
+            // SAFETY: `rac` is just forwarded from C and `Self::MAP_TABLE` is always valid.
+            unsafe { bindings::iomap_readahead(rac, &Self::MAP_TABLE) }
+        }
+
+        extern "C" fn bmap_callback(mapping: *mut bindings::address_space, block: u64) -> u64 {
+            // SAFETY: `mapping` is just forwarded from C and `Self::MAP_TABLE` is always valid.
+            unsafe { bindings::iomap_bmap(mapping, block, &Self::MAP_TABLE) }
+        }
+    }
+    address_space::Ops(&Table::<T>::TABLE, PhantomData)
+}
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ