[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240514131711.379322-24-wedsonaf@gmail.com>
Date: Tue, 14 May 2024 10:17:04 -0300
From: Wedson Almeida Filho <wedsonaf@...il.com>
To: Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>,
Matthew Wilcox <willy@...radead.org>,
Dave Chinner <david@...morbit.com>
Cc: Kent Overstreet <kent.overstreet@...il.com>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
linux-fsdevel@...r.kernel.org,
rust-for-linux@...r.kernel.org,
linux-kernel@...r.kernel.org,
Wedson Almeida Filho <walmeida@...rosoft.com>
Subject: [RFC PATCH v2 23/30] rust: fs: allow file systems backed by a block device
From: Wedson Almeida Filho <walmeida@...rosoft.com>
Allow Rust file systems that are backed by block devices (in addition to
in-memory ones).
Signed-off-by: Wedson Almeida Filho <walmeida@...rosoft.com>
---
rust/helpers.c | 14 +++
rust/kernel/block.rs | 1 -
rust/kernel/fs.rs | 60 ++++++++---
rust/kernel/fs/inode.rs | 221 +++++++++++++++++++++++++++++++++++++-
rust/kernel/fs/sb.rs | 49 ++++++++-
samples/rust/rust_rofs.rs | 2 +-
6 files changed, 328 insertions(+), 19 deletions(-)
diff --git a/rust/helpers.c b/rust/helpers.c
index 360a1d38ac19..6c6d18df055f 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -21,6 +21,7 @@
*/
#include <kunit/test-bug.h>
+#include <linux/blkdev.h>
#include <linux/bug.h>
#include <linux/build_bug.h>
#include <linux/cacheflush.h>
@@ -258,6 +259,13 @@ void rust_helper_kunmap_local(const void *vaddr)
}
EXPORT_SYMBOL_GPL(rust_helper_kunmap_local);
+struct folio *rust_helper_read_mapping_folio(struct address_space *mapping,
+ pgoff_t index, struct file *file)
+{
+ return read_mapping_folio(mapping, index, file);
+}
+EXPORT_SYMBOL_GPL(rust_helper_read_mapping_folio);
+
void rust_helper_i_uid_write(struct inode *inode, uid_t uid)
{
i_uid_write(inode, uid);
@@ -294,6 +302,12 @@ unsigned int rust_helper_MKDEV(unsigned int major, unsigned int minor)
}
EXPORT_SYMBOL_GPL(rust_helper_MKDEV);
+sector_t rust_helper_bdev_nr_sectors(struct block_device *bdev)
+{
+ return bdev_nr_sectors(bdev);
+}
+EXPORT_SYMBOL_GPL(rust_helper_bdev_nr_sectors);
+
unsigned long rust_helper_copy_to_user(void __user *to, const void *from,
unsigned long n)
{
diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs
index 868623d7c873..4d669bd5dce9 100644
--- a/rust/kernel/block.rs
+++ b/rust/kernel/block.rs
@@ -31,7 +31,6 @@ impl Device {
///
/// Callers must ensure that `ptr` is valid and remains so for the lifetime of the returned
/// object.
- #[allow(dead_code)]
pub(crate) unsafe fn from_raw<'a>(ptr: *mut bindings::block_device) -> &'a Self {
// SAFETY: The safety requirements guarantee that the cast below is ok.
unsafe { &*ptr.cast::<Self>() }
diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs
index 387e87e3edaf..864aca24d12c 100644
--- a/rust/kernel/fs.rs
+++ b/rust/kernel/fs.rs
@@ -26,6 +26,11 @@
/// This is C's `loff_t`.
pub type Offset = i64;
+/// An index into the page cache.
+///
+/// This is C's `pgoff_t`.
+pub type PageOffset = usize;
+
/// Maximum size of an inode.
pub const MAX_LFS_FILESIZE: Offset = bindings::MAX_LFS_FILESIZE;
@@ -37,6 +42,9 @@ pub trait FileSystem {
/// The name of the file system type.
const NAME: &'static CStr;
+ /// Determines how superblocks for this file system type are keyed.
+ const SUPER_TYPE: sb::Type = sb::Type::Independent;
+
/// Determines if an implementation doesn't specify the required types.
///
/// This is meant for internal use only.
@@ -44,7 +52,10 @@ pub trait FileSystem {
const IS_UNSPECIFIED: bool = false;
/// Initialises the new superblock and returns the data to attach to it.
- fn fill_super(sb: &mut SuperBlock<Self, sb::New>) -> Result<Self::Data>;
+ fn fill_super(
+ sb: &mut SuperBlock<Self, sb::New>,
+ mapper: Option<inode::Mapper>,
+ ) -> Result<Self::Data>;
/// Initialises and returns the root inode of the given superblock.
///
@@ -100,7 +111,7 @@ impl FileSystem for UnspecifiedFS {
type Data = ();
const NAME: &'static CStr = crate::c_str!("unspecified");
const IS_UNSPECIFIED: bool = true;
- fn fill_super(_: &mut SuperBlock<Self, sb::New>) -> Result {
+ fn fill_super(_: &mut SuperBlock<Self, sb::New>, _: Option<inode::Mapper>) -> Result {
Err(ENOTSUPP)
}
@@ -139,7 +150,9 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit<
fs.name = T::NAME.as_char_ptr();
fs.init_fs_context = Some(Self::init_fs_context_callback::<T>);
fs.kill_sb = Some(Self::kill_sb_callback::<T>);
- fs.fs_flags = 0;
+ fs.fs_flags = if let sb::Type::BlockDev = T::SUPER_TYPE {
+ bindings::FS_REQUIRES_DEV as i32
+ } else { 0 };
// SAFETY: Pointers stored in `fs` are static so will live for as long as the
// registration is active (it is undone in `drop`).
@@ -162,9 +175,16 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit<
unsafe extern "C" fn kill_sb_callback<T: FileSystem + ?Sized>(
sb_ptr: *mut bindings::super_block,
) {
- // SAFETY: In `get_tree_callback` we always call `get_tree_nodev`, so `kill_anon_super` is
- // the appropriate function to call for cleanup.
- unsafe { bindings::kill_anon_super(sb_ptr) };
+ match T::SUPER_TYPE {
+ // SAFETY: In `get_tree_callback` we always call `get_tree_bdev` for
+ // `sb::Type::BlockDev`, so `kill_block_super` is the appropriate function to call
+ // for cleanup.
+ sb::Type::BlockDev => unsafe { bindings::kill_block_super(sb_ptr) },
+ // SAFETY: In `get_tree_callback` we always call `get_tree_nodev` for
+ // `sb::Type::Independent`, so `kill_anon_super` is the appropriate function to call
+ // for cleanup.
+ sb::Type::Independent => unsafe { bindings::kill_anon_super(sb_ptr) },
+ }
// SAFETY: The C API contract guarantees that `sb_ptr` is valid for read.
let ptr = unsafe { (*sb_ptr).s_fs_info };
@@ -200,9 +220,18 @@ impl<T: FileSystem + ?Sized> Tables<T> {
};
unsafe extern "C" fn get_tree_callback(fc: *mut bindings::fs_context) -> ffi::c_int {
- // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has
- // the right type and is a valid callback.
- unsafe { bindings::get_tree_nodev(fc, Some(Self::fill_super_callback)) }
+ match T::SUPER_TYPE {
+ // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has
+ // the right type and is a valid callback.
+ sb::Type::BlockDev => unsafe {
+ bindings::get_tree_bdev(fc, Some(Self::fill_super_callback))
+ },
+ // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has
+ // the right type and is a valid callback.
+ sb::Type::Independent => unsafe {
+ bindings::get_tree_nodev(fc, Some(Self::fill_super_callback))
+ },
+ }
}
unsafe extern "C" fn fill_super_callback(
@@ -221,7 +250,14 @@ impl<T: FileSystem + ?Sized> Tables<T> {
sb.s_xattr = &Tables::<T>::XATTR_HANDLERS[0];
sb.s_flags |= bindings::SB_RDONLY;
- let data = T::fill_super(new_sb)?;
+ let mapper = if matches!(T::SUPER_TYPE, sb::Type::BlockDev) {
+ // SAFETY: This is the only mapper created for this inode, so it is unique.
+ Some(unsafe { new_sb.bdev().inode().mapper() })
+ } else {
+ None
+ };
+
+ let data = T::fill_super(new_sb, mapper)?;
// N.B.: Even on failure, `kill_sb` is called and frees the data.
sb.s_fs_info = data.into_foreign().cast_mut();
@@ -369,7 +405,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> {
///
/// ```
/// # mod module_fs_sample {
-/// use kernel::fs::{dentry, inode::INode, sb, sb::SuperBlock, self};
+/// use kernel::fs::{dentry, inode::INode, inode::Mapper, sb, sb::SuperBlock, self};
/// use kernel::prelude::*;
///
/// kernel::module_fs! {
@@ -384,7 +420,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> {
/// impl fs::FileSystem for MyFs {
/// type Data = ();
/// const NAME: &'static CStr = kernel::c_str!("myfs");
-/// fn fill_super(_: &mut SuperBlock<Self, sb::New>) -> Result {
+/// fn fill_super(_: &mut SuperBlock<Self, sb::New>, _: Option<Mapper>) -> Result {
/// todo!()
/// }
/// fn init_root(_sb: &SuperBlock<Self>) -> Result<dentry::Root<Self>> {
diff --git a/rust/kernel/fs/inode.rs b/rust/kernel/fs/inode.rs
index 75b68d697a6e..5b3602362521 100644
--- a/rust/kernel/fs/inode.rs
+++ b/rust/kernel/fs/inode.rs
@@ -7,13 +7,16 @@
//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h)
use super::{
- address_space, dentry, dentry::DEntry, file, sb::SuperBlock, FileSystem, Offset, UnspecifiedFS,
+ address_space, dentry, dentry::DEntry, file, sb::SuperBlock, FileSystem, Offset, PageOffset,
+ UnspecifiedFS,
};
-use crate::error::{code::*, Result};
+use crate::error::{code::*, from_err_ptr, Result};
use crate::types::{ARef, AlwaysRefCounted, Either, ForeignOwnable, Lockable, Locked, Opaque};
-use crate::{bindings, block, str::CStr, str::CString, time::Timespec};
+use crate::{
+ bindings, block, build_error, folio, folio::Folio, str::CStr, str::CString, time::Timespec,
+};
use core::mem::ManuallyDrop;
-use core::{marker::PhantomData, ptr};
+use core::{cmp, marker::PhantomData, ops::Deref, ptr};
use macros::vtable;
/// The number of an inode.
@@ -93,6 +96,129 @@ pub fn size(&self) -> Offset {
// SAFETY: `self` is guaranteed to be valid by the existence of a shared reference.
unsafe { bindings::i_size_read(self.0.get()) }
}
+
+ /// Returns a mapper for this inode.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that mappers are unique for a given inode and range. For inodes that
+ /// back a block device, a mapper is always created when the filesystem is mounted; so callers
+ /// in such situations must ensure that that mapper is never used.
+ pub unsafe fn mapper(&self) -> Mapper<T> {
+ Mapper {
+ inode: self.into(),
+ begin: 0,
+ end: Offset::MAX,
+ }
+ }
+
+ /// Returns a mapped folio at the given offset.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that there are no concurrent mutable mappings of the folio.
+ pub unsafe fn mapped_folio(
+ &self,
+ offset: Offset,
+ ) -> Result<folio::Mapped<'_, folio::PageCache<T>>> {
+ let page_index = offset >> bindings::PAGE_SHIFT;
+ let page_offset = offset & ((bindings::PAGE_SIZE - 1) as Offset);
+ let folio = self.read_mapping_folio(page_index.try_into()?)?;
+
+ // SAFETY: The safety requirements guarantee that there are no concurrent mutable mappings
+ // of the folio.
+ unsafe { Folio::map_owned(folio, page_offset.try_into()?) }
+ }
+
+ /// Returns the folio at the given page index.
+ pub fn read_mapping_folio(
+ &self,
+ index: PageOffset,
+ ) -> Result<ARef<Folio<folio::PageCache<T>>>> {
+ let folio = from_err_ptr(unsafe {
+ bindings::read_mapping_folio(
+ (*self.0.get()).i_mapping,
+ index.try_into()?,
+ ptr::null_mut(),
+ )
+ })?;
+ let ptr = ptr::NonNull::new(folio)
+ .ok_or(EIO)?
+ .cast::<Folio<folio::PageCache<T>>>();
+ // SAFETY: The folio returned by read_mapping_folio has had its refcount incremented.
+ Ok(unsafe { ARef::from_raw(ptr) })
+ }
+
+ /// Iterate over the given range, one folio at a time.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that there are no concurrent mutable mappings of the folio.
+ pub unsafe fn for_each_page<U>(
+ &self,
+ first: Offset,
+ len: Offset,
+ mut cb: impl FnMut(&[u8]) -> Result<Option<U>>,
+ ) -> Result<Option<U>> {
+ if first >= self.size() {
+ return Ok(None);
+ }
+ let mut remain = cmp::min(len, self.size() - first);
+ first.checked_add(remain).ok_or(EIO)?;
+
+ let mut next = first;
+ while remain > 0 {
+ // SAFETY: The safety requirements of this function satisfy those of `mapped_folio`.
+ let data = unsafe { self.mapped_folio(next)? };
+ let avail = cmp::min(data.len(), remain.try_into().unwrap_or(usize::MAX));
+ let ret = cb(&data[..avail])?;
+ if ret.is_some() {
+ return Ok(ret);
+ }
+
+ next += avail as Offset;
+ remain -= avail as Offset;
+ }
+
+ Ok(None)
+ }
+}
+
+impl<T: FileSystem + ?Sized, U: Deref<Target = INode<T>>> Locked<U, ReadSem> {
+ /// Returns a mapped folio at the given offset.
+ // TODO: This conflicts with Locked<Folio>::write. Once we settle on a way to handle reading
+ // the contents of certain inodes (e.g., directories, links), then we switch to that and
+ // remove this.
+ pub fn mapped_folio<'a>(
+ &'a self,
+ offset: Offset,
+ ) -> Result<folio::Mapped<'a, folio::PageCache<T>>>
+ where
+ T: 'a,
+ {
+ if T::IS_UNSPECIFIED {
+ build_error!("unspecified file systems cannot safely map folios");
+ }
+
+ // SAFETY: The inode is locked in read mode, so it's ok to map its contents.
+ unsafe { self.deref().mapped_folio(offset) }
+ }
+
+ /// Iterate over the given range, one folio at a time.
+ // TODO: This has the same issue as mapped_folio above.
+ pub fn for_each_page<V>(
+ &self,
+ first: Offset,
+ len: Offset,
+ cb: impl FnMut(&[u8]) -> Result<Option<V>>,
+ ) -> Result<Option<V>> {
+ if T::IS_UNSPECIFIED {
+ build_error!("unspecified file systems cannot safely map folios");
+ }
+
+ // SAFETY: The inode is locked in read mode, so it's ok to map its contents.
+ unsafe { self.deref().for_each_page(first, len, cb) }
+ }
}
// SAFETY: The type invariants guarantee that `INode` is always ref-counted.
@@ -111,6 +237,7 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
/// Indicates that the an inode's rw semapahore is locked in read (shared) mode.
pub struct ReadSem;
+// SAFETY: `raw_lock` calls `inode_lock_shared` which locks the inode in shared mode.
unsafe impl<T: FileSystem + ?Sized> Lockable<ReadSem> for INode<T> {
fn raw_lock(&self) {
// SAFETY: Since there's a reference to the inode, it must be valid.
@@ -432,3 +559,89 @@ extern "C" fn drop_cstring(ptr: *mut core::ffi::c_void) {
Self(&Table::<U>::TABLE, PhantomData)
}
}
+
+/// Allows mapping the contents of the inode.
+///
+/// # Invariants
+///
+/// Mappers are unique per range per inode.
+pub struct Mapper<T: FileSystem + ?Sized = UnspecifiedFS> {
+ inode: ARef<INode<T>>,
+ begin: Offset,
+ end: Offset,
+}
+
+// SAFETY: All inode and folio operations are safe from any thread.
+unsafe impl<T: FileSystem + ?Sized> Send for Mapper<T> {}
+
+// SAFETY: All inode and folio operations are safe from any thread.
+unsafe impl<T: FileSystem + ?Sized> Sync for Mapper<T> {}
+
+impl<T: FileSystem + ?Sized> Mapper<T> {
+ /// Splits the mapper into two ranges.
+ ///
+ /// The first range is from the beginning of `self` up to and including `offset - 1`. The
+ /// second range is from `offset` to the end of `self`.
+ pub fn split_at(mut self, offset: Offset) -> (Self, Self) {
+ let inode = self.inode.clone();
+ if offset <= self.begin {
+ (
+ Self {
+ inode,
+ begin: offset,
+ end: offset,
+ },
+ self,
+ )
+ } else if offset >= self.end {
+ (
+ self,
+ Self {
+ inode,
+ begin: offset,
+ end: offset,
+ },
+ )
+ } else {
+ let end = self.end;
+ self.end = offset;
+ (
+ self,
+ Self {
+ inode,
+ begin: offset,
+ end,
+ },
+ )
+ }
+ }
+
+ /// Returns a mapped folio at the given offset.
+ pub fn mapped_folio(&self, offset: Offset) -> Result<folio::Mapped<'_, folio::PageCache<T>>> {
+ if offset < self.begin || offset >= self.end {
+ return Err(ERANGE);
+ }
+
+ // SAFETY: By the type invariant, there are no other mutable mappings of the folio.
+ let mut map = unsafe { self.inode.mapped_folio(offset) }?;
+ map.cap_len((self.end - offset).try_into()?);
+ Ok(map)
+ }
+
+ /// Iterate over the given range, one folio at a time.
+ pub fn for_each_page<U>(
+ &self,
+ first: Offset,
+ len: Offset,
+ cb: impl FnMut(&[u8]) -> Result<Option<U>>,
+ ) -> Result<Option<U>> {
+ if first < self.begin || first >= self.end {
+ return Err(ERANGE);
+ }
+
+ let actual_len = cmp::min(len, self.end - first);
+
+ // SAFETY: By the type invariant, there are no other mutable mappings of the folio.
+ unsafe { self.inode.for_each_page(first, actual_len, cb) }
+ }
+}
diff --git a/rust/kernel/fs/sb.rs b/rust/kernel/fs/sb.rs
index 7c0c52e6da0a..93c7b2770163 100644
--- a/rust/kernel/fs/sb.rs
+++ b/rust/kernel/fs/sb.rs
@@ -8,11 +8,22 @@
use super::inode::{self, INode, Ino};
use super::FileSystem;
-use crate::bindings;
use crate::error::{code::*, Result};
use crate::types::{ARef, Either, ForeignOwnable, Opaque};
+use crate::{bindings, block, build_error};
use core::{marker::PhantomData, ptr};
+/// Type of superblock keying.
+///
+/// It determines how C's `fs_context_operations::get_tree` is implemented.
+pub enum Type {
+ /// Multiple independent superblocks may exist.
+ Independent,
+
+ /// Uses a block device.
+ BlockDev,
+}
+
/// A typestate for [`SuperBlock`] that indicates that it's a new one, so not fully initialized
/// yet.
pub struct New;
@@ -75,6 +86,28 @@ pub fn rdonly(&self) -> bool {
// SAFETY: `s_flags` only changes during init, so it is safe to read it.
unsafe { (*self.0.get()).s_flags & bindings::SB_RDONLY != 0 }
}
+
+ /// Returns the block device associated with the superblock.
+ pub fn bdev(&self) -> &block::Device {
+ if !matches!(T::SUPER_TYPE, Type::BlockDev) {
+ build_error!("bdev is only available in blockdev superblocks");
+ }
+
+ // SAFETY: The superblock is valid and given that it's a blockdev superblock it must have a
+ // valid `s_bdev` that remains valid while the superblock (`self`) is valid.
+ unsafe { block::Device::from_raw((*self.0.get()).s_bdev) }
+ }
+
+ /// Returns the number of sectors in the underlying block device.
+ pub fn sector_count(&self) -> block::Sector {
+ if !matches!(T::SUPER_TYPE, Type::BlockDev) {
+ build_error!("sector_count is only available in blockdev superblocks");
+ }
+
+ // SAFETY: The superblock is valid and given that it's a blockdev superblock it must have a
+ // valid `s_bdev`.
+ unsafe { bindings::bdev_nr_sectors((*self.0.get()).s_bdev) }
+ }
}
impl<T: FileSystem + ?Sized> SuperBlock<T, New> {
@@ -85,6 +118,20 @@ pub fn set_magic(&mut self, magic: usize) -> &mut Self {
unsafe { (*self.0.get()).s_magic = magic as core::ffi::c_ulong };
self
}
+
+ /// Sets the device blocksize, subjected to the minimum accepted by the device.
+ ///
+ /// Returns the actual value set.
+ pub fn min_blocksize(&mut self, size: i32) -> i32 {
+ if !matches!(T::SUPER_TYPE, Type::BlockDev) {
+ build_error!("min_blocksize is only available in blockdev superblocks");
+ }
+
+ // SAFETY: This a new superblock that is being initialised, so it it's ok to set the block
+ // size. Additionally, we've checked that this is the superblock is backed by a block
+ // device, so it is also valid.
+ unsafe { bindings::sb_min_blocksize(self.0.get(), size) }
+ }
}
impl<T: FileSystem + ?Sized, S: DataInited> SuperBlock<T, S> {
diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs
index 7027ca067f8f..fea3360b6e7a 100644
--- a/samples/rust/rust_rofs.rs
+++ b/samples/rust/rust_rofs.rs
@@ -101,7 +101,7 @@ impl fs::FileSystem for RoFs {
type Data = ();
const NAME: &'static CStr = c_str!("rust_rofs");
- fn fill_super(sb: &mut sb::SuperBlock<Self, sb::New>) -> Result {
+ fn fill_super(sb: &mut sb::SuperBlock<Self, sb::New>, _: Option<inode::Mapper>) -> Result {
sb.set_magic(0x52555354);
Ok(())
}
--
2.34.1
Powered by blists - more mailing lists