Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ptfs: refine the way to support O_DIRECT #176

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 8 additions & 17 deletions src/passthrough/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use std::os::fd::{AsFd, BorrowedFd};
use std::os::unix::ffi::OsStringExt;
use std::os::unix::io::{AsRawFd, RawFd};
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockWriteGuard};
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, RwLock, RwLockWriteGuard};
use std::time::Duration;

use vm_memory::{bitmap::BitmapSlice, ByteValued};
Expand All @@ -35,7 +35,7 @@ use self::mount_fd::MountFds;
use self::statx::{statx, StatExt};
use self::util::{
ebadf, einval, enosys, eperm, is_dir, is_safe_inode, openat, reopen_fd_through_proc, stat_fd,
UniqueInodeGenerator,
FileFlagGuard, UniqueInodeGenerator,
};
use crate::abi::fuse_abi as fuse;
use crate::abi::fuse_abi::Opcode;
Expand Down Expand Up @@ -256,39 +256,30 @@ impl InodeMap {
struct HandleData {
inode: Inode,
file: File,
lock: Mutex<()>,
open_flags: AtomicU32,
open_flags: RwLock<u32>,
}

impl HandleData {
fn new(inode: Inode, file: File, flags: u32) -> Self {
HandleData {
inode,
file,
lock: Mutex::new(()),
open_flags: AtomicU32::new(flags),
open_flags: RwLock::new(flags),
}
}

fn get_file(&self) -> &File {
&self.file
}

fn get_file_mut(&self) -> (MutexGuard<()>, &File) {
(self.lock.lock().unwrap(), &self.file)
fn get_file_mut(&self) -> (FileFlagGuard<u32>, &File) {
let guard = self.open_flags.write().unwrap();
(FileFlagGuard::Writer(guard), &self.file)
}

fn borrow_fd(&self) -> BorrowedFd {
self.file.as_fd()
}

fn get_flags(&self) -> u32 {
self.open_flags.load(Ordering::Relaxed)
}

fn set_flags(&self, flags: u32) {
self.open_flags.store(flags, Ordering::Relaxed);
}
}

struct HandleMap {
Expand Down
60 changes: 36 additions & 24 deletions src/passthrough/sync_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,34 @@ impl<S: BitmapSlice + Send + Sync> PassthroughFs<S> {
/// if these do not match update the file descriptor flags and store the new
/// result in the HandleData entry
#[inline(always)]
fn check_fd_flags(&self, data: Arc<HandleData>, fd: RawFd, flags: u32) -> io::Result<()> {
let open_flags = data.get_flags();
if open_flags != flags {
let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flags) };
fn ensure_file_flags<'a>(
&self,
data: &'a Arc<HandleData>,
fd: &impl AsRawFd,
mut flags: u32,
) -> io::Result<FileFlagGuard<'a, u32>> {
let guard = data.open_flags.read().unwrap();
if *guard & libc::O_DIRECT as u32 == flags & libc::O_DIRECT as u32 {
return Ok(FileFlagGuard::Reader(guard));
}
drop(guard);

let mut guard = data.open_flags.write().unwrap();
// Update the O_DIRECT flag if needed
if *guard & libc::O_DIRECT as u32 != flags & libc::O_DIRECT as u32 {
if flags & libc::O_DIRECT as u32 != 0 {
flags = *guard | libc::O_DIRECT as u32;
} else {
flags = *guard & !libc::O_DIRECT as u32;
}
let ret = unsafe { libc::fcntl(fd.as_raw_fd(), libc::F_SETFL, flags) };
if ret != 0 {
return Err(io::Error::last_os_error());
}
data.set_flags(flags);
*guard = flags;
}
Ok(())

Ok(FileFlagGuard::Writer(guard))
}

fn do_readdir(
Expand Down Expand Up @@ -116,8 +134,7 @@ impl<S: BitmapSlice + Send + Sync> PassthroughFs<S> {

let (front, back) = rem.split_at(size_of::<LinuxDirent64>());

let dirent64 = LinuxDirent64::from_slice(front)
.expect("fuse: unable to get LinuxDirent64 from slice");
let dirent64 = LinuxDirent64::from_slice(front).ok_or_else(einval)?;

let namelen = dirent64.d_reclen as usize - size_of::<LinuxDirent64>();
debug_assert!(
Expand Down Expand Up @@ -659,16 +676,14 @@ impl<S: BitmapSlice + Send + Sync> FileSystem for PassthroughFs<S> {
flags: u32,
) -> io::Result<usize> {
let data = self.get_data(handle, inode, libc::O_RDONLY)?;
let fd = data.borrow_fd();

self.ensure_file_flags(&data, &fd, flags)?;

// Manually implement File::try_clone() by borrowing fd of data.file instead of dup().
// It's safe because the `data` variable's lifetime spans the whole function,
// so data.file won't be closed.
let f = unsafe { File::from_raw_fd(data.borrow_fd().as_raw_fd()) };

self.check_fd_flags(data, f.as_raw_fd(), flags)?;

let mut f = ManuallyDrop::new(f);

let mut f = unsafe { ManuallyDrop::new(File::from_raw_fd(fd.as_raw_fd())) };
w.write_from(&mut *f, size as usize, offset)
}

Expand All @@ -686,21 +701,14 @@ impl<S: BitmapSlice + Send + Sync> FileSystem for PassthroughFs<S> {
fuse_flags: u32,
) -> io::Result<usize> {
let data = self.get_data(handle, inode, libc::O_RDWR)?;
let fd = data.borrow_fd();

// Manually implement File::try_clone() by borrowing fd of data.file instead of dup().
// It's safe because the `data` variable's lifetime spans the whole function,
// so data.file won't be closed.
let f = unsafe { File::from_raw_fd(data.borrow_fd().as_raw_fd()) };

self.check_fd_flags(data, f.as_raw_fd(), flags)?;

self.ensure_file_flags(&data, &fd, flags)?;
if self.seal_size.load(Ordering::Relaxed) {
let st = stat_fd(&f, None)?;
let st = stat_fd(&fd, None)?;
self.seal_size_check(Opcode::Write, st.st_size as u64, offset, size as u64, 0)?;
}

let mut f = ManuallyDrop::new(f);

// Cap restored when _killpriv is dropped
let _killpriv =
if self.killpriv_v2.load(Ordering::Relaxed) && (fuse_flags & WRITE_KILL_PRIV != 0) {
Expand All @@ -709,6 +717,10 @@ impl<S: BitmapSlice + Send + Sync> FileSystem for PassthroughFs<S> {
None
};

// Manually implement File::try_clone() by borrowing fd of data.file instead of dup().
// It's safe because the `data` variable's lifetime spans the whole function,
// so data.file won't be closed.
let mut f = unsafe { ManuallyDrop::new(File::from_raw_fd(fd.as_raw_fd())) };
r.read_to(&mut *f, size as usize, offset)
}

Expand Down
20 changes: 19 additions & 1 deletion src/passthrough/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ use std::ffi::{CStr, CString};
use std::fs::File;
use std::io;
use std::mem::MaybeUninit;
use std::ops::Deref;
use std::os::unix::io::{AsRawFd, FromRawFd};
use std::sync::atomic::{AtomicU64, AtomicU8, Ordering};
use std::sync::Mutex;
use std::sync::{Mutex, RwLockReadGuard, RwLockWriteGuard};

use super::inode_store::InodeId;
use super::MAX_HOST_INO;
Expand Down Expand Up @@ -225,6 +226,23 @@ pub fn eperm() -> io::Error {
io::Error::from_raw_os_error(libc::EPERM)
}

/// A helper structure to hold RwLock guard.
pub enum FileFlagGuard<'a, T> {
Reader(RwLockReadGuard<'a, T>),
Writer(RwLockWriteGuard<'a, T>),
}

impl<'a, T> Deref for FileFlagGuard<'a, T> {
type Target = T;

fn deref(&self) -> &Self::Target {
match self {
FileFlagGuard::Reader(v) => v.deref(),
FileFlagGuard::Writer(v) => v.deref(),
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading