@@ -1876,7 +1876,6 @@ struct cl_io {
/* The following are used for fallocate(2) */
int sa_falloc_mode;
loff_t sa_falloc_offset;
- loff_t sa_falloc_len;
loff_t sa_falloc_end;
} ci_setattr;
struct cl_data_version_io {
@@ -4998,7 +4998,7 @@ int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len)
struct lu_env *env;
struct cl_io *io;
u16 refcheck;
- int rc; loff_t sa_falloc_end;
+ int rc;
loff_t size = i_size_read(inode);
env = cl_env_get(&refcheck);
@@ -5011,34 +5011,32 @@ int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len)
io->u.ci_setattr.sa_parent_fid = lu_object_fid(&io->ci_obj->co_lu);
io->u.ci_setattr.sa_falloc_mode = mode;
io->u.ci_setattr.sa_falloc_offset = offset;
- io->u.ci_setattr.sa_falloc_len = len;
- io->u.ci_setattr.sa_falloc_end = io->u.ci_setattr.sa_falloc_offset +
- io->u.ci_setattr.sa_falloc_len;
+ io->u.ci_setattr.sa_falloc_end = offset + len;
io->u.ci_setattr.sa_subtype = CL_SETATTR_FALLOCATE;
- sa_falloc_end = io->u.ci_setattr.sa_falloc_end;
- if (sa_falloc_end > size) {
+ if (io->u.ci_setattr.sa_falloc_end > size) {
+ loff_t newsize = io->u.ci_setattr.sa_falloc_end;
+
/* Check new size against VFS/VM file size limit and rlimit */
- rc = inode_newsize_ok(inode, sa_falloc_end);
+ rc = inode_newsize_ok(inode, newsize);
if (rc)
goto out;
- if (sa_falloc_end > ll_file_maxbytes(inode)) {
+ if (newsize > ll_file_maxbytes(inode)) {
CDEBUG(D_INODE, "file size too large %llu > %llu\n",
- (unsigned long long)(sa_falloc_end),
+ (unsigned long long)newsize,
ll_file_maxbytes(inode));
rc = -EFBIG;
goto out;
}
}
-again:
- if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0)
- rc = cl_io_loop(env, io);
- else
- rc = io->ci_result;
-
- cl_io_fini(env, io);
- if (unlikely(io->ci_need_restart))
- goto again;
+ do {
+ rc = cl_io_init(env, io, CIT_SETATTR, io->ci_obj);
+ if (rc)
+ rc = io->ci_result;
+ else
+ rc = cl_io_loop(env, io);
+ cl_io_fini(env, io);
+ } while (unlikely(io->ci_need_restart));
out:
cl_env_put(env, &refcheck);
@@ -5050,6 +5048,9 @@ long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
struct inode *inode = filp->f_path.dentry->d_inode;
int rc;
+ if (offset < 0 || len <= 0)
+ return -EINVAL;
+
/*
* Encrypted inodes can't handle collapse range or zero range or insert
* range since we would need to re-encrypt blocks with a different IV or
@@ -5062,10 +5063,10 @@ long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
return -EOPNOTSUPP;
/*
- * Only mode == 0 (which is standard prealloc) is supported now.
- * Punch is not supported yet.
+ * mode == 0 (which is standard prealloc) and PUNCH is supported.
+ * Rest of mode options are not supported yet.
*/
- if (mode & ~FALLOC_FL_KEEP_SIZE)
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FALLOCATE, 1);
@@ -1046,6 +1046,9 @@ static int mdc_io_setattr_start(const struct lu_env *env,
return rc;
}
+ if (cl_io_is_fallocate(io))
+ return -EOPNOTSUPP;
+
if (oio->oi_lockless == 0) {
cl_object_attr_lock(obj);
rc = cl_object_attr_get(env, obj, attr);
@@ -530,6 +530,29 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
trunc_check_cb, (void *)&size);
}
+/**
+ * Flush affected pages prior punch.
+ * We shouldn't discard them locally first because that could be data loss
+ * if server doesn't support fallocate punch, we also need these data to be
+ * flushed first to prevent re-ordering with the punch
+ */
+static int osc_punch_start(const struct lu_env *env, struct cl_io *io,
+ struct cl_object *obj)
+{
+ struct osc_object *osc = cl2osc(obj);
+ pgoff_t pg_start = cl_index(obj, io->u.ci_setattr.sa_falloc_offset);
+ pgoff_t pg_end = cl_index(obj, io->u.ci_setattr.sa_falloc_end - 1);
+ int rc;
+
+ rc = osc_cache_writeback_range(env, osc, pg_start, pg_end, 1, 0);
+ if (rc < 0)
+ return rc;
+
+ osc_page_gang_lookup(env, io, osc, pg_start, pg_end, osc_discard_cb,
+ osc);
+ return 0;
+}
+
static int osc_io_setattr_start(const struct lu_env *env,
const struct cl_io_slice *slice)
{
@@ -543,19 +566,17 @@ static int osc_io_setattr_start(const struct lu_env *env,
unsigned int ia_avalid = io->u.ci_setattr.sa_avalid;
enum op_xvalid ia_xvalid = io->u.ci_setattr.sa_xvalid;
u64 size = io->u.ci_setattr.sa_attr.lvb_size;
- u64 end = OBD_OBJECT_EOF;
- bool io_is_falloc = false;
+ bool io_is_falloc = cl_io_is_fallocate(io);
int result = 0;
/* truncate cache dirty pages first */
- if (cl_io_is_trunc(io)) {
+ if (cl_io_is_trunc(io))
result = osc_cache_truncate_start(env, cl2osc(obj), size,
&oio->oi_trunc);
- } else if (cl_io_is_fallocate(io)) {
- io_is_falloc = true;
- size = io->u.ci_setattr.sa_falloc_offset;
- end = io->u.ci_setattr.sa_falloc_end;
- }
+ /* flush local pages prior punching them on server */
+ if (io_is_falloc &&
+ io->u.ci_setattr.sa_falloc_mode & FALLOC_FL_PUNCH_HOLE)
+ result = osc_punch_start(env, io, obj);
if (result == 0 && oio->oi_lockless == 0) {
cl_object_attr_lock(obj);
@@ -565,14 +586,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
unsigned int cl_valid = 0;
if (ia_avalid & ATTR_SIZE) {
- if (io_is_falloc) {
- attr->cat_size =
- io->u.ci_setattr.sa_attr.lvb_size;
- attr->cat_kms = attr->cat_size;
- } else {
- attr->cat_size = size;
- attr->cat_kms = size;
- }
+ attr->cat_size = size;
+ attr->cat_kms = size;
cl_valid = CAT_SIZE | CAT_KMS;
}
if (ia_avalid & ATTR_MTIME_SET) {
@@ -612,17 +627,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
oa->o_valid |= OBD_MD_FLMTIME;
oa->o_mtime = attr->cat_mtime;
}
- if (ia_avalid & ATTR_SIZE) {
- if (io_is_falloc) {
- oa->o_size = size;
- oa->o_blocks = end;
- oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
- } else {
- oa->o_size = size;
- oa->o_blocks = OBD_OBJECT_EOF;
- oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
- }
+ if (ia_avalid & ATTR_SIZE || io_is_falloc) {
if (oio->oi_lockless) {
oa->o_flags = OBD_FL_SRVLOCK;
oa->o_valid |= OBD_MD_FLFLAGS;
@@ -646,10 +652,16 @@ static int osc_io_setattr_start(const struct lu_env *env,
if (io_is_falloc) {
int falloc_mode = io->u.ci_setattr.sa_falloc_mode;
+ oa->o_size = io->u.ci_setattr.sa_falloc_offset;
+ oa->o_blocks = io->u.ci_setattr.sa_falloc_end;
+ oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
result = osc_fallocate_base(osc_export(cl2osc(obj)),
oa, osc_async_upcall,
cbargs, falloc_mode);
} else if (ia_avalid & ATTR_SIZE) {
+ oa->o_size = size;
+ oa->o_blocks = OBD_OBJECT_EOF;
+ oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
result = osc_punch_send(osc_export(cl2osc(obj)),
oa, osc_async_upcall, cbargs);
} else {
@@ -682,11 +694,11 @@ void osc_io_setattr_end(const struct lu_env *env,
if (result == 0) {
if (oio->oi_lockless) {
/* lockless truncate */
- struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev);
+ struct osc_device *osc = lu2osc_dev(obj->co_lu.lo_dev);
LASSERT(cl_io_is_trunc(io) || cl_io_is_fallocate(io));
/* XXX: Need a lock. */
- osd->od_stats.os_lockless_truncates++;
+ osc->od_stats.os_lockless_truncates++;
}
}
@@ -454,14 +454,7 @@ int osc_fallocate_base(struct obd_export *exp, struct obdo *oa,
struct obd_import *imp = class_exp2cliimp(exp);
int rc;
- /*
- * Only mode == 0 (which is standard prealloc) is supported now.
- * Punch is not supported yet.
- */
- if (mode & ~FALLOC_FL_KEEP_SIZE)
- return -EOPNOTSUPP;
oa->o_falloc_mode = mode;
-
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
&RQF_OST_FALLOCATE);
if (!req)