ACK: [SRU B][PATCH 1/1] ceph: track read contexts in ceph_file_info
Kleber Souza
kleber.souza at canonical.com
Fri Jul 27 10:36:35 UTC 2018
On 07/24/18 05:18, Daniel Axtens wrote:
> From: "Yan, Zheng" <zyan at redhat.com>
>
> BugLink: https://bugs.launchpad.net/bugs/1783246
>
> Previously ceph_read_iter() uses current->journal to pass context info
> to ceph_readpages(), so that ceph_readpages() can distinguish read(2)
> from readahead(2)/fadvise(2)/madvise(2). The problem is that page fault
> can happen when copying data to userspace memory. Page fault may call
> other filesystem's page_mkwrite() if the userspace memory is mapped to a
> file. The later filesystem may also want to use current->journal.
>
> The fix is define a on-stack data structure in ceph_read_iter(), add it
> to context list in ceph_file_info. ceph_readpages() searches the list,
> find if there is a context belongs to current thread.
>
> Signed-off-by: "Yan, Zheng" <zyan at redhat.com>
> Signed-off-by: Ilya Dryomov <idryomov at gmail.com>
> (cherry picked from commit 5d988308283ecf062fa88f20ae05c52cce0bcdca)
> Signed-off-by: Daniel Axtens <daniel.axtens at canonical.com>
Acked-by: Kleber Sacilotto de Souza <kleber.souza at canonical.com>
> ---
> fs/ceph/addr.c | 19 ++++++++++++-------
> fs/ceph/file.c | 10 ++++++++--
> fs/ceph/super.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 66 insertions(+), 9 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index dbf07051aacd..78a1208b878e 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -299,7 +299,8 @@ static void finish_read(struct ceph_osd_request *req)
> * start an async read(ahead) operation. return nr_pages we submitted
> * a read for on success, or negative error code.
> */
> -static int start_read(struct inode *inode, struct list_head *page_list, int max)
> +static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
> + struct list_head *page_list, int max)
> {
> struct ceph_osd_client *osdc =
> &ceph_inode_to_client(inode)->client->osdc;
> @@ -316,7 +317,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
> int got = 0;
> int ret = 0;
>
> - if (!current->journal_info) {
> + if (!rw_ctx) {
> /* caller of readpages does not hold buffer and read caps
> * (fadvise, madvise and readahead cases) */
> int want = CEPH_CAP_FILE_CACHE;
> @@ -437,6 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
> {
> struct inode *inode = file_inode(file);
> struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> + struct ceph_file_info *ci = file->private_data;
> + struct ceph_rw_context *rw_ctx;
> int rc = 0;
> int max = 0;
>
> @@ -449,11 +452,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
> if (rc == 0)
> goto out;
>
> + rw_ctx = ceph_find_rw_context(ci);
> max = fsc->mount_options->rsize >> PAGE_SHIFT;
> - dout("readpages %p file %p nr_pages %d max %d\n",
> - inode, file, nr_pages, max);
> + dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
> + inode, file, rw_ctx, nr_pages, max);
> while (!list_empty(page_list)) {
> - rc = start_read(inode, page_list, max);
> + rc = start_read(inode, rw_ctx, page_list, max);
> if (rc < 0)
> goto out;
> }
> @@ -1450,9 +1454,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
>
> if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
> ci->i_inline_version == CEPH_INLINE_NONE) {
> - current->journal_info = vma->vm_file;
> + CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
> + ceph_add_rw_context(fi, &rw_ctx);
> ret = filemap_fault(vmf);
> - current->journal_info = NULL;
> + ceph_del_rw_context(fi, &rw_ctx);
> } else
> ret = -EAGAIN;
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 0024d3e61bcd..7f75601d24d9 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -181,6 +181,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
> return -ENOMEM;
> }
> cf->fmode = fmode;
> +
> + spin_lock_init(&cf->rw_contexts_lock);
> + INIT_LIST_HEAD(&cf->rw_contexts);
> +
> cf->next_offset = 2;
> cf->readdir_cache_idx = -1;
> file->private_data = cf;
> @@ -464,6 +468,7 @@ int ceph_release(struct inode *inode, struct file *file)
> ceph_mdsc_put_request(cf->last_readdir);
> kfree(cf->last_name);
> kfree(cf->dir_info);
> + WARN_ON(!list_empty(&cf->rw_contexts));
> kmem_cache_free(ceph_file_cachep, cf);
>
> /* wake up anyone waiting for caps on this inode */
> @@ -1202,12 +1207,13 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
> retry_op = READ_INLINE;
> }
> } else {
> + CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
> dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
> inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
> ceph_cap_string(got));
> - current->journal_info = filp;
> + ceph_add_rw_context(fi, &rw_ctx);
> ret = generic_file_read_iter(iocb, to);
> - current->journal_info = NULL;
> + ceph_del_rw_context(fi, &rw_ctx);
> }
> dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
> inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 2beeec07fa76..dd59bc7d2c3d 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -668,6 +668,9 @@ struct ceph_file_info {
> short fmode; /* initialized on open */
> short flags; /* CEPH_F_* */
>
> + spinlock_t rw_contexts_lock;
> + struct list_head rw_contexts;
> +
> /* readdir: position within the dir */
> u32 frag;
> struct ceph_mds_request *last_readdir;
> @@ -684,6 +687,49 @@ struct ceph_file_info {
> int dir_info_len;
> };
>
> +struct ceph_rw_context {
> + struct list_head list;
> + struct task_struct *thread;
> + int caps;
> +};
> +
> +#define CEPH_DEFINE_RW_CONTEXT(_name, _caps) \
> + struct ceph_rw_context _name = { \
> + .thread = current, \
> + .caps = _caps, \
> + }
> +
> +static inline void ceph_add_rw_context(struct ceph_file_info *cf,
> + struct ceph_rw_context *ctx)
> +{
> + spin_lock(&cf->rw_contexts_lock);
> + list_add(&ctx->list, &cf->rw_contexts);
> + spin_unlock(&cf->rw_contexts_lock);
> +}
> +
> +static inline void ceph_del_rw_context(struct ceph_file_info *cf,
> + struct ceph_rw_context *ctx)
> +{
> + spin_lock(&cf->rw_contexts_lock);
> + list_del(&ctx->list);
> + spin_unlock(&cf->rw_contexts_lock);
> +}
> +
> +static inline struct ceph_rw_context*
> +ceph_find_rw_context(struct ceph_file_info *cf)
> +{
> + struct ceph_rw_context *ctx, *found = NULL;
> + spin_lock(&cf->rw_contexts_lock);
> + list_for_each_entry(ctx, &cf->rw_contexts, list) {
> + if (ctx->thread == current) {
> + found = ctx;
> + break;
> + }
> + }
> + spin_unlock(&cf->rw_contexts_lock);
> + return found;
> +}
> +
> struct ceph_readdir_cache_control {
> struct page *page;
> struct dentry **dentries;
>
More information about the kernel-team
mailing list