Skip to content

Commit

Permalink
Merge tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph updates from Ilya Dryomov:
 "A varied set of changes:

   - a large rework of cephx auth code to cope with CONFIG_VMAP_STACK
     (myself). Also fixed a deadlock caused by a bogus allocation on the
     writeback path and authorize reply verification.

   - a fix for long stalls during fsync (Jeff Layton). The client now
     has a way to force the MDS log flush, leading to ~100x speedups in
     some synthetic tests.

   - a new [no]require_active_mds mount option (Zheng Yan).

     On mount, we will now check whether any of the MDSes are available
     and bail rather than block if none are. This check can be avoided
     by specifying the "no" option.

   - a couple of MDS cap handling fixes and a few assorted patches
     throughout"

* tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client: (32 commits)
  libceph: remove now unused finish_request() wrapper
  libceph: always signal completion when done
  ceph: avoid creating orphan object when checking pool permission
  ceph: properly set issue_seq for cap release
  ceph: add flags parameter to send_cap_msg
  ceph: update cap message struct version to 10
  ceph: define new argument structure for send_cap_msg
  ceph: move xattr initialzation before the encoding past the ceph_mds_caps
  ceph: fix minor typo in unsafe_request_wait
  ceph: record truncate size/seq for snap data writeback
  ceph: check availability of mds cluster on mount
  ceph: fix splice read for no Fc capability case
  ceph: try getting buffer capability for readahead/fadvise
  ceph: fix scheduler warning due to nested blocking
  ceph: fix printing wrong return variable in ceph_direct_read_write()
  crush: include mapper.h in mapper.c
  rbd: silence bogus -Wmaybe-uninitialized warning
  libceph: no need to drop con->mutex for ->get_authorizer()
  libceph: drop len argument of *verify_authorizer_reply()
  libceph: verify authorize reply on connect
  ...
  • Loading branch information
torvalds committed Dec 16, 2016
2 parents ff0f962 + 45ee2c1 commit 59331c2
Show file tree
Hide file tree
Showing 23 changed files with 809 additions and 724 deletions.
2 changes: 1 addition & 1 deletion drivers/block/rbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -3756,7 +3756,7 @@ static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
struct rbd_device *rbd_dev = arg;
void *p = data;
void *const end = p + data_len;
u8 struct_v;
u8 struct_v = 0;
u32 len;
u32 notify_op;
int ret;
Expand Down
98 changes: 75 additions & 23 deletions fs/ceph/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,32 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
struct page **pages;
pgoff_t next_index;
int nr_pages = 0;
int ret;
int got = 0;
int ret = 0;

if (!current->journal_info) {
/* caller of readpages does not hold buffer and read caps
* (fadvise, madvise and readahead cases) */
int want = CEPH_CAP_FILE_CACHE;
ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got);
if (ret < 0) {
dout("start_read %p, error getting cap\n", inode);
} else if (!(got & want)) {
dout("start_read %p, no cache cap\n", inode);
ret = 0;
}
if (ret <= 0) {
if (got)
ceph_put_cap_refs(ci, got);
while (!list_empty(page_list)) {
page = list_entry(page_list->prev,
struct page, lru);
list_del(&page->lru);
put_page(page);
}
return ret;
}
}

off = (u64) page_offset(page);

Expand All @@ -338,15 +363,18 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
false);
if (IS_ERR(req))
return PTR_ERR(req);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
goto out;
}

/* build page vector */
nr_pages = calc_pages_for(0, len);
pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL);
ret = -ENOMEM;
if (!pages)
goto out;
if (!pages) {
ret = -ENOMEM;
goto out_put;
}
for (i = 0; i < nr_pages; ++i) {
page = list_entry(page_list->prev, struct page, lru);
BUG_ON(PageLocked(page));
Expand Down Expand Up @@ -378,6 +406,12 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
if (ret < 0)
goto out_pages;
ceph_osdc_put_request(req);

/* After adding locked pages to page cache, the inode holds cache cap.
* So we can drop our cap refs. */
if (got)
ceph_put_cap_refs(ci, got);

return nr_pages;

out_pages:
Expand All @@ -386,8 +420,11 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
unlock_page(pages[i]);
}
ceph_put_page_vector(pages, nr_pages, false);
out:
out_put:
ceph_osdc_put_request(req);
out:
if (got)
ceph_put_cap_refs(ci, got);
return ret;
}

Expand Down Expand Up @@ -424,7 +461,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
rc = start_read(inode, page_list, max);
if (rc < 0)
goto out;
BUG_ON(rc == 0);
}
out:
ceph_fscache_readpages_cancel(inode, page_list);
Expand All @@ -438,7 +474,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
* only snap context we are allowed to write back.
*/
static struct ceph_snap_context *get_oldest_context(struct inode *inode,
loff_t *snap_size)
loff_t *snap_size,
u64 *truncate_size,
u32 *truncate_seq)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc = NULL;
Expand All @@ -452,13 +490,21 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
snapc = ceph_get_snap_context(capsnap->context);
if (snap_size)
*snap_size = capsnap->size;
if (truncate_size)
*truncate_size = capsnap->truncate_size;
if (truncate_seq)
*truncate_seq = capsnap->truncate_seq;
break;
}
}
if (!snapc && ci->i_wrbuffer_ref_head) {
snapc = ceph_get_snap_context(ci->i_head_snapc);
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
if (truncate_size)
*truncate_size = capsnap->truncate_size;
if (truncate_seq)
*truncate_seq = capsnap->truncate_seq;
}
spin_unlock(&ci->i_ceph_lock);
return snapc;
Expand Down Expand Up @@ -501,7 +547,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %p page %p not dirty?\n", inode, page);
goto out;
}
oldest = get_oldest_context(inode, &snap_size);
oldest = get_oldest_context(inode, &snap_size,
&truncate_size, &truncate_seq);
if (snapc->seq > oldest->seq) {
dout("writepage %p page %p snapc %p not writeable - noop\n",
inode, page, snapc);
Expand All @@ -512,12 +559,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
}
ceph_put_snap_context(oldest);

spin_lock(&ci->i_ceph_lock);
truncate_seq = ci->i_truncate_seq;
truncate_size = ci->i_truncate_size;
if (snap_size == -1)
snap_size = i_size_read(inode);
spin_unlock(&ci->i_ceph_lock);

/* is this a partial page at end of file? */
if (page_off >= snap_size) {
Expand Down Expand Up @@ -764,7 +807,8 @@ static int ceph_writepages_start(struct address_space *mapping,
/* find oldest snap context with dirty data */
ceph_put_snap_context(snapc);
snap_size = -1;
snapc = get_oldest_context(inode, &snap_size);
snapc = get_oldest_context(inode, &snap_size,
&truncate_size, &truncate_seq);
if (!snapc) {
/* hmm, why does writepages get called when there
is no dirty data? */
Expand All @@ -774,11 +818,7 @@ static int ceph_writepages_start(struct address_space *mapping,
dout(" oldest snapc is %p seq %lld (%d snaps)\n",
snapc, snapc->seq, snapc->num_snaps);

spin_lock(&ci->i_ceph_lock);
truncate_seq = ci->i_truncate_seq;
truncate_size = ci->i_truncate_size;
i_size = i_size_read(inode);
spin_unlock(&ci->i_ceph_lock);

if (last_snapc && snapc != last_snapc) {
/* if we switched to a newer snapc, restart our scan at the
Expand Down Expand Up @@ -1124,7 +1164,8 @@ static int ceph_writepages_start(struct address_space *mapping,
static int context_is_writeable_or_written(struct inode *inode,
struct ceph_snap_context *snapc)
{
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL,
NULL, NULL);
int ret = !oldest || snapc->seq <= oldest->seq;

ceph_put_snap_context(oldest);
Expand Down Expand Up @@ -1169,7 +1210,7 @@ static int ceph_update_writeable_page(struct file *file,
* this page is already dirty in another (older) snap
* context! is it writeable now?
*/
oldest = get_oldest_context(inode, NULL);
oldest = get_oldest_context(inode, NULL, NULL, NULL);

if (snapc->seq > oldest->seq) {
ceph_put_snap_context(oldest);
Expand Down Expand Up @@ -1371,9 +1412,11 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));

if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE)
ci->i_inline_version == CEPH_INLINE_NONE) {
current->journal_info = vma->vm_file;
ret = filemap_fault(vma, vmf);
else
current->journal_info = NULL;
} else
ret = -EAGAIN;

dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
Expand Down Expand Up @@ -1905,6 +1948,15 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
struct ceph_string *pool_ns;
int ret, flags;

if (ci->i_vino.snap != CEPH_NOSNAP) {
/*
* Pool permission check needs to write to the first object.
* But for snapshot, head of the first object may have alread
* been deleted. Skip check to avoid creating orphan object.
*/
return 0;
}

if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
NOPOOLPERM))
return 0;
Expand Down
Loading

0 comments on commit 59331c2

Please sign in to comment.