Message ID | 20170209144836.12525-3-jlayton@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> On 9 Feb 2017, at 22:48, Jeff Layton <jlayton@redhat.com> wrote: > > When a Ceph volume hits capacity, a flag is set in the OSD map to > indicate that, and a new map is sprayed around the cluster. With cephfs > we want it to shut down any abortable requests that are in progress with > an -ENOSPC error as they'd just hang otherwise. > > Add a new ceph_osdc_abort_on_full helper function to handle this. It > will first check whether there is an out-of-space condition in the > cluster. It will then walk the tree and abort any request that has > r_abort_on_full set with an ENOSPC error. Call this new function > directly whenever we get a new OSD map. > > Signed-off-by: Jeff Layton <jlayton@redhat.com> > --- > net/ceph/osd_client.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 42 insertions(+) > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > index f68bb42da240..cdb0b58c4c99 100644 > --- a/net/ceph/osd_client.c > +++ b/net/ceph/osd_client.c > @@ -1777,6 +1777,47 @@ static void complete_request(struct ceph_osd_request *req, int err) > ceph_osdc_put_request(req); > } > > +/* > + * Drop all pending requests that are stalled waiting on a full condition to > + * clear, and complete them with ENOSPC as the return code. > + */ > +static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) > +{ > + struct ceph_osd_request *req; > + struct ceph_osd *osd; > + struct rb_node *m, *n; > + u32 latest_epoch = 0; > + bool osdmap_full = ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); > + > + dout("enter abort_on_full\n"); > + > + if (!osdmap_full && !have_pool_full(osdc)) > + goto out; > + > + for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { > + osd = rb_entry(n, struct ceph_osd, o_node); > + mutex_lock(&osd->lock); > + m = rb_first(&osd->o_requests); > + while (m) { > + req = rb_entry(m, struct ceph_osd_request, r_node); > + m = rb_next(m); > + For requests that have already got unsafe reply, we should ignore them or call req->r_unsafe_callback() to clean them up Regards Yan, Zheng > + if (req->r_abort_on_full && > + (osdmap_full || pool_full(osdc, req->r_t.base_oloc.pool))) { > + u32 cur_epoch = le32_to_cpu(req->r_replay_version.epoch); > + > + dout("%s: abort tid=%llu flags 0x%x\n", __func__, req->r_tid, req->r_flags); > + complete_request(req, -ENOSPC); > + if (cur_epoch > latest_epoch) > + latest_epoch = cur_epoch; > + } > + } > + mutex_unlock(&osd->lock); > + } > +out: > + dout("return abort_on_full latest_epoch=%u\n", latest_epoch); > +} > + > static void cancel_map_check(struct ceph_osd_request *req) > { > struct ceph_osd_client *osdc = req->r_osdc; > @@ -3292,6 +3333,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) > > ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP, > osdc->osdmap->epoch); > + ceph_osdc_abort_on_full(osdc); > up_write(&osdc->lock); > wake_up_all(&osdc->client->auth_wq); > return; > -- > 2.9.3 > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 2017-02-10 at 20:01 +0800, Yan, Zheng wrote: > > On 9 Feb 2017, at 22:48, Jeff Layton <jlayton@redhat.com> wrote: > > > > When a Ceph volume hits capacity, a flag is set in the OSD map to > > indicate that, and a new map is sprayed around the cluster. With cephfs > > we want it to shut down any abortable requests that are in progress with > > an -ENOSPC error as they'd just hang otherwise. > > > > Add a new ceph_osdc_abort_on_full helper function to handle this. It > > will first check whether there is an out-of-space condition in the > > cluster. It will then walk the tree and abort any request that has > > r_abort_on_full set with an ENOSPC error. Call this new function > > directly whenever we get a new OSD map. > > > > Signed-off-by: Jeff Layton <jlayton@redhat.com> > > --- > > net/ceph/osd_client.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 42 insertions(+) > > > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > > index f68bb42da240..cdb0b58c4c99 100644 > > --- a/net/ceph/osd_client.c > > +++ b/net/ceph/osd_client.c > > @@ -1777,6 +1777,47 @@ static void complete_request(struct ceph_osd_request *req, int err) > > ceph_osdc_put_request(req); > > } > > > > +/* > > + * Drop all pending requests that are stalled waiting on a full condition to > > + * clear, and complete them with ENOSPC as the return code. > > + */ > > +static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) > > +{ > > + struct ceph_osd_request *req; > > + struct ceph_osd *osd; > > + struct rb_node *m, *n; > > + u32 latest_epoch = 0; > > + bool osdmap_full = ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); > > + > > + dout("enter abort_on_full\n"); > > + > > + if (!osdmap_full && !have_pool_full(osdc)) > > + goto out; > > + > > + for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { > > + osd = rb_entry(n, struct ceph_osd, o_node); > > + mutex_lock(&osd->lock); > > + m = rb_first(&osd->o_requests); > > + while (m) { > > + req = rb_entry(m, struct ceph_osd_request, r_node); > > + m = rb_next(m); > > + > > For requests that have already got unsafe reply, we should ignore them or call req->r_unsafe_callback() to clean them up > > > Regards > Yan, Zheng Ok, yeah. I had to stare at the r_unsafe_callback code a bit the other day to handle the ERROR_WRITE flag and what you say makes sense. Honestly what we really need is a function like complete_request that hides all of these fiddly details about the request state. Maybe it would be simpler to just have complete_request handle the case where we've gotten an unsafe reply as well? I'll see what I can come up with there. > > + if (req->r_abort_on_full && > > + (osdmap_full || pool_full(osdc, req->r_t.base_oloc.pool))) { > > + u32 cur_epoch = le32_to_cpu(req->r_replay_version.epoch); > > + > > + dout("%s: abort tid=%llu flags 0x%x\n", __func__, req->r_tid, req->r_flags); > > + complete_request(req, -ENOSPC); > > + if (cur_epoch > latest_epoch) > > + latest_epoch = cur_epoch; > > + } > > + } > > + mutex_unlock(&osd->lock); > > + } > > +out: > > + dout("return abort_on_full latest_epoch=%u\n", latest_epoch); > > +} > > + > > static void cancel_map_check(struct ceph_osd_request *req) > > { > > struct ceph_osd_client *osdc = req->r_osdc; > > @@ -3292,6 +3333,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) > > > > ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP, > > osdc->osdmap->epoch); > > + ceph_osdc_abort_on_full(osdc); > > up_write(&osdc->lock); > > wake_up_all(&osdc->client->auth_wq); > > return; > > -- > > 2.9.3 > > > >
On Fri, Feb 10, 2017 at 1:07 PM, Jeff Layton <jlayton@redhat.com> wrote: > On Fri, 2017-02-10 at 20:01 +0800, Yan, Zheng wrote: >> > On 9 Feb 2017, at 22:48, Jeff Layton <jlayton@redhat.com> wrote: >> > >> > When a Ceph volume hits capacity, a flag is set in the OSD map to >> > indicate that, and a new map is sprayed around the cluster. With cephfs >> > we want it to shut down any abortable requests that are in progress with >> > an -ENOSPC error as they'd just hang otherwise. >> > >> > Add a new ceph_osdc_abort_on_full helper function to handle this. It >> > will first check whether there is an out-of-space condition in the >> > cluster. It will then walk the tree and abort any request that has >> > r_abort_on_full set with an ENOSPC error. Call this new function >> > directly whenever we get a new OSD map. >> > >> > Signed-off-by: Jeff Layton <jlayton@redhat.com> >> > --- >> > net/ceph/osd_client.c | 42 ++++++++++++++++++++++++++++++++++++++++++ >> > 1 file changed, 42 insertions(+) >> > >> > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c >> > index f68bb42da240..cdb0b58c4c99 100644 >> > --- a/net/ceph/osd_client.c >> > +++ b/net/ceph/osd_client.c >> > @@ -1777,6 +1777,47 @@ static void complete_request(struct ceph_osd_request *req, int err) >> > ceph_osdc_put_request(req); >> > } >> > >> > +/* >> > + * Drop all pending requests that are stalled waiting on a full condition to >> > + * clear, and complete them with ENOSPC as the return code. >> > + */ >> > +static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) >> > +{ >> > + struct ceph_osd_request *req; >> > + struct ceph_osd *osd; >> > + struct rb_node *m, *n; >> > + u32 latest_epoch = 0; >> > + bool osdmap_full = ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); >> > + >> > + dout("enter abort_on_full\n"); >> > + >> > + if (!osdmap_full && !have_pool_full(osdc)) >> > + goto out; >> > + >> > + for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { >> > + osd = rb_entry(n, struct ceph_osd, o_node); >> > + mutex_lock(&osd->lock); >> > + m = rb_first(&osd->o_requests); >> > + while (m) { >> > + req = rb_entry(m, struct ceph_osd_request, r_node); >> > + m = rb_next(m); >> > + >> >> For requests that have already got unsafe reply, we should ignore them or call req->r_unsafe_callback() to clean them up >> >> >> Regards >> Yan, Zheng > > Ok, yeah. I had to stare at the r_unsafe_callback code a bit the other > day to handle the ERROR_WRITE flag and what you say makes sense. > > Honestly what we really need is a function like complete_request that > hides all of these fiddly details about the request state. Maybe it > would be simpler to just have complete_request handle the case where > we've gotten an unsafe reply as well? > > I'll see what I can come up with there. This is exactly what I meant in my reply to Artur yesterday. Note that ceph_osdc_cancel_request() is explicit about not completing the request. It shouldn't be hard to mend it though -- let me look into it. Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f68bb42da240..cdb0b58c4c99 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1777,6 +1777,47 @@ static void complete_request(struct ceph_osd_request *req, int err) ceph_osdc_put_request(req); } +/* + * Drop all pending requests that are stalled waiting on a full condition to + * clear, and complete them with ENOSPC as the return code. + */ +static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) +{ + struct ceph_osd_request *req; + struct ceph_osd *osd; + struct rb_node *m, *n; + u32 latest_epoch = 0; + bool osdmap_full = ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); + + dout("enter abort_on_full\n"); + + if (!osdmap_full && !have_pool_full(osdc)) + goto out; + + for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { + osd = rb_entry(n, struct ceph_osd, o_node); + mutex_lock(&osd->lock); + m = rb_first(&osd->o_requests); + while (m) { + req = rb_entry(m, struct ceph_osd_request, r_node); + m = rb_next(m); + + if (req->r_abort_on_full && + (osdmap_full || pool_full(osdc, req->r_t.base_oloc.pool))) { + u32 cur_epoch = le32_to_cpu(req->r_replay_version.epoch); + + dout("%s: abort tid=%llu flags 0x%x\n", __func__, req->r_tid, req->r_flags); + complete_request(req, -ENOSPC); + if (cur_epoch > latest_epoch) + latest_epoch = cur_epoch; + } + } + mutex_unlock(&osd->lock); + } +out: + dout("return abort_on_full latest_epoch=%u\n", latest_epoch); +} + static void cancel_map_check(struct ceph_osd_request *req) { struct ceph_osd_client *osdc = req->r_osdc; @@ -3292,6 +3333,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP, osdc->osdmap->epoch); + ceph_osdc_abort_on_full(osdc); up_write(&osdc->lock); wake_up_all(&osdc->client->auth_wq); return;
When a Ceph volume hits capacity, a flag is set in the OSD map to indicate that, and a new map is sprayed around the cluster. With cephfs we want it to shut down any abortable requests that are in progress with an -ENOSPC error as they'd just hang otherwise. Add a new ceph_osdc_abort_on_full helper function to handle this. It will first check whether there is an out-of-space condition in the cluster. It will then walk the tree and abort any request that has r_abort_on_full set with an ENOSPC error. Call this new function directly whenever we get a new OSD map. Signed-off-by: Jeff Layton <jlayton@redhat.com> --- net/ceph/osd_client.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+)