diff mbox

[1/2] rbd: define flags field, use it for exists flag

Message ID 50F4538B.4010106@inktank.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alex Elder Jan. 14, 2013, 6:50 p.m. UTC
Define a new rbd device flags field, manipulated using atomic bit
operations.  Replace the use of the current "exists" flag with a
bit in this new "flags" field.

Signed-off-by: Alex Elder <elder@inktank.com>
---
 drivers/block/rbd.c |   17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

 }
@@ -1654,7 +1661,7 @@ static void rbd_rq_fn(struct request_queue *q)
 			snapc = ceph_get_snap_context(rbd_dev->header.snapc);
 			up_read(&rbd_dev->header_rwsem);
 			rbd_assert(snapc != NULL);
-		} else if (!atomic_read(&rbd_dev->exists)) {
+		} else if (!test_bit(rbd_dev_flag_exists, &rbd_dev->flags)) {
 			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
 			dout("request for non-existent snapshot");
 			result = -ENXIO;
@@ -2270,7 +2277,7 @@ struct rbd_device *rbd_dev_create(struct
rbd_client *rbdc,
 		return NULL;

 	spin_lock_init(&rbd_dev->lock);
-	atomic_set(&rbd_dev->exists, 0);
+	rbd_dev->flags = 0;
 	INIT_LIST_HEAD(&rbd_dev->node);
 	INIT_LIST_HEAD(&rbd_dev->snaps);
 	init_rwsem(&rbd_dev->header_rwsem);
@@ -2902,7 +2909,7 @@ static int rbd_dev_snaps_update(struct rbd_device
*rbd_dev)
 			/* Existing snapshot not in the new snap context */

 			if (rbd_dev->spec->snap_id == snap->id)
-				atomic_set(&rbd_dev->exists, 0);
+				set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
 			rbd_remove_snap_dev(snap);
 			dout("%ssnap id %llu has been removed\n",
 				rbd_dev->spec->snap_id == snap->id ?

Comments

Dan Mick Jan. 14, 2013, 8:32 p.m. UTC | #1
I see that set_bit is atomic, but I don't see that test_bit is.  Am I 
missing a subtlety?

On 01/14/2013 10:50 AM, Alex Elder wrote:
> Define a new rbd device flags field, manipulated using atomic bit
> operations.  Replace the use of the current "exists" flag with a
> bit in this new "flags" field.
>
> Signed-off-by: Alex Elder <elder@inktank.com>
> ---
>   drivers/block/rbd.c |   17 ++++++++++++-----
>   1 file changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 02002b1..9eb1631 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -232,7 +232,7 @@ struct rbd_device {
>   	spinlock_t		lock;		/* queue lock */
>
>   	struct rbd_image_header	header;
> -	atomic_t		exists;
> +	unsigned long		flags;
>   	struct rbd_spec		*spec;
>
>   	char			*header_name;
> @@ -260,6 +260,12 @@ struct rbd_device {
>   	unsigned long		open_count;
>   };
>
> +/* Flag bits for rbd_dev->flags */
> +
> +enum rbd_dev_flags {
> +	rbd_dev_flag_exists,	/* mapped snapshot has not been deleted */
> +};
> +
>   static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
>
>   static LIST_HEAD(rbd_dev_list);    /* devices */
> @@ -756,7 +762,8 @@ static int rbd_dev_set_mapping(struct rbd_device
> *rbd_dev)
>   			goto done;
>   		rbd_dev->mapping.read_only = true;
>   	}
> -	atomic_set(&rbd_dev->exists, 1);
> +	set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
> +
>   done:
>   	return ret;
>   }
> @@ -1654,7 +1661,7 @@ static void rbd_rq_fn(struct request_queue *q)
>   			snapc = ceph_get_snap_context(rbd_dev->header.snapc);
>   			up_read(&rbd_dev->header_rwsem);
>   			rbd_assert(snapc != NULL);
> -		} else if (!atomic_read(&rbd_dev->exists)) {
> +		} else if (!test_bit(rbd_dev_flag_exists, &rbd_dev->flags)) {
>   			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
>   			dout("request for non-existent snapshot");
>   			result = -ENXIO;
> @@ -2270,7 +2277,7 @@ struct rbd_device *rbd_dev_create(struct
> rbd_client *rbdc,
>   		return NULL;
>
>   	spin_lock_init(&rbd_dev->lock);
> -	atomic_set(&rbd_dev->exists, 0);
> +	rbd_dev->flags = 0;
>   	INIT_LIST_HEAD(&rbd_dev->node);
>   	INIT_LIST_HEAD(&rbd_dev->snaps);
>   	init_rwsem(&rbd_dev->header_rwsem);
> @@ -2902,7 +2909,7 @@ static int rbd_dev_snaps_update(struct rbd_device
> *rbd_dev)
>   			/* Existing snapshot not in the new snap context */
>
>   			if (rbd_dev->spec->snap_id == snap->id)
> -				atomic_set(&rbd_dev->exists, 0);
> +				set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
>   			rbd_remove_snap_dev(snap);
>   			dout("%ssnap id %llu has been removed\n",
>   				rbd_dev->spec->snap_id == snap->id ?
>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Elder Jan. 14, 2013, 9:23 p.m. UTC | #2
On 01/14/2013 02:32 PM, Dan Mick wrote:
> I see that set_bit is atomic, but I don't see that test_bit is.  Am I
> missing a subtlety?

That's an interesting observation.  I'm certain it's safe, but
I needed to research it a bit, and I still haven't verified it
to my satisfaction.

I *think* (but please look over the following and see if you
come to the same conclusion) that this operation doesn't need
to be made atomic, because the implementation of the routines
that implement the "set" operations guarantee their effects are
visible once they are done.

But I'm not sure whether "visible" here means precisely that
another CPU will be forced to go read the updated memory when
it calls test_bit().

http://www.kernel.org/doc/Documentation/atomic_ops.txt
The section of interest can be found by looking for the
sentence I'm talking about:
  Likewise, the atomic bit operation must be visible globally before any
  subsequent memory operation is made visible.

It doesn't come right and explain it though.  Please let me
know what you think.

					-Alex


> On 01/14/2013 10:50 AM, Alex Elder wrote:
>> Define a new rbd device flags field, manipulated using atomic bit
>> operations.  Replace the use of the current "exists" flag with a
>> bit in this new "flags" field.
>>
>> Signed-off-by: Alex Elder <elder@inktank.com>
>> ---
>>   drivers/block/rbd.c |   17 ++++++++++++-----
>>   1 file changed, 12 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>> index 02002b1..9eb1631 100644
>> --- a/drivers/block/rbd.c
>> +++ b/drivers/block/rbd.c
>> @@ -232,7 +232,7 @@ struct rbd_device {
>>       spinlock_t        lock;        /* queue lock */
>>
>>       struct rbd_image_header    header;
>> -    atomic_t        exists;
>> +    unsigned long        flags;
>>       struct rbd_spec        *spec;
>>
>>       char            *header_name;
>> @@ -260,6 +260,12 @@ struct rbd_device {
>>       unsigned long        open_count;
>>   };
>>
>> +/* Flag bits for rbd_dev->flags */
>> +
>> +enum rbd_dev_flags {
>> +    rbd_dev_flag_exists,    /* mapped snapshot has not been deleted */
>> +};
>> +
>>   static DEFINE_MUTEX(ctl_mutex);      /* Serialize
>> open/close/setup/teardown */
>>
>>   static LIST_HEAD(rbd_dev_list);    /* devices */
>> @@ -756,7 +762,8 @@ static int rbd_dev_set_mapping(struct rbd_device
>> *rbd_dev)
>>               goto done;
>>           rbd_dev->mapping.read_only = true;
>>       }
>> -    atomic_set(&rbd_dev->exists, 1);
>> +    set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
>> +
>>   done:
>>       return ret;
>>   }
>> @@ -1654,7 +1661,7 @@ static void rbd_rq_fn(struct request_queue *q)
>>               snapc = ceph_get_snap_context(rbd_dev->header.snapc);
>>               up_read(&rbd_dev->header_rwsem);
>>               rbd_assert(snapc != NULL);
>> -        } else if (!atomic_read(&rbd_dev->exists)) {
>> +        } else if (!test_bit(rbd_dev_flag_exists, &rbd_dev->flags)) {
>>               rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
>>               dout("request for non-existent snapshot");
>>               result = -ENXIO;
>> @@ -2270,7 +2277,7 @@ struct rbd_device *rbd_dev_create(struct
>> rbd_client *rbdc,
>>           return NULL;
>>
>>       spin_lock_init(&rbd_dev->lock);
>> -    atomic_set(&rbd_dev->exists, 0);
>> +    rbd_dev->flags = 0;
>>       INIT_LIST_HEAD(&rbd_dev->node);
>>       INIT_LIST_HEAD(&rbd_dev->snaps);
>>       init_rwsem(&rbd_dev->header_rwsem);
>> @@ -2902,7 +2909,7 @@ static int rbd_dev_snaps_update(struct rbd_device
>> *rbd_dev)
>>               /* Existing snapshot not in the new snap context */
>>
>>               if (rbd_dev->spec->snap_id == snap->id)
>> -                atomic_set(&rbd_dev->exists, 0);
>> +                set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
>>               rbd_remove_snap_dev(snap);
>>               dout("%ssnap id %llu has been removed\n",
>>                   rbd_dev->spec->snap_id == snap->id ?
>>

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Mick Jan. 15, 2013, 6:09 a.m. UTC | #3
I think I agree that the claim is that the onus is on the set, and so
I think the proposed code is safe.

On 01/14/2013 01:23 PM, Alex Elder wrote:
> On 01/14/2013 02:32 PM, Dan Mick wrote:
>> I see that set_bit is atomic, but I don't see that test_bit is.  Am I
>> missing a subtlety?
>
> That's an interesting observation.  I'm certain it's safe, but
> I needed to research it a bit, and I still haven't verified it
> to my satisfaction.
>
> I *think* (but please look over the following and see if you
> come to the same conclusion) that this operation doesn't need
> to be made atomic, because the implementation of the routines
> that implement the "set" operations guarantee their effects are
> visible once they are done.
>
> But I'm not sure whether "visible" here means precisely that
> another CPU will be forced to go read the updated memory when
> it calls test_bit().
>
> http://www.kernel.org/doc/Documentation/atomic_ops.txt
> The section of interest can be found by looking for the
> sentence I'm talking about:
>    Likewise, the atomic bit operation must be visible globally before any
>    subsequent memory operation is made visible.
>
> It doesn't come right and explain it though.  Please let me
> know what you think.
>
> 					-Alex
>
>
>> On 01/14/2013 10:50 AM, Alex Elder wrote:
>>> Define a new rbd device flags field, manipulated using atomic bit
>>> operations.  Replace the use of the current "exists" flag with a
>>> bit in this new "flags" field.
>>>
>>> Signed-off-by: Alex Elder <elder@inktank.com>
>>> ---
>>>    drivers/block/rbd.c |   17 ++++++++++++-----
>>>    1 file changed, 12 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>>> index 02002b1..9eb1631 100644
>>> --- a/drivers/block/rbd.c
>>> +++ b/drivers/block/rbd.c
>>> @@ -232,7 +232,7 @@ struct rbd_device {
>>>        spinlock_t        lock;        /* queue lock */
>>>
>>>        struct rbd_image_header    header;
>>> -    atomic_t        exists;
>>> +    unsigned long        flags;
>>>        struct rbd_spec        *spec;
>>>
>>>        char            *header_name;
>>> @@ -260,6 +260,12 @@ struct rbd_device {
>>>        unsigned long        open_count;
>>>    };
>>>
>>> +/* Flag bits for rbd_dev->flags */
>>> +
>>> +enum rbd_dev_flags {
>>> +    rbd_dev_flag_exists,    /* mapped snapshot has not been deleted */
>>> +};
>>> +
>>>    static DEFINE_MUTEX(ctl_mutex);      /* Serialize
>>> open/close/setup/teardown */
>>>
>>>    static LIST_HEAD(rbd_dev_list);    /* devices */
>>> @@ -756,7 +762,8 @@ static int rbd_dev_set_mapping(struct rbd_device
>>> *rbd_dev)
>>>                goto done;
>>>            rbd_dev->mapping.read_only = true;
>>>        }
>>> -    atomic_set(&rbd_dev->exists, 1);
>>> +    set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
>>> +
>>>    done:
>>>        return ret;
>>>    }
>>> @@ -1654,7 +1661,7 @@ static void rbd_rq_fn(struct request_queue *q)
>>>                snapc = ceph_get_snap_context(rbd_dev->header.snapc);
>>>                up_read(&rbd_dev->header_rwsem);
>>>                rbd_assert(snapc != NULL);
>>> -        } else if (!atomic_read(&rbd_dev->exists)) {
>>> +        } else if (!test_bit(rbd_dev_flag_exists, &rbd_dev->flags)) {
>>>                rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
>>>                dout("request for non-existent snapshot");
>>>                result = -ENXIO;
>>> @@ -2270,7 +2277,7 @@ struct rbd_device *rbd_dev_create(struct
>>> rbd_client *rbdc,
>>>            return NULL;
>>>
>>>        spin_lock_init(&rbd_dev->lock);
>>> -    atomic_set(&rbd_dev->exists, 0);
>>> +    rbd_dev->flags = 0;
>>>        INIT_LIST_HEAD(&rbd_dev->node);
>>>        INIT_LIST_HEAD(&rbd_dev->snaps);
>>>        init_rwsem(&rbd_dev->header_rwsem);
>>> @@ -2902,7 +2909,7 @@ static int rbd_dev_snaps_update(struct rbd_device
>>> *rbd_dev)
>>>                /* Existing snapshot not in the new snap context */
>>>
>>>                if (rbd_dev->spec->snap_id == snap->id)
>>> -                atomic_set(&rbd_dev->exists, 0);
>>> +                set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
>>>                rbd_remove_snap_dev(snap);
>>>                dout("%ssnap id %llu has been removed\n",
>>>                    rbd_dev->spec->snap_id == snap->id ?
>>>
>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Mick Jan. 15, 2013, 9 p.m. UTC | #4
Reviewed-by: Dan Mick <dan.mick@inktank.com>

On 01/14/2013 10:50 AM, Alex Elder wrote:
> Define a new rbd device flags field, manipulated using atomic bit
> operations.  Replace the use of the current "exists" flag with a
> bit in this new "flags" field.
>
> Signed-off-by: Alex Elder <elder@inktank.com>
> ---
>   drivers/block/rbd.c |   17 ++++++++++++-----
>   1 file changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 02002b1..9eb1631 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -232,7 +232,7 @@ struct rbd_device {
>   	spinlock_t		lock;		/* queue lock */
>
>   	struct rbd_image_header	header;
> -	atomic_t		exists;
> +	unsigned long		flags;
>   	struct rbd_spec		*spec;
>
>   	char			*header_name;
> @@ -260,6 +260,12 @@ struct rbd_device {
>   	unsigned long		open_count;
>   };
>
> +/* Flag bits for rbd_dev->flags */
> +
> +enum rbd_dev_flags {
> +	rbd_dev_flag_exists,	/* mapped snapshot has not been deleted */
> +};
> +
>   static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
>
>   static LIST_HEAD(rbd_dev_list);    /* devices */
> @@ -756,7 +762,8 @@ static int rbd_dev_set_mapping(struct rbd_device
> *rbd_dev)
>   			goto done;
>   		rbd_dev->mapping.read_only = true;
>   	}
> -	atomic_set(&rbd_dev->exists, 1);
> +	set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
> +
>   done:
>   	return ret;
>   }
> @@ -1654,7 +1661,7 @@ static void rbd_rq_fn(struct request_queue *q)
>   			snapc = ceph_get_snap_context(rbd_dev->header.snapc);
>   			up_read(&rbd_dev->header_rwsem);
>   			rbd_assert(snapc != NULL);
> -		} else if (!atomic_read(&rbd_dev->exists)) {
> +		} else if (!test_bit(rbd_dev_flag_exists, &rbd_dev->flags)) {
>   			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
>   			dout("request for non-existent snapshot");
>   			result = -ENXIO;
> @@ -2270,7 +2277,7 @@ struct rbd_device *rbd_dev_create(struct
> rbd_client *rbdc,
>   		return NULL;
>
>   	spin_lock_init(&rbd_dev->lock);
> -	atomic_set(&rbd_dev->exists, 0);
> +	rbd_dev->flags = 0;
>   	INIT_LIST_HEAD(&rbd_dev->node);
>   	INIT_LIST_HEAD(&rbd_dev->snaps);
>   	init_rwsem(&rbd_dev->header_rwsem);
> @@ -2902,7 +2909,7 @@ static int rbd_dev_snaps_update(struct rbd_device
> *rbd_dev)
>   			/* Existing snapshot not in the new snap context */
>
>   			if (rbd_dev->spec->snap_id == snap->id)
> -				atomic_set(&rbd_dev->exists, 0);
> +				set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
>   			rbd_remove_snap_dev(snap);
>   			dout("%ssnap id %llu has been removed\n",
>   				rbd_dev->spec->snap_id == snap->id ?
>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josh Durgin Jan. 16, 2013, 1:08 a.m. UTC | #5
On 01/14/2013 01:23 PM, Alex Elder wrote:
> On 01/14/2013 02:32 PM, Dan Mick wrote:
>> I see that set_bit is atomic, but I don't see that test_bit is.  Am I
>> missing a subtlety?
>
> That's an interesting observation.  I'm certain it's safe, but
> I needed to research it a bit, and I still haven't verified it
> to my satisfaction.
>
> I *think* (but please look over the following and see if you
> come to the same conclusion) that this operation doesn't need
> to be made atomic, because the implementation of the routines
> that implement the "set" operations guarantee their effects are
> visible once they are done.
>
> But I'm not sure whether "visible" here means precisely that
> another CPU will be forced to go read the updated memory when
> it calls test_bit().
>
> http://www.kernel.org/doc/Documentation/atomic_ops.txt
> The section of interest can be found by looking for the
> sentence I'm talking about:
>    Likewise, the atomic bit operation must be visible globally before any
>    subsequent memory operation is made visible.

I read that differently. I think that only applies to the test_and_set 
style operations mentioned directly above, not set_bit.

Documentation/memory-barriers.txt confirms this interpretation:

     The following operations are potential problems as they do
     _not_ imply memory barriers, but might be used for
     implementing such things as UNLOCK-class operations:

             atomic_set();
             set_bit();
             clear_bit();
             change_bit();

     With these the appropriate explicit memory barrier should be
     used if necessary (smp_mb__before_clear_bit() for instance).

And:

     Memory operations that occur after an UNLOCK operation may appear to
     happen before it completes.

So I think we need a memory barrier before and after set_bit for the
removing flag, but we don't need barriers for the exists flag, since
it's a best-effort value that can't stop already-in-flight requests.

Josh

> It doesn't come right and explain it though.  Please let me
> know what you think.
>
> 					-Alex
>
>
>> On 01/14/2013 10:50 AM, Alex Elder wrote:
>>> Define a new rbd device flags field, manipulated using atomic bit
>>> operations.  Replace the use of the current "exists" flag with a
>>> bit in this new "flags" field.
>>>
>>> Signed-off-by: Alex Elder <elder@inktank.com>
>>> ---
>>>    drivers/block/rbd.c |   17 ++++++++++++-----
>>>    1 file changed, 12 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>>> index 02002b1..9eb1631 100644
>>> --- a/drivers/block/rbd.c
>>> +++ b/drivers/block/rbd.c
>>> @@ -232,7 +232,7 @@ struct rbd_device {
>>>        spinlock_t        lock;        /* queue lock */
>>>
>>>        struct rbd_image_header    header;
>>> -    atomic_t        exists;
>>> +    unsigned long        flags;
>>>        struct rbd_spec        *spec;
>>>
>>>        char            *header_name;
>>> @@ -260,6 +260,12 @@ struct rbd_device {
>>>        unsigned long        open_count;
>>>    };
>>>
>>> +/* Flag bits for rbd_dev->flags */
>>> +
>>> +enum rbd_dev_flags {
>>> +    rbd_dev_flag_exists,    /* mapped snapshot has not been deleted */
>>> +};
>>> +
>>>    static DEFINE_MUTEX(ctl_mutex);      /* Serialize
>>> open/close/setup/teardown */
>>>
>>>    static LIST_HEAD(rbd_dev_list);    /* devices */
>>> @@ -756,7 +762,8 @@ static int rbd_dev_set_mapping(struct rbd_device
>>> *rbd_dev)
>>>                goto done;
>>>            rbd_dev->mapping.read_only = true;
>>>        }
>>> -    atomic_set(&rbd_dev->exists, 1);
>>> +    set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
>>> +
>>>    done:
>>>        return ret;
>>>    }
>>> @@ -1654,7 +1661,7 @@ static void rbd_rq_fn(struct request_queue *q)
>>>                snapc = ceph_get_snap_context(rbd_dev->header.snapc);
>>>                up_read(&rbd_dev->header_rwsem);
>>>                rbd_assert(snapc != NULL);
>>> -        } else if (!atomic_read(&rbd_dev->exists)) {
>>> +        } else if (!test_bit(rbd_dev_flag_exists, &rbd_dev->flags)) {
>>>                rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
>>>                dout("request for non-existent snapshot");
>>>                result = -ENXIO;
>>> @@ -2270,7 +2277,7 @@ struct rbd_device *rbd_dev_create(struct
>>> rbd_client *rbdc,
>>>            return NULL;
>>>
>>>        spin_lock_init(&rbd_dev->lock);
>>> -    atomic_set(&rbd_dev->exists, 0);
>>> +    rbd_dev->flags = 0;
>>>        INIT_LIST_HEAD(&rbd_dev->node);
>>>        INIT_LIST_HEAD(&rbd_dev->snaps);
>>>        init_rwsem(&rbd_dev->header_rwsem);
>>> @@ -2902,7 +2909,7 @@ static int rbd_dev_snaps_update(struct rbd_device
>>> *rbd_dev)
>>>                /* Existing snapshot not in the new snap context */
>>>
>>>                if (rbd_dev->spec->snap_id == snap->id)
>>> -                atomic_set(&rbd_dev->exists, 0);
>>> +                set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
>>>                rbd_remove_snap_dev(snap);
>>>                dout("%ssnap id %llu has been removed\n",
>>>                    rbd_dev->spec->snap_id == snap->id ?
>>>

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Elder Jan. 17, 2013, 11:16 p.m. UTC | #6
On 01/15/2013 07:08 PM, Josh Durgin wrote:
> On 01/14/2013 01:23 PM, Alex Elder wrote:
>> On 01/14/2013 02:32 PM, Dan Mick wrote:
>>> I see that set_bit is atomic, but I don't see that test_bit is.  Am I
>>> missing a subtlety?
>>
>> That's an interesting observation.  I'm certain it's safe, but
>> I needed to research it a bit, and I still haven't verified it
>> to my satisfaction.
>>
>> I *think* (but please look over the following and see if you
>> come to the same conclusion) that this operation doesn't need
>> to be made atomic, because the implementation of the routines
>> that implement the "set" operations guarantee their effects are
>> visible once they are done.
>>
>> But I'm not sure whether "visible" here means precisely that
>> another CPU will be forced to go read the updated memory when
>> it calls test_bit().
>>
>> http://www.kernel.org/doc/Documentation/atomic_ops.txt
>> The section of interest can be found by looking for the
>> sentence I'm talking about:
>>    Likewise, the atomic bit operation must be visible globally before any
>>    subsequent memory operation is made visible.
> 
> I read that differently. I think that only applies to the test_and_set
> style operations mentioned directly above, not set_bit.
> 
> Documentation/memory-barriers.txt confirms this interpretation:
> 
>     The following operations are potential problems as they do
>     _not_ imply memory barriers, but might be used for
>     implementing such things as UNLOCK-class operations:
> 
>             atomic_set();
>             set_bit();
>             clear_bit();
>             change_bit();
> 
>     With these the appropriate explicit memory barrier should be
>     used if necessary (smp_mb__before_clear_bit() for instance).
> 
> And:
> 
>     Memory operations that occur after an UNLOCK operation may appear to
>     happen before it completes.
> 
> So I think we need a memory barrier before and after set_bit for the
> removing flag, but we don't need barriers for the exists flag, since
> it's a best-effort value that can't stop already-in-flight requests.

You know, I agree with your analysis but now I'm not sure
even that's enough.

Here's the code in question (from the other patch):

Test side:
        mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
        if (!test_bit(rbd_dev_flag_removing, &rbd_dev->flags)) {
                (void) get_device(&rbd_dev->dev);
                set_device_ro(bdev, rbd_dev->mapping.read_only);
                rbd_dev->open_count++;
        } else {
                ret = -ENOENT;
        }
        mutex_unlock(&ctl_mutex);


Set side:
        if (rbd_dev->open_count) {
                ret = -EBUSY;
                goto done;
        }
        set_bit(rbd_dev_flag_removing, &rbd_dev->flags);

And here's the scenario I'm thinking about.  Initially,
suppose rbd_dev->open_count is 0 and the removing flag
is not set.

OPENING THREAD                  UNMAPPING THREAD
--------------                  ----------------
                                if (rbd_dev->open_count) {
                                    /* not taken, it's zero */
                                    ret = -EBUSY;
                                    goto done;
                                }
if (!test_bit(removing)) {
    /* not set yet! */          /* barrier won't help here */
                                set_bit(removing);
                                /* clean stuff up */
    rbd_dev->open_count++;      /* == kablooie == */
} else {
    ret = -ENOENT;
}

So I think we need a spinlock, or some other thing.

In any case, I'm not going to commit this change until
we've had a chance to talk about it a little more.

					-Alex
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 02002b1..9eb1631 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -232,7 +232,7 @@  struct rbd_device {
 	spinlock_t		lock;		/* queue lock */

 	struct rbd_image_header	header;
-	atomic_t		exists;
+	unsigned long		flags;
 	struct rbd_spec		*spec;

 	char			*header_name;
@@ -260,6 +260,12 @@  struct rbd_device {
 	unsigned long		open_count;
 };

+/* Flag bits for rbd_dev->flags */
+
+enum rbd_dev_flags {
+	rbd_dev_flag_exists,	/* mapped snapshot has not been deleted */
+};
+
 static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */

 static LIST_HEAD(rbd_dev_list);    /* devices */
@@ -756,7 +762,8 @@  static int rbd_dev_set_mapping(struct rbd_device
*rbd_dev)
 			goto done;
 		rbd_dev->mapping.read_only = true;
 	}
-	atomic_set(&rbd_dev->exists, 1);
+	set_bit(rbd_dev_flag_exists, &rbd_dev->flags);
+
 done:
 	return ret;