diff mbox series

[v3,01/19] block: Add PR callouts for read keys and reservation

Message ID 20221026231945.6609-2-michael.christie@oracle.com (mailing list archive)
State Not Applicable, archived
Delegated to: Mike Snitzer
Headers show
Series Use block pr_ops in LIO | expand

Commit Message

Mike Christie Oct. 26, 2022, 11:19 p.m. UTC
Add callouts for reading keys and reservations. This allows LIO to support
the READ_KEYS and READ_RESERVATION commands and will allow dm-multipath
to optimize it's error handling so it can check if it's getting an error
because there's an existing reservation or if we need to retry different
paths.

Note: This only initially adds the struct definitions in the kernel as I'm
not sure if we wanted to export the interface to userspace yet. read_keys
and read_reservation are exactly what dm-multipath and LIO need, but for a
userspace interface we may want something like SCSI's READ_FULL_STATUS and
NVMe's report reservation commands. Those are overkill for dm/LIO and
READ_FULL_STATUS is sometimes broken for SCSI devices.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 include/linux/pr.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

Bart Van Assche Nov. 2, 2022, 10:50 p.m. UTC | #1
On 10/26/22 16:19, Mike Christie wrote:
> +struct pr_keys {
> +	u32	generation;
> +	u32	num_keys;
> +	u64	keys[];
> +};
> +
> +struct pr_held_reservation {
> +	u64		key;
> +	u32		generation;
> +	enum pr_type	type;
> +};
> +
>   struct pr_ops {
>   	int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
>   			u32 flags);
> @@ -14,6 +26,18 @@ struct pr_ops {
>   	int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
>   			enum pr_type type, bool abort);
>   	int (*pr_clear)(struct block_device *bdev, u64 key);
> +	/*
> +	 * pr_read_keys - Read the registered keys and return them in the
> +	 * pr_keys->keys array. The keys array will have been allocated at the
> +	 * end of the pr_keys struct and is keys_len bytes. If there are more
> +	 * keys than can fit in the array, success will still be returned and
> +	 * pr_keys->num_keys will reflect the total number of keys the device
> +	 * contains, so the caller can retry with a larger array.
> +	 */
> +	int (*pr_read_keys)(struct block_device *bdev,
> +			struct pr_keys *keys_info, u32 keys_len);
> +	int (*pr_read_reservation)(struct block_device *bdev,
> +			struct pr_held_reservation *rsv);
>   };

Is there any pr_read_keys() implementation that won't have to divide 
@keys_len by 8? How about leaving out that argument and making callers 
store the number of elements in the keys[] array in the num_keys member 
before calling pr_read_keys()?

Thanks,

Bart.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Bart Van Assche Nov. 2, 2022, 10:53 p.m. UTC | #2
On 10/26/22 16:19, Mike Christie wrote:
> +struct pr_keys {
> +	u32	generation;
> +	u32	num_keys;
> +	u64	keys[];
> +};
Is my understanding correct that keys[] is treated as opaque data by the 
kernel? If so, is it necessary to convert the persistent reservation 
keys from big endian to CPU endianness? Some SCSI stacks keep 
reservation keys as __be64 format.

Thanks,

Bart.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Mike Christie Nov. 3, 2022, 1:54 a.m. UTC | #3
On 11/2/22 5:50 PM, Bart Van Assche wrote:
> On 10/26/22 16:19, Mike Christie wrote:
>> +struct pr_keys {
>> +    u32    generation;
>> +    u32    num_keys;
>> +    u64    keys[];
>> +};
>> +
>> +struct pr_held_reservation {
>> +    u64        key;
>> +    u32        generation;
>> +    enum pr_type    type;
>> +};
>> +
>>   struct pr_ops {
>>       int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
>>               u32 flags);
>> @@ -14,6 +26,18 @@ struct pr_ops {
>>       int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
>>               enum pr_type type, bool abort);
>>       int (*pr_clear)(struct block_device *bdev, u64 key);
>> +    /*
>> +     * pr_read_keys - Read the registered keys and return them in the
>> +     * pr_keys->keys array. The keys array will have been allocated at the
>> +     * end of the pr_keys struct and is keys_len bytes. If there are more
>> +     * keys than can fit in the array, success will still be returned and
>> +     * pr_keys->num_keys will reflect the total number of keys the device
>> +     * contains, so the caller can retry with a larger array.
>> +     */
>> +    int (*pr_read_keys)(struct block_device *bdev,
>> +            struct pr_keys *keys_info, u32 keys_len);
>> +    int (*pr_read_reservation)(struct block_device *bdev,
>> +            struct pr_held_reservation *rsv);
>>   };
> 
> Is there any pr_read_keys() implementation that won't have to divide @keys_len by 8? How about leaving out that argument and making callers store the number of elements in the keys[] array in the num_keys member before calling pr_read_keys()?

That seems ok to me. 

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Mike Christie Nov. 3, 2022, 2:25 a.m. UTC | #4
On 11/2/22 5:53 PM, Bart Van Assche wrote:
> On 10/26/22 16:19, Mike Christie wrote:
>> +struct pr_keys {
>> +    u32    generation;
>> +    u32    num_keys;
>> +    u64    keys[];
>> +};
> Is my understanding correct that keys[] is treated as opaque data by the kernel? If so, is it necessary to convert the persistent reservation keys from big endian to CPU endianness? Some SCSI stacks keep reservation keys as __be64 format.

The pr_read_keys/reservation calls work like the pr_register/reserve/
release calls where the scsi and nvme layer convert to/from the cpu
endianness to the specs endiennness (big for scsi and little for nvme).

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/include/linux/pr.h b/include/linux/pr.h
index 94ceec713afe..55c9ab7a202b 100644
--- a/include/linux/pr.h
+++ b/include/linux/pr.h
@@ -4,6 +4,18 @@ 
 
 #include <uapi/linux/pr.h>
 
+struct pr_keys {
+	u32	generation;
+	u32	num_keys;
+	u64	keys[];
+};
+
+struct pr_held_reservation {
+	u64		key;
+	u32		generation;
+	enum pr_type	type;
+};
+
 struct pr_ops {
 	int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
 			u32 flags);
@@ -14,6 +26,18 @@  struct pr_ops {
 	int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
 			enum pr_type type, bool abort);
 	int (*pr_clear)(struct block_device *bdev, u64 key);
+	/*
+	 * pr_read_keys - Read the registered keys and return them in the
+	 * pr_keys->keys array. The keys array will have been allocated at the
+	 * end of the pr_keys struct and is keys_len bytes. If there are more
+	 * keys than can fit in the array, success will still be returned and
+	 * pr_keys->num_keys will reflect the total number of keys the device
+	 * contains, so the caller can retry with a larger array.
+	 */
+	int (*pr_read_keys)(struct block_device *bdev,
+			struct pr_keys *keys_info, u32 keys_len);
+	int (*pr_read_reservation)(struct block_device *bdev,
+			struct pr_held_reservation *rsv);
 };
 
 #endif /* LINUX_PR_H */