diff mbox series

[v1,03/11] fs/proc/vmcore: disallow vmcore modifications after the vmcore was opened

Message ID 20241025151134.1275575-4-david@redhat.com (mailing list archive)
State New
Headers show
Series fs/proc/vmcore: kdump support for virtio-mem on s390 | expand

Commit Message

David Hildenbrand Oct. 25, 2024, 3:11 p.m. UTC
Let's protect all vmcore modifications by the vmcore_mutex and
disallow vmcore modifications after the vmcore was opened: modifications
would no longer be safe. Properly synchronize against concurrent opening
of the vmcore.

As a nice side-effect, we now properly protect concurrent vmcore
modifications.

No need to grab the mutex during mmap()/read(): after we opened the
vmcore, modifications are impossible.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 fs/proc/vmcore.c | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

Comments

Baoquan He Nov. 22, 2024, 9:16 a.m. UTC | #1
On 10/25/24 at 05:11pm, David Hildenbrand wrote:
......snip...
> @@ -1482,6 +1470,10 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
>  		return -EINVAL;
>  	}
>  
> +	/* We'll recheck under lock later. */
> +	if (data_race(vmcore_opened))
> +		return -EBUSY;

As I commented to patch 7, if vmcore is opened and closed after
checking, do we need to give up any chance to add device dumping
as below? 

fd = open(/proc/vmcore);
...do checking;
close(fd);

quit any device dump adding;

run makedumpfile on s390;
  ->fd = open(/proc/vmcore);
    -> try to dump;
  ->close(fd);

> +
>  	if (!data || !strlen(data->dump_name) ||
>  	    !data->vmcoredd_callback || !data->size)
>  		return -EINVAL;
> @@ -1515,12 +1507,16 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
>  	dump->buf = buf;
>  	dump->size = data_size;
>  
> -	/* Add the dump to driver sysfs list */
> +	/* Add the dump to driver sysfs list and update the elfcore hdr */
>  	mutex_lock(&vmcore_mutex);
> -	list_add_tail(&dump->list, &vmcoredd_list);
> -	mutex_unlock(&vmcore_mutex);
> +	if (vmcore_opened) {
> +		ret = -EBUSY;
> +		goto out_err;
> +	}
>  
> +	list_add_tail(&dump->list, &vmcoredd_list);
>  	vmcoredd_update_size(data_size);
> +	mutex_unlock(&vmcore_mutex);
>  	return 0;
>  
>  out_err:
> -- 
> 2.46.1
>
David Hildenbrand Nov. 22, 2024, 9:30 a.m. UTC | #2
On 22.11.24 10:16, Baoquan He wrote:
> On 10/25/24 at 05:11pm, David Hildenbrand wrote:
> ......snip...
>> @@ -1482,6 +1470,10 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
>>   		return -EINVAL;
>>   	}
>>   
>> +	/* We'll recheck under lock later. */
>> +	if (data_race(vmcore_opened))
>> +		return -EBUSY;
> 

Hi,

> As I commented to patch 7, if vmcore is opened and closed after
> checking, do we need to give up any chance to add device dumping
> as below?
> 
> fd = open(/proc/vmcore);
> ...do checking;
> close(fd);
> 
> quit any device dump adding;
> 
> run makedumpfile on s390;
>    ->fd = open(/proc/vmcore);
>      -> try to dump;
>    ->close(fd);

The only reasonable case where this could happen (with virtio_mem) would 
be when you hotplug a virtio-mem device into a VM that is currently in 
the kdump kernel. However, in this case, the device would not provide 
any memory we want to dump:

(1) The memory was not available to the 1st (crashed) kernel, because
     the device got hotplugged later.
(2) Hotplugged virtio-mem devices show up with "no plugged memory",
     meaning there wouldn't be even something to dump.

Drivers will be loaded (as part of the kernel or as part of the initrd) 
before any kdump action is happening. Similarly, just imagine your NIC 
driver not being loaded when you start dumping to a network share ...

This should similarly apply to vmcoredd providers.

There is another concern I had at some point with changing the effective 
/proc/vmcore size after someone already opened it, and might assume the 
size will stay unmodified (IOW, the file was completely static before 
vmcoredd showed up).

So unless there is a real use case that requires tracking whether the 
file is no longer open, to support modifying the vmcore afterwards, we 
should keep it simple.

I am not aware of any such cases, and my experiments with virtio_mem 
showed that the driver get loaded extremely early from the initrd, 
compared to when we actually start messing with /proc/vmcore from user 
space.

Thanks!
diff mbox series

Patch

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b91c304463c9..6371dbaa21be 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -243,33 +243,27 @@  static int vmcoredd_copy_dumps(struct iov_iter *iter, u64 start, size_t size)
 {
 	struct vmcoredd_node *dump;
 	u64 offset = 0;
-	int ret = 0;
 	size_t tsz;
 	char *buf;
 
-	mutex_lock(&vmcore_mutex);
 	list_for_each_entry(dump, &vmcoredd_list, list) {
 		if (start < offset + dump->size) {
 			tsz = min(offset + (u64)dump->size - start, (u64)size);
 			buf = dump->buf + start - offset;
-			if (copy_to_iter(buf, tsz, iter) < tsz) {
-				ret = -EFAULT;
-				goto out_unlock;
-			}
+			if (copy_to_iter(buf, tsz, iter) < tsz)
+				return -EFAULT;
 
 			size -= tsz;
 			start += tsz;
 
 			/* Leave now if buffer filled already */
 			if (!size)
-				goto out_unlock;
+				return 0;
 		}
 		offset += dump->size;
 	}
 
-out_unlock:
-	mutex_unlock(&vmcore_mutex);
-	return ret;
+	return 0;
 }
 
 #ifdef CONFIG_MMU
@@ -278,20 +272,16 @@  static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
 {
 	struct vmcoredd_node *dump;
 	u64 offset = 0;
-	int ret = 0;
 	size_t tsz;
 	char *buf;
 
-	mutex_lock(&vmcore_mutex);
 	list_for_each_entry(dump, &vmcoredd_list, list) {
 		if (start < offset + dump->size) {
 			tsz = min(offset + (u64)dump->size - start, (u64)size);
 			buf = dump->buf + start - offset;
 			if (remap_vmalloc_range_partial(vma, dst, buf, 0,
-							tsz)) {
-				ret = -EFAULT;
-				goto out_unlock;
-			}
+							tsz))
+				return -EFAULT;
 
 			size -= tsz;
 			start += tsz;
@@ -299,14 +289,12 @@  static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
 
 			/* Leave now if buffer filled already */
 			if (!size)
-				goto out_unlock;
+				return 0;
 		}
 		offset += dump->size;
 	}
 
-out_unlock:
-	mutex_unlock(&vmcore_mutex);
-	return ret;
+	return 0;
 }
 #endif /* CONFIG_MMU */
 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
@@ -1482,6 +1470,10 @@  int vmcore_add_device_dump(struct vmcoredd_data *data)
 		return -EINVAL;
 	}
 
+	/* We'll recheck under lock later. */
+	if (data_race(vmcore_opened))
+		return -EBUSY;
+
 	if (!data || !strlen(data->dump_name) ||
 	    !data->vmcoredd_callback || !data->size)
 		return -EINVAL;
@@ -1515,12 +1507,16 @@  int vmcore_add_device_dump(struct vmcoredd_data *data)
 	dump->buf = buf;
 	dump->size = data_size;
 
-	/* Add the dump to driver sysfs list */
+	/* Add the dump to driver sysfs list and update the elfcore hdr */
 	mutex_lock(&vmcore_mutex);
-	list_add_tail(&dump->list, &vmcoredd_list);
-	mutex_unlock(&vmcore_mutex);
+	if (vmcore_opened) {
+		ret = -EBUSY;
+		goto out_err;
+	}
 
+	list_add_tail(&dump->list, &vmcoredd_list);
 	vmcoredd_update_size(data_size);
+	mutex_unlock(&vmcore_mutex);
 	return 0;
 
 out_err: