[v5] ocfs2: add bounds checking to ocfs2_check_dir_entry()

Message ID	20240618065427.143612-1-llfamsec@gmail.com (mailing list archive)
State	New
Headers	show Received: from mail-pl1-f171.google.com (mail-pl1-f171.google.com [209.85.214.171]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 04A6713AA5E for <ocfs2-devel@lists.linux.dev>; Tue, 18 Jun 2024 06:54:59 +0000 (UTC) From: lei lu <llfamsec@gmail.com> To: ocfs2-devel@lists.linux.dev, joseph.qi@linux.alibaba.com Cc: heming.zhao@suse.com Subject: [PATCH v5] ocfs2: add bounds checking to ocfs2_check_dir_entry() Date: Tue, 18 Jun 2024 14:54:27 +0800 Message-Id: <20240618065427.143612-1-llfamsec@gmail.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	[v5] ocfs2: add bounds checking to ocfs2_check_dir_entry() \| expand [v5] ocfs2: add bounds checking to ocfs2_check_dir_entry()

lei lu June 18, 2024, 6:54 a.m. UTC

This adds sanity checks for ocfs2_dir_entry to make sure all members of
ocfs2_dir_entry don't stray beyond valid memory region.

Signed-off-by: lei lu <llfamsec@gmail.com>
---
 fs/ocfs2/dir.c | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

heming.zhao@suse.com June 18, 2024, 12:04 p.m. UTC | #1

On 6/18/24 14:54, lei lu wrote:
> This adds sanity checks for ocfs2_dir_entry to make sure all members of
> ocfs2_dir_entry don't stray beyond valid memory region.
> 
> Signed-off-by: lei lu <llfamsec@gmail.com>
> ---
>   fs/ocfs2/dir.c | 39 +++++++++++++++++++++++++--------------
>   1 file changed, 25 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
> index d620d4c53c6f..cdd92b4f9bf1 100644
> --- a/fs/ocfs2/dir.c
> +++ b/fs/ocfs2/dir.c
> @@ -294,13 +294,16 @@ static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
>    * bh passed here can be an inode block or a dir data block, depending
>    * on the inode inline data flag.
>    */
> -static int ocfs2_check_dir_entry(struct inode * dir,
> -				 struct ocfs2_dir_entry * de,
> -				 struct buffer_head * bh,
> +static int ocfs2_check_dir_entry(struct inode *dir,
> +				 struct ocfs2_dir_entry *de,
> +				 struct buffer_head *bh,
> +				 char *buf,
> +				 unsigned int size,
>   				 unsigned long offset)
>   {
>   	const char *error_msg = NULL;
>   	const int rlen = le16_to_cpu(de->rec_len);
> +	const unsigned long next_offset = ((char *) de - buf) + rlen;

Hi Lei & Joseph,

I withdraw my previous saying: the rest codes look good to me.

I checked some code. In my view, if the code runs as expected,
next_offset should be equal to:  "((char *) de - bh->b_data) + rlen".

You could add codes here to verify, e.g:
   printk("...");
   BUG_ON(next_offset != (((char *)de - bh->b_data) + rlen));

This BUG_ON() will never be triggered.

Please take some time to build and run the test, If my thinking is right,
you need to send v6 patch.

btw, please don't forget to check my below comments.
It seems your code also has other bug.

@Joseph, I also need your opinion.

>   
>   	if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
>   		error_msg = "rec_len is smaller than minimal";
> @@ -308,6 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
>   		error_msg = "rec_len % 4 != 0";
>   	else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
>   		error_msg = "rec_len is too small for name_len";
> +	else if (unlikely(next_offset > size))
> +		error_msg = "directory entry overrun";
> +	else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
> +		 next_offset != size)
> +		error_msg = "directory entry too close to end";
>   	else if (unlikely(
>   		 ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
>   		error_msg = "directory entry across blocks";
> @@ -352,16 +360,16 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
>   	de_buf = first_de;
>   	dlimit = de_buf + bytes;
>   
> -	while (de_buf < dlimit) {
> +	while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
>   		/* this code is executed quadratically often */
>   		/* do minimal checking `by hand' */
>   
>   		de = (struct ocfs2_dir_entry *) de_buf;
>   
> -		if (de_buf + namelen <= dlimit &&
> +		if (de->name + namelen <= dlimit &&
>   		    ocfs2_match(namelen, name, de)) {
>   			/* found a match - just to be sure, do a full check */
> -			if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
> +			if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, offset)) {


ocfs2 uses two ways to store dir entry:
  inline mode (OCFS2_INLINE_DATA_FL) & dir block mode

Above function (ocfs2_search_dirblock) handles dir block case, the code
can work under expectation.

>   				ret = -1;
>   				goto bail;
>   			}
> @@ -1138,7 +1146,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
>   	pde = NULL;
>   	de = (struct ocfs2_dir_entry *) first_de;
>   	while (i < bytes) {
> -		if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
> +		if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
>   			status = -EIO;
>   			mlog_errno(status);
>   			goto bail;
> @@ -1635,7 +1643,7 @@ int __ocfs2_add_entry(handle_t *handle,
>   		/* These checks should've already been passed by the
>   		 * prepare function, but I guess we can leave them
>   		 * here anyway. */
> -		if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
> +		if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, size, offset)) {
>   			retval = -ENOENT;
>   			goto bail;
>   		}
> @@ -1774,7 +1782,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
>   		}
>   
>   		de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
> -		if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
> +		if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
> +					   i_size_read(inode), ctx->pos)) {

This function (ocfs2_dir_foreach_blk_id) handles inline mode.

The de & data->id_data are both located in the inode struct, starting at
offset 0xc8 in ((struct ocfs2_dinode)->id2.i_data). however di_bh->b_data
points to the start address of the inode.

if ocfs2_check_dir_entry() uses de & data->id_data to compare the offset, it
misses the 0xc8 space.

  struct ocfs2_dinode
+----------------------------------+
|   ...      |    id2.i_data       |
|            |---------------------|
|            |           |id_data[]|
^------------------------^-------^-+
|                        |   ... |
0x0                     0xc8     0xc8+xxx: de
di_bh->b_data         data->id_da


(This patch) in ocfs2_check_dir_entry(), it uses "de - data->id_da" to get
the offset. However, from the above pic, the algorithm is wrong; it forgets
the [0,0xc8].
the correct way: (de - bh->b_data)

the same issue also occurs in ocfs2_find_dir_space_id().


Thanks,
Heming

>   			/* On error, skip the f_pos to the end. */
>   			ctx->pos = i_size_read(inode);
>   			break;
> @@ -1867,7 +1876,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
>   		while (ctx->pos < i_size_read(inode)
>   		       && offset < sb->s_blocksize) {
>   			de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
> -			if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
> +			if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
> +						   sb->s_blocksize, offset)) {
>   				/* On error, skip the f_pos to the
>   				   next block. */
>   				ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
> @@ -3339,7 +3349,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
>   	struct super_block *sb = dir->i_sb;
>   	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
>   	struct ocfs2_dir_entry *de, *last_de = NULL;
> -	char *de_buf, *limit;
> +	char *first_de, *de_buf, *limit;
>   	unsigned long offset = 0;
>   	unsigned int rec_len, new_rec_len, free_space;
>   
> @@ -3352,14 +3362,15 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
>   	else
>   		free_space = dir->i_sb->s_blocksize - i_size_read(dir);
>   
> -	de_buf = di->id2.i_data.id_data;
> +	first_de = di->id2.i_data.id_data;
> +	de_buf = first_de;
>   	limit = de_buf + i_size_read(dir);
>   	rec_len = OCFS2_DIR_REC_LEN(namelen);
>   
>   	while (de_buf < limit) {
>   		de = (struct ocfs2_dir_entry *)de_buf;
>   
> -		if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
> +		if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, i_size_read(dir), offset)) {
>   			ret = -ENOENT;
>   			goto out;
>   		}
> @@ -3441,7 +3452,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
>   			/* move to next block */
>   			de = (struct ocfs2_dir_entry *) bh->b_data;
>   		}
> -		if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
> +		if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, offset)) {
>   			status = -ENOENT;
>   			goto bail;
>   		}

lei lu June 18, 2024, 1:09 p.m. UTC | #2

On Tue, Jun 18, 2024 at 8:04 PM Heming Zhao <heming.zhao@suse.com> wrote:
>
> On 6/18/24 14:54, lei lu wrote:
> > This adds sanity checks for ocfs2_dir_entry to make sure all members of
> > ocfs2_dir_entry don't stray beyond valid memory region.
> >
> > Signed-off-by: lei lu <llfamsec@gmail.com>
> > ---
> >   fs/ocfs2/dir.c | 39 +++++++++++++++++++++++++--------------
> >   1 file changed, 25 insertions(+), 14 deletions(-)
> >
> > diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
> > index d620d4c53c6f..cdd92b4f9bf1 100644
> > --- a/fs/ocfs2/dir.c
> > +++ b/fs/ocfs2/dir.c
> > @@ -294,13 +294,16 @@ static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
> >    * bh passed here can be an inode block or a dir data block, depending
> >    * on the inode inline data flag.
> >    */
> > -static int ocfs2_check_dir_entry(struct inode * dir,
> > -                              struct ocfs2_dir_entry * de,
> > -                              struct buffer_head * bh,
> > +static int ocfs2_check_dir_entry(struct inode *dir,
> > +                              struct ocfs2_dir_entry *de,
> > +                              struct buffer_head *bh,
> > +                              char *buf,
> > +                              unsigned int size,
> >                                unsigned long offset)
> >   {
> >       const char *error_msg = NULL;
> >       const int rlen = le16_to_cpu(de->rec_len);
> > +     const unsigned long next_offset = ((char *) de - buf) + rlen;
>
> Hi Lei & Joseph,
>
> I withdraw my previous saying: the rest codes look good to me.
>
> I checked some code. In my view, if the code runs as expected,
> next_offset should be equal to:  "((char *) de - bh->b_data) + rlen".

Hi,

As you said below, In inline mode, dir entry list starts at
bh->b_data->id2.i_data,
In the dir block mode, dir entry list starts at bh->b_data directly.

So the prerequisite for checking here is to make sure that buf is the first_de
and size is the de list size. We make sure that de don't stray beyond the de
list regardless of its mode.

The above patch code refers to the __ext4_check_dir_entry.

>
> You could add codes here to verify, e.g:
>    printk("...");
>    BUG_ON(next_offset != (((char *)de - bh->b_data) + rlen));
>
> This BUG_ON() will never be triggered.
>
> Please take some time to build and run the test, If my thinking is right,
> you need to send v6 patch.
>
> btw, please don't forget to check my below comments.
> It seems your code also has other bug.
>
> @Joseph, I also need your opinion.
>
> >
> >       if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
> >               error_msg = "rec_len is smaller than minimal";
> > @@ -308,6 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
> >               error_msg = "rec_len % 4 != 0";
> >       else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
> >               error_msg = "rec_len is too small for name_len";
> > +     else if (unlikely(next_offset > size))
> > +             error_msg = "directory entry overrun";
> > +     else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
> > +              next_offset != size)
> > +             error_msg = "directory entry too close to end";
> >       else if (unlikely(
> >                ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
> >               error_msg = "directory entry across blocks";
> > @@ -352,16 +360,16 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
> >       de_buf = first_de;
> >       dlimit = de_buf + bytes;
> >
> > -     while (de_buf < dlimit) {
> > +     while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
> >               /* this code is executed quadratically often */
> >               /* do minimal checking `by hand' */
> >
> >               de = (struct ocfs2_dir_entry *) de_buf;
> >
> > -             if (de_buf + namelen <= dlimit &&
> > +             if (de->name + namelen <= dlimit &&
> >                   ocfs2_match(namelen, name, de)) {
> >                       /* found a match - just to be sure, do a full check */
> > -                     if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
> > +                     if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, offset)) {
>
>
> ocfs2 uses two ways to store dir entry:
>   inline mode (OCFS2_INLINE_DATA_FL) & dir block mode
>
> Above function (ocfs2_search_dirblock) handles dir block case, the code
> can work under expectation.

I think ocfs2_search_dirblock handles both two modes? Because In the caller
ocfs2_find_entry_id, the first_de is from di_bh->b_data->id2.i_data->id_data.

>
> >                               ret = -1;
> >                               goto bail;
> >                       }
> > @@ -1138,7 +1146,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
> >       pde = NULL;
> >       de = (struct ocfs2_dir_entry *) first_de;
> >       while (i < bytes) {
> > -             if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
> > +             if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
> >                       status = -EIO;
> >                       mlog_errno(status);
> >                       goto bail;
> > @@ -1635,7 +1643,7 @@ int __ocfs2_add_entry(handle_t *handle,
> >               /* These checks should've already been passed by the
> >                * prepare function, but I guess we can leave them
> >                * here anyway. */
> > -             if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
> > +             if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, size, offset)) {
> >                       retval = -ENOENT;
> >                       goto bail;
> >               }
> > @@ -1774,7 +1782,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
> >               }
> >
> >               de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
> > -             if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
> > +             if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
> > +                                        i_size_read(inode), ctx->pos)) {
>
> This function (ocfs2_dir_foreach_blk_id) handles inline mode.
>
> The de & data->id_data are both located in the inode struct, starting at
> offset 0xc8 in ((struct ocfs2_dinode)->id2.i_data). however di_bh->b_data
> points to the start address of the inode.
>
> if ocfs2_check_dir_entry() uses de & data->id_data to compare the offset, it
> misses the 0xc8 space.
>
>   struct ocfs2_dinode
> +----------------------------------+
> |   ...      |    id2.i_data       |
> |            |---------------------|
> |            |           |id_data[]|
> ^------------------------^-------^-+
> |                        |   ... |
> 0x0                     0xc8     0xc8+xxx: de
> di_bh->b_data         data->id_da
>
>
> (This patch) in ocfs2_check_dir_entry(), it uses "de - data->id_da" to get
> the offset. However, from the above pic, the algorithm is wrong; it forgets
> the [0,0xc8].
> the correct way: (de - bh->b_data)
>
> the same issue also occurs in ocfs2_find_dir_space_id().
>
>
> Thanks,
> Heming
>
> >                       /* On error, skip the f_pos to the end. */
> >                       ctx->pos = i_size_read(inode);
> >                       break;
> > @@ -1867,7 +1876,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
> >               while (ctx->pos < i_size_read(inode)
> >                      && offset < sb->s_blocksize) {
> >                       de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
> > -                     if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
> > +                     if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
> > +                                                sb->s_blocksize, offset)) {
> >                               /* On error, skip the f_pos to the
> >                                  next block. */
> >                               ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
> > @@ -3339,7 +3349,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
> >       struct super_block *sb = dir->i_sb;
> >       struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
> >       struct ocfs2_dir_entry *de, *last_de = NULL;
> > -     char *de_buf, *limit;
> > +     char *first_de, *de_buf, *limit;
> >       unsigned long offset = 0;
> >       unsigned int rec_len, new_rec_len, free_space;
> >
> > @@ -3352,14 +3362,15 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
> >       else
> >               free_space = dir->i_sb->s_blocksize - i_size_read(dir);
> >
> > -     de_buf = di->id2.i_data.id_data;
> > +     first_de = di->id2.i_data.id_data;
> > +     de_buf = first_de;
> >       limit = de_buf + i_size_read(dir);
> >       rec_len = OCFS2_DIR_REC_LEN(namelen);
> >
> >       while (de_buf < limit) {
> >               de = (struct ocfs2_dir_entry *)de_buf;
> >
> > -             if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
> > +             if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, i_size_read(dir), offset)) {
> >                       ret = -ENOENT;
> >                       goto out;
> >               }
> > @@ -3441,7 +3452,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
> >                       /* move to next block */
> >                       de = (struct ocfs2_dir_entry *) bh->b_data;
> >               }
> > -             if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
> > +             if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, offset)) {
> >                       status = -ENOENT;
> >                       goto bail;
> >               }
>

Joseph Qi June 22, 2024, 12:31 a.m. UTC | #3

Hi Heming，
Any more comments on this version?

Thanks,
Joseph

On 6/18/24 9:09 PM, lei lu wrote:
> On Tue, Jun 18, 2024 at 8:04 PM Heming Zhao <heming.zhao@suse.com> wrote:
>>
>> On 6/18/24 14:54, lei lu wrote:
>>> This adds sanity checks for ocfs2_dir_entry to make sure all members of
>>> ocfs2_dir_entry don't stray beyond valid memory region.
>>>
>>> Signed-off-by: lei lu <llfamsec@gmail.com>
>>> ---
>>>   fs/ocfs2/dir.c | 39 +++++++++++++++++++++++++--------------
>>>   1 file changed, 25 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
>>> index d620d4c53c6f..cdd92b4f9bf1 100644
>>> --- a/fs/ocfs2/dir.c
>>> +++ b/fs/ocfs2/dir.c
>>> @@ -294,13 +294,16 @@ static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
>>>    * bh passed here can be an inode block or a dir data block, depending
>>>    * on the inode inline data flag.
>>>    */
>>> -static int ocfs2_check_dir_entry(struct inode * dir,
>>> -                              struct ocfs2_dir_entry * de,
>>> -                              struct buffer_head * bh,
>>> +static int ocfs2_check_dir_entry(struct inode *dir,
>>> +                              struct ocfs2_dir_entry *de,
>>> +                              struct buffer_head *bh,
>>> +                              char *buf,
>>> +                              unsigned int size,
>>>                                unsigned long offset)
>>>   {
>>>       const char *error_msg = NULL;
>>>       const int rlen = le16_to_cpu(de->rec_len);
>>> +     const unsigned long next_offset = ((char *) de - buf) + rlen;
>>
>> Hi Lei & Joseph,
>>
>> I withdraw my previous saying: the rest codes look good to me.
>>
>> I checked some code. In my view, if the code runs as expected,
>> next_offset should be equal to:  "((char *) de - bh->b_data) + rlen".
> 
> Hi,
> 
> As you said below, In inline mode, dir entry list starts at
> bh->b_data->id2.i_data,
> In the dir block mode, dir entry list starts at bh->b_data directly.
> 
> So the prerequisite for checking here is to make sure that buf is the first_de
> and size is the de list size. We make sure that de don't stray beyond the de
> list regardless of its mode.
> 
> The above patch code refers to the __ext4_check_dir_entry.
> 
>>
>> You could add codes here to verify, e.g:
>>    printk("...");
>>    BUG_ON(next_offset != (((char *)de - bh->b_data) + rlen));
>>
>> This BUG_ON() will never be triggered.
>>
>> Please take some time to build and run the test, If my thinking is right,
>> you need to send v6 patch.
>>
>> btw, please don't forget to check my below comments.
>> It seems your code also has other bug.
>>
>> @Joseph, I also need your opinion.
>>
>>>
>>>       if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
>>>               error_msg = "rec_len is smaller than minimal";
>>> @@ -308,6 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
>>>               error_msg = "rec_len % 4 != 0";
>>>       else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
>>>               error_msg = "rec_len is too small for name_len";
>>> +     else if (unlikely(next_offset > size))
>>> +             error_msg = "directory entry overrun";
>>> +     else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
>>> +              next_offset != size)
>>> +             error_msg = "directory entry too close to end";
>>>       else if (unlikely(
>>>                ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
>>>               error_msg = "directory entry across blocks";
>>> @@ -352,16 +360,16 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
>>>       de_buf = first_de;
>>>       dlimit = de_buf + bytes;
>>>
>>> -     while (de_buf < dlimit) {
>>> +     while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
>>>               /* this code is executed quadratically often */
>>>               /* do minimal checking `by hand' */
>>>
>>>               de = (struct ocfs2_dir_entry *) de_buf;
>>>
>>> -             if (de_buf + namelen <= dlimit &&
>>> +             if (de->name + namelen <= dlimit &&
>>>                   ocfs2_match(namelen, name, de)) {
>>>                       /* found a match - just to be sure, do a full check */
>>> -                     if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
>>> +                     if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, offset)) {
>>
>>
>> ocfs2 uses two ways to store dir entry:
>>   inline mode (OCFS2_INLINE_DATA_FL) & dir block mode
>>
>> Above function (ocfs2_search_dirblock) handles dir block case, the code
>> can work under expectation.
> 
> I think ocfs2_search_dirblock handles both two modes? Because In the caller
> ocfs2_find_entry_id, the first_de is from di_bh->b_data->id2.i_data->id_data.
> 
>>
>>>                               ret = -1;
>>>                               goto bail;
>>>                       }
>>> @@ -1138,7 +1146,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
>>>       pde = NULL;
>>>       de = (struct ocfs2_dir_entry *) first_de;
>>>       while (i < bytes) {
>>> -             if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
>>> +             if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
>>>                       status = -EIO;
>>>                       mlog_errno(status);
>>>                       goto bail;
>>> @@ -1635,7 +1643,7 @@ int __ocfs2_add_entry(handle_t *handle,
>>>               /* These checks should've already been passed by the
>>>                * prepare function, but I guess we can leave them
>>>                * here anyway. */
>>> -             if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
>>> +             if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, size, offset)) {
>>>                       retval = -ENOENT;
>>>                       goto bail;
>>>               }
>>> @@ -1774,7 +1782,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
>>>               }
>>>
>>>               de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
>>> -             if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
>>> +             if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
>>> +                                        i_size_read(inode), ctx->pos)) {
>>
>> This function (ocfs2_dir_foreach_blk_id) handles inline mode.
>>
>> The de & data->id_data are both located in the inode struct, starting at
>> offset 0xc8 in ((struct ocfs2_dinode)->id2.i_data). however di_bh->b_data
>> points to the start address of the inode.
>>
>> if ocfs2_check_dir_entry() uses de & data->id_data to compare the offset, it
>> misses the 0xc8 space.
>>
>>   struct ocfs2_dinode
>> +----------------------------------+
>> |   ...      |    id2.i_data       |
>> |            |---------------------|
>> |            |           |id_data[]|
>> ^------------------------^-------^-+
>> |                        |   ... |
>> 0x0                     0xc8     0xc8+xxx: de
>> di_bh->b_data         data->id_da
>>
>>
>> (This patch) in ocfs2_check_dir_entry(), it uses "de - data->id_da" to get
>> the offset. However, from the above pic, the algorithm is wrong; it forgets
>> the [0,0xc8].
>> the correct way: (de - bh->b_data)
>>
>> the same issue also occurs in ocfs2_find_dir_space_id().
>>
>>
>> Thanks,
>> Heming
>>
>>>                       /* On error, skip the f_pos to the end. */
>>>                       ctx->pos = i_size_read(inode);
>>>                       break;
>>> @@ -1867,7 +1876,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
>>>               while (ctx->pos < i_size_read(inode)
>>>                      && offset < sb->s_blocksize) {
>>>                       de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
>>> -                     if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
>>> +                     if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
>>> +                                                sb->s_blocksize, offset)) {
>>>                               /* On error, skip the f_pos to the
>>>                                  next block. */
>>>                               ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
>>> @@ -3339,7 +3349,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
>>>       struct super_block *sb = dir->i_sb;
>>>       struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
>>>       struct ocfs2_dir_entry *de, *last_de = NULL;
>>> -     char *de_buf, *limit;
>>> +     char *first_de, *de_buf, *limit;
>>>       unsigned long offset = 0;
>>>       unsigned int rec_len, new_rec_len, free_space;
>>>
>>> @@ -3352,14 +3362,15 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
>>>       else
>>>               free_space = dir->i_sb->s_blocksize - i_size_read(dir);
>>>
>>> -     de_buf = di->id2.i_data.id_data;
>>> +     first_de = di->id2.i_data.id_data;
>>> +     de_buf = first_de;
>>>       limit = de_buf + i_size_read(dir);
>>>       rec_len = OCFS2_DIR_REC_LEN(namelen);
>>>
>>>       while (de_buf < limit) {
>>>               de = (struct ocfs2_dir_entry *)de_buf;
>>>
>>> -             if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
>>> +             if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, i_size_read(dir), offset)) {
>>>                       ret = -ENOENT;
>>>                       goto out;
>>>               }
>>> @@ -3441,7 +3452,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
>>>                       /* move to next block */
>>>                       de = (struct ocfs2_dir_entry *) bh->b_data;
>>>               }
>>> -             if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
>>> +             if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, offset)) {
>>>                       status = -ENOENT;
>>>                       goto bail;
>>>               }
>>

heming.zhao@suse.com June 23, 2024, 11:29 p.m. UTC | #4

On 6/22/24 08:31, Joseph Qi wrote:
> Hi Heming，
> Any more comments on this version?
> 

I was a little bit busy last week, and will recheck the code this
week and provide my comment.

-Heming

> 
> On 6/18/24 9:09 PM, lei lu wrote:
>> On Tue, Jun 18, 2024 at 8:04 PM Heming Zhao <heming.zhao@suse.com> wrote:
>>>
>>> On 6/18/24 14:54, lei lu wrote:
>>>> This adds sanity checks for ocfs2_dir_entry to make sure all members of
>>>> ocfs2_dir_entry don't stray beyond valid memory region.
>>>>
>>>> Signed-off-by: lei lu <llfamsec@gmail.com>
>>>> ---
>>>>    fs/ocfs2/dir.c | 39 +++++++++++++++++++++++++--------------
>>>>    1 file changed, 25 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
>>>> index d620d4c53c6f..cdd92b4f9bf1 100644
>>>> --- a/fs/ocfs2/dir.c
>>>> +++ b/fs/ocfs2/dir.c
>>>> @@ -294,13 +294,16 @@ static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
>>>>     * bh passed here can be an inode block or a dir data block, depending
>>>>     * on the inode inline data flag.
>>>>     */
>>>> -static int ocfs2_check_dir_entry(struct inode * dir,
>>>> -                              struct ocfs2_dir_entry * de,
>>>> -                              struct buffer_head * bh,
>>>> +static int ocfs2_check_dir_entry(struct inode *dir,
>>>> +                              struct ocfs2_dir_entry *de,
>>>> +                              struct buffer_head *bh,
>>>> +                              char *buf,
>>>> +                              unsigned int size,
>>>>                                 unsigned long offset)
>>>>    {
>>>>        const char *error_msg = NULL;
>>>>        const int rlen = le16_to_cpu(de->rec_len);
>>>> +     const unsigned long next_offset = ((char *) de - buf) + rlen;
>>>
>>> Hi Lei & Joseph,
>>>
>>> I withdraw my previous saying: the rest codes look good to me.
>>>
>>> I checked some code. In my view, if the code runs as expected,
>>> next_offset should be equal to:  "((char *) de - bh->b_data) + rlen".
>>
>> Hi,
>>
>> As you said below, In inline mode, dir entry list starts at
>> bh->b_data->id2.i_data,
>> In the dir block mode, dir entry list starts at bh->b_data directly.
>>
>> So the prerequisite for checking here is to make sure that buf is the first_de
>> and size is the de list size. We make sure that de don't stray beyond the de
>> list regardless of its mode.
>>
>> The above patch code refers to the __ext4_check_dir_entry.
>>
>>>
>>> You could add codes here to verify, e.g:
>>>     printk("...");
>>>     BUG_ON(next_offset != (((char *)de - bh->b_data) + rlen));
>>>
>>> This BUG_ON() will never be triggered.
>>>
>>> Please take some time to build and run the test, If my thinking is right,
>>> you need to send v6 patch.
>>>
>>> btw, please don't forget to check my below comments.
>>> It seems your code also has other bug.
>>>
>>> @Joseph, I also need your opinion.
>>>
>>>>
>>>>        if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
>>>>                error_msg = "rec_len is smaller than minimal";
>>>> @@ -308,6 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
>>>>                error_msg = "rec_len % 4 != 0";
>>>>        else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
>>>>                error_msg = "rec_len is too small for name_len";
>>>> +     else if (unlikely(next_offset > size))
>>>> +             error_msg = "directory entry overrun";
>>>> +     else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
>>>> +              next_offset != size)
>>>> +             error_msg = "directory entry too close to end";
>>>>        else if (unlikely(
>>>>                 ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
>>>>                error_msg = "directory entry across blocks";
>>>> @@ -352,16 +360,16 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
>>>>        de_buf = first_de;
>>>>        dlimit = de_buf + bytes;
>>>>
>>>> -     while (de_buf < dlimit) {
>>>> +     while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
>>>>                /* this code is executed quadratically often */
>>>>                /* do minimal checking `by hand' */
>>>>
>>>>                de = (struct ocfs2_dir_entry *) de_buf;
>>>>
>>>> -             if (de_buf + namelen <= dlimit &&
>>>> +             if (de->name + namelen <= dlimit &&
>>>>                    ocfs2_match(namelen, name, de)) {
>>>>                        /* found a match - just to be sure, do a full check */
>>>> -                     if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
>>>> +                     if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, offset)) {
>>>
>>>
>>> ocfs2 uses two ways to store dir entry:
>>>    inline mode (OCFS2_INLINE_DATA_FL) & dir block mode
>>>
>>> Above function (ocfs2_search_dirblock) handles dir block case, the code
>>> can work under expectation.
>>
>> I think ocfs2_search_dirblock handles both two modes? Because In the caller
>> ocfs2_find_entry_id, the first_de is from di_bh->b_data->id2.i_data->id_data.
>>
>>>
>>>>                                ret = -1;
>>>>                                goto bail;
>>>>                        }
>>>> @@ -1138,7 +1146,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
>>>>        pde = NULL;
>>>>        de = (struct ocfs2_dir_entry *) first_de;
>>>>        while (i < bytes) {
>>>> -             if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
>>>> +             if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
>>>>                        status = -EIO;
>>>>                        mlog_errno(status);
>>>>                        goto bail;
>>>> @@ -1635,7 +1643,7 @@ int __ocfs2_add_entry(handle_t *handle,
>>>>                /* These checks should've already been passed by the
>>>>                 * prepare function, but I guess we can leave them
>>>>                 * here anyway. */
>>>> -             if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
>>>> +             if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, size, offset)) {
>>>>                        retval = -ENOENT;
>>>>                        goto bail;
>>>>                }
>>>> @@ -1774,7 +1782,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
>>>>                }
>>>>
>>>>                de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
>>>> -             if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
>>>> +             if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
>>>> +                                        i_size_read(inode), ctx->pos)) {
>>>
>>> This function (ocfs2_dir_foreach_blk_id) handles inline mode.
>>>
>>> The de & data->id_data are both located in the inode struct, starting at
>>> offset 0xc8 in ((struct ocfs2_dinode)->id2.i_data). however di_bh->b_data
>>> points to the start address of the inode.
>>>
>>> if ocfs2_check_dir_entry() uses de & data->id_data to compare the offset, it
>>> misses the 0xc8 space.
>>>
>>>    struct ocfs2_dinode
>>> +----------------------------------+
>>> |   ...      |    id2.i_data       |
>>> |            |---------------------|
>>> |            |           |id_data[]|
>>> ^------------------------^-------^-+
>>> |                        |   ... |
>>> 0x0                     0xc8     0xc8+xxx: de
>>> di_bh->b_data         data->id_da
>>>
>>>
>>> (This patch) in ocfs2_check_dir_entry(), it uses "de - data->id_da" to get
>>> the offset. However, from the above pic, the algorithm is wrong; it forgets
>>> the [0,0xc8].
>>> the correct way: (de - bh->b_data)
>>>
>>> the same issue also occurs in ocfs2_find_dir_space_id().
>>>
>>>
>>> Thanks,
>>> Heming
>>>
>>>>                        /* On error, skip the f_pos to the end. */
>>>>                        ctx->pos = i_size_read(inode);
>>>>                        break;
>>>> @@ -1867,7 +1876,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
>>>>                while (ctx->pos < i_size_read(inode)
>>>>                       && offset < sb->s_blocksize) {
>>>>                        de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
>>>> -                     if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
>>>> +                     if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
>>>> +                                                sb->s_blocksize, offset)) {
>>>>                                /* On error, skip the f_pos to the
>>>>                                   next block. */
>>>>                                ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
>>>> @@ -3339,7 +3349,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
>>>>        struct super_block *sb = dir->i_sb;
>>>>        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
>>>>        struct ocfs2_dir_entry *de, *last_de = NULL;
>>>> -     char *de_buf, *limit;
>>>> +     char *first_de, *de_buf, *limit;
>>>>        unsigned long offset = 0;
>>>>        unsigned int rec_len, new_rec_len, free_space;
>>>>
>>>> @@ -3352,14 +3362,15 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
>>>>        else
>>>>                free_space = dir->i_sb->s_blocksize - i_size_read(dir);
>>>>
>>>> -     de_buf = di->id2.i_data.id_data;
>>>> +     first_de = di->id2.i_data.id_data;
>>>> +     de_buf = first_de;
>>>>        limit = de_buf + i_size_read(dir);
>>>>        rec_len = OCFS2_DIR_REC_LEN(namelen);
>>>>
>>>>        while (de_buf < limit) {
>>>>                de = (struct ocfs2_dir_entry *)de_buf;
>>>>
>>>> -             if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
>>>> +             if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, i_size_read(dir), offset)) {
>>>>                        ret = -ENOENT;
>>>>                        goto out;
>>>>                }
>>>> @@ -3441,7 +3452,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
>>>>                        /* move to next block */
>>>>                        de = (struct ocfs2_dir_entry *) bh->b_data;
>>>>                }
>>>> -             if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
>>>> +             if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, offset)) {
>>>>                        status = -ENOENT;
>>>>                        goto bail;
>>>>                }
>>>

heming.zhao@suse.com June 26, 2024, 8:30 a.m. UTC | #5

On 6/22/24 08:31, Joseph Qi wrote:
> Hi Heming，
> Any more comments on this version?
> 
> Thanks,
> Joseph

I made a mistake in the last mail. For inline mode, the 'size' parameter 
of ocfs2_check_dir_entry is the size of di->id2.i_data.id_data[], not 
the whole inode size.

ref:
ocfs2_fill_new_dir_id
  + size = le16_to_cpu(data->id_count); //id_data[] size 

  + i_size_write(inode, size); //record in inode

So the algorithm of v5 patch is correct. there is only one minor issue 
needs to be fixed. see below.

> 
> On 6/18/24 9:09 PM, lei lu wrote:
>> On Tue, Jun 18, 2024 at 8:04 PM Heming Zhao <heming.zhao@suse.com> wrote:
>>>
>>> On 6/18/24 14:54, lei lu wrote:
>>>> This adds sanity checks for ocfs2_dir_entry to make sure all members of
>>>> ocfs2_dir_entry don't stray beyond valid memory region.
>>>>
>>>> Signed-off-by: lei lu <llfamsec@gmail.com>
>>>> ---
>>>>    fs/ocfs2/dir.c | 39 +++++++++++++++++++++++++--------------
>>>>    1 file changed, 25 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
>>>> index d620d4c53c6f..cdd92b4f9bf1 100644
>>>> --- a/fs/ocfs2/dir.c
>>>> +++ b/fs/ocfs2/dir.c
>>>> @@ -294,13 +294,16 @@ static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
>>>>     * bh passed here can be an inode block or a dir data block, depending
>>>>     * on the inode inline data flag.
>>>>     */
>>>> -static int ocfs2_check_dir_entry(struct inode * dir,
>>>> -                              struct ocfs2_dir_entry * de,
>>>> -                              struct buffer_head * bh,
>>>> +static int ocfs2_check_dir_entry(struct inode *dir,
>>>> +                              struct ocfs2_dir_entry *de,
>>>> +                              struct buffer_head *bh,
>>>> +                              char *buf,
>>>> +                              unsigned int size,
>>>>                                 unsigned long offset)
>>>>    {
>>>>        const char *error_msg = NULL;
>>>>        const int rlen = le16_to_cpu(de->rec_len);
>>>> +     const unsigned long next_offset = ((char *) de - buf) + rlen;
>>>
>>> Hi Lei & Joseph,
>>>
>>> I withdraw my previous saying: the rest codes look good to me.
>>>
>>> I checked some code. In my view, if the code runs as expected,
>>> next_offset should be equal to:  "((char *) de - bh->b_data) + rlen".
>>
>> Hi,
>>
>> As you said below, In inline mode, dir entry list starts at
>> bh->b_data->id2.i_data,
>> In the dir block mode, dir entry list starts at bh->b_data directly.
>>
>> So the prerequisite for checking here is to make sure that buf is the first_de
>> and size is the de list size. We make sure that de don't stray beyond the de
>> list regardless of its mode.
>>
>> The above patch code refers to the __ext4_check_dir_entry.
>>
>>>
>>> You could add codes here to verify, e.g:
>>>     printk("...");
>>>     BUG_ON(next_offset != (((char *)de - bh->b_data) + rlen));
>>>
>>> This BUG_ON() will never be triggered.
>>>
>>> Please take some time to build and run the test, If my thinking is right,
>>> you need to send v6 patch.
>>>
>>> btw, please don't forget to check my below comments.
>>> It seems your code also has other bug.
>>>
>>> @Joseph, I also need your opinion.
>>>
>>>>
>>>>        if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
>>>>                error_msg = "rec_len is smaller than minimal";
>>>> @@ -308,6 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
>>>>                error_msg = "rec_len % 4 != 0";
>>>>        else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
>>>>                error_msg = "rec_len is too small for name_len";
>>>> +     else if (unlikely(next_offset > size))
>>>> +             error_msg = "directory entry overrun";
>>>> +     else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
>>>> +              next_offset != size)
>>>> +             error_msg = "directory entry too close to end";
>>>>        else if (unlikely(
>>>>                 ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
>>>>                error_msg = "directory entry across blocks";

the existing code " '((char *) de - bh->b_data) + rlen > 
dir->i_sb->s_blocksize' " should be removed. the reason is same with 
commit 226ba972b0863 ("ext4: refactor __ext4_check_dir_entry() to accept 
start and size").

Thanks,
Heming

>>>> @@ -352,16 +360,16 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
>>>>        de_buf = first_de;
>>>>        dlimit = de_buf + bytes;
>>>>
>>>> -     while (de_buf < dlimit) {
>>>> +     while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
>>>>                /* this code is executed quadratically often */
>>>>                /* do minimal checking `by hand' */
>>>>
>>>>                de = (struct ocfs2_dir_entry *) de_buf;
>>>>
>>>> -             if (de_buf + namelen <= dlimit &&
>>>> +             if (de->name + namelen <= dlimit &&
>>>>                    ocfs2_match(namelen, name, de)) {
>>>>                        /* found a match - just to be sure, do a full check */
>>>> -                     if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
>>>> +                     if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, offset)) {
>>>
>>>
>>> ocfs2 uses two ways to store dir entry:
>>>    inline mode (OCFS2_INLINE_DATA_FL) & dir block mode
>>>
>>> Above function (ocfs2_search_dirblock) handles dir block case, the code
>>> can work under expectation.
>>
>> I think ocfs2_search_dirblock handles both two modes? Because In the caller
>> ocfs2_find_entry_id, the first_de is from di_bh->b_data->id2.i_data->id_data.
>>
>>>
>>>>                                ret = -1;
>>>>                                goto bail;
>>>>                        }
>>>> @@ -1138,7 +1146,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
>>>>        pde = NULL;
>>>>        de = (struct ocfs2_dir_entry *) first_de;
>>>>        while (i < bytes) {
>>>> -             if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
>>>> +             if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
>>>>                        status = -EIO;
>>>>                        mlog_errno(status);
>>>>                        goto bail;
>>>> @@ -1635,7 +1643,7 @@ int __ocfs2_add_entry(handle_t *handle,
>>>>                /* These checks should've already been passed by the
>>>>                 * prepare function, but I guess we can leave them
>>>>                 * here anyway. */
>>>> -             if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
>>>> +             if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, size, offset)) {
>>>>                        retval = -ENOENT;
>>>>                        goto bail;
>>>>                }
>>>> @@ -1774,7 +1782,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
>>>>                }
>>>>
>>>>                de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
>>>> -             if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
>>>> +             if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
>>>> +                                        i_size_read(inode), ctx->pos)) {
>>>
>>> This function (ocfs2_dir_foreach_blk_id) handles inline mode.
>>>
>>> The de & data->id_data are both located in the inode struct, starting at
>>> offset 0xc8 in ((struct ocfs2_dinode)->id2.i_data). however di_bh->b_data
>>> points to the start address of the inode.
>>>
>>> if ocfs2_check_dir_entry() uses de & data->id_data to compare the offset, it
>>> misses the 0xc8 space.
>>>
>>>    struct ocfs2_dinode
>>> +----------------------------------+
>>> |   ...      |    id2.i_data       |
>>> |            |---------------------|
>>> |            |           |id_data[]|
>>> ^------------------------^-------^-+
>>> |                        |   ... |
>>> 0x0                     0xc8     0xc8+xxx: de
>>> di_bh->b_data         data->id_da
>>>
>>>
>>> (This patch) in ocfs2_check_dir_entry(), it uses "de - data->id_da" to get
>>> the offset. However, from the above pic, the algorithm is wrong; it forgets
>>> the [0,0xc8].
>>> the correct way: (de - bh->b_data)
>>>
>>> the same issue also occurs in ocfs2_find_dir_space_id().
>>>
>>>
>>> Thanks,
>>> Heming
>>>
>>>>                        /* On error, skip the f_pos to the end. */
>>>>                        ctx->pos = i_size_read(inode);
>>>>                        break;
>>>> @@ -1867,7 +1876,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
>>>>                while (ctx->pos < i_size_read(inode)
>>>>                       && offset < sb->s_blocksize) {
>>>>                        de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
>>>> -                     if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
>>>> +                     if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
>>>> +                                                sb->s_blocksize, offset)) {
>>>>                                /* On error, skip the f_pos to the
>>>>                                   next block. */
>>>>                                ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
>>>> @@ -3339,7 +3349,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
>>>>        struct super_block *sb = dir->i_sb;
>>>>        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
>>>>        struct ocfs2_dir_entry *de, *last_de = NULL;
>>>> -     char *de_buf, *limit;
>>>> +     char *first_de, *de_buf, *limit;
>>>>        unsigned long offset = 0;
>>>>        unsigned int rec_len, new_rec_len, free_space;
>>>>
>>>> @@ -3352,14 +3362,15 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
>>>>        else
>>>>                free_space = dir->i_sb->s_blocksize - i_size_read(dir);
>>>>
>>>> -     de_buf = di->id2.i_data.id_data;
>>>> +     first_de = di->id2.i_data.id_data;
>>>> +     de_buf = first_de;
>>>>        limit = de_buf + i_size_read(dir);
>>>>        rec_len = OCFS2_DIR_REC_LEN(namelen);
>>>>
>>>>        while (de_buf < limit) {
>>>>                de = (struct ocfs2_dir_entry *)de_buf;
>>>>
>>>> -             if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
>>>> +             if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, i_size_read(dir), offset)) {
>>>>                        ret = -ENOENT;
>>>>                        goto out;
>>>>                }
>>>> @@ -3441,7 +3452,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
>>>>                        /* move to next block */
>>>>                        de = (struct ocfs2_dir_entry *) bh->b_data;
>>>>                }
>>>> -             if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
>>>> +             if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, offset)) {
>>>>                        status = -ENOENT;
>>>>                        goto bail;
>>>>                }
>>>

lei lu June 26, 2024, 9:40 a.m. UTC | #6

On Wed, Jun 26, 2024 at 4:30 PM heming.zhao@suse.com
<heming.zhao@suse.com> wrote:
>
> On 6/22/24 08:31, Joseph Qi wrote:
> > Hi Heming，
> > Any more comments on this version?
> >
> > Thanks,
> > Joseph
>
> I made a mistake in the last mail. For inline mode, the 'size' parameter
> of ocfs2_check_dir_entry is the size of di->id2.i_data.id_data[], not
> the whole inode size.
>
> ref:
> ocfs2_fill_new_dir_id
>   + size = le16_to_cpu(data->id_count); //id_data[] size
>
>   + i_size_write(inode, size); //record in inode
>
> So the algorithm of v5 patch is correct. there is only one minor issue
> needs to be fixed. see below.
>
> >
> > On 6/18/24 9:09 PM, lei lu wrote:
> >> On Tue, Jun 18, 2024 at 8:04 PM Heming Zhao <heming.zhao@suse.com> wrote:
> >>>
> >>> On 6/18/24 14:54, lei lu wrote:
> >>>> This adds sanity checks for ocfs2_dir_entry to make sure all members of
> >>>> ocfs2_dir_entry don't stray beyond valid memory region.
> >>>>
> >>>> Signed-off-by: lei lu <llfamsec@gmail.com>
> >>>> ---
> >>>>    fs/ocfs2/dir.c | 39 +++++++++++++++++++++++++--------------
> >>>>    1 file changed, 25 insertions(+), 14 deletions(-)
> >>>>
> >>>> diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
> >>>> index d620d4c53c6f..cdd92b4f9bf1 100644
> >>>> --- a/fs/ocfs2/dir.c
> >>>> +++ b/fs/ocfs2/dir.c
> >>>> @@ -294,13 +294,16 @@ static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
> >>>>     * bh passed here can be an inode block or a dir data block, depending
> >>>>     * on the inode inline data flag.
> >>>>     */
> >>>> -static int ocfs2_check_dir_entry(struct inode * dir,
> >>>> -                              struct ocfs2_dir_entry * de,
> >>>> -                              struct buffer_head * bh,
> >>>> +static int ocfs2_check_dir_entry(struct inode *dir,
> >>>> +                              struct ocfs2_dir_entry *de,
> >>>> +                              struct buffer_head *bh,
> >>>> +                              char *buf,
> >>>> +                              unsigned int size,
> >>>>                                 unsigned long offset)
> >>>>    {
> >>>>        const char *error_msg = NULL;
> >>>>        const int rlen = le16_to_cpu(de->rec_len);
> >>>> +     const unsigned long next_offset = ((char *) de - buf) + rlen;
> >>>
> >>> Hi Lei & Joseph,
> >>>
> >>> I withdraw my previous saying: the rest codes look good to me.
> >>>
> >>> I checked some code. In my view, if the code runs as expected,
> >>> next_offset should be equal to:  "((char *) de - bh->b_data) + rlen".
> >>
> >> Hi,
> >>
> >> As you said below, In inline mode, dir entry list starts at
> >> bh->b_data->id2.i_data,
> >> In the dir block mode, dir entry list starts at bh->b_data directly.
> >>
> >> So the prerequisite for checking here is to make sure that buf is the first_de
> >> and size is the de list size. We make sure that de don't stray beyond the de
> >> list regardless of its mode.
> >>
> >> The above patch code refers to the __ext4_check_dir_entry.
> >>
> >>>
> >>> You could add codes here to verify, e.g:
> >>>     printk("...");
> >>>     BUG_ON(next_offset != (((char *)de - bh->b_data) + rlen));
> >>>
> >>> This BUG_ON() will never be triggered.
> >>>
> >>> Please take some time to build and run the test, If my thinking is right,
> >>> you need to send v6 patch.
> >>>
> >>> btw, please don't forget to check my below comments.
> >>> It seems your code also has other bug.
> >>>
> >>> @Joseph, I also need your opinion.
> >>>
> >>>>
> >>>>        if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
> >>>>                error_msg = "rec_len is smaller than minimal";
> >>>> @@ -308,6 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
> >>>>                error_msg = "rec_len % 4 != 0";
> >>>>        else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
> >>>>                error_msg = "rec_len is too small for name_len";
> >>>> +     else if (unlikely(next_offset > size))
> >>>> +             error_msg = "directory entry overrun";
> >>>> +     else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
> >>>> +              next_offset != size)
> >>>> +             error_msg = "directory entry too close to end";
> >>>>        else if (unlikely(
> >>>>                 ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
> >>>>                error_msg = "directory entry across blocks";
>
> the existing code " '((char *) de - bh->b_data) + rlen >
> dir->i_sb->s_blocksize' " should be removed. the reason is same with
> commit 226ba972b0863 ("ext4: refactor __ext4_check_dir_entry() to accept
> start and size").

Sure, I will send a v6 patch to fix this issue.

Thanks,
LL

>
> Thanks,
> Heming
>
> >>>> @@ -352,16 +360,16 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
> >>>>        de_buf = first_de;
> >>>>        dlimit = de_buf + bytes;
> >>>>
> >>>> -     while (de_buf < dlimit) {
> >>>> +     while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
> >>>>                /* this code is executed quadratically often */
> >>>>                /* do minimal checking `by hand' */
> >>>>
> >>>>                de = (struct ocfs2_dir_entry *) de_buf;
> >>>>
> >>>> -             if (de_buf + namelen <= dlimit &&
> >>>> +             if (de->name + namelen <= dlimit &&
> >>>>                    ocfs2_match(namelen, name, de)) {
> >>>>                        /* found a match - just to be sure, do a full check */
> >>>> -                     if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
> >>>> +                     if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, offset)) {
> >>>
> >>>
> >>> ocfs2 uses two ways to store dir entry:
> >>>    inline mode (OCFS2_INLINE_DATA_FL) & dir block mode
> >>>
> >>> Above function (ocfs2_search_dirblock) handles dir block case, the code
> >>> can work under expectation.
> >>
> >> I think ocfs2_search_dirblock handles both two modes? Because In the caller
> >> ocfs2_find_entry_id, the first_de is from di_bh->b_data->id2.i_data->id_data.
> >>
> >>>
> >>>>                                ret = -1;
> >>>>                                goto bail;
> >>>>                        }
> >>>> @@ -1138,7 +1146,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
> >>>>        pde = NULL;
> >>>>        de = (struct ocfs2_dir_entry *) first_de;
> >>>>        while (i < bytes) {
> >>>> -             if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
> >>>> +             if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
> >>>>                        status = -EIO;
> >>>>                        mlog_errno(status);
> >>>>                        goto bail;
> >>>> @@ -1635,7 +1643,7 @@ int __ocfs2_add_entry(handle_t *handle,
> >>>>                /* These checks should've already been passed by the
> >>>>                 * prepare function, but I guess we can leave them
> >>>>                 * here anyway. */
> >>>> -             if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
> >>>> +             if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, size, offset)) {
> >>>>                        retval = -ENOENT;
> >>>>                        goto bail;
> >>>>                }
> >>>> @@ -1774,7 +1782,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
> >>>>                }
> >>>>
> >>>>                de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
> >>>> -             if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
> >>>> +             if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
> >>>> +                                        i_size_read(inode), ctx->pos)) {
> >>>
> >>> This function (ocfs2_dir_foreach_blk_id) handles inline mode.
> >>>
> >>> The de & data->id_data are both located in the inode struct, starting at
> >>> offset 0xc8 in ((struct ocfs2_dinode)->id2.i_data). however di_bh->b_data
> >>> points to the start address of the inode.
> >>>
> >>> if ocfs2_check_dir_entry() uses de & data->id_data to compare the offset, it
> >>> misses the 0xc8 space.
> >>>
> >>>    struct ocfs2_dinode
> >>> +----------------------------------+
> >>> |   ...      |    id2.i_data       |
> >>> |            |---------------------|
> >>> |            |           |id_data[]|
> >>> ^------------------------^-------^-+
> >>> |                        |   ... |
> >>> 0x0                     0xc8     0xc8+xxx: de
> >>> di_bh->b_data         data->id_da
> >>>
> >>>
> >>> (This patch) in ocfs2_check_dir_entry(), it uses "de - data->id_da" to get
> >>> the offset. However, from the above pic, the algorithm is wrong; it forgets
> >>> the [0,0xc8].
> >>> the correct way: (de - bh->b_data)
> >>>
> >>> the same issue also occurs in ocfs2_find_dir_space_id().
> >>>
> >>>
> >>> Thanks,
> >>> Heming
> >>>
> >>>>                        /* On error, skip the f_pos to the end. */
> >>>>                        ctx->pos = i_size_read(inode);
> >>>>                        break;
> >>>> @@ -1867,7 +1876,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
> >>>>                while (ctx->pos < i_size_read(inode)
> >>>>                       && offset < sb->s_blocksize) {
> >>>>                        de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
> >>>> -                     if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
> >>>> +                     if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
> >>>> +                                                sb->s_blocksize, offset)) {
> >>>>                                /* On error, skip the f_pos to the
> >>>>                                   next block. */
> >>>>                                ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
> >>>> @@ -3339,7 +3349,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
> >>>>        struct super_block *sb = dir->i_sb;
> >>>>        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
> >>>>        struct ocfs2_dir_entry *de, *last_de = NULL;
> >>>> -     char *de_buf, *limit;
> >>>> +     char *first_de, *de_buf, *limit;
> >>>>        unsigned long offset = 0;
> >>>>        unsigned int rec_len, new_rec_len, free_space;
> >>>>
> >>>> @@ -3352,14 +3362,15 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
> >>>>        else
> >>>>                free_space = dir->i_sb->s_blocksize - i_size_read(dir);
> >>>>
> >>>> -     de_buf = di->id2.i_data.id_data;
> >>>> +     first_de = di->id2.i_data.id_data;
> >>>> +     de_buf = first_de;
> >>>>        limit = de_buf + i_size_read(dir);
> >>>>        rec_len = OCFS2_DIR_REC_LEN(namelen);
> >>>>
> >>>>        while (de_buf < limit) {
> >>>>                de = (struct ocfs2_dir_entry *)de_buf;
> >>>>
> >>>> -             if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
> >>>> +             if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, i_size_read(dir), offset)) {
> >>>>                        ret = -ENOENT;
> >>>>                        goto out;
> >>>>                }
> >>>> @@ -3441,7 +3452,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
> >>>>                        /* move to next block */
> >>>>                        de = (struct ocfs2_dir_entry *) bh->b_data;
> >>>>                }
> >>>> -             if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
> >>>> +             if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, offset)) {
> >>>>                        status = -ENOENT;
> >>>>                        goto bail;
> >>>>                }
> >>>

[v5] ocfs2: add bounds checking to ocfs2_check_dir_entry()

Commit Message

Comments

Patch