Message ID | 20230131170436.31280-1-iuliana.prodan@oss.nxp.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [v2] remoteproc: imx_dsp_rproc: add custom memory copy implementation for i.MX DSP Cores | expand |
On 2/1/2023 1:04 AM, Iuliana Prodan (OSS) wrote: > From: Iuliana Prodan <iuliana.prodan@nxp.com> > > The IRAM is part of the HiFi DSP. > According to hardware specification only 32-bits write are allowed > otherwise we get a Kernel panic. > > Therefore add a custom memory copy and memset functions to deal with > the above restriction. Which platform has this limitation? This driver has been landed for quite some time, is there any specific condition to trigger the issue? Regards, Peng. > > Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com> > > --- > Changes since v1 > - added missing check for cases when the memory slot is bigger than the file size; > - added a custom memset function > - removed is_iomem flag since is not used here > - updated custom memcpy function to avoid reading after end of source > --- > drivers/remoteproc/imx_dsp_rproc.c | 181 ++++++++++++++++++++++++++++- > 1 file changed, 180 insertions(+), 1 deletion(-) > > diff --git a/drivers/remoteproc/imx_dsp_rproc.c b/drivers/remoteproc/imx_dsp_rproc.c > index e4b1e962d56ad..d0dcc0820fadd 100644 > --- a/drivers/remoteproc/imx_dsp_rproc.c > +++ b/drivers/remoteproc/imx_dsp_rproc.c > @@ -715,6 +715,185 @@ static void imx_dsp_rproc_kick(struct rproc *rproc, int vqid) > dev_err(dev, "%s: failed (%d, err:%d)\n", __func__, vqid, err); > } > > +/* > + * Custom memory copy implementation for i.MX DSP Cores > + * > + * The IRAM is part of the HiFi DSP. > + * According to hw specs only 32-bits writes are allowed. > + */ > +static int imx_dsp_rproc_memcpy(void *dest, const void *src, size_t size) > +{ > + const u8 *src_byte = src; > + u32 affected_mask; > + u32 tmp; > + int i, q, r; > + > + /* destination must be 32bit aligned */ > + if (!IS_ALIGNED((u64)dest, 4)) > + return -EINVAL; > + > + q = size / 4; > + r = size % 4; > + > + /* __iowrite32_copy use 32bit size values so divide by 4 */ > + __iowrite32_copy(dest, src, q); > + > + if (r) { > + affected_mask = (1 << (8 * r)) - 1; > + > + /* first read the 32bit data of dest, then change affected > + * bytes, and write back to dest. > + * For unaffected bytes, it should not be changed > + */ > + tmp = ioread32(dest + q * 4); > + tmp &= ~affected_mask; > + > + /* avoid reading after end of source */ > + for (i = 0; i < r; i++) > + tmp |= (src_byte[q * 4 + i] << (8 * i)); > + > + iowrite32(tmp, dest + q * 4); > + } > + > + return 0; > +} > + > +/* > + * Custom memset implementation for i.MX DSP Cores > + * > + * The IRAM is part of the HiFi DSP. > + * According to hw specs only 32-bits writes are allowed. > + */ > +static int imx_dsp_rproc_memset(void *addr, u8 value, size_t size) > +{ > + u32 affected_mask; > + u32 tmp_val = value; > + u32 *tmp_dst = addr; > + u32 tmp; > + int q, r; > + > + /* destination must be 32bit aligned */ > + if (!IS_ALIGNED((u64)addr, 4)) > + return -EINVAL; > + > + tmp_val |= tmp_val << 8; > + tmp_val |= tmp_val << 16; > + > + q = size / 4; > + r = size % 4; > + > + while (q--) > + iowrite32(tmp_val, tmp_dst++); > + > + if (r) { > + affected_mask = (1 << (8 * r)) - 1; > + > + /* first read the 32bit data of addr, then change affected > + * bytes, and write back to addr. > + * For unaffected bytes, it should not be changed > + */ > + tmp = ioread32(tmp_dst); > + tmp &= ~affected_mask; > + > + tmp |= (tmp_val & affected_mask); > + iowrite32(tmp, tmp_dst); > + } > + > + return 0; > +} > +/** > + * imx_dsp_rproc_elf_load_segments() - load firmware segments to memory > + * @rproc: remote processor which will be booted using these fw segments > + * @fw: the ELF firmware image > + * > + * This function loads the firmware segments to memory, where the remote > + * processor expects them. > + * > + * Return: 0 on success and an appropriate error code otherwise > + */ > +static int imx_dsp_rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw) > +{ > + struct device *dev = &rproc->dev; > + const void *ehdr, *phdr; > + int i, ret = 0; > + u16 phnum; > + const u8 *elf_data = fw->data; > + u8 class = fw_elf_get_class(fw); > + u32 elf_phdr_get_size = elf_size_of_phdr(class); > + > + ehdr = elf_data; > + phnum = elf_hdr_get_e_phnum(class, ehdr); > + phdr = elf_data + elf_hdr_get_e_phoff(class, ehdr); > + > + /* go through the available ELF segments */ > + for (i = 0; i < phnum; i++, phdr += elf_phdr_get_size) { > + u64 da = elf_phdr_get_p_paddr(class, phdr); > + u64 memsz = elf_phdr_get_p_memsz(class, phdr); > + u64 filesz = elf_phdr_get_p_filesz(class, phdr); > + u64 offset = elf_phdr_get_p_offset(class, phdr); > + u32 type = elf_phdr_get_p_type(class, phdr); > + void *ptr; > + > + if (type != PT_LOAD || !memsz) > + continue; > + > + dev_dbg(dev, "phdr: type %d da 0x%llx memsz 0x%llx filesz 0x%llx\n", > + type, da, memsz, filesz); > + > + if (filesz > memsz) { > + dev_err(dev, "bad phdr filesz 0x%llx memsz 0x%llx\n", > + filesz, memsz); > + ret = -EINVAL; > + break; > + } > + > + if (offset + filesz > fw->size) { > + dev_err(dev, "truncated fw: need 0x%llx avail 0x%zx\n", > + offset + filesz, fw->size); > + ret = -EINVAL; > + break; > + } > + > + if (!rproc_u64_fit_in_size_t(memsz)) { > + dev_err(dev, "size (%llx) does not fit in size_t type\n", > + memsz); > + ret = -EOVERFLOW; > + break; > + } > + > + /* grab the kernel address for this device address */ > + ptr = rproc_da_to_va(rproc, da, memsz, NULL); > + if (!ptr) { > + dev_err(dev, "bad phdr da 0x%llx mem 0x%llx\n", da, > + memsz); > + ret = -EINVAL; > + break; > + } > + > + /* put the segment where the remote processor expects it */ > + if (filesz) { > + ret = imx_dsp_rproc_memcpy(ptr, elf_data + offset, filesz); > + if (ret) { > + dev_err(dev, "memory copy failed for da 0x%llx memsz 0x%llx\n", > + da, memsz); > + break; > + } > + } > + > + /* zero out remaining memory for this segment */ > + if (memsz > filesz) { > + ret = imx_dsp_rproc_memset(ptr + filesz, 0, memsz - filesz); > + if (ret) { > + dev_err(dev, "memset failed for da 0x%llx memsz 0x%llx\n", > + da, memsz); > + break; > + } > + } > + } > + > + return ret; > +} > + > static int imx_dsp_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw) > { > if (rproc_elf_load_rsc_table(rproc, fw)) > @@ -729,7 +908,7 @@ static const struct rproc_ops imx_dsp_rproc_ops = { > .start = imx_dsp_rproc_start, > .stop = imx_dsp_rproc_stop, > .kick = imx_dsp_rproc_kick, > - .load = rproc_elf_load_segments, > + .load = imx_dsp_rproc_elf_load_segments, > .parse_fw = imx_dsp_rproc_parse_fw, > .sanity_check = rproc_elf_sanity_check, > .get_boot_addr = rproc_elf_get_boot_addr,
Hi Peng, On 2/1/2023 2:27 AM, Peng Fan wrote: > > > On 2/1/2023 1:04 AM, Iuliana Prodan (OSS) wrote: >> From: Iuliana Prodan <iuliana.prodan@nxp.com> >> >> The IRAM is part of the HiFi DSP. >> According to hardware specification only 32-bits write are allowed >> otherwise we get a Kernel panic. >> >> Therefore add a custom memory copy and memset functions to deal with >> the above restriction. > > Which platform has this limitation? This driver has been landed for > quite some time, is there any specific condition to trigger the issue? > > Regards, > Peng. > Any platform with HiFi DSP. As I explained in the previous version, until now, it was used in a limited scenario and the firmware was correctly built to respect the write restriction - having the IRAM sections size a multiple of 4bytes. Now, I was trying a simple hello_world sample from Zephyr, compiled with GCC and I crashed the Kernel trying to load it on the hifi4 DSP. Thanks, Iulia
Hi Iuliana, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on remoteproc/rproc-next] [also build test WARNING on linus/master v6.2-rc6] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Iuliana-Prodan-OSS/remoteproc-imx_dsp_rproc-add-custom-memory-copy-implementation-for-i-MX-DSP-Cores/20230201-011011 base: git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux.git rproc-next patch link: https://lore.kernel.org/r/20230131170436.31280-1-iuliana.prodan%40oss.nxp.com patch subject: [PATCH v2] remoteproc: imx_dsp_rproc: add custom memory copy implementation for i.MX DSP Cores config: arm-allyesconfig (https://download.01.org/0day-ci/archive/20230204/202302041520.m9CY8p6U-lkp@intel.com/config) compiler: arm-linux-gnueabi-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/65bca8739891378a92cf6a5774e2ad72630a4276 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Iuliana-Prodan-OSS/remoteproc-imx_dsp_rproc-add-custom-memory-copy-implementation-for-i-MX-DSP-Cores/20230201-011011 git checkout 65bca8739891378a92cf6a5774e2ad72630a4276 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/remoteproc/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): In file included from include/linux/kernel.h:15, from include/linux/clk.h:13, from drivers/remoteproc/imx_dsp_rproc.c:6: drivers/remoteproc/imx_dsp_rproc.c: In function 'imx_dsp_rproc_memcpy': >> drivers/remoteproc/imx_dsp_rproc.c:732:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] 732 | if (!IS_ALIGNED((u64)dest, 4)) | ^ include/linux/align.h:13:44: note: in definition of macro 'IS_ALIGNED' 13 | #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) | ^ >> drivers/remoteproc/imx_dsp_rproc.c:732:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] 732 | if (!IS_ALIGNED((u64)dest, 4)) | ^ include/linux/align.h:13:58: note: in definition of macro 'IS_ALIGNED' 13 | #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) | ^ drivers/remoteproc/imx_dsp_rproc.c: In function 'imx_dsp_rproc_memset': drivers/remoteproc/imx_dsp_rproc.c:776:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] 776 | if (!IS_ALIGNED((u64)addr, 4)) | ^ include/linux/align.h:13:44: note: in definition of macro 'IS_ALIGNED' 13 | #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) | ^ drivers/remoteproc/imx_dsp_rproc.c:776:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] 776 | if (!IS_ALIGNED((u64)addr, 4)) | ^ include/linux/align.h:13:58: note: in definition of macro 'IS_ALIGNED' 13 | #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) | ^ vim +732 drivers/remoteproc/imx_dsp_rproc.c 717 718 /* 719 * Custom memory copy implementation for i.MX DSP Cores 720 * 721 * The IRAM is part of the HiFi DSP. 722 * According to hw specs only 32-bits writes are allowed. 723 */ 724 static int imx_dsp_rproc_memcpy(void *dest, const void *src, size_t size) 725 { 726 const u8 *src_byte = src; 727 u32 affected_mask; 728 u32 tmp; 729 int i, q, r; 730 731 /* destination must be 32bit aligned */ > 732 if (!IS_ALIGNED((u64)dest, 4)) 733 return -EINVAL; 734 735 q = size / 4; 736 r = size % 4; 737 738 /* __iowrite32_copy use 32bit size values so divide by 4 */ 739 __iowrite32_copy(dest, src, q); 740 741 if (r) { 742 affected_mask = (1 << (8 * r)) - 1; 743 744 /* first read the 32bit data of dest, then change affected 745 * bytes, and write back to dest. 746 * For unaffected bytes, it should not be changed 747 */ 748 tmp = ioread32(dest + q * 4); 749 tmp &= ~affected_mask; 750 751 /* avoid reading after end of source */ 752 for (i = 0; i < r; i++) 753 tmp |= (src_byte[q * 4 + i] << (8 * i)); 754 755 iowrite32(tmp, dest + q * 4); 756 } 757 758 return 0; 759 } 760
diff --git a/drivers/remoteproc/imx_dsp_rproc.c b/drivers/remoteproc/imx_dsp_rproc.c index e4b1e962d56ad..d0dcc0820fadd 100644 --- a/drivers/remoteproc/imx_dsp_rproc.c +++ b/drivers/remoteproc/imx_dsp_rproc.c @@ -715,6 +715,185 @@ static void imx_dsp_rproc_kick(struct rproc *rproc, int vqid) dev_err(dev, "%s: failed (%d, err:%d)\n", __func__, vqid, err); } +/* + * Custom memory copy implementation for i.MX DSP Cores + * + * The IRAM is part of the HiFi DSP. + * According to hw specs only 32-bits writes are allowed. + */ +static int imx_dsp_rproc_memcpy(void *dest, const void *src, size_t size) +{ + const u8 *src_byte = src; + u32 affected_mask; + u32 tmp; + int i, q, r; + + /* destination must be 32bit aligned */ + if (!IS_ALIGNED((u64)dest, 4)) + return -EINVAL; + + q = size / 4; + r = size % 4; + + /* __iowrite32_copy use 32bit size values so divide by 4 */ + __iowrite32_copy(dest, src, q); + + if (r) { + affected_mask = (1 << (8 * r)) - 1; + + /* first read the 32bit data of dest, then change affected + * bytes, and write back to dest. + * For unaffected bytes, it should not be changed + */ + tmp = ioread32(dest + q * 4); + tmp &= ~affected_mask; + + /* avoid reading after end of source */ + for (i = 0; i < r; i++) + tmp |= (src_byte[q * 4 + i] << (8 * i)); + + iowrite32(tmp, dest + q * 4); + } + + return 0; +} + +/* + * Custom memset implementation for i.MX DSP Cores + * + * The IRAM is part of the HiFi DSP. + * According to hw specs only 32-bits writes are allowed. + */ +static int imx_dsp_rproc_memset(void *addr, u8 value, size_t size) +{ + u32 affected_mask; + u32 tmp_val = value; + u32 *tmp_dst = addr; + u32 tmp; + int q, r; + + /* destination must be 32bit aligned */ + if (!IS_ALIGNED((u64)addr, 4)) + return -EINVAL; + + tmp_val |= tmp_val << 8; + tmp_val |= tmp_val << 16; + + q = size / 4; + r = size % 4; + + while (q--) + iowrite32(tmp_val, tmp_dst++); + + if (r) { + affected_mask = (1 << (8 * r)) - 1; + + /* first read the 32bit data of addr, then change affected + * bytes, and write back to addr. + * For unaffected bytes, it should not be changed + */ + tmp = ioread32(tmp_dst); + tmp &= ~affected_mask; + + tmp |= (tmp_val & affected_mask); + iowrite32(tmp, tmp_dst); + } + + return 0; +} +/** + * imx_dsp_rproc_elf_load_segments() - load firmware segments to memory + * @rproc: remote processor which will be booted using these fw segments + * @fw: the ELF firmware image + * + * This function loads the firmware segments to memory, where the remote + * processor expects them. + * + * Return: 0 on success and an appropriate error code otherwise + */ +static int imx_dsp_rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw) +{ + struct device *dev = &rproc->dev; + const void *ehdr, *phdr; + int i, ret = 0; + u16 phnum; + const u8 *elf_data = fw->data; + u8 class = fw_elf_get_class(fw); + u32 elf_phdr_get_size = elf_size_of_phdr(class); + + ehdr = elf_data; + phnum = elf_hdr_get_e_phnum(class, ehdr); + phdr = elf_data + elf_hdr_get_e_phoff(class, ehdr); + + /* go through the available ELF segments */ + for (i = 0; i < phnum; i++, phdr += elf_phdr_get_size) { + u64 da = elf_phdr_get_p_paddr(class, phdr); + u64 memsz = elf_phdr_get_p_memsz(class, phdr); + u64 filesz = elf_phdr_get_p_filesz(class, phdr); + u64 offset = elf_phdr_get_p_offset(class, phdr); + u32 type = elf_phdr_get_p_type(class, phdr); + void *ptr; + + if (type != PT_LOAD || !memsz) + continue; + + dev_dbg(dev, "phdr: type %d da 0x%llx memsz 0x%llx filesz 0x%llx\n", + type, da, memsz, filesz); + + if (filesz > memsz) { + dev_err(dev, "bad phdr filesz 0x%llx memsz 0x%llx\n", + filesz, memsz); + ret = -EINVAL; + break; + } + + if (offset + filesz > fw->size) { + dev_err(dev, "truncated fw: need 0x%llx avail 0x%zx\n", + offset + filesz, fw->size); + ret = -EINVAL; + break; + } + + if (!rproc_u64_fit_in_size_t(memsz)) { + dev_err(dev, "size (%llx) does not fit in size_t type\n", + memsz); + ret = -EOVERFLOW; + break; + } + + /* grab the kernel address for this device address */ + ptr = rproc_da_to_va(rproc, da, memsz, NULL); + if (!ptr) { + dev_err(dev, "bad phdr da 0x%llx mem 0x%llx\n", da, + memsz); + ret = -EINVAL; + break; + } + + /* put the segment where the remote processor expects it */ + if (filesz) { + ret = imx_dsp_rproc_memcpy(ptr, elf_data + offset, filesz); + if (ret) { + dev_err(dev, "memory copy failed for da 0x%llx memsz 0x%llx\n", + da, memsz); + break; + } + } + + /* zero out remaining memory for this segment */ + if (memsz > filesz) { + ret = imx_dsp_rproc_memset(ptr + filesz, 0, memsz - filesz); + if (ret) { + dev_err(dev, "memset failed for da 0x%llx memsz 0x%llx\n", + da, memsz); + break; + } + } + } + + return ret; +} + static int imx_dsp_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw) { if (rproc_elf_load_rsc_table(rproc, fw)) @@ -729,7 +908,7 @@ static const struct rproc_ops imx_dsp_rproc_ops = { .start = imx_dsp_rproc_start, .stop = imx_dsp_rproc_stop, .kick = imx_dsp_rproc_kick, - .load = rproc_elf_load_segments, + .load = imx_dsp_rproc_elf_load_segments, .parse_fw = imx_dsp_rproc_parse_fw, .sanity_check = rproc_elf_sanity_check, .get_boot_addr = rproc_elf_get_boot_addr,