diff mbox series

[v11,07/10] mtd: spi-nor: Add stacked memories support in spi-nor

Message ID 20231125092137.2948-8-amit.kumar-mahapatra@amd.com (mailing list archive)
State New, archived
Headers show
Series spi: Add support for stacked/parallel memories | expand

Commit Message

Mahapatra, Amit Kumar Nov. 25, 2023, 9:21 a.m. UTC
Each flash that is connected in stacked mode should have a separate
parameter structure. So, the flash parameter member(*params) of the spi_nor
structure is changed to an array (*params[2]). The array is used to store
the parameters of each flash connected in stacked configuration.

The current implementation assumes that a maximum of two flashes are
connected in stacked mode and both the flashes are of same make but can
differ in sizes. So, except the sizes all other flash parameters of both
the flashes are identical.

SPI-NOR is not aware of the chip_select values, for any incoming request
SPI-NOR will decide the flash index with the help of individual flash size
and the configuration type (single/stacked). SPI-NOR will pass on the flash
index information to the SPI core & SPI driver by setting the appropriate
bit in nor->spimem->spi->cs_index_mask. For example, if nth bit of
nor->spimem->spi->cs_index_mask is set then the driver would
assert/de-assert spi->chip_slect[n].

Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
---
 drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-------
 drivers/mtd/spi-nor/core.h  |   4 +
 include/linux/mtd/spi-nor.h |  15 +-
 3 files changed, 240 insertions(+), 51 deletions(-)

Comments

Tudor Ambarus Dec. 6, 2023, 2:30 p.m. UTC | #1
Hi, Amit,

On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
> Each flash that is connected in stacked mode should have a separate
> parameter structure. So, the flash parameter member(*params) of the spi_nor
> structure is changed to an array (*params[2]). The array is used to store
> the parameters of each flash connected in stacked configuration.
> 
> The current implementation assumes that a maximum of two flashes are
> connected in stacked mode and both the flashes are of same make but can
> differ in sizes. So, except the sizes all other flash parameters of both
> the flashes are identical.

Do you plan to add support for different flashes in stacked mode? If
not, wouldn't it be simpler to have just an array of flash sizes instead
of duplicating the entire params struct?

> 
> SPI-NOR is not aware of the chip_select values, for any incoming request
> SPI-NOR will decide the flash index with the help of individual flash size
> and the configuration type (single/stacked). SPI-NOR will pass on the flash
> index information to the SPI core & SPI driver by setting the appropriate
> bit in nor->spimem->spi->cs_index_mask. For example, if nth bit of
> nor->spimem->spi->cs_index_mask is set then the driver would
> assert/de-assert spi->chip_slect[n].
> 
> Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
> ---
>  drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-------
>  drivers/mtd/spi-nor/core.h  |   4 +
>  include/linux/mtd/spi-nor.h |  15 +-
>  3 files changed, 240 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
> index 93ae69b7ff83..e990be7c7eb6 100644
> --- a/drivers/mtd/spi-nor/core.c
> +++ b/drivers/mtd/spi-nor/core.c

cut

> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct spi_nor *nor)
>  static int spi_nor_late_init_params(struct spi_nor *nor)
>  {
>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor, 0);
> -	int ret;
> +	struct device_node *np = spi_nor_get_flash_node(nor);
> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
> +	u32 idx = 0;
> +	int rc, ret;
>  
>  	if (nor->manufacturer && nor->manufacturer->fixups &&
>  	    nor->manufacturer->fixups->late_init) {
> @@ -2937,6 +3042,44 @@ static int spi_nor_late_init_params(struct spi_nor *nor)
>  	if (params->n_banks > 1)
>  		params->bank_size = div64_u64(params->size, params->n_banks);
>  
> +	nor->num_flash = 0;
> +
> +	/*
> +	 * The flashes that are connected in stacked mode should be of same make.
> +	 * Except the flash size all other properties are identical for all the
> +	 * flashes connected in stacked mode.
> +	 * The flashes that are connected in parallel mode should be identical.
> +	 */
> +	while (idx < SNOR_FLASH_CNT_MAX) {
> +		rc = of_property_read_u64_index(np, "stacked-memories", idx, &flash_size[idx]);

This is a little late in my opinion, as we don't have any sanity check
on the flashes that are stacked on top of the first. We shall at least
read and compare the ID for all.

Cheers,
ta
Tudor Ambarus Dec. 6, 2023, 2:43 p.m. UTC | #2
On 12/6/23 14:30, Tudor Ambarus wrote:
> Hi, Amit,
> 
> On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
>> Each flash that is connected in stacked mode should have a separate
>> parameter structure. So, the flash parameter member(*params) of the spi_nor
>> structure is changed to an array (*params[2]). The array is used to store
>> the parameters of each flash connected in stacked configuration.
>>
>> The current implementation assumes that a maximum of two flashes are
>> connected in stacked mode and both the flashes are of same make but can
>> differ in sizes. So, except the sizes all other flash parameters of both
>> the flashes are identical.
> 
> Do you plan to add support for different flashes in stacked mode? If
> not, wouldn't it be simpler to have just an array of flash sizes instead
> of duplicating the entire params struct?
> 
>>
>> SPI-NOR is not aware of the chip_select values, for any incoming request
>> SPI-NOR will decide the flash index with the help of individual flash size
>> and the configuration type (single/stacked). SPI-NOR will pass on the flash
>> index information to the SPI core & SPI driver by setting the appropriate
>> bit in nor->spimem->spi->cs_index_mask. For example, if nth bit of
>> nor->spimem->spi->cs_index_mask is set then the driver would
>> assert/de-assert spi->chip_slect[n].
>>
>> Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
>> ---
>>  drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-------
>>  drivers/mtd/spi-nor/core.h  |   4 +
>>  include/linux/mtd/spi-nor.h |  15 +-
>>  3 files changed, 240 insertions(+), 51 deletions(-)
>>
>> diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
>> index 93ae69b7ff83..e990be7c7eb6 100644
>> --- a/drivers/mtd/spi-nor/core.c
>> +++ b/drivers/mtd/spi-nor/core.c
> 
> cut
> 
>> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct spi_nor *nor)
>>  static int spi_nor_late_init_params(struct spi_nor *nor)
>>  {
>>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor, 0);
>> -	int ret;
>> +	struct device_node *np = spi_nor_get_flash_node(nor);
>> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
>> +	u32 idx = 0;
>> +	int rc, ret;
>>  
>>  	if (nor->manufacturer && nor->manufacturer->fixups &&
>>  	    nor->manufacturer->fixups->late_init) {
>> @@ -2937,6 +3042,44 @@ static int spi_nor_late_init_params(struct spi_nor *nor)
>>  	if (params->n_banks > 1)
>>  		params->bank_size = div64_u64(params->size, params->n_banks);
>>  
>> +	nor->num_flash = 0;
>> +
>> +	/*
>> +	 * The flashes that are connected in stacked mode should be of same make.
>> +	 * Except the flash size all other properties are identical for all the
>> +	 * flashes connected in stacked mode.
>> +	 * The flashes that are connected in parallel mode should be identical.
>> +	 */
>> +	while (idx < SNOR_FLASH_CNT_MAX) {
>> +		rc = of_property_read_u64_index(np, "stacked-memories", idx, &flash_size[idx]);

also, it's not clear to me why you read this property multiple times.
Have you sent a device tree patch somewhere? It will help me understand
what you're trying to achieve.

> 
> This is a little late in my opinion, as we don't have any sanity check
> on the flashes that are stacked on top of the first. We shall at least
> read and compare the ID for all.
> 
> Cheers,
> ta
Tudor Ambarus Dec. 7, 2023, 5:24 p.m. UTC | #3
On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
> The current implementation assumes that a maximum of two flashes are
> connected in stacked mode and both the flashes are of same make but can
> differ in sizes. So, except the sizes all other flash parameters of both
> the flashes are identical.

Too much restrictions, isn't it? Have you thought about adding a layer
on top of SPI NOR managing the stacked/parallel flashes?
Mahapatra, Amit Kumar Dec. 8, 2023, 5:05 p.m. UTC | #4
Hello Tudor,

> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Wednesday, December 6, 2023 8:00 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> Hi, Amit,
> 
> On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
> > Each flash that is connected in stacked mode should have a separate
> > parameter structure. So, the flash parameter member(*params) of the
> > spi_nor structure is changed to an array (*params[2]). The array is
> > used to store the parameters of each flash connected in stacked
> configuration.
> >
> > The current implementation assumes that a maximum of two flashes are
> > connected in stacked mode and both the flashes are of same make but
> > can differ in sizes. So, except the sizes all other flash parameters
> > of both the flashes are identical.
> 
> Do you plan to add support for different flashes in stacked mode? If not,

No, according to the current implementation, in stacked mode, both flashes 
must be of the same make.

> wouldn't it be simpler to have just an array of flash sizes instead of
> duplicating the entire params struct?

Yes, that is accurate. In alignment with our current stacked support use case we 
can have an array of flash sizes instead.
The primary purpose of having an array of params struct was to facilitate 
potential future extensions, allowing the addition of stacked support for 
different flashes

> 
> >
> > SPI-NOR is not aware of the chip_select values, for any incoming
> > request SPI-NOR will decide the flash index with the help of
> > individual flash size and the configuration type (single/stacked).
> > SPI-NOR will pass on the flash index information to the SPI core & SPI
> > driver by setting the appropriate bit in
> > nor->spimem->spi->cs_index_mask. For example, if nth bit of
> > nor->spimem->spi->cs_index_mask is set then the driver would
> > assert/de-assert spi->chip_slect[n].
> >
> > Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
> > ---
> >  drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-------
> >  drivers/mtd/spi-nor/core.h  |   4 +
> >  include/linux/mtd/spi-nor.h |  15 +-
> >  3 files changed, 240 insertions(+), 51 deletions(-)
> >
> > diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
> > index 93ae69b7ff83..e990be7c7eb6 100644
> > --- a/drivers/mtd/spi-nor/core.c
> > +++ b/drivers/mtd/spi-nor/core.c
> 
> cut
> 
> > @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct
> > spi_nor *nor)  static int spi_nor_late_init_params(struct spi_nor
> > *nor)  {
> >  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor,
> 0);
> > -	int ret;
> > +	struct device_node *np = spi_nor_get_flash_node(nor);
> > +	u64 flash_size[SNOR_FLASH_CNT_MAX];
> > +	u32 idx = 0;
> > +	int rc, ret;
> >
> >  	if (nor->manufacturer && nor->manufacturer->fixups &&
> >  	    nor->manufacturer->fixups->late_init) { @@ -2937,6 +3042,44 @@
> > static int spi_nor_late_init_params(struct spi_nor *nor)
> >  	if (params->n_banks > 1)
> >  		params->bank_size = div64_u64(params->size, params-
> >n_banks);
> >
> > +	nor->num_flash = 0;
> > +
> > +	/*
> > +	 * The flashes that are connected in stacked mode should be of same
> make.
> > +	 * Except the flash size all other properties are identical for all the
> > +	 * flashes connected in stacked mode.
> > +	 * The flashes that are connected in parallel mode should be identical.
> > +	 */
> > +	while (idx < SNOR_FLASH_CNT_MAX) {
> > +		rc = of_property_read_u64_index(np, "stacked-memories",
> idx,
> > +&flash_size[idx]);
> 
> This is a little late in my opinion, as we don't have any sanity check on the
> flashes that are stacked on top of the first. We shall at least read and compare
> the ID for all.

Alright, I will incorporate a sanity check for reading and comparing the 
ID of the stacked flash. Subsequently, I believe this stacked logic 
should be relocated to spi_nor_get_flash_info() where we identify the 
first flash. Please share your thoughts on this. Additionally, do you 
anticipate that SPI-NOR should throw an error if the second or any 
subsequent flash within the stacked connection is different? Or would you 
prefer it to print a warning and operate in single mode (i.e., only the 
first flash)?

Regards,
Amit
> 
> Cheers,
> ta
Mahapatra, Amit Kumar Dec. 8, 2023, 5:06 p.m. UTC | #5
Hello Tudor,

> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Wednesday, December 6, 2023 8:14 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> On 12/6/23 14:30, Tudor Ambarus wrote:
> > Hi, Amit,
> >
> > On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
> >> Each flash that is connected in stacked mode should have a separate
> >> parameter structure. So, the flash parameter member(*params) of the
> >> spi_nor structure is changed to an array (*params[2]). The array is
> >> used to store the parameters of each flash connected in stacked
> configuration.
> >>
> >> The current implementation assumes that a maximum of two flashes are
> >> connected in stacked mode and both the flashes are of same make but
> >> can differ in sizes. So, except the sizes all other flash parameters
> >> of both the flashes are identical.
> >
> > Do you plan to add support for different flashes in stacked mode? If
> > not, wouldn't it be simpler to have just an array of flash sizes
> > instead of duplicating the entire params struct?
> >
> >>
> >> SPI-NOR is not aware of the chip_select values, for any incoming
> >> request SPI-NOR will decide the flash index with the help of
> >> individual flash size and the configuration type (single/stacked).
> >> SPI-NOR will pass on the flash index information to the SPI core &
> >> SPI driver by setting the appropriate bit in
> >> nor->spimem->spi->cs_index_mask. For example, if nth bit of
> >> nor->spimem->spi->cs_index_mask is set then the driver would
> >> assert/de-assert spi->chip_slect[n].
> >>
> >> Signed-off-by: Amit Kumar Mahapatra <amit.kumar-
> mahapatra@amd.com>
> >> ---
> >>  drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-------
> >>  drivers/mtd/spi-nor/core.h  |   4 +
> >>  include/linux/mtd/spi-nor.h |  15 +-
> >>  3 files changed, 240 insertions(+), 51 deletions(-)
> >>
> >> diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
> >> index 93ae69b7ff83..e990be7c7eb6 100644
> >> --- a/drivers/mtd/spi-nor/core.c
> >> +++ b/drivers/mtd/spi-nor/core.c
> >
> > cut
> >
> >> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct
> >> spi_nor *nor)  static int spi_nor_late_init_params(struct spi_nor
> >> *nor)  {
> >>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor,
> 0);
> >> -	int ret;
> >> +	struct device_node *np = spi_nor_get_flash_node(nor);
> >> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
> >> +	u32 idx = 0;
> >> +	int rc, ret;
> >>
> >>  	if (nor->manufacturer && nor->manufacturer->fixups &&
> >>  	    nor->manufacturer->fixups->late_init) { @@ -2937,6 +3042,44 @@
> >> static int spi_nor_late_init_params(struct spi_nor *nor)
> >>  	if (params->n_banks > 1)
> >>  		params->bank_size = div64_u64(params->size, params-
> >n_banks);
> >>
> >> +	nor->num_flash = 0;
> >> +
> >> +	/*
> >> +	 * The flashes that are connected in stacked mode should be of same
> make.
> >> +	 * Except the flash size all other properties are identical for all the
> >> +	 * flashes connected in stacked mode.
> >> +	 * The flashes that are connected in parallel mode should be identical.
> >> +	 */
> >> +	while (idx < SNOR_FLASH_CNT_MAX) {
> >> +		rc = of_property_read_u64_index(np, "stacked-memories",
> idx,
> >> +&flash_size[idx]);
> 
> also, it's not clear to me why you read this property multiple times.
> Have you sent a device tree patch somewhere? It will help me understand
> what you're trying to achieve.

Miquel submitted the device tree patch; here is the series.
https://lore.kernel.org/all/20220126112608.955728-1-miquel.raynal@bootlin.com/

Regards,
Amit
> 
> >
> > This is a little late in my opinion, as we don't have any sanity check
> > on the flashes that are stacked on top of the first. We shall at least
> > read and compare the ID for all.
> >
> > Cheers,
> > ta
Tudor Ambarus Dec. 11, 2023, 3:33 a.m. UTC | #6
On 12/8/23 17:05, Mahapatra, Amit Kumar wrote:
> Hello Tudor,

Hi!

> 
>> -----Original Message-----
>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
>> Sent: Wednesday, December 6, 2023 8:00 PM
>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
>> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
>> michael@walle.cc; linux-mtd@lists.infradead.org;
>> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
>> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
>> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
>> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
>> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
>> in spi-nor
>>
>> Hi, Amit,
>>
>> On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
>>> Each flash that is connected in stacked mode should have a separate
>>> parameter structure. So, the flash parameter member(*params) of the
>>> spi_nor structure is changed to an array (*params[2]). The array is
>>> used to store the parameters of each flash connected in stacked
>> configuration.
>>>
>>> The current implementation assumes that a maximum of two flashes are
>>> connected in stacked mode and both the flashes are of same make but
>>> can differ in sizes. So, except the sizes all other flash parameters
>>> of both the flashes are identical.
>>
>> Do you plan to add support for different flashes in stacked mode? If not,
> 
> No, according to the current implementation, in stacked mode, both flashes 
> must be of the same make.
> 
>> wouldn't it be simpler to have just an array of flash sizes instead of
>> duplicating the entire params struct?
> 
> Yes, that is accurate. In alignment with our current stacked support use case we 
> can have an array of flash sizes instead.
> The primary purpose of having an array of params struct was to facilitate 
> potential future extensions, allowing the addition of stacked support for 
> different flashes
> 

right. Don't do this change yet, let's decide on the overall
architecture first.

>>
>>>
>>> SPI-NOR is not aware of the chip_select values, for any incoming
>>> request SPI-NOR will decide the flash index with the help of
>>> individual flash size and the configuration type (single/stacked).
>>> SPI-NOR will pass on the flash index information to the SPI core & SPI
>>> driver by setting the appropriate bit in
>>> nor->spimem->spi->cs_index_mask. For example, if nth bit of
>>> nor->spimem->spi->cs_index_mask is set then the driver would
>>> assert/de-assert spi->chip_slect[n].
>>>
>>> Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@amd.com>
>>> ---
>>>  drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-------
>>>  drivers/mtd/spi-nor/core.h  |   4 +
>>>  include/linux/mtd/spi-nor.h |  15 +-
>>>  3 files changed, 240 insertions(+), 51 deletions(-)
>>>
>>> diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
>>> index 93ae69b7ff83..e990be7c7eb6 100644
>>> --- a/drivers/mtd/spi-nor/core.c
>>> +++ b/drivers/mtd/spi-nor/core.c
>>
>> cut
>>
>>> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct
>>> spi_nor *nor)  static int spi_nor_late_init_params(struct spi_nor
>>> *nor)  {
>>>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor,
>> 0);
>>> -	int ret;
>>> +	struct device_node *np = spi_nor_get_flash_node(nor);
>>> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
>>> +	u32 idx = 0;
>>> +	int rc, ret;
>>>
>>>  	if (nor->manufacturer && nor->manufacturer->fixups &&
>>>  	    nor->manufacturer->fixups->late_init) { @@ -2937,6 +3042,44 @@
>>> static int spi_nor_late_init_params(struct spi_nor *nor)
>>>  	if (params->n_banks > 1)
>>>  		params->bank_size = div64_u64(params->size, params-
>>> n_banks);
>>>
>>> +	nor->num_flash = 0;
>>> +
>>> +	/*
>>> +	 * The flashes that are connected in stacked mode should be of same
>> make.
>>> +	 * Except the flash size all other properties are identical for all the
>>> +	 * flashes connected in stacked mode.
>>> +	 * The flashes that are connected in parallel mode should be identical.
>>> +	 */
>>> +	while (idx < SNOR_FLASH_CNT_MAX) {
>>> +		rc = of_property_read_u64_index(np, "stacked-memories",
>> idx,
>>> +&flash_size[idx]);
>>
>> This is a little late in my opinion, as we don't have any sanity check on the
>> flashes that are stacked on top of the first. We shall at least read and compare
>> the ID for all.
> 
> Alright, I will incorporate a sanity check for reading and comparing the 
> ID of the stacked flash. Subsequently, I believe this stacked logic 
> should be relocated to spi_nor_get_flash_info() where we identify the 
> first flash. Please share your thoughts on this. Additionally, do you 

I'm wondering whether we can add a layer on top of the flash type to
handle the stacked/parallel modes. This way everything will become flash
type independent. Would it be possible to stack 2 SPI NANDs? How about a
SPI NOR and a SPI NAND?

Is the datasheet of the controller public?

> anticipate that SPI-NOR should throw an error if the second or any 
> subsequent flash within the stacked connection is different? Or would you 
> prefer it to print a warning and operate in single mode (i.e., only the 
> first flash)?

Both options are fine, but I haven't yet decided on the overall
architecture.

Cheers,
ta
Tudor Ambarus Dec. 11, 2023, 3:44 a.m. UTC | #7
On 12/8/23 17:06, Mahapatra, Amit Kumar wrote:
> Hello Tudor,

Hi!

> 
>> -----Original Message-----
>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
>> Sent: Wednesday, December 6, 2023 8:14 PM
>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
>> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
>> michael@walle.cc; linux-mtd@lists.infradead.org;
>> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
>> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
>> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
>> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
>> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
>> in spi-nor
>>
>>
>>
>> On 12/6/23 14:30, Tudor Ambarus wrote:
>>> Hi, Amit,
>>>
>>> On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
>>>> Each flash that is connected in stacked mode should have a separate
>>>> parameter structure. So, the flash parameter member(*params) of the
>>>> spi_nor structure is changed to an array (*params[2]). The array is
>>>> used to store the parameters of each flash connected in stacked
>> configuration.
>>>>
>>>> The current implementation assumes that a maximum of two flashes are
>>>> connected in stacked mode and both the flashes are of same make but
>>>> can differ in sizes. So, except the sizes all other flash parameters
>>>> of both the flashes are identical.
>>>
>>> Do you plan to add support for different flashes in stacked mode? If
>>> not, wouldn't it be simpler to have just an array of flash sizes
>>> instead of duplicating the entire params struct?
>>>
>>>>
>>>> SPI-NOR is not aware of the chip_select values, for any incoming
>>>> request SPI-NOR will decide the flash index with the help of
>>>> individual flash size and the configuration type (single/stacked).
>>>> SPI-NOR will pass on the flash index information to the SPI core &
>>>> SPI driver by setting the appropriate bit in
>>>> nor->spimem->spi->cs_index_mask. For example, if nth bit of
>>>> nor->spimem->spi->cs_index_mask is set then the driver would
>>>> assert/de-assert spi->chip_slect[n].
>>>>
>>>> Signed-off-by: Amit Kumar Mahapatra <amit.kumar-
>> mahapatra@amd.com>
>>>> ---
>>>>  drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-------
>>>>  drivers/mtd/spi-nor/core.h  |   4 +
>>>>  include/linux/mtd/spi-nor.h |  15 +-
>>>>  3 files changed, 240 insertions(+), 51 deletions(-)
>>>>
>>>> diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
>>>> index 93ae69b7ff83..e990be7c7eb6 100644
>>>> --- a/drivers/mtd/spi-nor/core.c
>>>> +++ b/drivers/mtd/spi-nor/core.c
>>>
>>> cut
>>>
>>>> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct
>>>> spi_nor *nor)  static int spi_nor_late_init_params(struct spi_nor
>>>> *nor)  {
>>>>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor,
>> 0);
>>>> -	int ret;
>>>> +	struct device_node *np = spi_nor_get_flash_node(nor);
>>>> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
>>>> +	u32 idx = 0;
>>>> +	int rc, ret;
>>>>
>>>>  	if (nor->manufacturer && nor->manufacturer->fixups &&
>>>>  	    nor->manufacturer->fixups->late_init) { @@ -2937,6 +3042,44 @@
>>>> static int spi_nor_late_init_params(struct spi_nor *nor)
>>>>  	if (params->n_banks > 1)
>>>>  		params->bank_size = div64_u64(params->size, params-
>>> n_banks);
>>>>
>>>> +	nor->num_flash = 0;
>>>> +
>>>> +	/*
>>>> +	 * The flashes that are connected in stacked mode should be of same
>> make.
>>>> +	 * Except the flash size all other properties are identical for all the
>>>> +	 * flashes connected in stacked mode.
>>>> +	 * The flashes that are connected in parallel mode should be identical.
>>>> +	 */
>>>> +	while (idx < SNOR_FLASH_CNT_MAX) {
>>>> +		rc = of_property_read_u64_index(np, "stacked-memories",
>> idx,
>>>> +&flash_size[idx]);
>>
>> also, it's not clear to me why you read this property multiple times.
>> Have you sent a device tree patch somewhere? It will help me understand
>> what you're trying to achieve.
> 
> Miquel submitted the device tree patch; here is the series.
> https://lore.kernel.org/all/20220126112608.955728-1-miquel.raynal@bootlin.com/
> 

oh, yes, I remember seeing this on the ml, but I couldn't allocate time
to review it. Looking at:
https://lore.kernel.org/all/20220126112608.955728-4-miquel.raynal@bootlin.com/

Flash size is not necessary for SPI NORs, as it can be discovered via
SFDP. And spi-max-frequency should have been specified for all flashes,
as I expect it can differ. At least so that the controller chooses the
minimum frequency from all the max (if it can't operate the stacks at
different frequencies).

Cheers,
ta
Mahapatra, Amit Kumar Dec. 11, 2023, 6:56 a.m. UTC | #8
> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Monday, December 11, 2023 9:03 AM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> On 12/8/23 17:05, Mahapatra, Amit Kumar wrote:
> > Hello Tudor,
> 
> Hi!

Hello Tudor,

> 
> >
> >> -----Original Message-----
> >> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> >> Sent: Wednesday, December 6, 2023 8:00 PM
> >> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> >> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> >> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> >> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> >> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> >> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> >> michael@walle.cc; linux-mtd@lists.infradead.org;
> >> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> >> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
> >> linux- arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> >> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> >> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> >> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
> >> support in spi-nor
> >>
> >> Hi, Amit,
> >>
> >> On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
> >>> Each flash that is connected in stacked mode should have a separate
> >>> parameter structure. So, the flash parameter member(*params) of the
> >>> spi_nor structure is changed to an array (*params[2]). The array is
> >>> used to store the parameters of each flash connected in stacked
> >> configuration.
> >>>
> >>> The current implementation assumes that a maximum of two flashes are
> >>> connected in stacked mode and both the flashes are of same make but
> >>> can differ in sizes. So, except the sizes all other flash parameters
> >>> of both the flashes are identical.
> >>
> >> Do you plan to add support for different flashes in stacked mode? If
> >> not,
> >
> > No, according to the current implementation, in stacked mode, both
> > flashes must be of the same make.
> >
> >> wouldn't it be simpler to have just an array of flash sizes instead
> >> of duplicating the entire params struct?
> >
> > Yes, that is accurate. In alignment with our current stacked support
> > use case we can have an array of flash sizes instead.
> > The primary purpose of having an array of params struct was to
> > facilitate potential future extensions, allowing the addition of
> > stacked support for different flashes
> >
> 
> right. Don't do this change yet, let's decide on the overall architecture first.

Sure.
> 
> >>
> >>>
> >>> SPI-NOR is not aware of the chip_select values, for any incoming
> >>> request SPI-NOR will decide the flash index with the help of
> >>> individual flash size and the configuration type (single/stacked).
> >>> SPI-NOR will pass on the flash index information to the SPI core &
> >>> SPI driver by setting the appropriate bit in
> >>> nor->spimem->spi->cs_index_mask. For example, if nth bit of
> >>> nor->spimem->spi->cs_index_mask is set then the driver would
> >>> assert/de-assert spi->chip_slect[n].
> >>>
> >>> Signed-off-by: Amit Kumar Mahapatra <amit.kumar-
> mahapatra@amd.com>
> >>> ---
> >>>  drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-----
> --
> >>>  drivers/mtd/spi-nor/core.h  |   4 +
> >>>  include/linux/mtd/spi-nor.h |  15 +-
> >>>  3 files changed, 240 insertions(+), 51 deletions(-)
> >>>
> >>> diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
> >>> index 93ae69b7ff83..e990be7c7eb6 100644
> >>> --- a/drivers/mtd/spi-nor/core.c
> >>> +++ b/drivers/mtd/spi-nor/core.c
> >>
> >> cut
> >>
> >>> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct
> >>> spi_nor *nor)  static int spi_nor_late_init_params(struct spi_nor
> >>> *nor)  {
> >>>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor,
> >> 0);
> >>> -	int ret;
> >>> +	struct device_node *np = spi_nor_get_flash_node(nor);
> >>> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
> >>> +	u32 idx = 0;
> >>> +	int rc, ret;
> >>>
> >>>  	if (nor->manufacturer && nor->manufacturer->fixups &&
> >>>  	    nor->manufacturer->fixups->late_init) { @@ -2937,6 +3042,44 @@
> >>> static int spi_nor_late_init_params(struct spi_nor *nor)
> >>>  	if (params->n_banks > 1)
> >>>  		params->bank_size = div64_u64(params->size, params-
> n_banks);
> >>>
> >>> +	nor->num_flash = 0;
> >>> +
> >>> +	/*
> >>> +	 * The flashes that are connected in stacked mode should be of
> >>> +same
> >> make.
> >>> +	 * Except the flash size all other properties are identical for all the
> >>> +	 * flashes connected in stacked mode.
> >>> +	 * The flashes that are connected in parallel mode should be identical.
> >>> +	 */
> >>> +	while (idx < SNOR_FLASH_CNT_MAX) {
> >>> +		rc = of_property_read_u64_index(np, "stacked-memories",
> >> idx,
> >>> +&flash_size[idx]);
> >>
> >> This is a little late in my opinion, as we don't have any sanity
> >> check on the flashes that are stacked on top of the first. We shall
> >> at least read and compare the ID for all.
> >
> > Alright, I will incorporate a sanity check for reading and comparing
> > the ID of the stacked flash. Subsequently, I believe this stacked
> > logic should be relocated to spi_nor_get_flash_info() where we
> > identify the first flash. Please share your thoughts on this.
> > Additionally, do you
> 
> I'm wondering whether we can add a layer on top of the flash type to handle

When you mention "on top," are you referring to incorporating it into 
the MTD layer? Initially, Miquel had submitted this patch to address 
stacked/parallel handling in the MTD layer.
https://lore.kernel.org/linux-mtd/20200114191052.0a16d116@xps13/t/
However, the Device Tree bindings were initially not accepted. 
Following a series of discussions, the below bindings were 
eventually merged.
https://lore.kernel.org/all/20220126112608.955728-4-miquel.raynal@bootlin.com/

> the stacked/parallel modes. This way everything will become flash type
> independent. Would it be possible to stack 2 SPI NANDs? How about a SPI
> NOR and a SPI NAND?
> 
> Is the datasheet of the controller public?

Yes, https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/Quad-SPI-Controller

> 
> > anticipate that SPI-NOR should throw an error if the second or any
> > subsequent flash within the stacked connection is different? Or would
> > you prefer it to print a warning and operate in single mode (i.e.,
> > only the first flash)?
> 
> Both options are fine, but I haven't yet decided on the overall architecture.

Ok.

Regards,
Amit
> 
> Cheers,
> ta
Tudor Ambarus Dec. 11, 2023, 9:35 a.m. UTC | #9
On 12/11/23 06:56, Mahapatra, Amit Kumar wrote:
> 
> 
>> -----Original Message-----
>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
>> Sent: Monday, December 11, 2023 9:03 AM
>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
>> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
>> michael@walle.cc; linux-mtd@lists.infradead.org;
>> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
>> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
>> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
>> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
>> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
>> in spi-nor
>>
>>
>>
>> On 12/8/23 17:05, Mahapatra, Amit Kumar wrote:
>>> Hello Tudor,
>>
>> Hi!
> 
> Hello Tudor,
> 

Hi!

>>
>>>
>>>> -----Original Message-----
>>>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
>>>> Sent: Wednesday, December 6, 2023 8:00 PM
>>>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
>>>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
>>>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
>>>> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
>>>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
>>>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
>>>> michael@walle.cc; linux-mtd@lists.infradead.org;
>>>> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
>>>> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
>>>> linux- arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
>>>> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
>>>> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
>>>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
>>>> support in spi-nor
>>>>
>>>> Hi, Amit,
>>>>
>>>> On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
>>>>> Each flash that is connected in stacked mode should have a separate
>>>>> parameter structure. So, the flash parameter member(*params) of the
>>>>> spi_nor structure is changed to an array (*params[2]). The array is
>>>>> used to store the parameters of each flash connected in stacked
>>>> configuration.
>>>>>
>>>>> The current implementation assumes that a maximum of two flashes are
>>>>> connected in stacked mode and both the flashes are of same make but
>>>>> can differ in sizes. So, except the sizes all other flash parameters
>>>>> of both the flashes are identical.
>>>>
>>>> Do you plan to add support for different flashes in stacked mode? If
>>>> not,
>>>
>>> No, according to the current implementation, in stacked mode, both
>>> flashes must be of the same make.
>>>
>>>> wouldn't it be simpler to have just an array of flash sizes instead
>>>> of duplicating the entire params struct?
>>>
>>> Yes, that is accurate. In alignment with our current stacked support
>>> use case we can have an array of flash sizes instead.
>>> The primary purpose of having an array of params struct was to
>>> facilitate potential future extensions, allowing the addition of
>>> stacked support for different flashes
>>>
>>
>> right. Don't do this change yet, let's decide on the overall architecture first.
> 
> Sure.
>>
>>>>
>>>>>
>>>>> SPI-NOR is not aware of the chip_select values, for any incoming
>>>>> request SPI-NOR will decide the flash index with the help of
>>>>> individual flash size and the configuration type (single/stacked).
>>>>> SPI-NOR will pass on the flash index information to the SPI core &
>>>>> SPI driver by setting the appropriate bit in
>>>>> nor->spimem->spi->cs_index_mask. For example, if nth bit of
>>>>> nor->spimem->spi->cs_index_mask is set then the driver would
>>>>> assert/de-assert spi->chip_slect[n].
>>>>>
>>>>> Signed-off-by: Amit Kumar Mahapatra <amit.kumar-
>> mahapatra@amd.com>
>>>>> ---
>>>>>  drivers/mtd/spi-nor/core.c  | 272 +++++++++++++++++++++++++++++-----
>> --
>>>>>  drivers/mtd/spi-nor/core.h  |   4 +
>>>>>  include/linux/mtd/spi-nor.h |  15 +-
>>>>>  3 files changed, 240 insertions(+), 51 deletions(-)
>>>>>
>>>>> diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
>>>>> index 93ae69b7ff83..e990be7c7eb6 100644
>>>>> --- a/drivers/mtd/spi-nor/core.c
>>>>> +++ b/drivers/mtd/spi-nor/core.c
>>>>
>>>> cut
>>>>
>>>>> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct
>>>>> spi_nor *nor)  static int spi_nor_late_init_params(struct spi_nor
>>>>> *nor)  {
>>>>>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor,
>>>> 0);
>>>>> -	int ret;
>>>>> +	struct device_node *np = spi_nor_get_flash_node(nor);
>>>>> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
>>>>> +	u32 idx = 0;
>>>>> +	int rc, ret;
>>>>>
>>>>>  	if (nor->manufacturer && nor->manufacturer->fixups &&
>>>>>  	    nor->manufacturer->fixups->late_init) { @@ -2937,6 +3042,44 @@
>>>>> static int spi_nor_late_init_params(struct spi_nor *nor)
>>>>>  	if (params->n_banks > 1)
>>>>>  		params->bank_size = div64_u64(params->size, params-
>> n_banks);
>>>>>
>>>>> +	nor->num_flash = 0;
>>>>> +
>>>>> +	/*
>>>>> +	 * The flashes that are connected in stacked mode should be of
>>>>> +same
>>>> make.
>>>>> +	 * Except the flash size all other properties are identical for all the
>>>>> +	 * flashes connected in stacked mode.
>>>>> +	 * The flashes that are connected in parallel mode should be identical.
>>>>> +	 */
>>>>> +	while (idx < SNOR_FLASH_CNT_MAX) {
>>>>> +		rc = of_property_read_u64_index(np, "stacked-memories",
>>>> idx,
>>>>> +&flash_size[idx]);
>>>>
>>>> This is a little late in my opinion, as we don't have any sanity
>>>> check on the flashes that are stacked on top of the first. We shall
>>>> at least read and compare the ID for all.
>>>
>>> Alright, I will incorporate a sanity check for reading and comparing
>>> the ID of the stacked flash. Subsequently, I believe this stacked
>>> logic should be relocated to spi_nor_get_flash_info() where we
>>> identify the first flash. Please share your thoughts on this.
>>> Additionally, do you
>>
>> I'm wondering whether we can add a layer on top of the flash type to handle
> 
> When you mention "on top," are you referring to incorporating it into 
> the MTD layer? Initially, Miquel had submitted this patch to address 

I mean something above SPI MEM flashes, be it NOR, NANDs or whatever.
Instead of treating the stacked flashes as a monolithic device and treat
in SPI NOR some array of flashes, to have a layer above which probes the
SPI MEM flash driver for each stacked flash. In your case SPI NOR would
be probed twice, as you use 2 SPI NOR flashes.

> stacked/parallel handling in the MTD layer.
> https://lore.kernel.org/linux-mtd/20200114191052.0a16d116@xps13/t/
> However, the Device Tree bindings were initially not accepted. 

Okay, thanks for the pointer. I'll take a look.

> Following a series of discussions, the below bindings were 
> eventually merged.
> https://lore.kernel.org/all/20220126112608.955728-4-miquel.raynal@bootlin.com/

I saw, thanks.

> 
>> the stacked/parallel modes. This way everything will become flash type
>> independent. Would it be possible to stack 2 SPI NANDs? How about a SPI
>> NOR and a SPI NAND?
>>
>> Is the datasheet of the controller public?
> 
> Yes, https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/Quad-SPI-Controller
> 

Wonderful, I'll take a look. I see two clocks there. Does this mean that
the stacked flashes can be operated at different frequencies? Do you
know if we can combine a SPI NOR with a SPI NAND in stacked configuration?

I need to study this a bit. I'll try to involve Michael and Pratyush too.

Cheers,
ta
Mahapatra, Amit Kumar Dec. 11, 2023, 1:37 p.m. UTC | #10
> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Monday, December 11, 2023 3:05 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> On 12/11/23 06:56, Mahapatra, Amit Kumar wrote:
> >
> >
> >> -----Original Message-----
> >> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> >> Sent: Monday, December 11, 2023 9:03 AM
> >> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> >> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> >> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> >> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> >> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> >> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> >> michael@walle.cc; linux-mtd@lists.infradead.org;
> >> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> >> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
> >> linux- arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> >> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> >> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> >> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
> >> support in spi-nor
> >>
> >>
> >>
> >> On 12/8/23 17:05, Mahapatra, Amit Kumar wrote:
> >>> Hello Tudor,
> >>
> >> Hi!
> >
> > Hello Tudor,
> >
> 
> Hi!

Hello Tudor,
> 
> >>
> >>>
> >>>> -----Original Message-----
> >>>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> >>>> Sent: Wednesday, December 6, 2023 8:00 PM
> >>>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> >>>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> >>>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> >>>> lee@kernel.org; james.schulman@cirrus.com;
> david.rhodes@cirrus.com;
> >>>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> >>>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> >>>> michael@walle.cc; linux-mtd@lists.infradead.org;
> >>>> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> >>>> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
> >>>> linux- arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> >>>> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git
> >>>> (AMD-
> >>>> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> >>>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
> >>>> support in spi-nor
> >>>>
> >>>> Hi, Amit,
> >>>>
> >>>> On 11/25/23 09:21, Amit Kumar Mahapatra wrote:
> >>>>> Each flash that is connected in stacked mode should have a
> >>>>> separate parameter structure. So, the flash parameter
> >>>>> member(*params) of the spi_nor structure is changed to an array
> >>>>> (*params[2]). The array is used to store the parameters of each
> >>>>> flash connected in stacked
> >>>> configuration.
> >>>>>
> >>>>> The current implementation assumes that a maximum of two flashes
> >>>>> are connected in stacked mode and both the flashes are of same
> >>>>> make but can differ in sizes. So, except the sizes all other flash
> >>>>> parameters of both the flashes are identical.
> >>>>
> >>>> Do you plan to add support for different flashes in stacked mode?
> >>>> If not,
> >>>
> >>> No, according to the current implementation, in stacked mode, both
> >>> flashes must be of the same make.
> >>>
> >>>> wouldn't it be simpler to have just an array of flash sizes instead
> >>>> of duplicating the entire params struct?
> >>>
> >>> Yes, that is accurate. In alignment with our current stacked support
> >>> use case we can have an array of flash sizes instead.
> >>> The primary purpose of having an array of params struct was to
> >>> facilitate potential future extensions, allowing the addition of
> >>> stacked support for different flashes
> >>>
> >>
> >> right. Don't do this change yet, let's decide on the overall architecture first.
> >
> > Sure.
> >>
> >>>>
> >>>>>
> >>>>> SPI-NOR is not aware of the chip_select values, for any incoming
> >>>>> request SPI-NOR will decide the flash index with the help of
> >>>>> individual flash size and the configuration type (single/stacked).
> >>>>> SPI-NOR will pass on the flash index information to the SPI core &
> >>>>> SPI driver by setting the appropriate bit in
> >>>>> nor->spimem->spi->cs_index_mask. For example, if nth bit of
> >>>>> nor->spimem->spi->cs_index_mask is set then the driver would
> >>>>> assert/de-assert spi->chip_slect[n].
> >>>>>
> >>>>> Signed-off-by: Amit Kumar Mahapatra <amit.kumar-
> >> mahapatra@amd.com>
> >>>>> ---
> >>>>>  drivers/mtd/spi-nor/core.c  | 272
> >>>>> +++++++++++++++++++++++++++++-----
> >> --
> >>>>>  drivers/mtd/spi-nor/core.h  |   4 +
> >>>>>  include/linux/mtd/spi-nor.h |  15 +-
> >>>>>  3 files changed, 240 insertions(+), 51 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/mtd/spi-nor/core.c
> >>>>> b/drivers/mtd/spi-nor/core.c index 93ae69b7ff83..e990be7c7eb6
> >>>>> 100644
> >>>>> --- a/drivers/mtd/spi-nor/core.c
> >>>>> +++ b/drivers/mtd/spi-nor/core.c
> >>>>
> >>>> cut
> >>>>
> >>>>> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct
> >>>>> spi_nor *nor)  static int spi_nor_late_init_params(struct spi_nor
> >>>>> *nor)  {
> >>>>>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor,
> >>>> 0);
> >>>>> -	int ret;
> >>>>> +	struct device_node *np = spi_nor_get_flash_node(nor);
> >>>>> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
> >>>>> +	u32 idx = 0;
> >>>>> +	int rc, ret;
> >>>>>
> >>>>>  	if (nor->manufacturer && nor->manufacturer->fixups &&
> >>>>>  	    nor->manufacturer->fixups->late_init) { @@ -2937,6 +3042,44
> >>>>> @@ static int spi_nor_late_init_params(struct spi_nor *nor)
> >>>>>  	if (params->n_banks > 1)
> >>>>>  		params->bank_size = div64_u64(params->size, params-
> >> n_banks);
> >>>>>
> >>>>> +	nor->num_flash = 0;
> >>>>> +
> >>>>> +	/*
> >>>>> +	 * The flashes that are connected in stacked mode should be
> of
> >>>>> +same
> >>>> make.
> >>>>> +	 * Except the flash size all other properties are identical for all
> the
> >>>>> +	 * flashes connected in stacked mode.
> >>>>> +	 * The flashes that are connected in parallel mode should be
> identical.
> >>>>> +	 */
> >>>>> +	while (idx < SNOR_FLASH_CNT_MAX) {
> >>>>> +		rc = of_property_read_u64_index(np, "stacked-
> memories",
> >>>> idx,
> >>>>> +&flash_size[idx]);
> >>>>
> >>>> This is a little late in my opinion, as we don't have any sanity
> >>>> check on the flashes that are stacked on top of the first. We shall
> >>>> at least read and compare the ID for all.
> >>>
> >>> Alright, I will incorporate a sanity check for reading and comparing
> >>> the ID of the stacked flash. Subsequently, I believe this stacked
> >>> logic should be relocated to spi_nor_get_flash_info() where we
> >>> identify the first flash. Please share your thoughts on this.
> >>> Additionally, do you
> >>
> >> I'm wondering whether we can add a layer on top of the flash type to
> >> handle
> >
> > When you mention "on top," are you referring to incorporating it into
> > the MTD layer? Initially, Miquel had submitted this patch to address
> 
> I mean something above SPI MEM flashes, be it NOR, NANDs or whatever.
> Instead of treating the stacked flashes as a monolithic device and treat in SPI
> NOR some array of flashes, to have a layer above which probes the SPI MEM
> flash driver for each stacked flash. In your case SPI NOR would be probed
> twice, as you use 2 SPI NOR flashes.
> 
> > stacked/parallel handling in the MTD layer.
> > https://lore.kernel.org/linux-mtd/20200114191052.0a16d116@xps13/t/
> > However, the Device Tree bindings were initially not accepted.
> 
> Okay, thanks for the pointer. I'll take a look.
> 
> > Following a series of discussions, the below bindings were eventually
> > merged.
> > https://lore.kernel.org/all/20220126112608.955728-4-miquel.raynal@boot
> > lin.com/
> 
> I saw, thanks.
> 
> >
> >> the stacked/parallel modes. This way everything will become flash
> >> type independent. Would it be possible to stack 2 SPI NANDs? How
> >> about a SPI NOR and a SPI NAND?
> >>
> >> Is the datasheet of the controller public?
> >
> > Yes,
> > https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/Quad-SPI-Control
> > ler
> >
> 
> Wonderful, I'll take a look. I see two clocks there. Does this mean that the
> stacked flashes can be operated at different frequencies? Do you know if we

In stacked configuration, both flashes utilize a common clock (single 
clock line). In a parallel setup, the flashes share the same clock but 
have distinct clock lines. Please refer the diagrams in the sections 
below for reference.
https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Interface-Diagrams
> can combine a SPI NOR with a SPI NAND in stacked configuration?

No, Xilinx/AMD QSPI controllers doesn't support this configuration.

Regards,
Amit
> 
> I need to study this a bit. I'll try to involve Michael and Pratyush too.
> 
> Cheers,
> ta
Tudor Ambarus Dec. 12, 2023, 3:02 p.m. UTC | #11
On 12/11/23 13:37, Mahapatra, Amit Kumar wrote:

Hi!

cut

>>>>>>>  drivers/mtd/spi-nor/core.c  | 272
>>>>>>> +++++++++++++++++++++++++++++-----
>>>> --
>>>>>>>  drivers/mtd/spi-nor/core.h  |   4 +
>>>>>>>  include/linux/mtd/spi-nor.h |  15 +-
>>>>>>>  3 files changed, 240 insertions(+), 51 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/mtd/spi-nor/core.c
>>>>>>> b/drivers/mtd/spi-nor/core.c index 93ae69b7ff83..e990be7c7eb6
>>>>>>> 100644
>>>>>>> --- a/drivers/mtd/spi-nor/core.c
>>>>>>> +++ b/drivers/mtd/spi-nor/core.c
>>>>>>
>>>>>> cut
>>>>>>
>>>>>>> @@ -2905,7 +3007,10 @@ static void spi_nor_init_fixup_flags(struct
>>>>>>> spi_nor *nor)  static int spi_nor_late_init_params(struct spi_nor
>>>>>>> *nor)  {
>>>>>>>  	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor,
>>>>>> 0);
>>>>>>> -	int ret;
>>>>>>> +	struct device_node *np = spi_nor_get_flash_node(nor);
>>>>>>> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
>>>>>>> +	u32 idx = 0;
>>>>>>> +	int rc, ret;
>>>>>>>
>>>>>>>  	if (nor->manufacturer && nor->manufacturer->fixups &&
>>>>>>>  	    nor->manufacturer->fixups->late_init) { @@ -2937,6 +3042,44
>>>>>>> @@ static int spi_nor_late_init_params(struct spi_nor *nor)
>>>>>>>  	if (params->n_banks > 1)
>>>>>>>  		params->bank_size = div64_u64(params->size, params-
>>>> n_banks);
>>>>>>>
>>>>>>> +	nor->num_flash = 0;
>>>>>>> +
>>>>>>> +	/*
>>>>>>> +	 * The flashes that are connected in stacked mode should be
>> of
>>>>>>> +same
>>>>>> make.
>>>>>>> +	 * Except the flash size all other properties are identical for all
>> the
>>>>>>> +	 * flashes connected in stacked mode.
>>>>>>> +	 * The flashes that are connected in parallel mode should be
>> identical.
>>>>>>> +	 */
>>>>>>> +	while (idx < SNOR_FLASH_CNT_MAX) {
>>>>>>> +		rc = of_property_read_u64_index(np, "stacked-
>> memories",
>>>>>> idx,
>>>>>>> +&flash_size[idx]);
>>>>>>
>>>>>> This is a little late in my opinion, as we don't have any sanity
>>>>>> check on the flashes that are stacked on top of the first. We shall
>>>>>> at least read and compare the ID for all.
>>>>>
>>>>> Alright, I will incorporate a sanity check for reading and comparing
>>>>> the ID of the stacked flash. Subsequently, I believe this stacked
>>>>> logic should be relocated to spi_nor_get_flash_info() where we
>>>>> identify the first flash. Please share your thoughts on this.
>>>>> Additionally, do you
>>>>
>>>> I'm wondering whether we can add a layer on top of the flash type to
>>>> handle
>>>
>>> When you mention "on top," are you referring to incorporating it into
>>> the MTD layer? Initially, Miquel had submitted this patch to address
>>
>> I mean something above SPI MEM flashes, be it NOR, NANDs or whatever.
>> Instead of treating the stacked flashes as a monolithic device and treat in SPI
>> NOR some array of flashes, to have a layer above which probes the SPI MEM
>> flash driver for each stacked flash. In your case SPI NOR would be probed
>> twice, as you use 2 SPI NOR flashes.
>>
>>> stacked/parallel handling in the MTD layer.
>>> https://lore.kernel.org/linux-mtd/20200114191052.0a16d116@xps13/t/
>>> However, the Device Tree bindings were initially not accepted.
>>
>> Okay, thanks for the pointer. I'll take a look.

haven't yet read the thread from above, but I skimmed over the AMD
controller datasheet.

>>
>>> Following a series of discussions, the below bindings were eventually
>>> merged.
>>> https://lore.kernel.org/all/20220126112608.955728-4-miquel.raynal@boot
>>> lin.com/
>>
>> I saw, thanks.
>>
>>>
>>>> the stacked/parallel modes. This way everything will become flash
>>>> type independent. Would it be possible to stack 2 SPI NANDs? How
>>>> about a SPI NOR and a SPI NAND?
>>>>
>>>> Is the datasheet of the controller public?
>>>
>>> Yes,
>>> https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/Quad-SPI-Control
>>> ler
>>>
>>
>> Wonderful, I'll take a look. I see two clocks there. Does this mean that the
>> stacked flashes can be operated at different frequencies? Do you know if we
> 
> In stacked configuration, both flashes utilize a common clock (single 
> clock line). In a parallel setup, the flashes share the same clock but 
> have distinct clock lines. Please refer the diagrams in the sections 
> below for reference.
> https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Interface-Diagrams

Thanks! Can you share with us what flashes you used for testing in the
stacked and parallel configurations?

>> can combine a SPI NOR with a SPI NAND in stacked configuration?
> 
> No, Xilinx/AMD QSPI controllers doesn't support this configuration.
> 

2 SPI NANDs shall work with the AMD controller, right?

I skimmed over the QSPI controller datasheet and wondered why one would
get complicated with 2 Quad Flashes when we have Octal. But then I saw
that the same SoC can configure an Octal controller (the Octal and Quad
controllers seems mutual exclusive) and that the Octal one can operate 2
octal flashes in stacked mode :).

Cheers,
ta
Mahapatra, Amit Kumar Dec. 15, 2023, 7:55 a.m. UTC | #12
Hello Tudor,

> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Tuesday, December 12, 2023 8:33 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> On 12/11/23 13:37, Mahapatra, Amit Kumar wrote:
> 
> Hi!
> 
> cut
> 
> >>>>>>>  drivers/mtd/spi-nor/core.c  | 272
> >>>>>>> +++++++++++++++++++++++++++++-----
> >>>> --
> >>>>>>>  drivers/mtd/spi-nor/core.h  |   4 +
> >>>>>>>  include/linux/mtd/spi-nor.h |  15 +-
> >>>>>>>  3 files changed, 240 insertions(+), 51 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/drivers/mtd/spi-nor/core.c
> >>>>>>> b/drivers/mtd/spi-nor/core.c index 93ae69b7ff83..e990be7c7eb6
> >>>>>>> 100644
> >>>>>>> --- a/drivers/mtd/spi-nor/core.c
> >>>>>>> +++ b/drivers/mtd/spi-nor/core.c
> >>>>>>
> >>>>>> cut
> >>>>>>
> >>>>>>> @@ -2905,7 +3007,10 @@ static void
> >>>>>>> spi_nor_init_fixup_flags(struct spi_nor *nor)  static int
> >>>>>>> spi_nor_late_init_params(struct spi_nor
> >>>>>>> *nor)  {
> >>>>>>>  	struct spi_nor_flash_parameter *params =
> >>>>>>> spi_nor_get_params(nor,
> >>>>>> 0);
> >>>>>>> -	int ret;
> >>>>>>> +	struct device_node *np = spi_nor_get_flash_node(nor);
> >>>>>>> +	u64 flash_size[SNOR_FLASH_CNT_MAX];
> >>>>>>> +	u32 idx = 0;
> >>>>>>> +	int rc, ret;
> >>>>>>>
> >>>>>>>  	if (nor->manufacturer && nor->manufacturer->fixups &&
> >>>>>>>  	    nor->manufacturer->fixups->late_init) { @@ -2937,6
> >>>>>>> +3042,44 @@ static int spi_nor_late_init_params(struct spi_nor
> *nor)
> >>>>>>>  	if (params->n_banks > 1)
> >>>>>>>  		params->bank_size = div64_u64(params->size,
> params-
> >>>> n_banks);
> >>>>>>>
> >>>>>>> +	nor->num_flash = 0;
> >>>>>>> +
> >>>>>>> +	/*
> >>>>>>> +	 * The flashes that are connected in stacked mode should be
> >> of
> >>>>>>> +same
> >>>>>> make.
> >>>>>>> +	 * Except the flash size all other properties are identical
> >>>>>>> +for all
> >> the
> >>>>>>> +	 * flashes connected in stacked mode.
> >>>>>>> +	 * The flashes that are connected in parallel mode should be
> >> identical.
> >>>>>>> +	 */
> >>>>>>> +	while (idx < SNOR_FLASH_CNT_MAX) {
> >>>>>>> +		rc = of_property_read_u64_index(np, "stacked-
> >> memories",
> >>>>>> idx,
> >>>>>>> +&flash_size[idx]);
> >>>>>>
> >>>>>> This is a little late in my opinion, as we don't have any sanity
> >>>>>> check on the flashes that are stacked on top of the first. We
> >>>>>> shall at least read and compare the ID for all.
> >>>>>
> >>>>> Alright, I will incorporate a sanity check for reading and
> >>>>> comparing the ID of the stacked flash. Subsequently, I believe
> >>>>> this stacked logic should be relocated to spi_nor_get_flash_info()
> >>>>> where we identify the first flash. Please share your thoughts on this.
> >>>>> Additionally, do you
> >>>>
> >>>> I'm wondering whether we can add a layer on top of the flash type
> >>>> to handle
> >>>
> >>> When you mention "on top," are you referring to incorporating it
> >>> into the MTD layer? Initially, Miquel had submitted this patch to
> >>> address
> >>
> >> I mean something above SPI MEM flashes, be it NOR, NANDs or whatever.
> >> Instead of treating the stacked flashes as a monolithic device and
> >> treat in SPI NOR some array of flashes, to have a layer above which
> >> probes the SPI MEM flash driver for each stacked flash. In your case
> >> SPI NOR would be probed twice, as you use 2 SPI NOR flashes.
> >>
> >>> stacked/parallel handling in the MTD layer.
> >>> https://lore.kernel.org/linux-mtd/20200114191052.0a16d116@xps13/t/
> >>> However, the Device Tree bindings were initially not accepted.
> >>
> >> Okay, thanks for the pointer. I'll take a look.
> 
> haven't yet read the thread from above, but I skimmed over the AMD
> controller datasheet.
> 
> >>
> >>> Following a series of discussions, the below bindings were
> >>> eventually merged.
> >>> https://lore.kernel.org/all/20220126112608.955728-4-miquel.raynal@bo
> >>> ot
> >>> lin.com/
> >>
> >> I saw, thanks.
> >>
> >>>
> >>>> the stacked/parallel modes. This way everything will become flash
> >>>> type independent. Would it be possible to stack 2 SPI NANDs? How
> >>>> about a SPI NOR and a SPI NAND?
> >>>>
> >>>> Is the datasheet of the controller public?
> >>>
> >>> Yes,
> >>> https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/Quad-SPI-Contr
> >>> ol
> >>> ler
> >>>
> >>
> >> Wonderful, I'll take a look. I see two clocks there. Does this mean
> >> that the stacked flashes can be operated at different frequencies? Do
> >> you know if we
> >
> > In stacked configuration, both flashes utilize a common clock (single
> > clock line). In a parallel setup, the flashes share the same clock but
> > have distinct clock lines. Please refer the diagrams in the sections
> > below for reference.
> > https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Inter
> > face-Diagrams
> 
> Thanks! Can you share with us what flashes you used for testing in the
> stacked and parallel configurations?

I used SPI-NOR QSPI flashes for testing stacked and parallel.

> 
> >> can combine a SPI NOR with a SPI NAND in stacked configuration?
> >
> > No, Xilinx/AMD QSPI controllers doesn't support this configuration.
> >
> 
> 2 SPI NANDs shall work with the AMD controller, right?

We never tested 2 SPI-NAND with AMD controller.
> 
> I skimmed over the QSPI controller datasheet and wondered why one would
> get complicated with 2 Quad Flashes when we have Octal. But then I saw that
> the same SoC can configure an Octal controller (the Octal and Quad
> controllers seems mutual exclusive) and that the Octal one can operate 2
> octal flashes in stacked mode :).

Thats correct .

Please let me know how you want me to proceed on this.

Regards,
Amit
> 
> Cheers,
> ta
Tudor Ambarus Dec. 15, 2023, 8:09 a.m. UTC | #13
On 15.12.2023 09:55, Mahapatra, Amit Kumar wrote:
>> Thanks! Can you share with us what flashes you used for testing in the
>> stacked and parallel configurations?
> I used SPI-NOR QSPI flashes for testing stacked and parallel.

I got that, I wanted the flash name or device ID.

What I'm interested is if each flash is in its own package. Are they?

> 
>>>> can combine a SPI NOR with a SPI NAND in stacked configuration?
>>> No, Xilinx/AMD QSPI controllers doesn't support this configuration.
>>>
>> 2 SPI NANDs shall work with the AMD controller, right?
> We never tested 2 SPI-NAND with AMD controller.

I was asking because I think the stacked layer shall be SPI MEM generic,
and not particular to SPI NOR.

>> I skimmed over the QSPI controller datasheet and wondered why one would
>> get complicated with 2 Quad Flashes when we have Octal. But then I saw that
>> the same SoC can configure an Octal controller (the Octal and Quad
>> controllers seems mutual exclusive) and that the Octal one can operate 2
>> octal flashes in stacked mode 
Mahapatra, Amit Kumar Dec. 15, 2023, 10:02 a.m. UTC | #14
Hello Tudor,

> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Friday, December 15, 2023 1:40 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> On 15.12.2023 09:55, Mahapatra, Amit Kumar wrote:
> >> Thanks! Can you share with us what flashes you used for testing in
> >> the stacked and parallel configurations?
> > I used SPI-NOR QSPI flashes for testing stacked and parallel.
> 
> I got that, I wanted the flash name or device ID.

N25Q00A, MX66U2G45G, IS25LP01G & W25H02JV are some of the QSPI flashes on 
which we tested. Additionally, we conducted tests on over 30 different 
QSPI flashes from four distinct vendors (Miron, Winbond, Macronix, and ISSI).

> What I'm interested is if each flash is in its own package. Are they?

I'm sorry, but I don't quite understand what you mean by "if each flash in 
its own package."

> 
> >
> >>>> can combine a SPI NOR with a SPI NAND in stacked configuration?
> >>> No, Xilinx/AMD QSPI controllers doesn't support this configuration.
> >>>
> >> 2 SPI NANDs shall work with the AMD controller, right?
> > We never tested 2 SPI-NAND with AMD controller.
> 
> I was asking because I think the stacked layer shall be SPI MEM generic, and
> not particular to SPI NOR.

Yes, I agree.

> 
> >> I skimmed over the QSPI controller datasheet and wondered why one
> >> would get complicated with 2 Quad Flashes when we have Octal. But
> >> then I saw that the same SoC can configure an Octal controller (the
> >> Octal and Quad controllers seems mutual exclusive) and that the Octal
> >> one can operate 2 octal flashes in stacked mode 
Tudor Ambarus Dec. 15, 2023, 10:33 a.m. UTC | #15
On 12/15/23 10:02, Mahapatra, Amit Kumar wrote:
> Hello Tudor,

Hi,

> 
>> -----Original Message-----
>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
>> Sent: Friday, December 15, 2023 1:40 PM
>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
>> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
>> michael@walle.cc; linux-mtd@lists.infradead.org;
>> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
>> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
>> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
>> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
>> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
>> in spi-nor
>>
>>
>>
>> On 15.12.2023 09:55, Mahapatra, Amit Kumar wrote:
>>>> Thanks! Can you share with us what flashes you used for testing in
>>>> the stacked and parallel configurations?
>>> I used SPI-NOR QSPI flashes for testing stacked and parallel.
>>
>> I got that, I wanted the flash name or device ID.
> 
> N25Q00A, MX66U2G45G, IS25LP01G & W25H02JV are some of the QSPI flashes on 
> which we tested. Additionally, we conducted tests on over 30 different 
> QSPI flashes from four distinct vendors (Miron, Winbond, Macronix, and ISSI).
> 

Great.

>> What I'm interested is if each flash is in its own package. Are they?
> 
> I'm sorry, but I don't quite understand what you mean by "if each flash in 
> its own package."
> 

There are flashes that are stacked at the physical level. It's a single
flash with multiple dies, that are all under a single physical package.

As I understand, your stacked flash model is at logical level. You have
2 flashes each in its own package. 2 different entities. Is my
understanding correct?

Cheers,
ta
Mahapatra, Amit Kumar Dec. 15, 2023, 11:20 a.m. UTC | #16
Hello Tudor,

> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Friday, December 15, 2023 4:03 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org; michael@walle.cc;
> linux-mtd@lists.infradead.org; nicolas.ferre@microchip.com;
> alexandre.belloni@bootlin.com; claudiu.beznea@tuxon.dev; Simek, Michal
> <michal.simek@amd.com>; linux-arm-kernel@lists.infradead.org; alsa-
> devel@alsa-project.org; patches@opensource.cirrus.com; linux-
> sound@vger.kernel.org; git (AMD-Xilinx) <git@amd.com>;
> amitrkcian2002@gmail.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> On 12/15/23 10:02, Mahapatra, Amit Kumar wrote:
> > Hello Tudor,
> 
> Hi,
> 
> >
> >> -----Original Message-----
> >> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> >> Sent: Friday, December 15, 2023 1:40 PM
> >> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> >> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> >> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> >> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> >> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> >> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> >> michael@walle.cc; linux-mtd@lists.infradead.org;
> >> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> >> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
> >> linux- arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> >> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> >> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> >> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
> >> support in spi-nor
> >>
> >>
> >>
> >> On 15.12.2023 09:55, Mahapatra, Amit Kumar wrote:
> >>>> Thanks! Can you share with us what flashes you used for testing in
> >>>> the stacked and parallel configurations?
> >>> I used SPI-NOR QSPI flashes for testing stacked and parallel.
> >>
> >> I got that, I wanted the flash name or device ID.
> >
> > N25Q00A, MX66U2G45G, IS25LP01G & W25H02JV are some of the QSPI
> flashes
> > on which we tested. Additionally, we conducted tests on over 30
> > different QSPI flashes from four distinct vendors (Miron, Winbond,
> Macronix, and ISSI).
> >
> 
> Great.
> 
> >> What I'm interested is if each flash is in its own package. Are they?
> >
> > I'm sorry, but I don't quite understand what you mean by "if each
> > flash in its own package."
> >
> 
> There are flashes that are stacked at the physical level. It's a single flash with
> multiple dies, that are all under a single physical package.

Got it. The W25H02JV QSPI flash I mentioned earlier is a device with 
with four dies that are stacked at the physical level.

> 
> As I understand, your stacked flash model is at logical level. You have
> 2 flashes each in its own package. 2 different entities. Is my understanding
> correct?

Yes, that’s correct.

I'd like to contribute to your earlier point regarding the placement of 
the stacked layer. As you correctly highlighted, it should be in the 
spi-mem generic layer. For instance, when a read/write operation extends 
across multiple flashes (whether SPI-NOR or SPI-NAND), the stacked layer 
must handle the flash crossover. This requires setting the appropriate CS 
index in mem->spi->cs_index_mask to select the correct slave device and 
updating the data buffer, address & data length in spi_mem_op struct 
variable. Does this align with your understanding?

Regards,
Amit
> 
> Cheers,
> ta
Tudor Ambarus Dec. 19, 2023, 8:26 a.m. UTC | #17
On 15.12.2023 13:20, Mahapatra, Amit Kumar wrote:
> Hello Tudor,
> 

Hi!

>> -----Original Message-----
>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
>> Sent: Friday, December 15, 2023 4:03 PM
>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
>> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org; michael@walle.cc;
>> linux-mtd@lists.infradead.org; nicolas.ferre@microchip.com;
>> alexandre.belloni@bootlin.com; claudiu.beznea@tuxon.dev; Simek, Michal
>> <michal.simek@amd.com>; linux-arm-kernel@lists.infradead.org; alsa-
>> devel@alsa-project.org; patches@opensource.cirrus.com; linux-
>> sound@vger.kernel.org; git (AMD-Xilinx) <git@amd.com>;
>> amitrkcian2002@gmail.com
>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
>> in spi-nor
>>
>>
>>
>> On 12/15/23 10:02, Mahapatra, Amit Kumar wrote:
>>> Hello Tudor,
>>
>> Hi,
>>
>>>
>>>> -----Original Message-----
>>>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
>>>> Sent: Friday, December 15, 2023 1:40 PM
>>>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
>>>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
>>>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
>>>> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
>>>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
>>>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
>>>> michael@walle.cc; linux-mtd@lists.infradead.org;
>>>> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
>>>> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
>>>> linux- arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
>>>> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
>>>> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
>>>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
>>>> support in spi-nor
>>>>
>>>>
>>>>
>>>> On 15.12.2023 09:55, Mahapatra, Amit Kumar wrote:
>>>>>> Thanks! Can you share with us what flashes you used for testing in
>>>>>> the stacked and parallel configurations?
>>>>> I used SPI-NOR QSPI flashes for testing stacked and parallel.
>>>>
>>>> I got that, I wanted the flash name or device ID.
>>>
>>> N25Q00A, MX66U2G45G, IS25LP01G & W25H02JV are some of the QSPI
>> flashes
>>> on which we tested. Additionally, we conducted tests on over 30
>>> different QSPI flashes from four distinct vendors (Miron, Winbond,
>> Macronix, and ISSI).
>>>
>>
>> Great.
>>
>>>> What I'm interested is if each flash is in its own package. Are they?
>>>
>>> I'm sorry, but I don't quite understand what you mean by "if each
>>> flash in its own package."
>>>
>>
>> There are flashes that are stacked at the physical level. It's a single flash with
>> multiple dies, that are all under a single physical package.
> 
> Got it. The W25H02JV QSPI flash I mentioned earlier is a device with 
> with four dies that are stacked at the physical level.
> 
>>
>> As I understand, your stacked flash model is at logical level. You have
>> 2 flashes each in its own package. 2 different entities. Is my understanding
>> correct?
> 
> Yes, that’s correct.
> 
> I'd like to contribute to your earlier point regarding the placement of 
> the stacked layer. As you correctly highlighted, it should be in the 
> spi-mem generic layer. For instance, when a read/write operation extends 
> across multiple flashes (whether SPI-NOR or SPI-NAND), the stacked layer 
> must handle the flash crossover. This requires setting the appropriate CS 
> index in mem->spi->cs_index_mask to select the correct slave device and 
> updating the data buffer, address & data length in spi_mem_op struct 
> variable. Does this align with your understanding?
> 

This was the initial idea, yes, but we'll have to see how mtd concat
fits in. Maybe the abstraction can be made at the mtd level, which I
suspect mtd concat does. I have to read that driver, never opened it.

Something else to consider: I see that Micron has a twin quad mode:
https://media-www.micron.com/-/media/client/global/documents/products/data-sheet/nor-flash/serial-nor/mt25t/generation-b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500

The micron's "Separate Chip-Select and Clock Signals" resembles the
AMD's dual parallel 8-bit.
Micron's "Shared Chip-Select and Clock Signals" differs from the AMD's
stacked mode, as Micron uses DQ[3:0] and DQ[7:4], whereas AMD considers
both as DQ[3:0].

I had a short chat with Michael and he highlighted that instead of the
parallel mode, one would be better of with an octal device. I wonder
whether the quad parallel is worth the effort. I see AMD can select
either quad (single/stacked/parallel) or octal (single/stacked). Is the
parallel mode considered obsolete for new IPs?

Cheers,
ta
Mahapatra, Amit Kumar Dec. 21, 2023, 6:54 a.m. UTC | #18
Hello Tudor,

> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Tuesday, December 19, 2023 1:56 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> On 15.12.2023 13:20, Mahapatra, Amit Kumar wrote:
> > Hello Tudor,
> >
> 
> Hi!
> 
> >> -----Original Message-----
> >> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> >> Sent: Friday, December 15, 2023 4:03 PM
> >> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> >> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> >> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> >> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> >> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> >> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> >> michael@walle.cc; linux-mtd@lists.infradead.org;
> >> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> >> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
> >> linux-arm-kernel@lists.infradead.org; alsa- devel@alsa-project.org;
> >> patches@opensource.cirrus.com; linux- sound@vger.kernel.org; git
> >> (AMD-Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> >> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
> >> support in spi-nor
> >>
> >>
> >>
> >> On 12/15/23 10:02, Mahapatra, Amit Kumar wrote:
> >>> Hello Tudor,
> >>
> >> Hi,
> >>
> >>>
> >>>> -----Original Message-----
> >>>> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> >>>> Sent: Friday, December 15, 2023 1:40 PM
> >>>> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> >>>> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> >>>> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> >>>> lee@kernel.org; james.schulman@cirrus.com;
> david.rhodes@cirrus.com;
> >>>> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> >>>> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> >>>> michael@walle.cc; linux-mtd@lists.infradead.org;
> >>>> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> >>>> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
> >>>> linux- arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> >>>> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git
> >>>> (AMD-
> >>>> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com
> >>>> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
> >>>> support in spi-nor
> >>>>
> >>>>
> >>>>
> >>>> On 15.12.2023 09:55, Mahapatra, Amit Kumar wrote:
> >>>>>> Thanks! Can you share with us what flashes you used for testing
> >>>>>> in the stacked and parallel configurations?
> >>>>> I used SPI-NOR QSPI flashes for testing stacked and parallel.
> >>>>
> >>>> I got that, I wanted the flash name or device ID.
> >>>
> >>> N25Q00A, MX66U2G45G, IS25LP01G & W25H02JV are some of the QSPI
> >> flashes
> >>> on which we tested. Additionally, we conducted tests on over 30
> >>> different QSPI flashes from four distinct vendors (Miron, Winbond,
> >> Macronix, and ISSI).
> >>>
> >>
> >> Great.
> >>
> >>>> What I'm interested is if each flash is in its own package. Are they?
> >>>
> >>> I'm sorry, but I don't quite understand what you mean by "if each
> >>> flash in its own package."
> >>>
> >>
> >> There are flashes that are stacked at the physical level. It's a
> >> single flash with multiple dies, that are all under a single physical package.
> >
> > Got it. The W25H02JV QSPI flash I mentioned earlier is a device with
> > with four dies that are stacked at the physical level.
> >
> >>
> >> As I understand, your stacked flash model is at logical level. You
> >> have
> >> 2 flashes each in its own package. 2 different entities. Is my
> >> understanding correct?
> >
> > Yes, that’s correct.
> >
> > I'd like to contribute to your earlier point regarding the placement
> > of the stacked layer. As you correctly highlighted, it should be in
> > the spi-mem generic layer. For instance, when a read/write operation
> > extends across multiple flashes (whether SPI-NOR or SPI-NAND), the
> > stacked layer must handle the flash crossover. This requires setting
> > the appropriate CS index in mem->spi->cs_index_mask to select the
> > correct slave device and updating the data buffer, address & data
> > length in spi_mem_op struct variable. Does this align with your
> understanding?
> >
> 
> This was the initial idea, yes, but we'll have to see how mtd concat fits in.
> Maybe the abstraction can be made at the mtd level, which I suspect mtd
> concat does. I have to read that driver, never opened it.

I haven't explored the mtd concat driver either.

> 
> Something else to consider: I see that Micron has a twin quad mode:
> https://media-www.micron.com/-
> /media/client/global/documents/products/data-sheet/nor-flash/serial-
> nor/mt25t/generation-
> b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
> 
> The micron's "Separate Chip-Select and Clock Signals" resembles the AMD's
> dual parallel 8-bit.

Yes, I agree.

> Micron's "Shared Chip-Select and Clock Signals" differs from the AMD's
> stacked mode, as Micron uses DQ[3:0] and DQ[7:4], whereas AMD considers
> both as DQ[3:0].

Yes, correct.

> 
> I had a short chat with Michael and he highlighted that instead of the parallel
> mode, one would be better of with an octal device. I wonder whether the
> quad parallel is worth the effort. I see AMD can select either quad
> (single/stacked/parallel) or octal (single/stacked). Is the parallel mode
 
Indeed, customers have the flexibility to choose between quad or octal 
options. However, some opt for a cost-effective strategy by selecting 
only Quad SPI in their chipset and boosting throughput through the 
parallel use of two flashes. To gauge the popularity of this 
configuration, I will consult with our marketing team for further 
insights. Given that parallel is a controller feature, it can be 
integrated into the driver file. At present, we can emphasis on 
implementing support for stacked mode, either through a new interface 
like mtd/spi-nor/stacked.c or by utilizing the mtd concat driver.

> considered obsolete for new IPs?

No, the parallel mode feature is still present in AMD's new IPs.

Regards,
Amit
> 
> Cheers,
> ta
Tudor Ambarus Feb. 9, 2024, 11:06 a.m. UTC | #19
On 12/21/23 06:54, Mahapatra, Amit Kumar wrote:
>> Something else to consider: I see that Micron has a twin quad mode:
>> https://media-www.micron.com/-
>> /media/client/global/documents/products/data-sheet/nor-flash/serial-
>> nor/mt25t/generation-
>> b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
>>
>> The micron's "Separate Chip-Select and Clock Signals" resembles the AMD's
>> dual parallel 8-bit.
> Yes, I agree.
> 
>> Micron's "Shared Chip-Select and Clock Signals" differs from the AMD's
>> stacked mode, as Micron uses DQ[3:0] and DQ[7:4], whereas AMD considers
>> both as DQ[3:0].
> Yes, correct.

Amit, please help me to assess this. I assume Micron and Microchip is
using the same concepts as AMD uses for the "Dual Parallel 8-bit IO
mode", but they call it "Twin Quad Mode".

I was wrong, the AMD datasheet [1] was misleading [2], it described the
IOs for both flashes as IO[3:0], but later on in the "Table QSPI
Interface Signals" the second flash is described with IO[7:4].

The AMD's 8-bit Dual Flash Parallel Interface is using dedicated CS# and
CLK# lines for each flash. As Micron does, isn't it?

Micron says [3] that:
"The device contains two quad I/O die, each able to operate
independently for a total of eight I/Os. The memory map applies to each
die. Each die has internal registers for status, configuration, and
device protection that can be set and read independently from one other.
Micron recommends that internal configuration settings for the two die
be set identically."

it also says that:
"When using quad commands in XIO-SPI or when using QIO-SPI,
DQ[3:0]/DQ[7:4] are I/O."

So I guess the upper layers just ask for a chunk of memory to be written
and the controller handles the cs# lines automatically. How is the AMD
controller working, do you have to drive the cs# lines manually, or you
just set the parallel mode and the controller takes care of everything?

I assume this is how mchp is handling things, they seem to just set a
bit the protocol into the QSPI_IFR.PROTTYP register field and that's all
[4]. They even seem to write the registers of both flashes at the same time.

In what regards the AMD's "dual stack interface", AMD is sharing the
clock and IO lines and uses dedicated CS# lines for the flashes, whereas
Micron shares the CS# and CLK# lines with different IO lines.

Amit, please study the architectures used by mchp, micron and amd and
let us know if they are the same or they differ, and if they differ what
are the differences.

I added Conor from mchp in cc, I see Nicolas is already there, and Bean
from micron.

Thanks,
ta

[1]
https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Interface-Signals
[2]
https://docs.xilinx.com/viewer/attachment/dwmjhDJGICdJqD4swyVzcQ/fD8nv4ry78xM0_EF5kv4mA
[3]
https://media-www.micron.com/-/media/client/global/documents/products/data-sheet/nor-flash/serial-nor/mt25t/generation-b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
[4]
https://ww1.microchip.com/downloads/aemDocuments/documents/MPU32/ProductDocuments/DataSheets/SAMA7G5-Series-Data-Sheet-DS60001765.pdf
Tudor Ambarus Feb. 9, 2024, 4:13 p.m. UTC | #20
On 2/9/24 11:06, Tudor Ambarus wrote:
> 
> 
> On 12/21/23 06:54, Mahapatra, Amit Kumar wrote:
>>> Something else to consider: I see that Micron has a twin quad mode:
>>> https://media-www.micron.com/-
>>> /media/client/global/documents/products/data-sheet/nor-flash/serial-
>>> nor/mt25t/generation-
>>> b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
>>>
>>> The micron's "Separate Chip-Select and Clock Signals" resembles the AMD's
>>> dual parallel 8-bit.
>> Yes, I agree.
>>
>>> Micron's "Shared Chip-Select and Clock Signals" differs from the AMD's
>>> stacked mode, as Micron uses DQ[3:0] and DQ[7:4], whereas AMD considers
>>> both as DQ[3:0].
>> Yes, correct.
> 
> Amit, please help me to assess this. I assume Micron and Microchip is
> using the same concepts as AMD uses for the "Dual Parallel 8-bit IO
> mode", but they call it "Twin Quad Mode".
> 
> I was wrong, the AMD datasheet [1] was misleading [2], it described the
> IOs for both flashes as IO[3:0], but later on in the "Table QSPI
> Interface Signals" the second flash is described with IO[7:4].
> 
> The AMD's 8-bit Dual Flash Parallel Interface is using dedicated CS# and
> CLK# lines for each flash. As Micron does, isn't it?
> 
> Micron says [3] that:
> "The device contains two quad I/O die, each able to operate
> independently for a total of eight I/Os. The memory map applies to each
> die. Each die has internal registers for status, configuration, and
> device protection that can be set and read independently from one other.
> Micron recommends that internal configuration settings for the two die
> be set identically."

Amit,

I forgot to say my first conclusion about the quote from above. Even if
the dies are in the same physical package, micron asks users to
configure each die as it is a self-standing entity, IOW to configure
each die as it is a flash on its own. To me it looks like 2 concatenated
flashes at first look. Thus identical to how AMD controller works.
Please clarify this.

> 
> it also says that:
> "When using quad commands in XIO-SPI or when using QIO-SPI,
> DQ[3:0]/DQ[7:4] are I/O."

and this would be a parallel concatenation of two flashes.

Then it would be good if you let us now the similarities and differences
between how amd and mchp controller work, I scrawled few ideas below.

thanks,
ta
> 
> So I guess the upper layers just ask for a chunk of memory to be written
> and the controller handles the cs# lines automatically. How is the AMD
> controller working, do you have to drive the cs# lines manually, or you
> just set the parallel mode and the controller takes care of everything?
> 
> I assume this is how mchp is handling things, they seem to just set a
> bit the protocol into the QSPI_IFR.PROTTYP register field and that's all
> [4]. They even seem to write the registers of both flashes at the same time.
> 
> In what regards the AMD's "dual stack interface", AMD is sharing the
> clock and IO lines and uses dedicated CS# lines for the flashes, whereas
> Micron shares the CS# and CLK# lines with different IO lines.
> 
> Amit, please study the architectures used by mchp, micron and amd and
> let us know if they are the same or they differ, and if they differ what
> are the differences.
> 
> I added Conor from mchp in cc, I see Nicolas is already there, and Bean
> from micron.
> 
> Thanks,
> ta
> 
> [1]
> https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Interface-Signals
> [2]
> https://docs.xilinx.com/viewer/attachment/dwmjhDJGICdJqD4swyVzcQ/fD8nv4ry78xM0_EF5kv4mA
> [3]
> https://media-www.micron.com/-/media/client/global/documents/products/data-sheet/nor-flash/serial-nor/mt25t/generation-b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
> [4]
> https://ww1.microchip.com/downloads/aemDocuments/documents/MPU32/ProductDocuments/DataSheets/SAMA7G5-Series-Data-Sheet-DS60001765.pdf
Mahapatra, Amit Kumar March 13, 2024, 4:03 p.m. UTC | #21
Hello Tudor,

> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Friday, February 9, 2024 4:36 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com; Conor Dooley
> <conor.dooley@microchip.com>; beanhuo@micron.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> CAUTION: This message has originated from an External Source. Please use
> proper judgment and caution when opening attachments, clicking links, or
> responding to this email.
> 
> 
> On 12/21/23 06:54, Mahapatra, Amit Kumar wrote:
> >> Something else to consider: I see that Micron has a twin quad mode:
> >> https://media-www.micron.com/-
> >> /media/client/global/documents/products/data-sheet/nor-flash/serial-
> >> nor/mt25t/generation-
> >>
> b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
> >>
> >> The micron's "Separate Chip-Select and Clock Signals" resembles the
> >> AMD's dual parallel 8-bit.
> > Yes, I agree.
> >
> >> Micron's "Shared Chip-Select and Clock Signals" differs from the
> >> AMD's stacked mode, as Micron uses DQ[3:0] and DQ[7:4], whereas AMD
> >> considers both as DQ[3:0].
> > Yes, correct.
> 
> Amit, please help me to assess this. I assume Micron and Microchip is using
> the same concepts as AMD uses for the "Dual Parallel 8-bit IO mode", but
> they call it "Twin Quad Mode".

That's accurate. It's the same concept.
> 
> I was wrong, the AMD datasheet [1] was misleading [2], it described the IOs
> for both flashes as IO[3:0], but later on in the "Table QSPI Interface Signals"
> the second flash is described with IO[7:4].

That’s correct.
> 
> The AMD's 8-bit Dual Flash Parallel Interface is using dedicated CS# and CLK#
> lines for each flash. As Micron does, isn't it?

Correct.
> 
> Micron says [3] that:
> "The device contains two quad I/O die, each able to operate independently
> for a total of eight I/Os. The memory map applies to each die. Each die has
> internal registers for status, configuration, and device protection that can be
> set and read independently from one other.
> Micron recommends that internal configuration settings for the two die be set
> identically."
> 
> it also says that:
> "When using quad commands in XIO-SPI or when using QIO-SPI,
> DQ[3:0]/DQ[7:4] are I/O."
> 
> So I guess the upper layers just ask for a chunk of memory to be written and
> the controller handles the cs# lines automatically. How is the AMD controller
> working, do you have to drive the cs# lines manually, or you just set the
> parallel mode and the controller takes care of everything?

https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/Word-Format
In parallel mode, the driver sets both the CS_LOWER and CS_UPPER bits 
in the Cmd_FIFO_Data register(please refer the above link), and sets 
BUS_SEL to 2’b11 to utilize all 8 IO lines. The controller then manages 
the assertion and de-assertion of the CS# lines.
> 
> I assume this is how mchp is handling things, they seem to just set a bit the
> protocol into the QSPI_IFR.PROTTYP register field and that's all [4]. They even
> seem to write the registers of both flashes at the same time.

Yes, that's accurate, but the key distinction is that in Microchip, the 
QSPI controller has one CS# (referred to as QCS), whereas the AMD QSPI 
controller has 2 CS#(referred to as CS0 & CS1).

Regards,
Amit

> 
> In what regards the AMD's "dual stack interface", AMD is sharing the clock
> and IO lines and uses dedicated CS# lines for the flashes, whereas Micron
> shares the CS# and CLK# lines with different IO lines.
> 
> Amit, please study the architectures used by mchp, micron and amd and let
> us know if they are the same or they differ, and if they differ what are the
> differences.
> 
> I added Conor from mchp in cc, I see Nicolas is already there, and Bean from
> micron.
> 
> Thanks,
> ta
> 
> [1]
> https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Interface-
> Signals
> [2]
> https://docs.xilinx.com/viewer/attachment/dwmjhDJGICdJqD4swyVzcQ/fD8nv
> 4ry78xM0_EF5kv4mA
> [3]
> https://media-www.micron.com/-
> /media/client/global/documents/products/data-sheet/nor-flash/serial-
> nor/mt25t/generation-
> b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
> [4]
> https://ww1.microchip.com/downloads/aemDocuments/documents/MPU32/
> ProductDocuments/DataSheets/SAMA7G5-Series-Data-Sheet-
> DS60001765.pdf
Mahapatra, Amit Kumar March 13, 2024, 4:03 p.m. UTC | #22
> -----Original Message-----
> From: Tudor Ambarus <tudor.ambarus@linaro.org>
> Sent: Friday, February 9, 2024 9:44 PM
> To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com; Conor Dooley
> <conor.dooley@microchip.com>; beanhuo@micron.com
> Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> On 2/9/24 11:06, Tudor Ambarus wrote:
> >
> >
> > On 12/21/23 06:54, Mahapatra, Amit Kumar wrote:
> >>> Something else to consider: I see that Micron has a twin quad mode:
> >>> https://media-www.micron.com/-
> >>> /media/client/global/documents/products/data-sheet/nor-flash/serial-
> >>> nor/mt25t/generation-
> >>>
> b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
> >>>
> >>> The micron's "Separate Chip-Select and Clock Signals" resembles the
> >>> AMD's dual parallel 8-bit.
> >> Yes, I agree.
> >>
> >>> Micron's "Shared Chip-Select and Clock Signals" differs from the
> >>> AMD's stacked mode, as Micron uses DQ[3:0] and DQ[7:4], whereas AMD
> >>> considers both as DQ[3:0].
> >> Yes, correct.
> >
> > Amit, please help me to assess this. I assume Micron and Microchip is
> > using the same concepts as AMD uses for the "Dual Parallel 8-bit IO
> > mode", but they call it "Twin Quad Mode".
> >
> > I was wrong, the AMD datasheet [1] was misleading [2], it described
> > the IOs for both flashes as IO[3:0], but later on in the "Table QSPI
> > Interface Signals" the second flash is described with IO[7:4].
> >
> > The AMD's 8-bit Dual Flash Parallel Interface is using dedicated CS#
> > and CLK# lines for each flash. As Micron does, isn't it?
> >
> > Micron says [3] that:
> > "The device contains two quad I/O die, each able to operate
> > independently for a total of eight I/Os. The memory map applies to
> > each die. Each die has internal registers for status, configuration,
> > and device protection that can be set and read independently from one
> other.
> > Micron recommends that internal configuration settings for the two die
> > be set identically."
> 

Hello Tudor,

> Amit,
> 
> I forgot to say my first conclusion about the quote from above. Even if the
> dies are in the same physical package, micron asks users to configure each die
> as it is a self-standing entity, IOW to configure each die as it is a flash on its
> own. To me it looks like 2 concatenated flashes at first look. Thus identical to
> how AMD controller works.
> Please clarify this.

That’s correct, the Micron flash that you referred can communicate with 
the AMD QSPI controller in both parallel and stacked mode.
> 
> >
> > it also says that:
> > "When using quad commands in XIO-SPI or when using QIO-SPI,
> > DQ[3:0]/DQ[7:4] are I/O."
> 
> and this would be a parallel concatenation of two flashes.

That's correct.

Regards,
Amit
> 
> Then it would be good if you let us now the similarities and differences
> between how amd and mchp controller work, I scrawled few ideas below.
> 
> thanks,
> ta
> >
> > So I guess the upper layers just ask for a chunk of memory to be
> > written and the controller handles the cs# lines automatically. How is
> > the AMD controller working, do you have to drive the cs# lines
> > manually, or you just set the parallel mode and the controller takes care of
> everything?
> >
> > I assume this is how mchp is handling things, they seem to just set a
> > bit the protocol into the QSPI_IFR.PROTTYP register field and that's
> > all [4]. They even seem to write the registers of both flashes at the same
> time.
> >
> > In what regards the AMD's "dual stack interface", AMD is sharing the
> > clock and IO lines and uses dedicated CS# lines for the flashes,
> > whereas Micron shares the CS# and CLK# lines with different IO lines.
> >
> > Amit, please study the architectures used by mchp, micron and amd and
> > let us know if they are the same or they differ, and if they differ
> > what are the differences.
> >
> > I added Conor from mchp in cc, I see Nicolas is already there, and
> > Bean from micron.
> >
> > Thanks,
> > ta
> >
> > [1]
> > https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Inter
> > face-Signals
> > [2]
> >
> https://docs.xilinx.com/viewer/attachment/dwmjhDJGICdJqD4swyVzcQ/fD8nv
> > 4ry78xM0_EF5kv4mA
> > [3]
> > https://media-www.micron.com/-
> /media/client/global/documents/products/
> > data-sheet/nor-flash/serial-nor/mt25t/generation-b/mt25t_qljs_l_512_xb
> > a_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
> > [4]
> >
> https://ww1.microchip.com/downloads/aemDocuments/documents/MPU32/
> Produ
> > ctDocuments/DataSheets/SAMA7G5-Series-Data-Sheet-DS60001765.pdf
Mahapatra, Amit Kumar July 26, 2024, 12:35 p.m. UTC | #23
> -----Original Message-----
> From: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>
> Sent: Wednesday, March 13, 2024 9:34 PM
> To: Tudor Ambarus <tudor.ambarus@linaro.org>; broonie@kernel.org;
> pratyush@kernel.org; miquel.raynal@bootlin.com; richard@nod.at;
> vigneshr@ti.com; sbinding@opensource.cirrus.com; lee@kernel.org;
> james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> michael@walle.cc; linux-mtd@lists.infradead.org;
> nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>; linux-
> arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com; Conor Dooley
> <conor.dooley@microchip.com>; beanhuo@micron.com
> Subject: RE: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> 
> 
> > -----Original Message-----
> > From: Tudor Ambarus <tudor.ambarus@linaro.org>
> > Sent: Friday, February 9, 2024 9:44 PM
> > To: Mahapatra, Amit Kumar <amit.kumar-mahapatra@amd.com>;
> > broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> > richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> > lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> > rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> > Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org;
> > michael@walle.cc; linux-mtd@lists.infradead.org;
> > nicolas.ferre@microchip.com; alexandre.belloni@bootlin.com;
> > claudiu.beznea@tuxon.dev; Simek, Michal <michal.simek@amd.com>;
> linux-
> > arm-kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> > patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> > Xilinx) <git@amd.com>; amitrkcian2002@gmail.com; Conor Dooley
> > <conor.dooley@microchip.com>; beanhuo@micron.com
> > Subject: Re: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories
> > support in spi-nor

Hello Everyone,

I would like to propose another approach for handling stacked and 
parallel connection modes and would appreciate your thoughts on it. 
But before that, here is some background on what we are trying to achieve.

The AMD QSPI controller supports two advanced connection modes(Stacked and 
Dual Parallel) which allow the controller to treat two different flashes 
as one storage.

Stacked:
Flashes share the same SPI bus, but different CS line, controller asserts 
the CS of the flash to which it needs to communicate.

Dual Parallel:
Both the flashes have their separate SPI bus CS of both the flashes will 
be asserted/de-asserted at the same time. In this mode data will be split 
across both the flashes by enabling the STRIPE setting in the controller. 
If STRIPE is not enabled, then same data will be sent to both the devices.

For more information on the modes please feel free to go through the 
controller flash interface below
https://docs.amd.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Device-Interface

Mirochip QSPI controller also supports "Dual Parallel 8-bit IO mode", but 
they call it "Twin Quad Mode".
https://ww1.microchip.com/downloads/aemDocuments/documents/MPU32/ProductDocuments/DataSheets/SAMA7G5-Series-Data-Sheet-DS60001765.pdf

DT binding changes were added through the following commits:
https://github.com/torvalds/linux/commit/f89504300e94524d5d5846ff8b728592ac72cec4
https://github.com/torvalds/linux/commit/eba5368503b4291db7819512600fa014ea17c5a8
https://github.com/torvalds/linux/commit/e2edd1b64f1c79e8abda365149ed62a2a9a494b4

SPI core changes were adds through the following commit:
https://github.com/torvalds/linux/commit/4d8ff6b0991d5e86b17b235fc46ec62e9195cb9b

Based on the inputs/suggestions from Tudor, i am planning to add a new 
layer between the SPI-NOR and MTD layers to support stacked and parallel 
configurations. This new layer will be part of the spi-nor and located in 
mtd/spi-nor/

This layer would perform the following tasks:
 - During probing, store information from all the connected flashes, 
   whether in stacked or parallel mode, and present it as a single device 
   to the MTD layer.
 - Register callbacks through this new layer instead of spi-nor/core.c and 
   handle MTD device registration.
 - In stacked mode, select the appropriate spi-nor flash based on the 
   address provided by the MTD layer during flash operations.
 - Manage flash crossover operations in stacked mode.
 - Ensure both connected flashes are identical in parallel mode.
 - Handle odd byte count requests from the MTD layer during flash 
   operations in parallel mode.

For implementing this the current DT binding need to be updated as 
follows.

stacked-memories DT changes:
 - Flash size information can be retrieved directly from the flash, so it 
   has been removed from the DT binding.
 - Each stacked flash will have its own flash node. This approach allows 
   flashes of different makes and sizes to be stacked together, as each 
   flash will be probed individually.
 - The stacked-memories DT bindings will contain the phandles of the flash 
   nodes connected in stacked mode.

spi@0 {
  
  flash@0 {
    compatible = "jedec,spi-nor"
    reg = <0x00>;
    stacked-memories = <&flash@0 &flash@1>;
    spi-max-frequency = <50000000>;
    ...
              partition@0 { 
        label = "qspi-0";
        reg = <0x0 0xf00000>;
    };
                        

  }
  
  flash@1 {
    compatible = "jedec,spi-nor"
              reg = <0x01>;
    spi-max-frequency = <50000000>;
    ...
              partition@0 { 
        label = "qspi-1";
        reg = <0x0 0x800000>;
    };
  }
}

parallel-memories DT changes:
 - Flash size information can be retrieved directly from the flash, so it 
   has been removed from the DT binding.
 - Each flash connected in parallel mode will have its own flash node. 
   This allows us to verify that both flashes connected in parallel are 
   identical, as each flash node will be probed separately.
 - The parallel-memories DT bindings will contain the phandles of the 
   flash nodes connected in parallel.

spi@0 {
  
  flash@0 {
    compatible = "jedec,spi-nor"
    reg = <0x00>;
    parallel-memories = <&flash@0 &flash@1>;
    spi-max-frequency = <50000000>;
    ...
              partition@0 { 
        label = "qspi-0";
        reg = <0x0 0xf00000>;
    };
                        

  }
  
  flash@1 {
    compatible = "jedec,spi-nor"
              reg = <0x01>;
    spi-max-frequency = <50000000>;
    ...
              partition@0 { 
        label = "qspi-1";
        reg = <0x0 0x800000>;
    };
  }
}

Regards,
Amit

> >
> >
> >
> > On 2/9/24 11:06, Tudor Ambarus wrote:
> > >
> > >
> > > On 12/21/23 06:54, Mahapatra, Amit Kumar wrote:
> > >>> Something else to consider: I see that Micron has a twin quad mode:
> > >>> https://media-www.micron.com/-
> > >>> /media/client/global/documents/products/data-sheet/nor-flash/seria
> > >>> l-
> > >>> nor/mt25t/generation-
> > >>>
> > b/mt25t_qljs_l_512_xba_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
> > >>>
> > >>> The micron's "Separate Chip-Select and Clock Signals" resembles
> > >>> the AMD's dual parallel 8-bit.
> > >> Yes, I agree.
> > >>
> > >>> Micron's "Shared Chip-Select and Clock Signals" differs from the
> > >>> AMD's stacked mode, as Micron uses DQ[3:0] and DQ[7:4], whereas
> > >>> AMD considers both as DQ[3:0].
> > >> Yes, correct.
> > >
> > > Amit, please help me to assess this. I assume Micron and Microchip
> > > is using the same concepts as AMD uses for the "Dual Parallel 8-bit
> > > IO mode", but they call it "Twin Quad Mode".
> > >
> > > I was wrong, the AMD datasheet [1] was misleading [2], it described
> > > the IOs for both flashes as IO[3:0], but later on in the "Table QSPI
> > > Interface Signals" the second flash is described with IO[7:4].
> > >
> > > The AMD's 8-bit Dual Flash Parallel Interface is using dedicated CS#
> > > and CLK# lines for each flash. As Micron does, isn't it?
> > >
> > > Micron says [3] that:
> > > "The device contains two quad I/O die, each able to operate
> > > independently for a total of eight I/Os. The memory map applies to
> > > each die. Each die has internal registers for status, configuration,
> > > and device protection that can be set and read independently from
> > > one
> > other.
> > > Micron recommends that internal configuration settings for the two
> > > die be set identically."
> >
> 
> Hello Tudor,
> 
> > Amit,
> >
> > I forgot to say my first conclusion about the quote from above. Even
> > if the dies are in the same physical package, micron asks users to
> > configure each die as it is a self-standing entity, IOW to configure
> > each die as it is a flash on its own. To me it looks like 2
> > concatenated flashes at first look. Thus identical to how AMD controller
> works.
> > Please clarify this.
> 
> That’s correct, the Micron flash that you referred can communicate with the
> AMD QSPI controller in both parallel and stacked mode.
> >
> > >
> > > it also says that:
> > > "When using quad commands in XIO-SPI or when using QIO-SPI,
> > > DQ[3:0]/DQ[7:4] are I/O."
> >
> > and this would be a parallel concatenation of two flashes.
> 
> That's correct.
> 
> Regards,
> Amit
> >
> > Then it would be good if you let us now the similarities and
> > differences between how amd and mchp controller work, I scrawled few
> ideas below.
> >
> > thanks,
> > ta
> > >
> > > So I guess the upper layers just ask for a chunk of memory to be
> > > written and the controller handles the cs# lines automatically. How
> > > is the AMD controller working, do you have to drive the cs# lines
> > > manually, or you just set the parallel mode and the controller takes
> > > care of
> > everything?
> > >
> > > I assume this is how mchp is handling things, they seem to just set
> > > a bit the protocol into the QSPI_IFR.PROTTYP register field and
> > > that's all [4]. They even seem to write the registers of both
> > > flashes at the same
> > time.
> > >
> > > In what regards the AMD's "dual stack interface", AMD is sharing the
> > > clock and IO lines and uses dedicated CS# lines for the flashes,
> > > whereas Micron shares the CS# and CLK# lines with different IO lines.
> > >
> > > Amit, please study the architectures used by mchp, micron and amd
> > > and let us know if they are the same or they differ, and if they
> > > differ what are the differences.
> > >
> > > I added Conor from mchp in cc, I see Nicolas is already there, and
> > > Bean from micron.
> > >
> > > Thanks,
> > > ta
> > >
> > > [1]
> > > https://docs.xilinx.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Int
> > > er
> > > face-Signals
> > > [2]
> > >
> >
> https://docs.xilinx.com/viewer/attachment/dwmjhDJGICdJqD4swyVzcQ/fD8nv
> > > 4ry78xM0_EF5kv4mA
> > > [3]
> > > https://media-www.micron.com/-
> > /media/client/global/documents/products/
> > > data-sheet/nor-flash/serial-nor/mt25t/generation-b/mt25t_qljs_l_512_
> > > xb
> > > a_0.pdf?rev=de70b770c5dc4da8b8ead06b57c03500
> > > [4]
> > >
> >
> https://ww1.microchip.com/downloads/aemDocuments/documents/MPU32/
> > Produ
> > > ctDocuments/DataSheets/SAMA7G5-Series-Data-Sheet-DS60001765.pdf
Michael Walle July 26, 2024, 12:55 p.m. UTC | #24
Hi,

> Based on the inputs/suggestions from Tudor, i am planning to add a new 
> layer between the SPI-NOR and MTD layers to support stacked and parallel 
> configurations. This new layer will be part of the spi-nor and located in 
> mtd/spi-nor/

Will AMD submit to maintain this layer? What happens if the
maintainer will leave AMD? TBH, personally, I don't like to
maintain such a niche feature.
I'd really like to see some use cases and performance reports for
this, like actual boards (and no evaluation boards don't count). Why
wouldn't someone just use an octal flash?

And as already mentioned there is also mtdcat, which seems to
duplicate some features?

-michael
Michal Simek July 31, 2024, 8:58 a.m. UTC | #25
Hi Michael,

On 7/26/24 14:55, Michael Walle wrote:
> Hi,
> 
>> Based on the inputs/suggestions from Tudor, i am planning to add a new
>> layer between the SPI-NOR and MTD layers to support stacked and parallel
>> configurations. This new layer will be part of the spi-nor and located in
>> mtd/spi-nor/
> 
> Will AMD submit to maintain this layer? What happens if the
> maintainer will leave AMD? TBH, personally, I don't like to
> maintain such a niche feature.
> I'd really like to see some use cases and performance reports for
> this, like actual boards (and no evaluation boards don't count). Why
> wouldn't someone just use an octal flash?

AMD/Xilinx is not creating products that's why we don't have data on actual 
boards but I don't really understand why evaluation boards don't count. A lot of 
customers are taking schematics from us and removing parts which they don't need 
and add their custom part.

But one product for parallel configuration which is publicly saying that it is 
using it is for example this SOM.
https://shop.trenz-electronic.de/en/TE0820-05-2AI21MA-MPSoC-Module-with-AMD-Zynq-UltraScale-ZU2CG-1I-2-GByte-DDR4-SDRAM-4-x-5-cm

I am not marketing guy to tell if there is any other which publicly saying we 
are using this feature but we can only develop/support/maintain support for 
these configurations on our evaluation boards because that's what we have access 
to and what we know how it is done.

Also performance numbers from us can be only provided against our evaluation boards.

Thanks,
Michal
Michael Walle July 31, 2024, 9:19 a.m. UTC | #26
Hi Michal,

> >> Based on the inputs/suggestions from Tudor, i am planning to add a new
> >> layer between the SPI-NOR and MTD layers to support stacked and parallel
> >> configurations. This new layer will be part of the spi-nor and located in
> >> mtd/spi-nor/
> > 
> > Will AMD submit to maintain this layer? What happens if the
> > maintainer will leave AMD? TBH, personally, I don't like to
> > maintain such a niche feature.
> > I'd really like to see some use cases and performance reports for
> > this, like actual boards (and no evaluation boards don't count). Why
> > wouldn't someone just use an octal flash?
>
> AMD/Xilinx is not creating products that's why we don't have data on actual 
> boards but I don't really understand why evaluation boards don't count.

Because on an eval board the vendor just puts everything possible on
the board.

> A lot of 
> customers are taking schematics from us and removing parts which they don't need 
> and add their custom part.
>
> But one product for parallel configuration which is publicly saying that it is 
> using it is for example this SOM.
> https://shop.trenz-electronic.de/en/TE0820-05-2AI21MA-MPSoC-Module-with-AMD-Zynq-UltraScale-ZU2CG-1I-2-GByte-DDR4-SDRAM-4-x-5-cm
>
> I am not marketing guy to tell if there is any other which publicly saying we 
> are using this feature but we can only develop/support/maintain support for 
> these configurations on our evaluation boards because that's what we have access 
> to and what we know how it is done.
>
> Also performance numbers from us can be only provided against our evaluation boards.

Which is good enough.

All I'm saying is that you shouldn't put burden on us (the SPI NOR
maintainers) for what seems to me at least as a niche. Thus I was
asking for performance numbers and users. Convince me that I'm
wrong and that is worth our time.

The first round of patches were really invasive regarding the core
code. So if there is a clean layering approach which can be enabled
as a module and you are maintaining it I'm fine with that (even if
the core code needs some changes then like hooks or so, not sure).

-michael
Michal Simek July 31, 2024, 1:40 p.m. UTC | #27
On 7/31/24 11:19, Michael Walle wrote:
> Hi Michal,
> 
>>>> Based on the inputs/suggestions from Tudor, i am planning to add a new
>>>> layer between the SPI-NOR and MTD layers to support stacked and parallel
>>>> configurations. This new layer will be part of the spi-nor and located in
>>>> mtd/spi-nor/
>>>
>>> Will AMD submit to maintain this layer? What happens if the
>>> maintainer will leave AMD? TBH, personally, I don't like to
>>> maintain such a niche feature.
>>> I'd really like to see some use cases and performance reports for
>>> this, like actual boards (and no evaluation boards don't count). Why
>>> wouldn't someone just use an octal flash?
>>
>> AMD/Xilinx is not creating products that's why we don't have data on actual
>> boards but I don't really understand why evaluation boards don't count.
> 
> Because on an eval board the vendor just puts everything possible on
> the board.
> 
>> A lot of
>> customers are taking schematics from us and removing parts which they don't need
>> and add their custom part.
>>
>> But one product for parallel configuration which is publicly saying that it is
>> using it is for example this SOM.
>> https://shop.trenz-electronic.de/en/TE0820-05-2AI21MA-MPSoC-Module-with-AMD-Zynq-UltraScale-ZU2CG-1I-2-GByte-DDR4-SDRAM-4-x-5-cm
>>
>> I am not marketing guy to tell if there is any other which publicly saying we
>> are using this feature but we can only develop/support/maintain support for
>> these configurations on our evaluation boards because that's what we have access
>> to and what we know how it is done.
>>
>> Also performance numbers from us can be only provided against our evaluation boards.
> 
> Which is good enough.
> 
> All I'm saying is that you shouldn't put burden on us (the SPI NOR
> maintainers) for what seems to me at least as a niche. Thus I was
> asking for performance numbers and users. Convince me that I'm
> wrong and that is worth our time.

No. It is not really just feature of our evaluation boards. Customers are using 
it. I was talking to one guy from field and he confirms me that these 
configurations are used by his multiple customers in real products.

> 
> The first round of patches were really invasive regarding the core
> code. So if there is a clean layering approach which can be enabled
> as a module and you are maintaining it I'm fine with that (even if
> the core code needs some changes then like hooks or so, not sure).

That discussion started with Miquel some years ago when he was trying to to 
solve description in DT which is merged for a while in the kernel.
And Amit is trying to figure it out which way to go.
I don't want to predict where that code should be going or how it should look 
like because don't have spi-nor experience. But I hope we finally move forward 
on this topic to see the path how to upstream support for it.

Thanks,
Michal
Michael Walle July 31, 2024, 2:11 p.m. UTC | #28
> > All I'm saying is that you shouldn't put burden on us (the SPI NOR
> > maintainers) for what seems to me at least as a niche. Thus I was
> > asking for performance numbers and users. Convince me that I'm
> > wrong and that is worth our time.
>
> No. It is not really just feature of our evaluation boards. Customers are using 
> it. I was talking to one guy from field and he confirms me that these 
> configurations are used by his multiple customers in real products.

Which begs the question, do we really have to support every feature
in the core (I'd like to hear Tudors and Pratyush opinion here).
Honestly, this just looks like a concatenation of two QSPI
controllers. Why didn't you just use a normal octal controller which
is a protocol also backed by the JEDEC standard. Is it any faster?
Do you get more capacity? Does anyone really use large SPI-NOR
flashes? If so, why? I mean you've put that controller on your SoC,
you must have some convincing arguments why a customer should use
it.

> > The first round of patches were really invasive regarding the core
> > code. So if there is a clean layering approach which can be enabled
> > as a module and you are maintaining it I'm fine with that (even if
> > the core code needs some changes then like hooks or so, not sure).
>
> That discussion started with Miquel some years ago when he was trying to to 
> solve description in DT which is merged for a while in the kernel.

What's your point here? From what I can tell the DT binding is wrong
and needs to be reworked anyway.

> And Amit is trying to figure it out which way to go.
> I don't want to predict where that code should be going or how it should look 
> like because don't have spi-nor experience. But I hope we finally move forward 
> on this topic to see the path how to upstream support for it.

You still didn't answer my initial question. Will AMD support and
maintain that code? I was pushing you towards putting that code into
your own driver because then that's up to you what you are doing
there.

So how do we move forward? I'd like to see as little as core changes
possible to get your dual flash setup working. I'm fine with having a
layer in spi-nor/ (not sure that's how it will work with mtdcat
which looks like it's similar as your stacked flash) as long as it
can be a module (selected by the driver).

-michael
Michal Simek Aug. 1, 2024, 6:22 a.m. UTC | #29
On 7/31/24 16:11, Michael Walle wrote:
>>> All I'm saying is that you shouldn't put burden on us (the SPI NOR
>>> maintainers) for what seems to me at least as a niche. Thus I was
>>> asking for performance numbers and users. Convince me that I'm
>>> wrong and that is worth our time.
>>
>> No. It is not really just feature of our evaluation boards. Customers are using
>> it. I was talking to one guy from field and he confirms me that these
>> configurations are used by his multiple customers in real products.
> 
> Which begs the question, do we really have to support every feature
> in the core (I'd like to hear Tudors and Pratyush opinion here).
> Honestly, this just looks like a concatenation of two QSPI
> controllers. 

Based on my understanding for stacked yes. For parallel no.

> Why didn't you just use a normal octal controller which
> is a protocol also backed by the JEDEC standard. 

On newer SOC octal IP core is used.
Amit please comment.

> Is it any faster?

Amit: please provide numbers.

> Do you get more capacity? Does anyone really use large SPI-NOR
> flashes? If so, why?

You get twice more capacity based on that configuration. I can't answer the 
second question because not working with field. But both of that configurations 
are used by customers. Adding Neal if he wants to add something more to it.

> I mean you've put that controller on your SoC,
> you must have some convincing arguments why a customer should use
> it.

I expect recommendation is to use single configuration but if you need bigger 
space for your application the only way to extend it is to use stacked 
configuration with two the same flashes next to each other.
If you want to have bigger size and also be faster answer is parallel 
configuration.


>>> The first round of patches were really invasive regarding the core
>>> code. So if there is a clean layering approach which can be enabled
>>> as a module and you are maintaining it I'm fine with that (even if
>>> the core code needs some changes then like hooks or so, not sure).
>>
>> That discussion started with Miquel some years ago when he was trying to to
>> solve description in DT which is merged for a while in the kernel.
> 
> What's your point here? From what I can tell the DT binding is wrong
> and needs to be reworked anyway.

I am just saying that this is not any adhoc new feature but configuration which 
has been already discussed and some steps made. If DT binding is wrong it can be 
deprecated and use new one but for that it has be clear which way to go.


>> And Amit is trying to figure it out which way to go.
>> I don't want to predict where that code should be going or how it should look
>> like because don't have spi-nor experience. But I hope we finally move forward
>> on this topic to see the path how to upstream support for it.
> 
> You still didn't answer my initial question. Will AMD support and
> maintain that code? I was pushing you towards putting that code into
> your own driver because then that's up to you what you are doing
> there.

Of course. We care about our code and about supporting our SOC and features 
which are related to it. It means yes, we will be regularly testing it and 
taking care about it.


> So how do we move forward? I'd like to see as little as core changes
> possible to get your dual flash setup working. I'm fine with having a
> layer in spi-nor/ (not sure that's how it will work with mtdcat
> which looks like it's similar as your stacked flash) as long as it
> can be a module (selected by the driver).

ok.

Thanks,
Michal
Frager, Neal Aug. 1, 2024, 6:37 a.m. UTC | #30
Hi Michael,

>>> All I'm saying is that you shouldn't put burden on us (the SPI NOR
>>> maintainers) for what seems to me at least as a niche. Thus I was
>>> asking for performance numbers and users. Convince me that I'm
>>> wrong and that is worth our time.
>>
>> No. It is not really just feature of our evaluation boards. Customers are using
>> it. I was talking to one guy from field and he confirms me that these
>> configurations are used by his multiple customers in real products.
> 
> Which begs the question, do we really have to support every feature
> in the core (I'd like to hear Tudors and Pratyush opinion here).
> Honestly, this just looks like a concatenation of two QSPI
> controllers. 

> Based on my understanding for stacked yes. For parallel no.

> Why didn't you just use a normal octal controller which
> is a protocol also backed by the JEDEC standard. 

> On newer SOC octal IP core is used.
> Amit please comment.

> Is it any faster?

> Amit: please provide numbers.

> Do you get more capacity? Does anyone really use large SPI-NOR
> flashes? If so, why?

> You get twice more capacity based on that configuration. I can't answer the 
> second question because not working with field. But both of that configurations 
> are used by customers. Adding Neal if he wants to add something more to it.

Just to add a comment as I work directly with our customers.  The main reason
this support is important is for our older SoCs, zynq and zynqmp.

Most of our customers are using QSPI flash as the first boot memory to get
from the boot ROM to u-boot.  They then typically use other memories, such as
eMMC for the Linux kernel, OS and file system.

The issue we have on the zynq and zynqmp SoCs is that the boot ROM (code that
cannot be changed) will not boot from an OSPI flash.  It will only boot from a
QSPI flash.  This is what is forcing many of our customers down the QSPI path.
Since many of these customers are interested in additional speed and memory
size, they then end up using a parallel or stacked configuration because they
cannot use an OSPI with zynq or zynqmp.

All of our newer SoCs can boot from OSPI.  I agree with you that if someone
could choose OSPI for performance, they would, so I do not expect parallel or
stacked configurations with our newer SoCs.

I get why you see this configuration as a niche, but for us, it is a very
large niche because zynq and zynqmp are two of our most successful SoC
families.

> I mean you've put that controller on your SoC,
> you must have some convincing arguments why a customer should use
> it.

> I expect recommendation is to use single configuration but if you need bigger 
> space for your application the only way to extend it is to use stacked 
> configuration with two the same flashes next to each other.
> If you want to have bigger size and also be faster answer is parallel 
> configuration.


>>> The first round of patches were really invasive regarding the core
>>> code. So if there is a clean layering approach which can be enabled
>>> as a module and you are maintaining it I'm fine with that (even if
>>> the core code needs some changes then like hooks or so, not sure).
>>
>> That discussion started with Miquel some years ago when he was trying to to
>> solve description in DT which is merged for a while in the kernel.
> 
> What's your point here? From what I can tell the DT binding is wrong
> and needs to be reworked anyway.

> I am just saying that this is not any adhoc new feature but configuration which 
> has been already discussed and some steps made. If DT binding is wrong it can be 
> deprecated and use new one but for that it has be clear which way to go.


>> And Amit is trying to figure it out which way to go.
>> I don't want to predict where that code should be going or how it should look
>> like because don't have spi-nor experience. But I hope we finally move forward
>> on this topic to see the path how to upstream support for it.
> 
> You still didn't answer my initial question. Will AMD support and
> maintain that code? I was pushing you towards putting that code into
> your own driver because then that's up to you what you are doing
> there.

> Of course. We care about our code and about supporting our SOC and features 
> which are related to it. It means yes, we will be regularly testing it and 
> taking care about it.


> So how do we move forward? I'd like to see as little as core changes
> possible to get your dual flash setup working. I'm fine with having a
> layer in spi-nor/ (not sure that's how it will work with mtdcat
> which looks like it's similar as your stacked flash) as long as it
> can be a module (selected by the driver).

> ok.

Best regards,
Neal Frager
AMD
Mahapatra, Amit Kumar Aug. 1, 2024, 9:28 a.m. UTC | #31
Hello,


> -----Original Message-----
> From: Frager, Neal <neal.frager@amd.com>
> Sent: Thursday, August 1, 2024 12:07 PM
> To: Simek, Michal <michal.simek@amd.com>; Michael Walle
> <michael@walle.cc>; Mahapatra, Amit Kumar <amit.kumar-
> mahapatra@amd.com>; Tudor Ambarus <tudor.ambarus@linaro.org>;
> broonie@kernel.org; pratyush@kernel.org; miquel.raynal@bootlin.com;
> richard@nod.at; vigneshr@ti.com; sbinding@opensource.cirrus.com;
> lee@kernel.org; james.schulman@cirrus.com; david.rhodes@cirrus.com;
> rf@opensource.cirrus.com; perex@perex.cz; tiwai@suse.com
> Cc: linux-spi@vger.kernel.org; linux-kernel@vger.kernel.org; linux-
> mtd@lists.infradead.org; nicolas.ferre@microchip.com;
> alexandre.belloni@bootlin.com; claudiu.beznea@tuxon.dev; linux-arm-
> kernel@lists.infradead.org; alsa-devel@alsa-project.org;
> patches@opensource.cirrus.com; linux-sound@vger.kernel.org; git (AMD-
> Xilinx) <git@amd.com>; amitrkcian2002@gmail.com; Conor Dooley
> <conor.dooley@microchip.com>; beanhuo@micron.com
> Subject: RE: [PATCH v11 07/10] mtd: spi-nor: Add stacked memories support
> in spi-nor
> 
> Hi Michael,
> 
> >>> All I'm saying is that you shouldn't put burden on us (the SPI NOR
> >>> maintainers) for what seems to me at least as a niche. Thus I was
> >>> asking for performance numbers and users. Convince me that I'm wrong
> >>> and that is worth our time.
> >>
> >> No. It is not really just feature of our evaluation boards. Customers
> >> are using it. I was talking to one guy from field and he confirms me
> >> that these configurations are used by his multiple customers in real
> products.
> >
> > Which begs the question, do we really have to support every feature in
> > the core (I'd like to hear Tudors and Pratyush opinion here).
> > Honestly, this just looks like a concatenation of two QSPI
> > controllers.
> 
> > Based on my understanding for stacked yes. For parallel no.
> 
> > Why didn't you just use a normal octal controller which is a protocol
> > also backed by the JEDEC standard.
> 
> > On newer SOC octal IP core is used.
> > Amit please comment.
> 
> > Is it any faster?
> 
> > Amit: please provide numbers.
> 
Here are some QSPI performance numbers comparing a single flash mode and 
two flashes connected in parallel mode. I ran the test on a VCK190 Eval 
Board https://www.xilinx.com/products/boards-and-kits/vck190.html, 
measuring the timing for mtd_debug erase, write, and read operations. 
The QSPI bus clock was set to 150MHz, and the data size was 32MB, 
comparing a single flash setup with a two-flash parallel mode setup.

Single Flash mode:

xilinx-vck190-20242:/home/petalinux# time mtd_debug erase /dev/mtd2 0x00 0x1e00000
Erased 31457280 bytes from address 0x00000000 in flash

real	0m54.713s
user	0m0.000s
sys	0m32.639s
xilinx-vck190-20242:/home/petalinux# time mtd_debug write /dev/mtd2 0x00 0x1e00000 test.bin
Copied 31457280 bytes from test.bin to address 0x00000000 in flash

real	0m30.187s
user	0m0.000s
sys	0m16.359s
xilinx-vck190-20242:/home/petalinux# time mtd_debug read /dev/mtd2 0x00 0x1e00000 test-read.bin
Copied 31457280 bytes from address 0x00000000 in flash to test-read.bin

real	0m0.472s
user	0m0.001s
sys	0m0.040s

Two flashes connected in parallel mode:

xilinx-vck190-20242:/home/petalinux# time mtd_debug erase /dev/mtd2 0x00 0x1e00000
Erased 31457280 bytes from address 0x00000000 in flash

real	0m27.727s
user	0m0.004s
sys	0m14.923s
xilinx-vck190-20242:/home/petalinux# time mtd_debug write /dev/mtd2 0x00 0x1e00000 test.bin
Copied 31457280 bytes from test.bin to address 0x00000000 in flash

real	0m16.538s
user	0m0.000s
sys	0m8.512s
xilinx-vck190-20242:/home/petalinux# time mtd_debug read /dev/mtd2 0x00 0x1e00000 test-read.bin
Copied 31457280 bytes from address 0x00000000 in flash to test-read.bin

real	0m0.263s
user	0m0.000s
sys	0m0.044s

Regards,
Amit

> > Do you get more capacity? Does anyone really use large SPI-NOR
> > flashes? If so, why?
> 
> > You get twice more capacity based on that configuration. I can't
> > answer the second question because not working with field. But both of
> > that configurations are used by customers. Adding Neal if he wants to add
> something more to it.
> 
> Just to add a comment as I work directly with our customers.  The main
> reason this support is important is for our older SoCs, zynq and zynqmp.
> 
> Most of our customers are using QSPI flash as the first boot memory to get
> from the boot ROM to u-boot.  They then typically use other memories, such
> as eMMC for the Linux kernel, OS and file system.
> 
> The issue we have on the zynq and zynqmp SoCs is that the boot ROM (code
> that cannot be changed) will not boot from an OSPI flash.  It will only boot
> from a QSPI flash.  This is what is forcing many of our customers down the
> QSPI path.
> Since many of these customers are interested in additional speed and
> memory size, they then end up using a parallel or stacked configuration
> because they cannot use an OSPI with zynq or zynqmp.
> 
> All of our newer SoCs can boot from OSPI.  I agree with you that if someone
> could choose OSPI for performance, they would, so I do not expect parallel or
> stacked configurations with our newer SoCs.
> 
> I get why you see this configuration as a niche, but for us, it is a very large
> niche because zynq and zynqmp are two of our most successful SoC families.
> 
> > I mean you've put that controller on your SoC, you must have some
> > convincing arguments why a customer should use it.
> 
> > I expect recommendation is to use single configuration but if you need
> > bigger space for your application the only way to extend it is to use
> > stacked configuration with two the same flashes next to each other.
> > If you want to have bigger size and also be faster answer is parallel
> > configuration.
> 
> 
> >>> The first round of patches were really invasive regarding the core
> >>> code. So if there is a clean layering approach which can be enabled
> >>> as a module and you are maintaining it I'm fine with that (even if
> >>> the core code needs some changes then like hooks or so, not sure).
> >>
> >> That discussion started with Miquel some years ago when he was trying
> >> to to solve description in DT which is merged for a while in the kernel.
> >
> > What's your point here? From what I can tell the DT binding is wrong
> > and needs to be reworked anyway.
> 
> > I am just saying that this is not any adhoc new feature but
> > configuration which has been already discussed and some steps made. If
> > DT binding is wrong it can be deprecated and use new one but for that it
> has be clear which way to go.
> 
> 
> >> And Amit is trying to figure it out which way to go.
> >> I don't want to predict where that code should be going or how it
> >> should look like because don't have spi-nor experience. But I hope we
> >> finally move forward on this topic to see the path how to upstream
> support for it.
> >
> > You still didn't answer my initial question. Will AMD support and
> > maintain that code? I was pushing you towards putting that code into
> > your own driver because then that's up to you what you are doing
> > there.
> 
> > Of course. We care about our code and about supporting our SOC and
> > features which are related to it. It means yes, we will be regularly
> > testing it and taking care about it.
> 
> 
> > So how do we move forward? I'd like to see as little as core changes
> > possible to get your dual flash setup working. I'm fine with having a
> > layer in spi-nor/ (not sure that's how it will work with mtdcat which
> > looks like it's similar as your stacked flash) as long as it can be a
> > module (selected by the driver).
> 
> > ok.
> 
> Best regards,
> Neal Frager
> AMD
Michael Walle Aug. 5, 2024, 8:14 a.m. UTC | #32
Hi,

> > You get twice more capacity based on that configuration. I can't answer the 
> > second question because not working with field. But both of that configurations 
> > are used by customers. Adding Neal if he wants to add something more to it.
>
> Just to add a comment as I work directly with our customers.  The main reason
> this support is important is for our older SoCs, zynq and zynqmp.
>
> Most of our customers are using QSPI flash as the first boot memory to get
> from the boot ROM to u-boot.  They then typically use other memories, such as
> eMMC for the Linux kernel, OS and file system.

Agreed and that's probably the most prominent use case for NOR
flashes anyway.

> The issue we have on the zynq and zynqmp SoCs is that the boot ROM (code that
> cannot be changed) will not boot from an OSPI flash.  It will only boot from a
> QSPI flash.  This is what is forcing many of our customers down the QSPI path.
> Since many of these customers are interested in additional speed and memory
> size, they then end up using a parallel or stacked configuration because they
> cannot use an OSPI with zynq or zynqmp.

Above you've said, the bootloader is stored on the NOR flash and the
bulk storage is eMMC. So why do you need bigger NOR flashes (where
even the biggest NOR flash isn't enough)?

I also don't understand "the boot ROM will just boot from QSPI".
First, you cannot connect an octal flash anyway, because you only
have an QSPI controller, right? Secondly, normally the bootrom will
(at least) boot the first stage using normal (single line) SPI
commands. Is that not the case for zynq and zynqmp?

> All of our newer SoCs can boot from OSPI.  I agree with you that if someone
> could choose OSPI for performance, they would, so I do not expect parallel or
> stacked configurations with our newer SoCs.

Ok, but then the argument with bigger flashes are void, because you
are back to be bound to one OSPI flash.

> I get why you see this configuration as a niche, but for us, it is a very
> large niche because zynq and zynqmp are two of our most successful SoC
> families.

Fair enough. But please find a way to support it without butchering
the whole core.

-michael
Michael Walle Aug. 5, 2024, 8:27 a.m. UTC | #33
Hi,

> >>> All I'm saying is that you shouldn't put burden on us (the SPI NOR
> >>> maintainers) for what seems to me at least as a niche. Thus I was
> >>> asking for performance numbers and users. Convince me that I'm
> >>> wrong and that is worth our time.
> >>
> >> No. It is not really just feature of our evaluation boards. Customers are using
> >> it. I was talking to one guy from field and he confirms me that these
> >> configurations are used by his multiple customers in real products.
> > 
> > Which begs the question, do we really have to support every feature
> > in the core (I'd like to hear Tudors and Pratyush opinion here).
> > Honestly, this just looks like a concatenation of two QSPI
> > controllers. 
>
> Based on my understanding for stacked yes. For parallel no.

See below.

> > Why didn't you just use a normal octal controller which
> > is a protocol also backed by the JEDEC standard. 
>
> On newer SOC octal IP core is used.
> Amit please comment.
>
> > Is it any faster?
>
> Amit: please provide numbers.
>
> > Do you get more capacity? Does anyone really use large SPI-NOR
> > flashes? If so, why?
>
> You get twice more capacity based on that configuration. I can't answer the 
> second question because not working with field. But both of that configurations 
> are used by customers. Adding Neal if he wants to add something more to it.
>
> > I mean you've put that controller on your SoC,
> > you must have some convincing arguments why a customer should use
> > it.
>
> I expect recommendation is to use single configuration but if you need bigger 
> space for your application the only way to extend it is to use stacked 
> configuration with two the same flashes next to each other.
> If you want to have bigger size and also be faster answer is parallel 
> configuration.

But who is using expensive NOR flash for bulk storage anyway? You're
only mentioning parallel mode. Also the performance numbers were
just about the parallel mode. What about stacked mode? Because
there's a chance that parallel mode works without modification of
the core (?).

> >>> The first round of patches were really invasive regarding the core
> >>> code. So if there is a clean layering approach which can be enabled
> >>> as a module and you are maintaining it I'm fine with that (even if
> >>> the core code needs some changes then like hooks or so, not sure).
> >>
> >> That discussion started with Miquel some years ago when he was trying to to
> >> solve description in DT which is merged for a while in the kernel.
> > 
> > What's your point here? From what I can tell the DT binding is wrong
> > and needs to be reworked anyway.
>
> I am just saying that this is not any adhoc new feature but configuration which 
> has been already discussed and some steps made. If DT binding is wrong it can be 
> deprecated and use new one but for that it has be clear which way to go.

Well, AMD could have side stepped all this if they had just
integrated a normal OSPI flash controller, which would have the same
requirements regarding the pins (if not even less) and it would have
been *easy* to integrate it into the already available ecosystem.
That was what my initial question was about. Why did you choose two
QSPI ports instead of one OSPI port.

> >> And Amit is trying to figure it out which way to go.
> >> I don't want to predict where that code should be going or how it should look
> >> like because don't have spi-nor experience. But I hope we finally move forward
> >> on this topic to see the path how to upstream support for it.
> > 
> > You still didn't answer my initial question. Will AMD support and
> > maintain that code? I was pushing you towards putting that code into
> > your own driver because then that's up to you what you are doing
> > there.
>
> Of course. We care about our code and about supporting our SOC and features 
> which are related to it. It means yes, we will be regularly testing it and 
> taking care about it.

Great!

-michael
Michal Simek Aug. 5, 2024, 11 a.m. UTC | #34
Hi,

On 8/5/24 10:27, Michael Walle wrote:
> Hi,
> 
>>>>> All I'm saying is that you shouldn't put burden on us (the SPI NOR
>>>>> maintainers) for what seems to me at least as a niche. Thus I was
>>>>> asking for performance numbers and users. Convince me that I'm
>>>>> wrong and that is worth our time.
>>>>
>>>> No. It is not really just feature of our evaluation boards. Customers are using
>>>> it. I was talking to one guy from field and he confirms me that these
>>>> configurations are used by his multiple customers in real products.
>>>
>>> Which begs the question, do we really have to support every feature
>>> in the core (I'd like to hear Tudors and Pratyush opinion here).
>>> Honestly, this just looks like a concatenation of two QSPI
>>> controllers.
>>
>> Based on my understanding for stacked yes. For parallel no.
> 
> See below.
> 
>>> Why didn't you just use a normal octal controller which
>>> is a protocol also backed by the JEDEC standard.
>>
>> On newer SOC octal IP core is used.
>> Amit please comment.
>>
>>> Is it any faster?
>>
>> Amit: please provide numbers.
>>
>>> Do you get more capacity? Does anyone really use large SPI-NOR
>>> flashes? If so, why?
>>
>> You get twice more capacity based on that configuration. I can't answer the
>> second question because not working with field. But both of that configurations
>> are used by customers. Adding Neal if he wants to add something more to it.
>>
>>> I mean you've put that controller on your SoC,
>>> you must have some convincing arguments why a customer should use
>>> it.
>>
>> I expect recommendation is to use single configuration but if you need bigger
>> space for your application the only way to extend it is to use stacked
>> configuration with two the same flashes next to each other.
>> If you want to have bigger size and also be faster answer is parallel
>> configuration.
> 
> But who is using expensive NOR flash for bulk storage anyway? 

I expect you understand that even if I know companies which does it I am not 
allow to share their names.

But customers don't need to have other free pins to connect for example emmc.
That's why adding one more "expensive flash" can be for them only one option.

Also I bet that price for one more qspi flash is nothing compare to chip itself 
and other related expenses for low volume production.

> You're
> only mentioning parallel mode. Also the performance numbers were
> just about the parallel mode. What about stacked mode? Because
> there's a chance that parallel mode works without modification of
> the core (?).

I will let Amit to comment it.


> 
>>>>> The first round of patches were really invasive regarding the core
>>>>> code. So if there is a clean layering approach which can be enabled
>>>>> as a module and you are maintaining it I'm fine with that (even if
>>>>> the core code needs some changes then like hooks or so, not sure).
>>>>
>>>> That discussion started with Miquel some years ago when he was trying to to
>>>> solve description in DT which is merged for a while in the kernel.
>>>
>>> What's your point here? From what I can tell the DT binding is wrong
>>> and needs to be reworked anyway.
>>
>> I am just saying that this is not any adhoc new feature but configuration which
>> has been already discussed and some steps made. If DT binding is wrong it can be
>> deprecated and use new one but for that it has be clear which way to go.
> 
> Well, AMD could have side stepped all this if they had just
> integrated a normal OSPI flash controller, which would have the same
> requirements regarding the pins (if not even less) and it would have
> been *easy* to integrate it into the already available ecosystem.
> That was what my initial question was about. Why did you choose two
> QSPI ports instead of one OSPI port.

Keep in your mind that ZynqMP is 9years old SoC. Zynq 12+ years with a lot of 
internal development happening before. Not sure if ospi even exists at that 
time. Also if any IP was available for the price which they were targeting.
I don't think make sense to discuss OSPI in this context because that's not in 
these SoCs.
I have never worked with spi that's why don't know historical context to provide 
more details.

Thanks,
Michal
Mahapatra, Amit Kumar Aug. 7, 2024, 1:21 p.m. UTC | #35
Hello Michael,

> On 8/5/24 10:27, Michael Walle wrote:
> > Hi,
> >
> >>>>> All I'm saying is that you shouldn't put burden on us (the SPI NOR
> >>>>> maintainers) for what seems to me at least as a niche. Thus I was
> >>>>> asking for performance numbers and users. Convince me that I'm
> >>>>> wrong and that is worth our time.
> >>>>
> >>>> No. It is not really just feature of our evaluation boards.
> >>>> Customers are using it. I was talking to one guy from field and he
> >>>> confirms me that these configurations are used by his multiple
> customers in real products.
> >>>
> >>> Which begs the question, do we really have to support every feature
> >>> in the core (I'd like to hear Tudors and Pratyush opinion here).
> >>> Honestly, this just looks like a concatenation of two QSPI
> >>> controllers.
> >>
> >> Based on my understanding for stacked yes. For parallel no.
> >
> > See below.
> >
> >>> Why didn't you just use a normal octal controller which is a
> >>> protocol also backed by the JEDEC standard.
> >>
> >> On newer SOC octal IP core is used.
> >> Amit please comment.
> >>
> >>> Is it any faster?
> >>
> >> Amit: please provide numbers.
> >>
> >>> Do you get more capacity? Does anyone really use large SPI-NOR
> >>> flashes? If so, why?
> >>
> >> You get twice more capacity based on that configuration. I can't
> >> answer the second question because not working with field. But both
> >> of that configurations are used by customers. Adding Neal if he wants to
> add something more to it.
> >>
> >>> I mean you've put that controller on your SoC, you must have some
> >>> convincing arguments why a customer should use it.
> >>
> >> I expect recommendation is to use single configuration but if you
> >> need bigger space for your application the only way to extend it is
> >> to use stacked configuration with two the same flashes next to each other.
> >> If you want to have bigger size and also be faster answer is parallel
> >> configuration.
> >
> > But who is using expensive NOR flash for bulk storage anyway?
> 
> I expect you understand that even if I know companies which does it I am not
> allow to share their names.
> 
> But customers don't need to have other free pins to connect for example
> emmc.
> That's why adding one more "expensive flash" can be for them only one
> option.
> 
> Also I bet that price for one more qspi flash is nothing compare to chip itself
> and other related expenses for low volume production.
> 
> > You're
> > only mentioning parallel mode. Also the performance numbers were just
> > about the parallel mode. What about stacked mode? Because there's a
> > chance that parallel mode works without modification of the core (?).
> 
> I will let Amit to comment it.

The performance of the stacked configuration will be the same as that of 
the single mode. As Michal mentioned earlier, stacked mode is used for 
scenarios where the customer requires larger flash space while maintaining 
the same performance.

I want to provide some background on why I choose to handle stacked and 
parallel modes through an additional layer or file, such as 
/mtd/spi-nor/stacked.c, rather than mtd-concat. Initially, when Miquel 
began upstreaming stacked support by extending the mtd-concat driver, 
the DT binding was not accepted. He proposed a couple of DT bindings 
[1] & [2] to support stacking through mtd-concat, but none were accepted. 
Additionally, after reviewing the MTD core code, he found that adding 
stacked support through mtd-concat could be complicated and involve many 
corner cases, which he mentioned in his RFC [3]. He then suggested 
concatenating the flashes instead of the mtd partitions, and eventually, 
the current DT bindings were added. This is why I propose handling the 
stacked and parallel configurations through an additional layer or file, 
as the mtd-concat approach was already discussed and rejected.

[1] https://lore.kernel.org/all/20191113171505.26128-4-miquel.raynal@bootlin.com/
[2] https://lore.kernel.org/all/20191127105522.31445-5-miquel.raynal@bootlin.com/
[3]https://lore.kernel.org/all/20211112152411.818321-1-miquel.raynal@bootlin.com/

Regards,
Amit

> 
> 
> >
> >>>>> The first round of patches were really invasive regarding the core
> >>>>> code. So if there is a clean layering approach which can be
> >>>>> enabled as a module and you are maintaining it I'm fine with that
> >>>>> (even if the core code needs some changes then like hooks or so, not
> sure).
> >>>>
> >>>> That discussion started with Miquel some years ago when he was
> >>>> trying to to solve description in DT which is merged for a while in the
> kernel.
> >>>
> >>> What's your point here? From what I can tell the DT binding is wrong
> >>> and needs to be reworked anyway.
> >>
> >> I am just saying that this is not any adhoc new feature but
> >> configuration which has been already discussed and some steps made.
> >> If DT binding is wrong it can be deprecated and use new one but for that it
> has be clear which way to go.
> >
> > Well, AMD could have side stepped all this if they had just integrated
> > a normal OSPI flash controller, which would have the same requirements
> > regarding the pins (if not even less) and it would have been *easy* to
> > integrate it into the already available ecosystem.
> > That was what my initial question was about. Why did you choose two
> > QSPI ports instead of one OSPI port.
> 
> Keep in your mind that ZynqMP is 9years old SoC. Zynq 12+ years with a lot of
> internal development happening before. Not sure if ospi even exists at that
> time. Also if any IP was available for the price which they were targeting.
> I don't think make sense to discuss OSPI in this context because that's not in
> these SoCs.
> I have never worked with spi that's why don't know historical context to
> provide more details.
> 
> Thanks,
> Michal
Miquel Raynal Aug. 12, 2024, 7:29 a.m. UTC | #36
Hi Michael,

> > > The first round of patches were really invasive regarding the core
> > > code. So if there is a clean layering approach which can be enabled
> > > as a module and you are maintaining it I'm fine with that (even if
> > > the core code needs some changes then like hooks or so, not sure).  
> >
> > That discussion started with Miquel some years ago when he was trying to to 
> > solve description in DT which is merged for a while in the kernel.  
> 
> What's your point here? From what I can tell the DT binding is wrong
> and needs to be reworked anyway.

I'm sorry I'm now catching up, can you point at the thread explaining
what is wrong in the bindings? I didn't find where this was detailed. Or
otherwise summarize quickly what needs to change?

Thanks!
Miquèl
Michael Walle Aug. 12, 2024, 7:37 a.m. UTC | #37
Hi,

> > > > The first round of patches were really invasive regarding the core
> > > > code. So if there is a clean layering approach which can be enabled
> > > > as a module and you are maintaining it I'm fine with that (even if
> > > > the core code needs some changes then like hooks or so, not sure).  
> > >
> > > That discussion started with Miquel some years ago when he was trying to to 
> > > solve description in DT which is merged for a while in the kernel.  
> > 
> > What's your point here? From what I can tell the DT binding is wrong
> > and needs to be reworked anyway.
>
> I'm sorry I'm now catching up, can you point at the thread explaining
> what is wrong in the bindings? I didn't find where this was detailed. Or
> otherwise summarize quickly what needs to change?

Somewhere in this mega thread Tudor had some remarks about the
bindings. Amit also mentioned it here:

https://lore.kernel.org/r/IA0PR12MB769944254171C39FF4171B52DCB42@IA0PR12MB7699.namprd12.prod.outlook.com/

-michael
Miquel Raynal Aug. 12, 2024, 8:38 a.m. UTC | #38
Hi,

> Hello Everyone,
> 
> I would like to propose another approach for handling stacked and 
> parallel connection modes and would appreciate your thoughts on it. 
> But before that, here is some background on what we are trying to achieve.
> 
> The AMD QSPI controller supports two advanced connection modes(Stacked and 
> Dual Parallel) which allow the controller to treat two different flashes 
> as one storage.
> 
> Stacked:
> Flashes share the same SPI bus, but different CS line, controller asserts 
> the CS of the flash to which it needs to communicate.
> 
> Dual Parallel:
> Both the flashes have their separate SPI bus CS of both the flashes will 
> be asserted/de-asserted at the same time. In this mode data will be split 
> across both the flashes by enabling the STRIPE setting in the controller. 
> If STRIPE is not enabled, then same data will be sent to both the devices.
> 
> For more information on the modes please feel free to go through the 
> controller flash interface below
> https://docs.amd.com/r/en-US/am011-versal-acap-trm/QSPI-Flash-Device-Interface
> 
> Mirochip QSPI controller also supports "Dual Parallel 8-bit IO mode", but 
> they call it "Twin Quad Mode".
> https://ww1.microchip.com/downloads/aemDocuments/documents/MPU32/ProductDocuments/DataSheets/SAMA7G5-Series-Data-Sheet-DS60001765.pdf
> 
> DT binding changes were added through the following commits:
> https://github.com/torvalds/linux/commit/f89504300e94524d5d5846ff8b728592ac72cec4
> https://github.com/torvalds/linux/commit/eba5368503b4291db7819512600fa014ea17c5a8
> https://github.com/torvalds/linux/commit/e2edd1b64f1c79e8abda365149ed62a2a9a494b4
> 
> SPI core changes were adds through the following commit:
> https://github.com/torvalds/linux/commit/4d8ff6b0991d5e86b17b235fc46ec62e9195cb9b
> 
> Based on the inputs/suggestions from Tudor, i am planning to add a new 
> layer between the SPI-NOR and MTD layers to support stacked and parallel 
> configurations. This new layer will be part of the spi-nor and located in 
> mtd/spi-nor/

For now I don't think you need a totally generic implementation. As
long as there is only one controller supporting these modes, I'd say
this is not super relevant.

> This layer would perform the following tasks:
>  - During probing, store information from all the connected flashes, 
>    whether in stacked or parallel mode, and present it as a single device 
>    to the MTD layer.
>  - Register callbacks through this new layer instead of spi-nor/core.c and 
>    handle MTD device registration.
>  - In stacked mode, select the appropriate spi-nor flash based on the 
>    address provided by the MTD layer during flash operations.
>  - Manage flash crossover operations in stacked mode.
>  - Ensure both connected flashes are identical in parallel mode.
>  - Handle odd byte count requests from the MTD layer during flash 
>    operations in parallel mode.
> 
> For implementing this the current DT binding need to be updated as 
> follows.

So you want to go back to step 1 and redefine bindings? Is that worth?

> stacked-memories DT changes:
>  - Flash size information can be retrieved directly from the flash, so it 
>    has been removed from the DT binding.
>  - Each stacked flash will have its own flash node. This approach allows 
>    flashes of different makes and sizes to be stacked together, as each 
>    flash will be probed individually.

And how will you define partitions crossing device boundaries? I
believe this constraint has been totally forgotten in this proposal.
The whole idea of stacking two devices this way was to simplify the
user's life with a single device exposed and the controller handling
itself the CS changes. That is precisely what the current binding do.
The final goal being to double your storage transparently. If your goal
was to put two chips aside, then none of this was actually needed. If
you don't care about that anymore, then all the energy put into
discussing the bindings initially was useless and a controller property
could also have made the trick.

>  - The stacked-memories DT bindings will contain the phandles of the flash 
>    nodes connected in stacked mode.
> 
> spi@0 {
>   
>   flash@0 {
>     compatible = "jedec,spi-nor"
>     reg = <0x00>;
>     stacked-memories = <&flash@0 &flash@1>;
>     spi-max-frequency = <50000000>;
>     ...
>               partition@0 { 
>         label = "qspi-0";
>         reg = <0x0 0xf00000>;
>     };
>                         
> 
>   }
>   
>   flash@1 {
>     compatible = "jedec,spi-nor"
>               reg = <0x01>;
>     spi-max-frequency = <50000000>;
>     ...
>               partition@0 { 
>         label = "qspi-1";
>         reg = <0x0 0x800000>;
>     };
>   }
> }
> 
> parallel-memories DT changes:
>  - Flash size information can be retrieved directly from the flash, so it 
>    has been removed from the DT binding.
>  - Each flash connected in parallel mode will have its own flash node. 
>    This allows us to verify that both flashes connected in parallel are 
>    identical, as each flash node will be probed separately.

Well, you don't have to verify that. It's a basic hardware design
constraint for using this mode.

Otherwise same comment as above, this description doesn't allow
correct partitioning and that was one of the main constraints back when
we discussed these needs.

>  - The parallel-memories DT bindings will contain the phandles of the 
>    flash nodes connected in parallel.
> 
> spi@0 {
>   
>   flash@0 {
>     compatible = "jedec,spi-nor"
>     reg = <0x00>;
>     parallel-memories = <&flash@0 &flash@1>;
>     spi-max-frequency = <50000000>;
>     ...
>               partition@0 { 
>         label = "qspi-0";
>         reg = <0x0 0xf00000>;
>     };
>                         
> 
>   }
>   
>   flash@1 {
>     compatible = "jedec,spi-nor"
>               reg = <0x01>;
>     spi-max-frequency = <50000000>;
>     ...
>               partition@0 { 
>         label = "qspi-1";
>         reg = <0x0 0x800000>;
>     };
>   }
> }

Thanks,
Miquèl
Miquel Raynal Aug. 12, 2024, 8:39 a.m. UTC | #39
Hi Michael,

michael@walle.cc wrote on Mon, 12 Aug 2024 09:37:06 +0200:

> Hi,
> 
> > > > > The first round of patches were really invasive regarding the core
> > > > > code. So if there is a clean layering approach which can be enabled
> > > > > as a module and you are maintaining it I'm fine with that (even if
> > > > > the core code needs some changes then like hooks or so, not sure).    
> > > >
> > > > That discussion started with Miquel some years ago when he was trying to to 
> > > > solve description in DT which is merged for a while in the kernel.    
> > > 
> > > What's your point here? From what I can tell the DT binding is wrong
> > > and needs to be reworked anyway.  
> >
> > I'm sorry I'm now catching up, can you point at the thread explaining
> > what is wrong in the bindings? I didn't find where this was detailed. Or
> > otherwise summarize quickly what needs to change?  
> 
> Somewhere in this mega thread Tudor had some remarks about the
> bindings. Amit also mentioned it here:
> 
> https://lore.kernel.org/r/IA0PR12MB769944254171C39FF4171B52DCB42@IA0PR12MB7699.namprd12.prod.outlook.com/

Great. I jumped-in there. Thanks!

Miquèl
Tudor Ambarus Aug. 12, 2024, 9:45 a.m. UTC | #40
Hiya,

Amit, the thread becomes unreadable, better start a new one describing
what you plan to do.

On 8/12/24 9:38 AM, Miquel Raynal wrote:
> For now I don't think you need a totally generic implementation. As
> long as there is only one controller supporting these modes, I'd say
> this is not super relevant.


Miquel,

Microchip supports a twin quad mode too, and micron has some flashes
with similar architecture, see:

https://lore.kernel.org/linux-mtd/20231125092137.2948-1-amit.kumar-mahapatra@amd.com/T/#mbe999dde1d29371183aa53089ef6f0087d6902a7

We shall consider them from the beginning. I guess we need to read
everything all over again.
Mahapatra, Amit Kumar Aug. 14, 2024, 7:13 a.m. UTC | #41
Hello Miquel,

> > Based on the inputs/suggestions from Tudor, i am planning to add a new
> > layer between the SPI-NOR and MTD layers to support stacked and
> > parallel configurations. This new layer will be part of the spi-nor
> > and located in mtd/spi-nor/
> 
> For now I don't think you need a totally generic implementation. As long as
> there is only one controller supporting these modes, I'd say this is not super
> relevant.

IMHO, there should be a general solution since this isn't limited to just 
one controller. Any controller can support stacked mode with multiple 
native-cs or multiple gpio-cs, or with a combination of a native-cs and a 
gpio-cs. For parallel configurations, there are other controllers from 
Microchip and some flash devices that operate similarly to AMD's parallel 
mode.

> 
> > This layer would perform the following tasks:
> >  - During probing, store information from all the connected flashes,
> >    whether in stacked or parallel mode, and present it as a single device
> >    to the MTD layer.
> >  - Register callbacks through this new layer instead of spi-nor/core.c and
> >    handle MTD device registration.
> >  - In stacked mode, select the appropriate spi-nor flash based on the
> >    address provided by the MTD layer during flash operations.
> >  - Manage flash crossover operations in stacked mode.
> >  - Ensure both connected flashes are identical in parallel mode.
> >  - Handle odd byte count requests from the MTD layer during flash
> >    operations in parallel mode.
> >
> > For implementing this the current DT binding need to be updated as
> > follows.
> 
> So you want to go back to step 1 and redefine bindings? Is that worth?

The current bindings are effective if we only support identical 
flash devices or flashes of the same make but with different sizes 
connected in stacked mode. However, if we want to extend stacked support 
to include flashes of different makes in stacked mode, the current 
bindings may not be adequate. That's why I suggested updating the bindings 
to accommodate all possible scenario.

> 
> > stacked-memories DT changes:
> >  - Flash size information can be retrieved directly from the flash, so it
> >    has been removed from the DT binding.
> >  - Each stacked flash will have its own flash node. This approach allows
> >    flashes of different makes and sizes to be stacked together, as each
> >    flash will be probed individually.
> 
> And how will you define partitions crossing device boundaries? I believe this
> constraint has been totally forgotten in this proposal.

According to the new binding proposal, one of the two flash nodes will 
have a partition that crosses the device boundary.

> The whole idea of stacking two devices this way was to simplify the user's life
> with a single device exposed and the controller handling itself the CS changes.
> That is precisely what the current binding do.

That's true, but as I mentioned earlier, if we're not inclined to support 
stacked mode for flashes of different makes, then the current bindings 
are sufficient.

> The final goal being to double your storage transparently. If your goal was to
> put two chips aside, then none of this was actually needed. If you don't care
> about that anymore, then all the energy put into discussing the bindings
> initially was useless and a controller property could also have made the trick.
> 
> >  - The stacked-memories DT bindings will contain the phandles of the flash
> >    nodes connected in stacked mode.
> >
> > spi@0 {
> >
> >   flash@0 {
> >     compatible = "jedec,spi-nor"
> >     reg = <0x00>;
> >     stacked-memories = <&flash@0 &flash@1>;
> >     spi-max-frequency = <50000000>;
> >     ...
> >               partition@0 {
> >         label = "qspi-0";
> >         reg = <0x0 0xf00000>;
> >     };
> >
> >
> >   }
> >
> >   flash@1 {
> >     compatible = "jedec,spi-nor"
> >               reg = <0x01>;
> >     spi-max-frequency = <50000000>;
> >     ...
> >               partition@0 {
> >         label = "qspi-1";
> >         reg = <0x0 0x800000>;
> >     };
> >   }
> > }
> >
> > parallel-memories DT changes:
> >  - Flash size information can be retrieved directly from the flash, so it
> >    has been removed from the DT binding.
> >  - Each flash connected in parallel mode will have its own flash node.
> >    This allows us to verify that both flashes connected in parallel are
> >    identical, as each flash node will be probed separately.
> 
> Well, you don't have to verify that. It's a basic hardware design constraint for
> using this mode.

Agree

Regards,
Amit

> 
> Otherwise same comment as above, this description doesn't allow correct
> partitioning and that was one of the main constraints back when we discussed
> these needs.
> 
> >  - The parallel-memories DT bindings will contain the phandles of the
> >    flash nodes connected in parallel.
> >
> > spi@0 {
> >
> >   flash@0 {
> >     compatible = "jedec,spi-nor"
> >     reg = <0x00>;
> >     parallel-memories = <&flash@0 &flash@1>;
> >     spi-max-frequency = <50000000>;
> >     ...
> >               partition@0 {
> >         label = "qspi-0";
> >         reg = <0x0 0xf00000>;
> >     };
> >
> >
> >   }
> >
> >   flash@1 {
> >     compatible = "jedec,spi-nor"
> >               reg = <0x01>;
> >     spi-max-frequency = <50000000>;
> >     ...
> >               partition@0 {
> >         label = "qspi-1";
> >         reg = <0x0 0x800000>;
> >     };
> >   }
> > }
> 
> Thanks,
> Miquèl
Miquel Raynal Aug. 14, 2024, 8:46 a.m. UTC | #42
Hi Amit,

amit.kumar-mahapatra@amd.com wrote on Wed, 14 Aug 2024 07:13:35 +0000:

> Hello Miquel,
> 
> > > Based on the inputs/suggestions from Tudor, i am planning to add a new
> > > layer between the SPI-NOR and MTD layers to support stacked and
> > > parallel configurations. This new layer will be part of the spi-nor
> > > and located in mtd/spi-nor/  
> > 
> > For now I don't think you need a totally generic implementation. As long as
> > there is only one controller supporting these modes, I'd say this is not super
> > relevant.  
> 
> IMHO, there should be a general solution since this isn't limited to just 
> one controller. Any controller can support stacked mode with multiple 
> native-cs or multiple gpio-cs, or with a combination of a native-cs and a 
> gpio-cs.

That's not what was initially discussed. The Xilinx use case was:
a controller is managing two devices "at the same time" transparently
from the host. So the two flashes appear like a single one and thus,
are described like a single one.

You don't need anything in the bindings nor in the core to manage two
flashes connected to the same controller otherwise. The only use case
the Xilinx model was bringing, was to consider the storage bigger from
a host perspective and thus be able to store files across the device
boundary natively.

> For parallel configurations, there are other controllers from 
> Microchip and some flash devices that operate similarly to AMD's parallel 
> mode.

Yes, Tudor reminded me about these.

> > > This layer would perform the following tasks:
> > >  - During probing, store information from all the connected flashes,
> > >    whether in stacked or parallel mode, and present it as a single device
> > >    to the MTD layer.
> > >  - Register callbacks through this new layer instead of spi-nor/core.c and
> > >    handle MTD device registration.
> > >  - In stacked mode, select the appropriate spi-nor flash based on the
> > >    address provided by the MTD layer during flash operations.
> > >  - Manage flash crossover operations in stacked mode.
> > >  - Ensure both connected flashes are identical in parallel mode.
> > >  - Handle odd byte count requests from the MTD layer during flash
> > >    operations in parallel mode.
> > >
> > > For implementing this the current DT binding need to be updated as
> > > follows.  
> > 
> > So you want to go back to step 1 and redefine bindings? Is that worth?  
> 
> The current bindings are effective if we only support identical 
> flash devices or flashes of the same make but with different sizes 
> connected in stacked mode. However, if we want to extend stacked support 
> to include flashes of different makes in stacked mode,

The only actual feature the stacked mode brings is the ability to
consider two devices like one. This is abstracted by hardware, this is
a controller capability. The only way this can work is if the two
storage devices are of the same kind and accept the same commands/init
cycles.

If you consider two different devices, then there is no hardware
abstraction anymore, the controller is no longer "smart" enough and you
are back to the standard situation with two devices connected using
their own independent CS, known by the host. In this case the
"stacked-mode" bindings no longer apply. You need to describe the two
chips independently in the DT, and your stacked feature in the
controller can no longer be used.

You need other bindings to support this case because it is a different
situation. For this case, there was a mtd-concat approach (which was
never merged). But this is really something different than the stacked
mode in your controller because there is no specific hardware feature
involved, it's just pure software.

> the current 
> bindings may not be adequate. That's why I suggested updating the bindings 
> to accommodate all possible scenario.
> 
> >   
> > > stacked-memories DT changes:
> > >  - Flash size information can be retrieved directly from the flash, so it
> > >    has been removed from the DT binding.
> > >  - Each stacked flash will have its own flash node. This approach allows
> > >    flashes of different makes and sizes to be stacked together, as each
> > >    flash will be probed individually.  
> > 
> > And how will you define partitions crossing device boundaries? I believe this
> > constraint has been totally forgotten in this proposal.  
> 
> According to the new binding proposal, one of the two flash nodes will 
> have a partition that crosses the device boundary.

From a bindings perspective, it feels very awkward and I doubt it will
be accepted. From a code perspective, you're gonna need to butcher the
core...

> > The whole idea of stacking two devices this way was to simplify the user's life
> > with a single device exposed and the controller handling itself the CS changes.
> > That is precisely what the current binding do.  
> 
> That's true, but as I mentioned earlier, if we're not inclined to support 
> stacked mode for flashes of different makes, then the current bindings 
> are sufficient.

Thanks,
Miquèl
Mahapatra, Amit Kumar Aug. 14, 2024, 12:53 p.m. UTC | #43
Hello Miquel,

> That's not what was initially discussed. The Xilinx use case was:
> a controller is managing two devices "at the same time" transparently from

Yes, but in stacked mode, the controller communicates with one of the two 
connected flash devices at any given time, based on the address and data 
length. It doesn't assert both chip selects simultaneously.

> the host. So the two flashes appear like a single one and thus, are described
> like a single one.
> 
> You don't need anything in the bindings nor in the core to manage two
> flashes connected to the same controller otherwise. The only use case the
> Xilinx model was bringing, was to consider the storage bigger from a host
> perspective and thus be able to store files across the device boundary
> natively.

When adding stacked support to the SPI core, Mark also asked us to support 
the GPIO chip select use case, so it is not only restricted to native cs.

> 
> > For parallel configurations, there are other controllers from
> > Microchip and some flash devices that operate similarly to AMD's
> > parallel mode.
> 
> Yes, Tudor reminded me about these.
> 
> > > > This layer would perform the following tasks:
> > > >  - During probing, store information from all the connected flashes,
> > > >    whether in stacked or parallel mode, and present it as a single device
> > > >    to the MTD layer.
> > > >  - Register callbacks through this new layer instead of spi-nor/core.c and
> > > >    handle MTD device registration.
> > > >  - In stacked mode, select the appropriate spi-nor flash based on the
> > > >    address provided by the MTD layer during flash operations.
> > > >  - Manage flash crossover operations in stacked mode.
> > > >  - Ensure both connected flashes are identical in parallel mode.
> > > >  - Handle odd byte count requests from the MTD layer during flash
> > > >    operations in parallel mode.
> > > >
> > > > For implementing this the current DT binding need to be updated as
> > > > follows.
> > >
> > > So you want to go back to step 1 and redefine bindings? Is that worth?
> >
> > The current bindings are effective if we only support identical flash
> > devices or flashes of the same make but with different sizes connected
> > in stacked mode. However, if we want to extend stacked support to
> > include flashes of different makes in stacked mode,
> 
> The only actual feature the stacked mode brings is the ability to consider two
> devices like one. This is abstracted by hardware, this is a controller capability.

Stacked mode is a software abstraction rather than a controller feature or 
capability. At any given time, the controller communicates with one of the 
two connected flash devices, as determined by the requested address and data 
length. If an operation starts on one flash and ends on the other, the core 
needs to split it into two separate operations and adjust the data length 
accordingly.

> The only way this can work is if the two storage devices are of the same kind
> and accept the same commands/init cycles.
> 
> If you consider two different devices, then there is no hardware abstraction
> anymore, the controller is no longer "smart" enough and you are back to the
> standard situation with two devices connected using their own independent
> CS, known by the host. In this case the "stacked-mode" bindings no longer
> apply. You need to describe the two chips independently in the DT, and your
> stacked feature in the controller can no longer be used.

As stated earlier stacked is not a controller feature but rather a software 
abstraction to assert the appropriate cs as per the requested address & data 
length.

Regards,
Amit
> 
> You need other bindings to support this case because it is a different
> situation. For this case, there was a mtd-concat approach (which was never
> merged). But this is really something different than the stacked mode in your
> controller because there is no specific hardware feature involved, it's just pure
> software.
> 
> > the current
> > bindings may not be adequate. That's why I suggested updating the
> > bindings to accommodate all possible scenario.
> >
> > >
> > > > stacked-memories DT changes:
> > > >  - Flash size information can be retrieved directly from the flash, so it
> > > >    has been removed from the DT binding.
> > > >  - Each stacked flash will have its own flash node. This approach allows
> > > >    flashes of different makes and sizes to be stacked together, as each
> > > >    flash will be probed individually.
> > >
> > > And how will you define partitions crossing device boundaries? I
> > > believe this constraint has been totally forgotten in this proposal.
> >
> > According to the new binding proposal, one of the two flash nodes will
> > have a partition that crosses the device boundary.
> 
> From a bindings perspective, it feels very awkward and I doubt it will be
> accepted. From a code perspective, you're gonna need to butcher the core...
> 
> > > The whole idea of stacking two devices this way was to simplify the
> > > user's life with a single device exposed and the controller handling itself
> the CS changes.
> > > That is precisely what the current binding do.
> >
> > That's true, but as I mentioned earlier, if we're not inclined to
> > support stacked mode for flashes of different makes, then the current
> > bindings are sufficient.
> 
> Thanks,
> Miquèl
Miquel Raynal Aug. 14, 2024, 2:46 p.m. UTC | #44
Hi Amit,

> > > > > For implementing this the current DT binding need to be updated as
> > > > > follows.  
> > > >
> > > > So you want to go back to step 1 and redefine bindings? Is that worth?  
> > >
> > > The current bindings are effective if we only support identical flash
> > > devices or flashes of the same make but with different sizes connected
> > > in stacked mode. However, if we want to extend stacked support to
> > > include flashes of different makes in stacked mode,  
> > 
> > The only actual feature the stacked mode brings is the ability to consider two
> > devices like one. This is abstracted by hardware, this is a controller capability.  
> 
> Stacked mode is a software abstraction rather than a controller feature or 
> capability. At any given time, the controller communicates with one of the 
> two connected flash devices, as determined by the requested address and data 
> length. If an operation starts on one flash and ends on the other, the core 
> needs to split it into two separate operations and adjust the data length 
> accordingly.

I'm sorry, that was not my understanding, cf the initial RFC:

	Subject: [RFC PATCH 0/3] Dual stacked/parallel memories bindings
	Date: Fri, 12 Nov 2021 16:24:08 +0100

	"[...] supporting specific SPI controller modes like Xilinx's
	where the controller can highly abstract the hardware and
	provide access to a single bigger device instead [...]"

Furthermore, I rapidly checked the Zynq7000 TRM, it suggests that the
controller is capable of addressing the right memory itself based on
the address, especially in linear mode? 

	https://docs.amd.com/r/en-US/ug585-zynq-7000-SoC-TRM/Dual-SS-4-bit-Stacked-I/O

	"The lower SPI flash memory should always be connected if the
	linear Quad-SPI memory subsystem is used, and the upper flash
	memory is optional. Total address space is 32 MB with a 25-bit
	address. In IO mode, the MSB of the address is defined by
	U_PAGE which is located at bit 28 of register 0xA0 . In Linear
	address mode, AXI address bit 24 determines the upper or lower
	memory page. All of the commands will be executed by the device
	selected by U_PAGE in I/O mode and address bit 24 in linear
	mode."

Anyway, you may decide to go down the "pure software" route, which is
probably easier from an implementation perspective, but means you're
gonna have to argue -again- in favor of the representation of a purely
virtual device that is not hardware.

Cheers,
Miquèl
Mahapatra, Amit Kumar Aug. 19, 2024, 10:28 a.m. UTC | #45
Hello Miquel,

> Hi Amit,
> 
> > > > > > For implementing this the current DT binding need to be
> > > > > > updated as follows.
> > > > >
> > > > > So you want to go back to step 1 and redefine bindings? Is that worth?
> > > >
> > > > The current bindings are effective if we only support identical
> > > > flash devices or flashes of the same make but with different sizes
> > > > connected in stacked mode. However, if we want to extend stacked
> > > > support to include flashes of different makes in stacked mode,
> > >
> > > The only actual feature the stacked mode brings is the ability to
> > > consider two devices like one. This is abstracted by hardware, this is a
> controller capability.
> >
> > Stacked mode is a software abstraction rather than a controller
> > feature or capability. At any given time, the controller communicates
> > with one of the two connected flash devices, as determined by the
> > requested address and data length. If an operation starts on one flash
> > and ends on the other, the core needs to split it into two separate
> > operations and adjust the data length accordingly.
> 
> I'm sorry, that was not my understanding, cf the initial RFC:
> 
> 	Subject: [RFC PATCH 0/3] Dual stacked/parallel memories bindings
> 	Date: Fri, 12 Nov 2021 16:24:08 +0100
> 
> 	"[...] supporting specific SPI controller modes like Xilinx's
> 	where the controller can highly abstract the hardware and
> 	provide access to a single bigger device instead [...]"
> 
> Furthermore, I rapidly checked the Zynq7000 TRM, it suggests that the
> controller is capable of addressing the right memory itself based on the
> address, especially in linear mode?

Yes, this is true only when operating in Linear Mode. In this mode, the 
Zynq 7000 can only perform read operations. Erase and write operations 
are possible only in I/O mode, so while operating in I/O mode the CS pins 
must be manually asserted by the driver according to the address and data 
length
> 
> 	https://docs.amd.com/r/en-US/ug585-zynq-7000-SoC-TRM/Dual-SS-
> 4-bit-Stacked-I/O
> 
> 	"The lower SPI flash memory should always be connected if the
> 	linear Quad-SPI memory subsystem is used, and the upper flash
> 	memory is optional. Total address space is 32 MB with a 25-bit
> 	address. In IO mode, the MSB of the address is defined by
> 	U_PAGE which is located at bit 28 of register 0xA0 . In Linear
> 	address mode, AXI address bit 24 determines the upper or lower
> 	memory page. All of the commands will be executed by the device
> 	selected by U_PAGE in I/O mode and address bit 24 in linear
> 	mode."
> 
> Anyway, you may decide to go down the "pure software" route, which is
> probably easier from an implementation perspective, but means you're
> gonna have to argue -again- in favor of the representation of a purely virtual
> device that is not hardware.

Yes, that's correct, but I was planning to manage it through a new layer rather 
than using mtd-concat. As Tudor mentioned, I will start a new email thread 
outlining my proposal so we can resume the discussion on the patch moving 
forward.

Regards,
Amit
> 
> Cheers,
> Miquèl
diff mbox series

Patch

diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index 93ae69b7ff83..e990be7c7eb6 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -1821,13 +1821,18 @@  static int spi_nor_erase_multi_sectors(struct spi_nor *nor, u64 addr, u32 len)
 static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 {
 	struct spi_nor *nor = mtd_to_spi_nor(mtd);
-	u32 addr, len;
+	struct spi_nor_flash_parameter *params;
+	u32 addr, len, offset, cur_cs_num = 0;
 	uint32_t rem;
 	int ret;
+	u64 sz;
 
 	dev_dbg(nor->dev, "at 0x%llx, len %lld\n", (long long)instr->addr,
 			(long long)instr->len);
 
+	params = spi_nor_get_params(nor, 0);
+	sz = params->size;
+
 	if (spi_nor_has_uniform_erase(nor)) {
 		div_u64_rem(instr->len, mtd->erasesize, &rem);
 		if (rem)
@@ -1849,23 +1854,27 @@  static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 		if (ret)
 			goto erase_err;
 
-		ret = spi_nor_erase_chip(nor);
-		spi_nor_unlock_device(nor);
-		if (ret)
-			goto erase_err;
+		while (cur_cs_num < nor->num_flash) {
+			nor->spimem->spi->cs_index_mask = 0x01 << cur_cs_num;
+			ret = spi_nor_erase_chip(nor);
+			spi_nor_unlock_device(nor);
+			if (ret)
+				goto erase_err;
 
-		/*
-		 * Scale the timeout linearly with the size of the flash, with
-		 * a minimum calibrated to an old 2MB flash. We could try to
-		 * pull these from CFI/SFDP, but these values should be good
-		 * enough for now.
-		 */
-		timeout = max(CHIP_ERASE_2MB_READY_WAIT_JIFFIES,
-			      CHIP_ERASE_2MB_READY_WAIT_JIFFIES *
-			      (unsigned long)(mtd->size / SZ_2M));
-		ret = spi_nor_wait_till_ready_with_timeout(nor, timeout);
-		if (ret)
-			goto erase_err;
+			/*
+			 * Scale the timeout linearly with the size of the flash, with
+			 * a minimum calibrated to an old 2MB flash. We could try to
+			 * pull these from CFI/SFDP, but these values should be good
+			 * enough for now.
+			 */
+			timeout = max(CHIP_ERASE_2MB_READY_WAIT_JIFFIES,
+				      CHIP_ERASE_2MB_READY_WAIT_JIFFIES *
+				      (unsigned long)(params->size / SZ_2M));
+			ret = spi_nor_wait_till_ready_with_timeout(nor, timeout);
+			if (ret)
+				goto erase_err;
+			cur_cs_num++;
+		}
 
 	/* REVISIT in some cases we could speed up erasing large regions
 	 * by using SPINOR_OP_SE instead of SPINOR_OP_BE_4K.  We may have set up
@@ -1874,12 +1883,26 @@  static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 	/* "sector"-at-a-time erase */
 	} else if (spi_nor_has_uniform_erase(nor)) {
+		/* Determine the flash from which the operation need to start */
+		while ((cur_cs_num < nor->num_flash) && (addr > sz - 1)) {
+			cur_cs_num++;
+			params = spi_nor_get_params(nor, cur_cs_num);
+			sz += params->size;
+		}
+
 		while (len) {
 			ret = spi_nor_lock_device(nor);
 			if (ret)
 				goto erase_err;
 
-			ret = spi_nor_erase_sector(nor, addr);
+			nor->spimem->spi->cs_index_mask = 0x01 << cur_cs_num;
+			offset = addr;
+			if (nor->flags & SNOR_F_HAS_STACKED) {
+				params = spi_nor_get_params(nor, cur_cs_num);
+				offset -= (sz - params->size);
+			}
+
+			ret = spi_nor_erase_sector(nor, offset);
 			spi_nor_unlock_device(nor);
 			if (ret)
 				goto erase_err;
@@ -1890,13 +1913,45 @@  static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 			addr += mtd->erasesize;
 			len -= mtd->erasesize;
+
+			/*
+			 * Flash cross over condition in stacked mode.
+			 */
+			if ((nor->flags & SNOR_F_HAS_STACKED) && (addr > sz - 1)) {
+				cur_cs_num++;
+				params = spi_nor_get_params(nor, cur_cs_num);
+				sz += params->size;
+			}
 		}
 
 	/* erase multiple sectors */
 	} else {
-		ret = spi_nor_erase_multi_sectors(nor, addr, len);
-		if (ret)
-			goto erase_err;
+		u64 erase_len = 0;
+
+		/* Determine the flash from which the operation need to start */
+		while ((cur_cs_num < nor->num_flash) && (addr > sz - 1)) {
+			cur_cs_num++;
+			params = spi_nor_get_params(nor, cur_cs_num);
+			sz += params->size;
+		}
+		/* perform multi sector erase onec per Flash*/
+		while (len) {
+			erase_len = (len > (sz - addr)) ? (sz - addr) : len;
+			offset = addr;
+			nor->spimem->spi->cs_index_mask = 0x01 << cur_cs_num;
+			if (nor->flags & SNOR_F_HAS_STACKED) {
+				params = spi_nor_get_params(nor, cur_cs_num);
+				offset -= (sz - params->size);
+			}
+			ret = spi_nor_erase_multi_sectors(nor, offset, erase_len);
+			if (ret)
+				goto erase_err;
+			len -= erase_len;
+			addr += erase_len;
+			cur_cs_num++;
+			params = spi_nor_get_params(nor, cur_cs_num);
+			sz += params->size;
+		}
 	}
 
 	ret = spi_nor_write_disable(nor);
@@ -2087,9 +2142,11 @@  static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
 			size_t *retlen, u_char *buf)
 {
 	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	struct spi_nor_flash_parameter *params;
+	ssize_t ret, read_len, len_lock =  len;
 	loff_t from_lock = from;
-	size_t len_lock = len;
-	ssize_t ret;
+	u32 cur_cs_num = 0;
+	u64 sz;
 
 	dev_dbg(nor->dev, "from 0x%08x, len %zd\n", (u32)from, len);
 
@@ -2097,9 +2154,23 @@  static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
 	if (ret)
 		return ret;
 
+	params = spi_nor_get_params(nor, 0);
+	sz = params->size;
+
+	/* Determine the flash from which the operation need to start */
+	while ((cur_cs_num < nor->num_flash) && (from > sz - 1)) {
+		cur_cs_num++;
+		params = spi_nor_get_params(nor, cur_cs_num);
+		sz += params->size;
+	}
 	while (len) {
 		loff_t addr = from;
 
+		nor->spimem->spi->cs_index_mask = 0x01 << cur_cs_num;
+		read_len = (len > (sz - addr)) ? (sz - addr) : len;
+		params = spi_nor_get_params(nor, cur_cs_num);
+		addr -= (sz - params->size);
+
 		addr = spi_nor_convert_addr(nor, addr);
 
 		ret = spi_nor_read_data(nor, addr, len, buf);
@@ -2111,11 +2182,22 @@  static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
 		if (ret < 0)
 			goto read_err;
 
-		WARN_ON(ret > len);
+		WARN_ON(ret > read_len);
 		*retlen += ret;
 		buf += ret;
 		from += ret;
 		len -= ret;
+
+		/*
+		 * Flash cross over condition in stacked mode.
+		 *
+		 */
+		if ((nor->flags & SNOR_F_HAS_STACKED) && (from > sz - 1)) {
+			cur_cs_num++;
+			params = spi_nor_get_params(nor, cur_cs_num);
+			sz += params->size;
+		}
+
 	}
 	ret = 0;
 
@@ -2136,8 +2218,9 @@  static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 	struct spi_nor *nor = mtd_to_spi_nor(mtd);
 	struct spi_nor_flash_parameter *params;
 	size_t page_offset, page_remain, i;
+	u32 page_size, cur_cs_num = 0;
 	ssize_t ret;
-	u32 page_size;
+	u64 sz;
 
 	dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len);
 
@@ -2147,6 +2230,14 @@  static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 	params = spi_nor_get_params(nor, 0);
 	page_size = params->page_size;
+	sz = params->size;
+
+	/* Determine the flash from which the operation need to start */
+	while ((cur_cs_num < nor->num_flash) && (to > sz - 1)) {
+		cur_cs_num++;
+		params = spi_nor_get_params(nor, cur_cs_num);
+		sz += params->size;
+	}
 
 	for (i = 0; i < len; ) {
 		ssize_t written;
@@ -2167,6 +2258,10 @@  static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 		/* the size of data remaining on the first page */
 		page_remain = min_t(size_t, page_size - page_offset, len - i);
 
+		nor->spimem->spi->cs_index_mask = 0x01 << cur_cs_num;
+		params = spi_nor_get_params(nor, cur_cs_num);
+		addr -= (sz - params->size);
+
 		addr = spi_nor_convert_addr(nor, addr);
 
 		ret = spi_nor_lock_device(nor);
@@ -2184,6 +2279,15 @@  static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 			goto write_err;
 		*retlen += written;
 		i += written;
+
+		/*
+		 * Flash cross over condition in stacked mode.
+		 */
+		if ((nor->flags & SNOR_F_HAS_STACKED) && ((to + i) > sz - 1)) {
+			cur_cs_num++;
+			params = spi_nor_get_params(nor, cur_cs_num);
+			sz += params->size;
+		}
 	}
 
 write_err:
@@ -2297,8 +2401,6 @@  int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
 static int spi_nor_spimem_check_op(struct spi_nor *nor,
 				   struct spi_mem_op *op)
 {
-	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor, 0);
-
 	/*
 	 * First test with 4 address bytes. The opcode itself might
 	 * be a 3B addressing opcode but we don't care, because
@@ -2307,7 +2409,7 @@  static int spi_nor_spimem_check_op(struct spi_nor *nor,
 	 */
 	op->addr.nbytes = 4;
 	if (!spi_mem_supports_op(nor->spimem, op)) {
-		if (params->size > SZ_16M)
+		if (nor->mtd.size > SZ_16M)
 			return -EOPNOTSUPP;
 
 		/* If flash size <= 16MB, 3 address bytes are sufficient */
@@ -2905,7 +3007,10 @@  static void spi_nor_init_fixup_flags(struct spi_nor *nor)
 static int spi_nor_late_init_params(struct spi_nor *nor)
 {
 	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor, 0);
-	int ret;
+	struct device_node *np = spi_nor_get_flash_node(nor);
+	u64 flash_size[SNOR_FLASH_CNT_MAX];
+	u32 idx = 0;
+	int rc, ret;
 
 	if (nor->manufacturer && nor->manufacturer->fixups &&
 	    nor->manufacturer->fixups->late_init) {
@@ -2937,6 +3042,44 @@  static int spi_nor_late_init_params(struct spi_nor *nor)
 	if (params->n_banks > 1)
 		params->bank_size = div64_u64(params->size, params->n_banks);
 
+	nor->num_flash = 0;
+
+	/*
+	 * The flashes that are connected in stacked mode should be of same make.
+	 * Except the flash size all other properties are identical for all the
+	 * flashes connected in stacked mode.
+	 * The flashes that are connected in parallel mode should be identical.
+	 */
+	while (idx < SNOR_FLASH_CNT_MAX) {
+		rc = of_property_read_u64_index(np, "stacked-memories", idx, &flash_size[idx]);
+		if (rc)
+			break;
+		idx++;
+		if (!(nor->flags & SNOR_F_HAS_STACKED))
+			nor->flags |= SNOR_F_HAS_STACKED;
+
+		nor->num_flash++;
+	}
+
+	/*
+	 * By default one flash device should be connected
+	 * so, nor->num_flash is 1.
+	 */
+	if (!nor->num_flash)
+		nor->num_flash = 1;
+
+	if (nor->flags & SNOR_F_HAS_STACKED) {
+		for (idx = 1; idx < nor->num_flash; idx++) {
+			params = spi_nor_get_params(nor, idx);
+			params = devm_kzalloc(nor->dev, sizeof(*params), GFP_KERNEL);
+			if (params) {
+				memcpy(params, spi_nor_get_params(nor, 0), sizeof(*params));
+				params->size = flash_size[idx];
+				spi_nor_set_params(nor, idx, params);
+			}
+		}
+	}
+
 	return 0;
 }
 
@@ -3145,16 +3288,28 @@  static int spi_nor_set_octal_dtr(struct spi_nor *nor, bool enable)
  */
 static int spi_nor_quad_enable(struct spi_nor *nor)
 {
-	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor, 0);
+	struct spi_nor_flash_parameter *params;
+	int err, idx;
 
-	if (!params->quad_enable)
-		return 0;
+	for (idx = 0; idx < nor->num_flash; idx++) {
+		params = spi_nor_get_params(nor, idx);
+		if (!params->quad_enable)
+			return 0;
 
-	if (!(spi_nor_get_protocol_width(nor->read_proto) == 4 ||
-	      spi_nor_get_protocol_width(nor->write_proto) == 4))
-		return 0;
+		if (!(spi_nor_get_protocol_width(nor->read_proto) == 4 ||
+		      spi_nor_get_protocol_width(nor->write_proto) == 4))
+			return 0;
+		/*
+		 * Set the appropriate CS index before
+		 * issuing the command.
+		 */
+		nor->spimem->spi->cs_index_mask = 0x01 << idx;
 
-	return params->quad_enable(nor);
+		err = params->quad_enable(nor);
+		if (err)
+			return err;
+	}
+	return err;
 }
 
 /**
@@ -3186,7 +3341,7 @@  int spi_nor_set_4byte_addr_mode(struct spi_nor *nor, bool enable)
 
 static int spi_nor_init(struct spi_nor *nor)
 {
-	int err;
+	int err, idx;
 
 	err = spi_nor_set_octal_dtr(nor, true);
 	if (err) {
@@ -3227,9 +3382,16 @@  static int spi_nor_init(struct spi_nor *nor)
 		 */
 		WARN_ONCE(nor->flags & SNOR_F_BROKEN_RESET,
 			  "enabling reset hack; may not recover from unexpected reboots\n");
-		err = spi_nor_set_4byte_addr_mode(nor, true);
-		if (err)
-			return err;
+		for (idx = 0; idx < nor->num_flash; idx++) {
+			/*
+			 * Select the appropriate CS index before
+			 * issuing the command.
+			 */
+			nor->spimem->spi->cs_index_mask = 0x01 << idx;
+			err = spi_nor_set_4byte_addr_mode(nor, true);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;
@@ -3344,18 +3506,28 @@  static void spi_nor_put_device(struct mtd_info *mtd)
 static void spi_nor_restore(struct spi_nor *nor)
 {
 	int ret;
+	int idx;
 
 	/* restore the addressing mode */
 	if (nor->addr_nbytes == 4 && !(nor->flags & SNOR_F_4B_OPCODES) &&
 	    nor->flags & SNOR_F_BROKEN_RESET) {
-		ret = spi_nor_set_4byte_addr_mode(nor, false);
-		if (ret)
+		for (idx = 0; idx < nor->num_flash; idx++) {
 			/*
-			 * Do not stop the execution in the hope that the flash
-			 * will default to the 3-byte address mode after the
-			 * software reset.
+			 * Select the appropriate CS index before
+			 * issuing the command.
 			 */
-			dev_err(nor->dev, "Failed to exit 4-byte address mode, err = %d\n", ret);
+			nor->spimem->spi->cs_index_mask = 1 << idx;
+			ret = spi_nor_set_4byte_addr_mode(nor, false);
+			if (ret)
+				/*
+				 * Do not stop the execution in the hope that the flash
+				 * will default to the 3-byte address mode after the
+				 * software reset.
+				 */
+				dev_err(nor->dev,
+					"Failed to exit 4-byte address mode, err = %d\n",
+					ret);
+		}
 	}
 
 	if (nor->flags & SNOR_F_SOFT_RESET)
@@ -3422,6 +3594,8 @@  static void spi_nor_set_mtd_info(struct spi_nor *nor)
 	struct spi_nor_flash_parameter *params = spi_nor_get_params(nor, 0);
 	struct mtd_info *mtd = &nor->mtd;
 	struct device *dev = nor->dev;
+	u64 total_sz = 0;
+	int idx;
 
 	spi_nor_set_mtd_locking_ops(nor);
 	spi_nor_set_mtd_otp_ops(nor);
@@ -3440,7 +3614,11 @@  static void spi_nor_set_mtd_info(struct spi_nor *nor)
 		mtd->_erase = spi_nor_erase;
 	mtd->writesize = params->writesize;
 	mtd->writebufsize = params->page_size;
-	mtd->size = params->size;
+	for (idx = 0; idx < nor->num_flash; idx++) {
+		params = spi_nor_get_params(nor, idx);
+		total_sz += params->size;
+	}
+	mtd->size = total_sz;
 	mtd->_read = spi_nor_read;
 	/* Might be already set by some SST flashes. */
 	if (!mtd->_write)
diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
index 93cd2fc3606d..b2997eca7551 100644
--- a/drivers/mtd/spi-nor/core.h
+++ b/drivers/mtd/spi-nor/core.h
@@ -18,6 +18,9 @@ 
 #define SPI_NOR_DEFAULT_N_BANKS 1
 #define SPI_NOR_DEFAULT_SECTOR_SIZE SZ_64K
 
+/* In single configuration enable CS0 */
+#define SPI_NOR_ENABLE_CS0     BIT(0)
+
 /* Standard SPI NOR flash operations. */
 #define SPI_NOR_READID_OP(naddr, ndummy, buf, len)			\
 	SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 0),			\
@@ -140,6 +143,7 @@  enum spi_nor_option_flags {
 	SNOR_F_RWW		= BIT(14),
 	SNOR_F_ECC		= BIT(15),
 	SNOR_F_NO_WP		= BIT(16),
+	SNOR_F_HAS_STACKED      = BIT(17),
 };
 
 struct spi_nor_read_command {
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 59909e7d6f53..9d72b0bbab94 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -127,6 +127,12 @@ 
 #define SR2_LB3			BIT(5)	/* Security Register Lock Bit 3 */
 #define SR2_QUAD_EN_BIT7	BIT(7)
 
+/*
+ * Maximum number of flashes that can be connected
+ * in stacked/parallel configuration
+ */
+#define	SNOR_FLASH_CNT_MAX	4
+
 /* Supported SPI protocols */
 #define SNOR_PROTO_INST_MASK	GENMASK(23, 16)
 #define SNOR_PROTO_INST_SHIFT	16
@@ -378,6 +384,7 @@  struct spi_nor_flash_parameter;
  *                      hooks, or dynamically when parsing the SFDP tables.
  * @dirmap:		pointers to struct spi_mem_dirmap_desc for reads/writes.
  * @priv:		pointer to the private data
+ * @num_flash		number of flashes connected in parallel or stacked mode
  */
 struct spi_nor {
 	struct mtd_info		mtd;
@@ -412,13 +419,13 @@  struct spi_nor {
 
 	const struct spi_nor_controller_ops *controller_ops;
 
-	struct spi_nor_flash_parameter *params;
+	struct spi_nor_flash_parameter *params[SNOR_FLASH_CNT_MAX];
 
 	struct {
 		struct spi_mem_dirmap_desc *rdesc;
 		struct spi_mem_dirmap_desc *wdesc;
 	} dirmap;
-
+	u32			num_flash;
 	void *priv;
 };
 
@@ -435,13 +442,13 @@  static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
 
 static inline struct spi_nor_flash_parameter *spi_nor_get_params(const struct spi_nor *nor, u8 idx)
 {
-	return nor->params;
+	return nor->params[idx];
 }
 
 static inline void spi_nor_set_params(struct spi_nor *nor, u8 idx,
 				      struct spi_nor_flash_parameter *params)
 {
-	nor->params = params;
+	nor->params[idx] = params;
 }
 /**
  * spi_nor_scan() - scan the SPI NOR