Message ID | 1304522525-27351-1-git-send-email-alexander.stein@systec-electronic.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 05/04/11 16:22, Alexander Stein wrote: > req.sample needs its own cacheline otherwise accessing req.msg fetches > it in again. > Put req onto stack as it doesn't need to be DMA save. > req.sample is kzalloced itself for DMA access. > req.command doesn't need own cache line because it will only be written to > memory on dma_map_single. req.scratch is unsed at all. > > Note: This effect doesn't occur if the underlying SPI driver doesn't use > DMA at all. > How about using ____cacheline_aligned having moved 'sample' to the end of the structure? e.g. ... struct spi_transfer xfer[6]; __be16 sample ____cacheline_aligned; } Rather smaller patch when you get down to it. > Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com> > --- > Anatolij, > > could you please check the ads7845 parts? > Thanks! > > drivers/input/touchscreen/ads7846.c | 92 ++++++++++++++++++----------------- > 1 files changed, 48 insertions(+), 44 deletions(-) > > diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c > index c24946f..6157a80 100644 > --- a/drivers/input/touchscreen/ads7846.c > +++ b/drivers/input/touchscreen/ads7846.c > @@ -281,7 +281,7 @@ struct ser_req { > u8 command; > u8 ref_off; > u16 scratch; > - __be16 sample; > + __be16 *sample; > struct spi_message msg; > struct spi_transfer xfer[6]; > }; > @@ -289,7 +289,7 @@ struct ser_req { > struct ads7845_ser_req { > u8 command[3]; > u8 pwrdown[3]; > - u8 sample[3]; > + u8 *sample; > struct spi_message msg; > struct spi_transfer xfer[2]; > }; > @@ -298,71 +298,73 @@ static int ads7846_read12_ser(struct device *dev, unsigned command) > { > struct spi_device *spi = to_spi_device(dev); > struct ads7846 *ts = dev_get_drvdata(dev); > - struct ser_req *req; > + struct ser_req req; > int status; > int use_internal; > > - req = kzalloc(sizeof *req, GFP_KERNEL); > - if (!req) > + memset(&req, 0, sizeof(req)); > + > + req.sample = kzalloc(sizeof *(req.sample), GFP_KERNEL); > + if (!req.sample) > return -ENOMEM; > > - spi_message_init(&req->msg); > + spi_message_init(&req.msg); > > /* FIXME boards with ads7846 might use external vref instead ... */ > use_internal = (ts->model == 7846); > > /* maybe turn on internal vREF, and let it settle */ > if (use_internal) { > - req->ref_on = REF_ON; > - req->xfer[0].tx_buf = &req->ref_on; > - req->xfer[0].len = 1; > - spi_message_add_tail(&req->xfer[0], &req->msg); > + req.ref_on = REF_ON; > + req.xfer[0].tx_buf = &req.ref_on; > + req.xfer[0].len = 1; > + spi_message_add_tail(&req.xfer[0], &req.msg); > > - req->xfer[1].rx_buf = &req->scratch; > - req->xfer[1].len = 2; > + req.xfer[1].rx_buf = &req.scratch; > + req.xfer[1].len = 2; > > /* for 1uF, settle for 800 usec; no cap, 100 usec. */ > - req->xfer[1].delay_usecs = ts->vref_delay_usecs; > - spi_message_add_tail(&req->xfer[1], &req->msg); > + req.xfer[1].delay_usecs = ts->vref_delay_usecs; > + spi_message_add_tail(&req.xfer[1], &req.msg); > } > > /* take sample */ > - req->command = (u8) command; > - req->xfer[2].tx_buf = &req->command; > - req->xfer[2].len = 1; > - spi_message_add_tail(&req->xfer[2], &req->msg); > + req.command = (u8) command; > + req.xfer[2].tx_buf = &req.command; > + req.xfer[2].len = 1; > + spi_message_add_tail(&req.xfer[2], &req.msg); > > - req->xfer[3].rx_buf = &req->sample; > - req->xfer[3].len = 2; > - spi_message_add_tail(&req->xfer[3], &req->msg); > + req.xfer[3].rx_buf = req.sample; > + req.xfer[3].len = 2; > + spi_message_add_tail(&req.xfer[3], &req.msg); > > /* REVISIT: take a few more samples, and compare ... */ > > /* converter in low power mode & enable PENIRQ */ > - req->ref_off = PWRDOWN; > - req->xfer[4].tx_buf = &req->ref_off; > - req->xfer[4].len = 1; > - spi_message_add_tail(&req->xfer[4], &req->msg); > + req.ref_off = PWRDOWN; > + req.xfer[4].tx_buf = &req.ref_off; > + req.xfer[4].len = 1; > + spi_message_add_tail(&req.xfer[4], &req.msg); > > - req->xfer[5].rx_buf = &req->scratch; > - req->xfer[5].len = 2; > - CS_CHANGE(req->xfer[5]); > - spi_message_add_tail(&req->xfer[5], &req->msg); > + req.xfer[5].rx_buf = &req.scratch; > + req.xfer[5].len = 2; > + CS_CHANGE(req.xfer[5]); > + spi_message_add_tail(&req.xfer[5], &req.msg); > > mutex_lock(&ts->lock); > ads7846_stop(ts); > - status = spi_sync(spi, &req->msg); > + status = spi_sync(spi, &req.msg); > ads7846_restart(ts); > mutex_unlock(&ts->lock); > > if (status == 0) { > /* on-wire is a must-ignore bit, a BE12 value, then padding */ > - status = be16_to_cpu(req->sample); > + status = be16_to_cpu(*req.sample); > status = status >> 3; > status &= 0x0fff; > } > > - kfree(req); > + kfree(req.sample); > return status; > } > > @@ -370,35 +372,37 @@ static int ads7845_read12_ser(struct device *dev, unsigned command) > { > struct spi_device *spi = to_spi_device(dev); > struct ads7846 *ts = dev_get_drvdata(dev); > - struct ads7845_ser_req *req; > + struct ads7845_ser_req req; > int status; > > - req = kzalloc(sizeof *req, GFP_KERNEL); > - if (!req) > + memset(&req, 0, sizeof(req)); > + > + req.sample = kzalloc(3 * sizeof *(req.sample), GFP_KERNEL); > + if (!req.sample) > return -ENOMEM; > > - spi_message_init(&req->msg); > + spi_message_init(&req.msg); > > - req->command[0] = (u8) command; > - req->xfer[0].tx_buf = req->command; > - req->xfer[0].rx_buf = req->sample; > - req->xfer[0].len = 3; > - spi_message_add_tail(&req->xfer[0], &req->msg); > + req.command[0] = (u8) command; > + req.xfer[0].tx_buf = req.command; > + req.xfer[0].rx_buf = req.sample; > + req.xfer[0].len = 3; > + spi_message_add_tail(&req.xfer[0], &req.msg); > > mutex_lock(&ts->lock); > ads7846_stop(ts); > - status = spi_sync(spi, &req->msg); > + status = spi_sync(spi, &req.msg); > ads7846_restart(ts); > mutex_unlock(&ts->lock); > > if (status == 0) { > /* BE12 value, then padding */ > - status = be16_to_cpu(*((u16 *)&req->sample[1])); > + status = be16_to_cpu(*((u16 *)&req.sample[1])); > status = status >> 3; > status &= 0x0fff; > } > > - kfree(req); > + kfree(req.sample); > return status; > } > -- To unsubscribe from this list: send the line "unsubscribe linux-input" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/04/11 16:49, Jonathan Cameron wrote: > On 05/04/11 16:22, Alexander Stein wrote: >> req.sample needs its own cacheline otherwise accessing req.msg fetches >> it in again. >> Put req onto stack as it doesn't need to be DMA save. >> req.sample is kzalloced itself for DMA access. >> req.command doesn't need own cache line because it will only be written to >> memory on dma_map_single. req.scratch is unsed at all. >> >> Note: This effect doesn't occur if the underlying SPI driver doesn't use >> DMA at all. >> > How about using ____cacheline_aligned having moved 'sample' to the end of the > structure? > > e.g. > > ... > struct spi_transfer xfer[6]; > __be16 sample ____cacheline_aligned; > } hmm. I should probably have glanced further up the thread. Michael beat me to it with the same suggestion! -- To unsubscribe from this list: send the line "unsubscribe linux-input" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, Am Mittwoch, 4. Mai 2011, 17:49:56 schrieb Jonathan Cameron: > On 05/04/11 16:22, Alexander Stein wrote: > > req.sample needs its own cacheline otherwise accessing req.msg fetches > > it in again. > > Put req onto stack as it doesn't need to be DMA save. > > req.sample is kzalloced itself for DMA access. > > req.command doesn't need own cache line because it will only be written > > to memory on dma_map_single. req.scratch is unsed at all. > > > > Note: This effect doesn't occur if the underlying SPI driver doesn't use > > DMA at all. > > How about using ____cacheline_aligned having moved 'sample' to the end of > the structure? > > e.g. > > ... > struct spi_transfer xfer[6]; > __be16 sample ____cacheline_aligned; > } > > Rather smaller patch when you get down to it. Mh, I didn't know I had to reorder the struct elements to use ____cacheline_aligned. I originally had a solution in mind with a smaller memory footprint in mind, but rethinking about it, this might be negligible if there is one at all. So, if ____cacheline_aligned is the more sane approach I'll resend a patch using it. Alexander -- To unsubscribe from this list: send the line "unsubscribe linux-input" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/04/11 16:55, Alexander Stein wrote: > Hello, > > Am Mittwoch, 4. Mai 2011, 17:49:56 schrieb Jonathan Cameron: >> On 05/04/11 16:22, Alexander Stein wrote: >>> req.sample needs its own cacheline otherwise accessing req.msg fetches >>> it in again. >>> Put req onto stack as it doesn't need to be DMA save. >>> req.sample is kzalloced itself for DMA access. >>> req.command doesn't need own cache line because it will only be written >>> to memory on dma_map_single. req.scratch is unsed at all. >>> >>> Note: This effect doesn't occur if the underlying SPI driver doesn't use >>> DMA at all. >> >> How about using ____cacheline_aligned having moved 'sample' to the end of >> the structure? >> >> e.g. >> >> ... >> struct spi_transfer xfer[6]; >> __be16 sample ____cacheline_aligned; >> } >> >> Rather smaller patch when you get down to it. > > Mh, I didn't know I had to reorder the struct elements to use > ____cacheline_aligned. I originally had a solution in mind with a smaller > memory footprint in mind, but rethinking about it, this might be negligible if > there is one at all. > So, if ____cacheline_aligned is the more sane approach I'll resend a patch > using it. It simply enforces that element being aligned. Thus if there is anything after it you may well have things in the same cacheline. Obviously you could use align the following element to ensure its on the next cache line, but thats silly when you can just reorder to make sure nothing is there. All mallocs are guaranteed to get a whole number of cachelines. (would be really fiddly otherwise!) Footprint wise, it should be pretty trivial. -- To unsubscribe from this list: send the line "unsubscribe linux-input" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 04, 2011 at 05:55:23PM +0200, Alexander Stein wrote: > Hello, > > Am Mittwoch, 4. Mai 2011, 17:49:56 schrieb Jonathan Cameron: > > On 05/04/11 16:22, Alexander Stein wrote: > > > req.sample needs its own cacheline otherwise accessing req.msg fetches > > > it in again. > > > Put req onto stack as it doesn't need to be DMA save. > > > req.sample is kzalloced itself for DMA access. > > > req.command doesn't need own cache line because it will only be written > > > to memory on dma_map_single. req.scratch is unsed at all. > > > > > > Note: This effect doesn't occur if the underlying SPI driver doesn't use > > > DMA at all. > > > > How about using ____cacheline_aligned having moved 'sample' to the end of > > the structure? > > > > e.g. > > > > ... > > struct spi_transfer xfer[6]; > > __be16 sample ____cacheline_aligned; > > } > > > > Rather smaller patch when you get down to it. > > Mh, I didn't know I had to reorder the struct elements to use > ____cacheline_aligned. I originally had a solution in mind with a smaller > memory footprint in mind, but rethinking about it, this might be negligible if > there is one at all. > So, if ____cacheline_aligned is the more sane approach I'll resend a patch > using it. > Yes, please, as ____cacheline_aligned introduces the least amount of the churn (and I like it better than separately doing kmalloc anyway) and I should be able to get it in .39. Thanks.
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index c24946f..6157a80 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -281,7 +281,7 @@ struct ser_req { u8 command; u8 ref_off; u16 scratch; - __be16 sample; + __be16 *sample; struct spi_message msg; struct spi_transfer xfer[6]; }; @@ -289,7 +289,7 @@ struct ser_req { struct ads7845_ser_req { u8 command[3]; u8 pwrdown[3]; - u8 sample[3]; + u8 *sample; struct spi_message msg; struct spi_transfer xfer[2]; }; @@ -298,71 +298,73 @@ static int ads7846_read12_ser(struct device *dev, unsigned command) { struct spi_device *spi = to_spi_device(dev); struct ads7846 *ts = dev_get_drvdata(dev); - struct ser_req *req; + struct ser_req req; int status; int use_internal; - req = kzalloc(sizeof *req, GFP_KERNEL); - if (!req) + memset(&req, 0, sizeof(req)); + + req.sample = kzalloc(sizeof *(req.sample), GFP_KERNEL); + if (!req.sample) return -ENOMEM; - spi_message_init(&req->msg); + spi_message_init(&req.msg); /* FIXME boards with ads7846 might use external vref instead ... */ use_internal = (ts->model == 7846); /* maybe turn on internal vREF, and let it settle */ if (use_internal) { - req->ref_on = REF_ON; - req->xfer[0].tx_buf = &req->ref_on; - req->xfer[0].len = 1; - spi_message_add_tail(&req->xfer[0], &req->msg); + req.ref_on = REF_ON; + req.xfer[0].tx_buf = &req.ref_on; + req.xfer[0].len = 1; + spi_message_add_tail(&req.xfer[0], &req.msg); - req->xfer[1].rx_buf = &req->scratch; - req->xfer[1].len = 2; + req.xfer[1].rx_buf = &req.scratch; + req.xfer[1].len = 2; /* for 1uF, settle for 800 usec; no cap, 100 usec. */ - req->xfer[1].delay_usecs = ts->vref_delay_usecs; - spi_message_add_tail(&req->xfer[1], &req->msg); + req.xfer[1].delay_usecs = ts->vref_delay_usecs; + spi_message_add_tail(&req.xfer[1], &req.msg); } /* take sample */ - req->command = (u8) command; - req->xfer[2].tx_buf = &req->command; - req->xfer[2].len = 1; - spi_message_add_tail(&req->xfer[2], &req->msg); + req.command = (u8) command; + req.xfer[2].tx_buf = &req.command; + req.xfer[2].len = 1; + spi_message_add_tail(&req.xfer[2], &req.msg); - req->xfer[3].rx_buf = &req->sample; - req->xfer[3].len = 2; - spi_message_add_tail(&req->xfer[3], &req->msg); + req.xfer[3].rx_buf = req.sample; + req.xfer[3].len = 2; + spi_message_add_tail(&req.xfer[3], &req.msg); /* REVISIT: take a few more samples, and compare ... */ /* converter in low power mode & enable PENIRQ */ - req->ref_off = PWRDOWN; - req->xfer[4].tx_buf = &req->ref_off; - req->xfer[4].len = 1; - spi_message_add_tail(&req->xfer[4], &req->msg); + req.ref_off = PWRDOWN; + req.xfer[4].tx_buf = &req.ref_off; + req.xfer[4].len = 1; + spi_message_add_tail(&req.xfer[4], &req.msg); - req->xfer[5].rx_buf = &req->scratch; - req->xfer[5].len = 2; - CS_CHANGE(req->xfer[5]); - spi_message_add_tail(&req->xfer[5], &req->msg); + req.xfer[5].rx_buf = &req.scratch; + req.xfer[5].len = 2; + CS_CHANGE(req.xfer[5]); + spi_message_add_tail(&req.xfer[5], &req.msg); mutex_lock(&ts->lock); ads7846_stop(ts); - status = spi_sync(spi, &req->msg); + status = spi_sync(spi, &req.msg); ads7846_restart(ts); mutex_unlock(&ts->lock); if (status == 0) { /* on-wire is a must-ignore bit, a BE12 value, then padding */ - status = be16_to_cpu(req->sample); + status = be16_to_cpu(*req.sample); status = status >> 3; status &= 0x0fff; } - kfree(req); + kfree(req.sample); return status; } @@ -370,35 +372,37 @@ static int ads7845_read12_ser(struct device *dev, unsigned command) { struct spi_device *spi = to_spi_device(dev); struct ads7846 *ts = dev_get_drvdata(dev); - struct ads7845_ser_req *req; + struct ads7845_ser_req req; int status; - req = kzalloc(sizeof *req, GFP_KERNEL); - if (!req) + memset(&req, 0, sizeof(req)); + + req.sample = kzalloc(3 * sizeof *(req.sample), GFP_KERNEL); + if (!req.sample) return -ENOMEM; - spi_message_init(&req->msg); + spi_message_init(&req.msg); - req->command[0] = (u8) command; - req->xfer[0].tx_buf = req->command; - req->xfer[0].rx_buf = req->sample; - req->xfer[0].len = 3; - spi_message_add_tail(&req->xfer[0], &req->msg); + req.command[0] = (u8) command; + req.xfer[0].tx_buf = req.command; + req.xfer[0].rx_buf = req.sample; + req.xfer[0].len = 3; + spi_message_add_tail(&req.xfer[0], &req.msg); mutex_lock(&ts->lock); ads7846_stop(ts); - status = spi_sync(spi, &req->msg); + status = spi_sync(spi, &req.msg); ads7846_restart(ts); mutex_unlock(&ts->lock); if (status == 0) { /* BE12 value, then padding */ - status = be16_to_cpu(*((u16 *)&req->sample[1])); + status = be16_to_cpu(*((u16 *)&req.sample[1])); status = status >> 3; status &= 0x0fff; } - kfree(req); + kfree(req.sample); return status; }
req.sample needs its own cacheline otherwise accessing req.msg fetches it in again. Put req onto stack as it doesn't need to be DMA save. req.sample is kzalloced itself for DMA access. req.command doesn't need own cache line because it will only be written to memory on dma_map_single. req.scratch is unsed at all. Note: This effect doesn't occur if the underlying SPI driver doesn't use DMA at all. Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com> --- Anatolij, could you please check the ads7845 parts? Thanks! drivers/input/touchscreen/ads7846.c | 92 ++++++++++++++++++----------------- 1 files changed, 48 insertions(+), 44 deletions(-)