diff mbox

drm/vmwgfx: switch from ioremap_cache to memremap

Message ID 20151012223527.34143.87313.stgit@dwillia2-desk3.jf.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dan Williams Oct. 12, 2015, 10:35 p.m. UTC
Per commit 2e586a7e017a "drm/vmwgfx: Map the fifo as cached" the driver
expects the fifo registers to be cacheable.  In preparation for
deprecating ioremap_cache() convert its usage in vmwgfx to memremap().

Cc: David Airlie <airlied@linux.ie>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Sinclair Yeh <syeh@vmware.com>
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 This is part of the tree-wide conversion of ioremap_cache() to
 memremap() targeted for v4.4 [1]
 
 As noted in that cover letter feel free to apply or ack.  These
 individual conversions can go in independently given the base memremap()
 implementation is already upstream in v4.3-rc1.
 
 This passes a clean run of sparse, but I have not tried to run the
 result.
 
 [1]: https://lkml.org/lkml/2015/10/9/699

 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |    8 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |    2 -
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c |   23 ++++---
 drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c  |  102 ++++++++++++++++-----------------
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c |    9 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c   |    8 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c   |   24 ++++----
 7 files changed, 84 insertions(+), 92 deletions(-)

Comments

Thomas Hellstrom Oct. 13, 2015, 5:18 a.m. UTC | #1
Hi!

On 10/13/2015 12:35 AM, Dan Williams wrote:
> Per commit 2e586a7e017a "drm/vmwgfx: Map the fifo as cached" the driver
> expects the fifo registers to be cacheable.  In preparation for
> deprecating ioremap_cache() convert its usage in vmwgfx to memremap().
>
> Cc: David Airlie <airlied@linux.ie>
> Cc: Thomas Hellstrom <thellstrom@vmware.com>
> Cc: Sinclair Yeh <syeh@vmware.com>
> Cc: dri-devel@lists.freedesktop.org
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

While I have nothing against the conversion, what's stopping the
compiler from reordering writes on a generic architecture and caching
and reordering reads on x86 in particular? At the very least it looks to
me like the memory accesses of the memremap'd memory needs to be
encapsulated within READ_ONCE and WRITE_ONCE.

How is this handled in the other conversions?

Thanks,
Thomas





> ---
>
>  This is part of the tree-wide conversion of ioremap_cache() to
>  memremap() targeted for v4.4 [1]
>  
>  As noted in that cover letter feel free to apply or ack.  These
>  individual conversions can go in independently given the base memremap()
>  implementation is already upstream in v4.3-rc1.
>  
>  This passes a clean run of sparse, but I have not tried to run the
>  result.
>  
>  [1]: https://urldefense.proofpoint.com/v2/url?u=https-3A__lkml.org_lkml_2015_10_9_699&d=BQICaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=vpukPkBtpoNQp2IUKuFviOmPNYWVKmen3Jeeu55zmEA&m=XuVtQ3_28jin5hdK6wBcLigEiRc-1TuelYa3zm94m44&s=kN3-2XStWWU0f20wNGpmQiwZTTiBBzwD4LShvfokwkQ&e= 
>
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |    8 +--
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |    2 -
>  drivers/gpu/drm/vmwgfx/vmwgfx_fence.c |   23 ++++---
>  drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c  |  102 ++++++++++++++++-----------------
>  drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c |    9 +--
>  drivers/gpu/drm/vmwgfx/vmwgfx_irq.c   |    8 +--
>  drivers/gpu/drm/vmwgfx/vmwgfx_kms.c   |   24 ++++----
>  7 files changed, 84 insertions(+), 92 deletions(-)
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index 2c7a25c71af2..d6152cd7c634 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -752,8 +752,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
>  	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
>  	dev_priv->active_master = &dev_priv->fbdev_master;
>  
> -	dev_priv->mmio_virt = ioremap_cache(dev_priv->mmio_start,
> -					    dev_priv->mmio_size);
> +	dev_priv->mmio_virt = memremap(dev_priv->mmio_start,
> +				       dev_priv->mmio_size, MEMREMAP_WB);
>  
>  	if (unlikely(dev_priv->mmio_virt == NULL)) {
>  		ret = -ENOMEM;
> @@ -907,7 +907,7 @@ out_no_irq:
>  out_no_device:
>  	ttm_object_device_release(&dev_priv->tdev);
>  out_err4:
> -	iounmap(dev_priv->mmio_virt);
> +	memunmap(dev_priv->mmio_virt);
>  out_err3:
>  	vmw_ttm_global_release(dev_priv);
>  out_err0:
> @@ -958,7 +958,7 @@ static int vmw_driver_unload(struct drm_device *dev)
>  		pci_release_regions(dev->pdev);
>  
>  	ttm_object_device_release(&dev_priv->tdev);
> -	iounmap(dev_priv->mmio_virt);
> +	memunmap(dev_priv->mmio_virt);
>  	if (dev_priv->ctx.staged_bindings)
>  		vmw_binding_state_free(dev_priv->ctx.staged_bindings);
>  	vmw_ttm_global_release(dev_priv);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> index f19fd39b43e1..7ff1db23de80 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> @@ -375,7 +375,7 @@ struct vmw_private {
>  	uint32_t stdu_max_height;
>  	uint32_t initial_width;
>  	uint32_t initial_height;
> -	u32 __iomem *mmio_virt;
> +	u32 *mmio_virt;
>  	uint32_t capabilities;
>  	uint32_t max_gmr_ids;
>  	uint32_t max_gmr_pages;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index 567ddede51d1..97f75adc080d 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -141,9 +141,9 @@ static bool vmw_fence_enable_signaling(struct fence *f)
>  
>  	struct vmw_fence_manager *fman = fman_from_fence(fence);
>  	struct vmw_private *dev_priv = fman->dev_priv;
> +	u32 *fifo_mem = dev_priv->mmio_virt;
> +	u32 seqno = *(fifo_mem + SVGA_FIFO_FENCE);
>  
> -	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
> -	u32 seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
>  	if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
>  		return false;
>  
>
Here, for example.

/Thomas
Dan Williams Oct. 13, 2015, 4:35 p.m. UTC | #2
On Mon, Oct 12, 2015 at 10:18 PM, Thomas Hellstrom
<thellstrom@vmware.com> wrote:
> Hi!
>
> On 10/13/2015 12:35 AM, Dan Williams wrote:
>> Per commit 2e586a7e017a "drm/vmwgfx: Map the fifo as cached" the driver
>> expects the fifo registers to be cacheable.  In preparation for
>> deprecating ioremap_cache() convert its usage in vmwgfx to memremap().
>>
>> Cc: David Airlie <airlied@linux.ie>
>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>> Cc: Sinclair Yeh <syeh@vmware.com>
>> Cc: dri-devel@lists.freedesktop.org
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>
> While I have nothing against the conversion, what's stopping the
> compiler from reordering writes on a generic architecture and caching
> and reordering reads on x86 in particular? At the very least it looks to
> me like the memory accesses of the memremap'd memory needs to be
> encapsulated within READ_ONCE and WRITE_ONCE.

Hmm, currently the code is using ioread32/iowrite32 which only do
volatile accesses, whereas READ_ONCE / WRITE_ONCE have a memory
clobber on entry and exit.  So, I'm assuming all you need is the
guarantee of "no compiler re-ordering" and not the stronger
READ_ONCE/WRITE_ONCE guarantees, but that still seems broken compared
to explicit fencing where it matters.

If the ordering needs to be guaranteed with respect to the agent on
the other side of the fifo that can only be asserted via real barriers
(mb(), wmb()) which the driver already seems to have in places.
ioread32 and iowrite32 as is don't help with that case.

> How is this handled in the other conversions?

As far as I can see, the vmwgfx conversion is unique in implementing a
device fifo in cached memory.
Thomas Hellstrom Oct. 13, 2015, 6:37 p.m. UTC | #3
On 10/13/2015 06:35 PM, Dan Williams wrote:
> On Mon, Oct 12, 2015 at 10:18 PM, Thomas Hellstrom
> <thellstrom@vmware.com> wrote:
>> Hi!
>>
>> On 10/13/2015 12:35 AM, Dan Williams wrote:
>>> Per commit 2e586a7e017a "drm/vmwgfx: Map the fifo as cached" the driver
>>> expects the fifo registers to be cacheable.  In preparation for
>>> deprecating ioremap_cache() convert its usage in vmwgfx to memremap().
>>>
>>> Cc: David Airlie <airlied@linux.ie>
>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>> Cc: Sinclair Yeh <syeh@vmware.com>
>>> Cc: dri-devel@lists.freedesktop.org
>>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> While I have nothing against the conversion, what's stopping the
>> compiler from reordering writes on a generic architecture and caching
>> and reordering reads on x86 in particular? At the very least it looks to
>> me like the memory accesses of the memremap'd memory needs to be
>> encapsulated within READ_ONCE and WRITE_ONCE.
> Hmm, currently the code is using ioread32/iowrite32 which only do
> volatile accesses, whereas READ_ONCE / WRITE_ONCE have a memory
> clobber on entry and exit.  So, I'm assuming all you need is the
> guarantee of "no compiler re-ordering" and not the stronger
> READ_ONCE/WRITE_ONCE guarantees, but that still seems broken compared
> to explicit fencing where it matters.

I'm not quite sure I follow you here, it looks to me like READ_ONCE()
and WRITE_ONCE() are implemented as
volatile accesses,

http://lxr.free-electrons.com/source/include/linux/compiler.h#L215

just like ioread32 and iowrite32

http://lxr.free-electrons.com/source/include/asm-generic/io.h#L54

which would minimize any potential impact of this change.
IMO optimizing the memory accesses can be done as a later step.

/Thomas
Dan Williams Oct. 13, 2015, 6:48 p.m. UTC | #4
On Tue, Oct 13, 2015 at 11:37 AM, Thomas Hellstrom
<thellstrom@vmware.com> wrote:
> On 10/13/2015 06:35 PM, Dan Williams wrote:
>> On Mon, Oct 12, 2015 at 10:18 PM, Thomas Hellstrom
>> <thellstrom@vmware.com> wrote:
>>> Hi!
>>>
>>> On 10/13/2015 12:35 AM, Dan Williams wrote:
>>>> Per commit 2e586a7e017a "drm/vmwgfx: Map the fifo as cached" the driver
>>>> expects the fifo registers to be cacheable.  In preparation for
>>>> deprecating ioremap_cache() convert its usage in vmwgfx to memremap().
>>>>
>>>> Cc: David Airlie <airlied@linux.ie>
>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>>> Cc: Sinclair Yeh <syeh@vmware.com>
>>>> Cc: dri-devel@lists.freedesktop.org
>>>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>>> While I have nothing against the conversion, what's stopping the
>>> compiler from reordering writes on a generic architecture and caching
>>> and reordering reads on x86 in particular? At the very least it looks to
>>> me like the memory accesses of the memremap'd memory needs to be
>>> encapsulated within READ_ONCE and WRITE_ONCE.
>> Hmm, currently the code is using ioread32/iowrite32 which only do
>> volatile accesses, whereas READ_ONCE / WRITE_ONCE have a memory
>> clobber on entry and exit.  So, I'm assuming all you need is the
>> guarantee of "no compiler re-ordering" and not the stronger
>> READ_ONCE/WRITE_ONCE guarantees, but that still seems broken compared
>> to explicit fencing where it matters.
>
> I'm not quite sure I follow you here, it looks to me like READ_ONCE()
> and WRITE_ONCE() are implemented as
> volatile accesses,

Ah, sorry, I was looking at the default case...

>
> http://lxr.free-electrons.com/source/include/linux/compiler.h#L215
>
> just like ioread32 and iowrite32
>
> http://lxr.free-electrons.com/source/include/asm-generic/io.h#L54
>
> which would minimize any potential impact of this change.
> IMO optimizing the memory accesses can be done as a later step.
>

Ok, I'll make local read_fifo() and write_fifo() macros to make this
explicit.  Are these names ok with you?
Thomas Hellstrom Oct. 13, 2015, 6:52 p.m. UTC | #5
On 10/13/2015 08:48 PM, Dan Williams wrote:
> On Tue, Oct 13, 2015 at 11:37 AM, Thomas Hellstrom
> <thellstrom@vmware.com> wrote:
>> On 10/13/2015 06:35 PM, Dan Williams wrote:
>>> On Mon, Oct 12, 2015 at 10:18 PM, Thomas Hellstrom
>>> <thellstrom@vmware.com> wrote:
>>>> Hi!
>>>>
>>>> On 10/13/2015 12:35 AM, Dan Williams wrote:
>>>>> Per commit 2e586a7e017a "drm/vmwgfx: Map the fifo as cached" the driver
>>>>> expects the fifo registers to be cacheable.  In preparation for
>>>>> deprecating ioremap_cache() convert its usage in vmwgfx to memremap().
>>>>>
>>>>> Cc: David Airlie <airlied@linux.ie>
>>>>> Cc: Thomas Hellstrom <thellstrom@vmware.com>
>>>>> Cc: Sinclair Yeh <syeh@vmware.com>
>>>>> Cc: dri-devel@lists.freedesktop.org
>>>>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>>>> While I have nothing against the conversion, what's stopping the
>>>> compiler from reordering writes on a generic architecture and caching
>>>> and reordering reads on x86 in particular? At the very least it looks to
>>>> me like the memory accesses of the memremap'd memory needs to be
>>>> encapsulated within READ_ONCE and WRITE_ONCE.
>>> Hmm, currently the code is using ioread32/iowrite32 which only do
>>> volatile accesses, whereas READ_ONCE / WRITE_ONCE have a memory
>>> clobber on entry and exit.  So, I'm assuming all you need is the
>>> guarantee of "no compiler re-ordering" and not the stronger
>>> READ_ONCE/WRITE_ONCE guarantees, but that still seems broken compared
>>> to explicit fencing where it matters.
>> I'm not quite sure I follow you here, it looks to me like READ_ONCE()
>> and WRITE_ONCE() are implemented as
>> volatile accesses,
> Ah, sorry, I was looking at the default case...
>
>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lxr.free-2Delectrons.com_source_include_linux_compiler.h-23L215&d=BQIBaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=vpukPkBtpoNQp2IUKuFviOmPNYWVKmen3Jeeu55zmEA&m=JRxebmjcR4J-yhD0wROjKrAKyto5OeetIvqt7MqV_WA&s=zn7YmnS74zjM3Sd5Dp9mZnSL27jqel6cwRHwYV6gU3U&e= 
>>
>> just like ioread32 and iowrite32
>>
>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lxr.free-2Delectrons.com_source_include_asm-2Dgeneric_io.h-23L54&d=BQIBaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=vpukPkBtpoNQp2IUKuFviOmPNYWVKmen3Jeeu55zmEA&m=JRxebmjcR4J-yhD0wROjKrAKyto5OeetIvqt7MqV_WA&s=y4dD2GUpcZVHljnThYugF-YLTgeP6En4JwoOnkaLg7A&e= 
>>
>> which would minimize any potential impact of this change.
>> IMO optimizing the memory accesses can be done as a later step.
>>
> Ok, I'll make local read_fifo() and write_fifo() macros to make this
> explicit.  Are these names ok with you?

Sure.

Thanks,
Thomas
diff mbox

Patch

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 2c7a25c71af2..d6152cd7c634 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -752,8 +752,8 @@  static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
 	dev_priv->active_master = &dev_priv->fbdev_master;
 
-	dev_priv->mmio_virt = ioremap_cache(dev_priv->mmio_start,
-					    dev_priv->mmio_size);
+	dev_priv->mmio_virt = memremap(dev_priv->mmio_start,
+				       dev_priv->mmio_size, MEMREMAP_WB);
 
 	if (unlikely(dev_priv->mmio_virt == NULL)) {
 		ret = -ENOMEM;
@@ -907,7 +907,7 @@  out_no_irq:
 out_no_device:
 	ttm_object_device_release(&dev_priv->tdev);
 out_err4:
-	iounmap(dev_priv->mmio_virt);
+	memunmap(dev_priv->mmio_virt);
 out_err3:
 	vmw_ttm_global_release(dev_priv);
 out_err0:
@@ -958,7 +958,7 @@  static int vmw_driver_unload(struct drm_device *dev)
 		pci_release_regions(dev->pdev);
 
 	ttm_object_device_release(&dev_priv->tdev);
-	iounmap(dev_priv->mmio_virt);
+	memunmap(dev_priv->mmio_virt);
 	if (dev_priv->ctx.staged_bindings)
 		vmw_binding_state_free(dev_priv->ctx.staged_bindings);
 	vmw_ttm_global_release(dev_priv);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index f19fd39b43e1..7ff1db23de80 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -375,7 +375,7 @@  struct vmw_private {
 	uint32_t stdu_max_height;
 	uint32_t initial_width;
 	uint32_t initial_height;
-	u32 __iomem *mmio_virt;
+	u32 *mmio_virt;
 	uint32_t capabilities;
 	uint32_t max_gmr_ids;
 	uint32_t max_gmr_pages;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 567ddede51d1..97f75adc080d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -141,9 +141,9 @@  static bool vmw_fence_enable_signaling(struct fence *f)
 
 	struct vmw_fence_manager *fman = fman_from_fence(fence);
 	struct vmw_private *dev_priv = fman->dev_priv;
+	u32 *fifo_mem = dev_priv->mmio_virt;
+	u32 seqno = *(fifo_mem + SVGA_FIFO_FENCE);
 
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
-	u32 seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 	if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
 		return false;
 
@@ -386,14 +386,14 @@  static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
 				      u32 passed_seqno)
 {
 	u32 goal_seqno;
-	u32 __iomem *fifo_mem;
+	u32 *fifo_mem;
 	struct vmw_fence_obj *fence;
 
 	if (likely(!fman->seqno_valid))
 		return false;
 
 	fifo_mem = fman->dev_priv->mmio_virt;
-	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	goal_seqno = *(fifo_mem + SVGA_FIFO_FENCE_GOAL);
 	if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
 		return false;
 
@@ -401,8 +401,7 @@  static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
 	list_for_each_entry(fence, &fman->fence_list, head) {
 		if (!list_empty(&fence->seq_passed_actions)) {
 			fman->seqno_valid = true;
-			iowrite32(fence->base.seqno,
-				  fifo_mem + SVGA_FIFO_FENCE_GOAL);
+			*(fifo_mem + SVGA_FIFO_FENCE_GOAL) = fence->base.seqno;
 			break;
 		}
 	}
@@ -430,18 +429,18 @@  static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
 {
 	struct vmw_fence_manager *fman = fman_from_fence(fence);
 	u32 goal_seqno;
-	u32 __iomem *fifo_mem;
+	u32 *fifo_mem;
 
 	if (fence_is_signaled_locked(&fence->base))
 		return false;
 
 	fifo_mem = fman->dev_priv->mmio_virt;
-	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	goal_seqno = *(fifo_mem + SVGA_FIFO_FENCE_GOAL);
 	if (likely(fman->seqno_valid &&
 		   goal_seqno - fence->base.seqno < VMW_FENCE_WRAP))
 		return false;
 
-	iowrite32(fence->base.seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	*(fifo_mem + SVGA_FIFO_FENCE_GOAL) = fence->base.seqno;
 	fman->seqno_valid = true;
 
 	return true;
@@ -453,9 +452,9 @@  static void __vmw_fences_update(struct vmw_fence_manager *fman)
 	struct list_head action_list;
 	bool needs_rerun;
 	uint32_t seqno, new_seqno;
-	u32 __iomem *fifo_mem = fman->dev_priv->mmio_virt;
+	u32 *fifo_mem = fman->dev_priv->mmio_virt;
 
-	seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	seqno = *(fifo_mem + SVGA_FIFO_FENCE);
 rerun:
 	list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
 		if (seqno - fence->base.seqno < VMW_FENCE_WRAP) {
@@ -477,7 +476,7 @@  rerun:
 
 	needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
 	if (unlikely(needs_rerun)) {
-		new_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+		new_seqno = *(fifo_mem + SVGA_FIFO_FENCE);
 		if (new_seqno != seqno) {
 			seqno = new_seqno;
 			goto rerun;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 80c40c31d4f8..199a8c0a681f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -36,7 +36,7 @@  struct vmw_temp_set_context {
 
 bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
 {
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	u32 *fifo_mem = dev_priv->mmio_virt;
 	uint32_t fifo_min, hwversion;
 	const struct vmw_fifo_state *fifo = &dev_priv->fifo;
 
@@ -60,15 +60,15 @@  bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
 	if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO))
 		return false;
 
-	fifo_min = ioread32(fifo_mem  + SVGA_FIFO_MIN);
+	fifo_min = *(fifo_mem  + SVGA_FIFO_MIN);
 	if (fifo_min <= SVGA_FIFO_3D_HWVERSION * sizeof(unsigned int))
 		return false;
 
-	hwversion = ioread32(fifo_mem +
-			     ((fifo->capabilities &
-			       SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
-			      SVGA_FIFO_3D_HWVERSION_REVISED :
-			      SVGA_FIFO_3D_HWVERSION));
+	hwversion = *(fifo_mem +
+		      ((fifo->capabilities &
+			SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
+		       SVGA_FIFO_3D_HWVERSION_REVISED :
+		       SVGA_FIFO_3D_HWVERSION));
 
 	if (hwversion == 0)
 		return false;
@@ -85,13 +85,13 @@  bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
 
 bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv)
 {
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	u32 *fifo_mem = dev_priv->mmio_virt;
 	uint32_t caps;
 
 	if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO))
 		return false;
 
-	caps = ioread32(fifo_mem + SVGA_FIFO_CAPABILITIES);
+	caps = *(fifo_mem + SVGA_FIFO_CAPABILITIES);
 	if (caps & SVGA_FIFO_CAP_PITCHLOCK)
 		return true;
 
@@ -100,7 +100,7 @@  bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv)
 
 int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 {
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	u32 *fifo_mem = dev_priv->mmio_virt;
 	uint32_t max;
 	uint32_t min;
 
@@ -137,19 +137,19 @@  int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 	if (min < PAGE_SIZE)
 		min = PAGE_SIZE;
 
-	iowrite32(min, fifo_mem + SVGA_FIFO_MIN);
-	iowrite32(dev_priv->mmio_size, fifo_mem + SVGA_FIFO_MAX);
+	*(fifo_mem + SVGA_FIFO_MIN) = min;
+	*(fifo_mem + SVGA_FIFO_MAX) = dev_priv->mmio_size;
 	wmb();
-	iowrite32(min,  fifo_mem + SVGA_FIFO_NEXT_CMD);
-	iowrite32(min,  fifo_mem + SVGA_FIFO_STOP);
-	iowrite32(0, fifo_mem + SVGA_FIFO_BUSY);
+	*(fifo_mem + SVGA_FIFO_NEXT_CMD) = min;
+	*(fifo_mem + SVGA_FIFO_STOP) = min;
+	*(fifo_mem + SVGA_FIFO_BUSY) = 0;
 	mb();
 
 	vmw_write(dev_priv, SVGA_REG_CONFIG_DONE, 1);
 
-	max = ioread32(fifo_mem + SVGA_FIFO_MAX);
-	min = ioread32(fifo_mem  + SVGA_FIFO_MIN);
-	fifo->capabilities = ioread32(fifo_mem + SVGA_FIFO_CAPABILITIES);
+	max = *(fifo_mem + SVGA_FIFO_MAX);
+	min = *(fifo_mem  + SVGA_FIFO_MIN);
+	fifo->capabilities = *(fifo_mem + SVGA_FIFO_CAPABILITIES);
 
 	DRM_INFO("Fifo max 0x%08x min 0x%08x cap 0x%08x\n",
 		 (unsigned int) max,
@@ -157,7 +157,7 @@  int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 		 (unsigned int) fifo->capabilities);
 
 	atomic_set(&dev_priv->marker_seq, dev_priv->last_read_seqno);
-	iowrite32(dev_priv->last_read_seqno, fifo_mem + SVGA_FIFO_FENCE);
+	*(fifo_mem + SVGA_FIFO_FENCE) = dev_priv->last_read_seqno;
 	vmw_marker_queue_init(&fifo->marker_queue);
 
 	return 0;
@@ -165,7 +165,7 @@  int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 
 void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
 {
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	u32 *fifo_mem = dev_priv->mmio_virt;
 	static DEFINE_SPINLOCK(ping_lock);
 	unsigned long irq_flags;
 
@@ -174,8 +174,8 @@  void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
 	 * test-and-set of the SVGA_FIFO_BUSY register.
 	 */
 	spin_lock_irqsave(&ping_lock, irq_flags);
-	if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) {
-		iowrite32(1, fifo_mem + SVGA_FIFO_BUSY);
+	if (unlikely(*(fifo_mem + SVGA_FIFO_BUSY) == 0)) {
+		*(fifo_mem + SVGA_FIFO_BUSY) = 1;
 		vmw_write(dev_priv, SVGA_REG_SYNC, reason);
 	}
 	spin_unlock_irqrestore(&ping_lock, irq_flags);
@@ -183,13 +183,13 @@  void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
 
 void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 {
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	u32 *fifo_mem = dev_priv->mmio_virt;
 
 	vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
 	while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0)
 		;
 
-	dev_priv->last_read_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	dev_priv->last_read_seqno = *(fifo_mem + SVGA_FIFO_FENCE);
 
 	vmw_write(dev_priv, SVGA_REG_CONFIG_DONE,
 		  dev_priv->config_done_state);
@@ -213,11 +213,11 @@  void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 
 static bool vmw_fifo_is_full(struct vmw_private *dev_priv, uint32_t bytes)
 {
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
-	uint32_t max = ioread32(fifo_mem + SVGA_FIFO_MAX);
-	uint32_t next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
-	uint32_t min = ioread32(fifo_mem + SVGA_FIFO_MIN);
-	uint32_t stop = ioread32(fifo_mem + SVGA_FIFO_STOP);
+	u32 *fifo_mem = dev_priv->mmio_virt;
+	uint32_t max = *(fifo_mem + SVGA_FIFO_MAX);
+	uint32_t next_cmd = *(fifo_mem + SVGA_FIFO_NEXT_CMD);
+	uint32_t min = *(fifo_mem + SVGA_FIFO_MIN);
+	uint32_t stop = *(fifo_mem + SVGA_FIFO_STOP);
 
 	return ((max - next_cmd) + (stop - min) <= bytes);
 }
@@ -321,7 +321,7 @@  static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
 				    uint32_t bytes)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	u32 *fifo_mem = dev_priv->mmio_virt;
 	uint32_t max;
 	uint32_t min;
 	uint32_t next_cmd;
@@ -329,9 +329,9 @@  static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
 	int ret;
 
 	mutex_lock(&fifo_state->fifo_mutex);
-	max = ioread32(fifo_mem + SVGA_FIFO_MAX);
-	min = ioread32(fifo_mem + SVGA_FIFO_MIN);
-	next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
+	max = *(fifo_mem + SVGA_FIFO_MAX);
+	min = *(fifo_mem + SVGA_FIFO_MIN);
+	next_cmd = *(fifo_mem + SVGA_FIFO_NEXT_CMD);
 
 	if (unlikely(bytes >= (max - min)))
 		goto out_err;
@@ -342,7 +342,7 @@  static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
 	fifo_state->reserved_size = bytes;
 
 	while (1) {
-		uint32_t stop = ioread32(fifo_mem + SVGA_FIFO_STOP);
+		uint32_t stop = *(fifo_mem + SVGA_FIFO_STOP);
 		bool need_bounce = false;
 		bool reserve_in_place = false;
 
@@ -376,10 +376,8 @@  static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
 				fifo_state->using_bounce_buffer = false;
 
 				if (reserveable)
-					iowrite32(bytes, fifo_mem +
-						  SVGA_FIFO_RESERVED);
-				return (void __force *) (fifo_mem +
-							 (next_cmd >> 2));
+					*(fifo_mem + SVGA_FIFO_RESERVED) = bytes;
+				return fifo_mem + (next_cmd >> 2);
 			} else {
 				need_bounce = true;
 			}
@@ -427,7 +425,7 @@  void *vmw_fifo_reserve_dx(struct vmw_private *dev_priv, uint32_t bytes,
 }
 
 static void vmw_fifo_res_copy(struct vmw_fifo_state *fifo_state,
-			      u32 __iomem *fifo_mem,
+			      u32 *fifo_mem,
 			      uint32_t next_cmd,
 			      uint32_t max, uint32_t min, uint32_t bytes)
 {
@@ -439,30 +437,28 @@  static void vmw_fifo_res_copy(struct vmw_fifo_state *fifo_state,
 	if (bytes < chunk_size)
 		chunk_size = bytes;
 
-	iowrite32(bytes, fifo_mem + SVGA_FIFO_RESERVED);
+	*(fifo_mem + SVGA_FIFO_RESERVED) = bytes;
 	mb();
-	memcpy_toio(fifo_mem + (next_cmd >> 2), buffer, chunk_size);
+	memcpy(fifo_mem + (next_cmd >> 2), buffer, chunk_size);
 	rest = bytes - chunk_size;
 	if (rest)
-		memcpy_toio(fifo_mem + (min >> 2), buffer + (chunk_size >> 2),
-			    rest);
+		memcpy(fifo_mem + (min >> 2), buffer + (chunk_size >> 2), rest);
 }
 
 static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state,
-			       u32 __iomem *fifo_mem,
-			       uint32_t next_cmd,
+			       u32 *fifo_mem, uint32_t next_cmd,
 			       uint32_t max, uint32_t min, uint32_t bytes)
 {
 	uint32_t *buffer = (fifo_state->dynamic_buffer != NULL) ?
 	    fifo_state->dynamic_buffer : fifo_state->static_buffer;
 
 	while (bytes > 0) {
-		iowrite32(*buffer++, fifo_mem + (next_cmd >> 2));
+		*(fifo_mem + (next_cmd >> 2)) = *buffer++;
 		next_cmd += sizeof(uint32_t);
 		if (unlikely(next_cmd == max))
 			next_cmd = min;
 		mb();
-		iowrite32(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD);
+		*(fifo_mem + SVGA_FIFO_NEXT_CMD) = next_cmd;
 		mb();
 		bytes -= sizeof(uint32_t);
 	}
@@ -471,10 +467,10 @@  static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state,
 static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
-	uint32_t next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
-	uint32_t max = ioread32(fifo_mem + SVGA_FIFO_MAX);
-	uint32_t min = ioread32(fifo_mem + SVGA_FIFO_MIN);
+	u32 *fifo_mem = dev_priv->mmio_virt;
+	uint32_t next_cmd = *(fifo_mem + SVGA_FIFO_NEXT_CMD);
+	uint32_t max = *(fifo_mem + SVGA_FIFO_MAX);
+	uint32_t min = *(fifo_mem + SVGA_FIFO_MIN);
 	bool reserveable = fifo_state->capabilities & SVGA_FIFO_CAP_RESERVE;
 
 	if (fifo_state->dx)
@@ -507,11 +503,11 @@  static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 		if (next_cmd >= max)
 			next_cmd -= max - min;
 		mb();
-		iowrite32(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD);
+		*(fifo_mem + SVGA_FIFO_NEXT_CMD) = next_cmd;
 	}
 
 	if (reserveable)
-		iowrite32(0, fifo_mem + SVGA_FIFO_RESERVED);
+		*(fifo_mem + SVGA_FIFO_RESERVED) = 0;
 	mb();
 	up_write(&fifo_state->rwsem);
 	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 0a970afed93b..d6d8663dc257 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -64,7 +64,7 @@  int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		break;
 	case DRM_VMW_PARAM_FIFO_HW_VERSION:
 	{
-		u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+		u32 *fifo_mem = dev_priv->mmio_virt;
 		const struct vmw_fifo_state *fifo = &dev_priv->fifo;
 
 		if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS)) {
@@ -72,8 +72,7 @@  int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 			break;
 		}
 
-		param->value =
-			ioread32(fifo_mem +
+		param->value = *(fifo_mem +
 				 ((fifo->capabilities &
 				   SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
 				  SVGA_FIFO_3D_HWVERSION_REVISED :
@@ -162,7 +161,7 @@  int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 		(struct drm_vmw_get_3d_cap_arg *) data;
 	struct vmw_private *dev_priv = vmw_priv(dev);
 	uint32_t size;
-	u32 __iomem *fifo_mem;
+	u32 *fifo_mem;
 	void __user *buffer = (void __user *)((unsigned long)(arg->buffer));
 	void *bounce;
 	int ret;
@@ -211,7 +210,7 @@  int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 			goto out_err;
 	} else {
 		fifo_mem = dev_priv->mmio_virt;
-		memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size);
+		memcpy(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size);
 	}
 
 	ret = copy_to_user(buffer, bounce, size);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 9498a5e33c12..160121af64d4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -72,8 +72,8 @@  static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno)
 void vmw_update_seqno(struct vmw_private *dev_priv,
 			 struct vmw_fifo_state *fifo_state)
 {
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
-	uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	u32 *fifo_mem = dev_priv->mmio_virt;
+	uint32_t seqno = *(fifo_mem + SVGA_FIFO_FENCE);
 
 	if (dev_priv->last_read_seqno != seqno) {
 		dev_priv->last_read_seqno = seqno;
@@ -178,8 +178,8 @@  int vmw_fallback_wait(struct vmw_private *dev_priv,
 	}
 	finish_wait(&dev_priv->fence_queue, &__wait);
 	if (ret == 0 && fifo_idle) {
-		u32 __iomem *fifo_mem = dev_priv->mmio_virt;
-		iowrite32(signal_seq, fifo_mem + SVGA_FIFO_FENCE);
+		u32 *fifo_mem = dev_priv->mmio_virt;
+		*(fifo_mem + SVGA_FIFO_FENCE) = signal_seq;
 	}
 	wake_up_all(&dev_priv->fence_queue);
 out_err:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 15a6c01cd016..8977e32c49c7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -123,14 +123,12 @@  err_unreserve:
 void vmw_cursor_update_position(struct vmw_private *dev_priv,
 				bool show, int x, int y)
 {
-	u32 __iomem *fifo_mem = dev_priv->mmio_virt;
-	uint32_t count;
-
-	iowrite32(show ? 1 : 0, fifo_mem + SVGA_FIFO_CURSOR_ON);
-	iowrite32(x, fifo_mem + SVGA_FIFO_CURSOR_X);
-	iowrite32(y, fifo_mem + SVGA_FIFO_CURSOR_Y);
-	count = ioread32(fifo_mem + SVGA_FIFO_CURSOR_COUNT);
-	iowrite32(++count, fifo_mem + SVGA_FIFO_CURSOR_COUNT);
+	u32 *fifo_mem = dev_priv->mmio_virt;
+
+	*(fifo_mem + SVGA_FIFO_CURSOR_ON) = show ? 1 : 0;
+	*(fifo_mem + SVGA_FIFO_CURSOR_X) = x;
+	*(fifo_mem + SVGA_FIFO_CURSOR_Y) = y;
+	*(fifo_mem + SVGA_FIFO_CURSOR_COUNT) += 1;
 }
 
 int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
@@ -1155,7 +1153,7 @@  int vmw_kms_write_svga(struct vmw_private *vmw_priv,
 	if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK)
 		vmw_write(vmw_priv, SVGA_REG_PITCHLOCK, pitch);
 	else if (vmw_fifo_have_pitchlock(vmw_priv))
-		iowrite32(pitch, vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK);
+		*(vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK) = pitch;
 	vmw_write(vmw_priv, SVGA_REG_WIDTH, width);
 	vmw_write(vmw_priv, SVGA_REG_HEIGHT, height);
 	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, bpp);
@@ -1181,8 +1179,8 @@  int vmw_kms_save_vga(struct vmw_private *vmw_priv)
 		vmw_priv->vga_pitchlock =
 		  vmw_read(vmw_priv, SVGA_REG_PITCHLOCK);
 	else if (vmw_fifo_have_pitchlock(vmw_priv))
-		vmw_priv->vga_pitchlock = ioread32(vmw_priv->mmio_virt +
-						   SVGA_FIFO_PITCHLOCK);
+		vmw_priv->vga_pitchlock = *(vmw_priv->mmio_virt +
+					    SVGA_FIFO_PITCHLOCK);
 
 	if (!(vmw_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY))
 		return 0;
@@ -1230,8 +1228,8 @@  int vmw_kms_restore_vga(struct vmw_private *vmw_priv)
 		vmw_write(vmw_priv, SVGA_REG_PITCHLOCK,
 			  vmw_priv->vga_pitchlock);
 	else if (vmw_fifo_have_pitchlock(vmw_priv))
-		iowrite32(vmw_priv->vga_pitchlock,
-			  vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK);
+		*(vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK) =
+			vmw_priv->vga_pitchlock;
 
 	if (!(vmw_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY))
 		return 0;