diff mbox

[PATCHv4,5/7] iommu/tegra: smmu: Support "mmu-masters" binding

Message ID 1384158718-4756-6-git-send-email-hdoyu@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Hiroshi DOYU Nov. 11, 2013, 8:31 a.m. UTC
Follow arm,smmu's "mmu-masters" binding.

Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
---
Update:
Newly added for v4. In v3, I used "nvidia,swgroups" and
"nvidia,memory-clients" bindings.
---
 .../bindings/iommu/nvidia,tegra30-smmu.txt         |  28 ++++-
 drivers/iommu/tegra-smmu.c                         | 138 +++++++++++++++++----
 2 files changed, 141 insertions(+), 25 deletions(-)

Comments

Will Deacon Nov. 11, 2013, 11:35 a.m. UTC | #1
Hi Hiroshi,

On Mon, Nov 11, 2013 at 08:31:56AM +0000, Hiroshi Doyu wrote:
> Follow arm,smmu's "mmu-masters" binding.
> 
> Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> ---
> Update:
> Newly added for v4. In v3, I used "nvidia,swgroups" and
> "nvidia,memory-clients" bindings.

[...]

> @@ -1151,6 +1152,77 @@ static void tegra_smmu_create_default_map(struct smmu_device *smmu)
>  	}
>  }
>  
> +static struct smmu_client *find_smmu_client(struct smmu_device *smmu,
> +					    struct device_node *dev_node)
> +{
> +	struct rb_node *node = smmu->clients.rb_node;
> +
> +	while (node) {
> +		struct smmu_client *client;
> +
> +		client = container_of(node, struct smmu_client, node);
> +		if (dev_node < client->of_node)
> +			node = node->rb_left;
> +		else if (dev_node > client->of_node)
> +			node = node->rb_right;
> +		else
> +			return client;
> +	}
> +
> +	return NULL;
> +}
> +
> +static int insert_smmu_client(struct smmu_device *smmu,
> +			      struct smmu_client *client)
> +{
> +	struct rb_node **new, *parent;
> +
> +	new = &smmu->clients.rb_node;
> +	parent = NULL;
> +	while (*new) {
> +		struct smmu_client *this;
> +		this = container_of(*new, struct smmu_client, node);
> +
> +		parent = *new;
> +		if (client->of_node < this->of_node)
> +			new = &((*new)->rb_left);
> +		else if (client->of_node > this->of_node)
> +			new = &((*new)->rb_right);
> +		else
> +			return -EEXIST;
> +	}
> +
> +	rb_link_node(&client->node, parent, new);
> +	rb_insert_color(&client->node, &smmu->clients);
> +	return 0;
> +}
> +
> +static int register_smmu_client(struct smmu_device *smmu,
> +				struct device *dev,
> +				struct of_phandle_args *args)
> +{
> +	struct smmu_client *client;
> +	int i;
> +
> +	client = find_smmu_client(smmu, args->np);
> +	if (client) {
> +		dev_err(dev,
> +			"rejecting multiple registrations for client device %s\n",
> +			args->np->full_name);
> +		return -EBUSY;
> +	}
> +
> +	client = devm_kzalloc(dev, sizeof(*client), GFP_KERNEL);
> +	if (!client)
> +		return -ENOMEM;
> +
> +	client->of_node = args->np;
> +	for (i = 0; i < args->args_count; i++)
> +		client->hwgrp |= 1ULL << args->args[i];
> +
> +	return insert_smmu_client(smmu, client);
> +}

This code looks familiar ;)

If this approach makes it past the DT guys, I wonder if there's any scope
for factoring out some of the arm-smmu code so that basic client
parsing/searching/registration can be made into a library?

Will
Hiroshi DOYU Nov. 11, 2013, 12:03 p.m. UTC | #2
Will Deacon <will.deacon@arm.com> wrote @ Mon, 11 Nov 2013 12:35:10 +0100:

> This code looks familiar ;)

;)
 
> If this approach makes it past the DT guys, I wonder if there's any scope
> for factoring out some of the arm-smmu code so that basic client
> parsing/searching/registration can be made into a library?

That's what I thought too. I just didn't proceed further till I get
some feedback on this design first. As you suggested, some of arm,smmu
part can be enough generic so that some other of IOMMU drivers can
make use of that as a library.
Stephen Warren Nov. 13, 2013, 12:17 a.m. UTC | #3
On 11/11/2013 01:31 AM, Hiroshi Doyu wrote:
> Follow arm,smmu's "mmu-masters" binding.

> diff --git a/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt b/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt

> +- mmu-masters   : A list of phandles to device nodes representing bus
> +                  masters for which the SMMU can provide a translation
> +                  and their corresponding StreamIDs (see example below).
> +                  Each device node linked from this list must have a
> +                  "#stream-id-cells" property, indicating the number of
> +                  StreamIDs(swgroup ID) associated with it, which is defined
> +		  in "include/dt-bindings/memory/tegra-swgroup.h".

Some of those lines are indented with TABs, others with spaces.

> +		mmu-masters = <&host1x TEGRA_SWGROUP_HC>,
> +			      <&mpe TEGRA_SWGROUP_MPE>,
> +			      <&vi TEGRA_SWGROUP_VI>,
> +			      <&epp TEGRA_SWGROUP_EPP>,
> +			      <&isp TEGRA_SWGROUP_ISP>,
> +			      <&gr2d TEGRA_SWGROUP_G2>,
> +			      <&gr3d TEGRA_SWGROUP_NV TEGRA_SWGROUP_NV2>,

So right now, the driver is statically assigning clients to a couple of
specific ASIDs. What if we want to configure that mapping from DT; does
that make sense? Instead of mmu-masters being a list of <phandle
streamid*>, should it be <phandle ASID streamid*> or <phandle (streamid
ASID)*>?




> diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c

>  struct smmu_client {
...
> -	u32			hwgrp;
> +	u64			hwgrp;

I think that's used later with for_each_set_bit() etc. Should it be
declared as an explicit bitmap object, or at least an unsigned long to
directly match the bitmap APIs?

Related, what if someone bumps <dt-bindings/memory/tegra-swgroup.h>'s
TEGRA_SWGROUP_MAX to 96 without changing the code?

>  static int smmu_iommu_attach_dev(struct iommu_domain *domain,
>  				 struct device *dev)
...
> -	client = devm_kzalloc(smmu->dev, sizeof(*c), GFP_KERNEL);
> +	client = find_smmu_client(smmu, dev->of_node);
>  	if (!client)
... (deletions of replaced code)
>  		return -EINVAL;

-ENODEV cursorily sounds better? Same in smmu_iommu_add_device().

> @@ -1238,6 +1311,23 @@ static int tegra_smmu_probe(struct platform_device *pdev)

> +	i = 0;
> +	smmu->clients = RB_ROOT;
> +	while (true) {
> +		err = of_parse_phandle_with_args(dev->of_node, "mmu-masters",
> +						 "#stream-id-cells", i, &args);
> +		if (err)
> +			break;

An iterator macro similar to of_property_for_each_u32/string() might be
nicer, which could replace all that with:

of_property_for_each_phandle_with_args(dev->of_node, "mmu-masters",
					"#stream-id-cells") {
Hiroshi DOYU Nov. 13, 2013, 7:45 a.m. UTC | #4
On Wed, 13 Nov 2013 01:17:22 +0100
Stephen Warren <swarren@wwwdotorg.org> wrote:

> > +		mmu-masters = <&host1x TEGRA_SWGROUP_HC>,
> > +			      <&mpe TEGRA_SWGROUP_MPE>,
> > +			      <&vi TEGRA_SWGROUP_VI>,
> > +			      <&epp TEGRA_SWGROUP_EPP>,
> > +			      <&isp TEGRA_SWGROUP_ISP>,
> > +			      <&gr2d TEGRA_SWGROUP_G2>,
> > +			      <&gr3d TEGRA_SWGROUP_NV TEGRA_SWGROUP_NV2>,
> 
> So right now, the driver is statically assigning clients to a couple of
> specific ASIDs. What if we want to configure that mapping from DT; does
> that make sense? Instead of mmu-masters being a list of <phandle
> streamid*>, should it be <phandle ASID streamid*> or <phandle (streamid
> ASID)*>?

That's possible.

Here, swgroup ID == stream ID, and a device is statically bind to a
specific swgroup ID(hard coded). ASID is dynamically assigned to
swgroup(devices). So assigning ASID belongs to a policy, but we can
consider this assigning as board specifc policy since it's hard to
change after kernel boots up in general. So assigning ASID in a board
DT makes sense. The format would be:

  <phandle "swgroup ID" "asid">,

ex:
  <&host1x TEGRA_SWGROUP_HC 0>,

The above depends on the discussion of the standard IOMMU bindings,
but the number of argument can be set by each IOMMU driver.

If we take the the other way,

 smmu: iommu@xxxxxx {
       #iommu-cells = <3>;
       ^^^^^^^^^^^^^^^^^^
   };

   host1x {
           compatible = "nvidia,tegra30-host1x", "simple-bus";
           iommu = <&smmu 0x??????? 0x??????? "asid">;
	   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#######
           gr3d {
                   compatible = "nvidia,tegra30-gr3d";
                   iommu = <&smmu 0x??????? 0x???????>;
           }

I think that this "asid" part can be set 0 in tegra??.dtsi and the
actual value can be overwritten in tegra??-<boardname>.dts file.
Kumar Gala Nov. 13, 2013, 11:15 a.m. UTC | #5
On Nov 11, 2013, at 2:31 AM, Hiroshi Doyu <hdoyu@nvidia.com> wrote:

> Follow arm,smmu's "mmu-masters" binding.
> 
> Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> ---
> Update:
> Newly added for v4. In v3, I used "nvidia,swgroups" and
> "nvidia,memory-clients" bindings.
> ---
> .../bindings/iommu/nvidia,tegra30-smmu.txt         |  28 ++++-
> drivers/iommu/tegra-smmu.c                         | 138 +++++++++++++++++----
> 2 files changed, 141 insertions(+), 25 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt b/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt
> index 89fb543..51884e9 100644
> --- a/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt
> +++ b/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt
> @@ -8,9 +8,16 @@ Required properties:
> - nvidia,#asids : # of ASIDs
> - dma-window : IOVA start address and length.
> - nvidia,ahb : phandle to the ahb bus connected to SMMU.
> +- mmu-masters   : A list of phandles to device nodes representing bus
> +                  masters for which the SMMU can provide a translation
> +                  and their corresponding StreamIDs (see example below).
> +                  Each device node linked from this list must have a
> +                  "#stream-id-cells" property, indicating the number of
> +                  StreamIDs(swgroup ID) associated with it, which is defined
> +		  in "include/dt-bindings/memory/tegra-swgroup.h".
> 
> Example:
> -	smmu {
> +	iommu {
> 		compatible = "nvidia,tegra30-smmu";
> 		reg = <0x7000f010 0x02c
> 		       0x7000f1f0 0x010
> @@ -18,4 +25,23 @@ Example:
> 		nvidia,#asids = <4>;		/* # of ASIDs */
> 		dma-window = <0 0x40000000>;	/* IOVA start & length */
> 		nvidia,ahb = <&ahb>;
> +
> +		mmu-masters = <&host1x TEGRA_SWGROUP_HC>,
> +			      <&mpe TEGRA_SWGROUP_MPE>,
> +			      <&vi TEGRA_SWGROUP_VI>,
> +			      <&epp TEGRA_SWGROUP_EPP>,
> +			      <&isp TEGRA_SWGROUP_ISP>,
> +			      <&gr2d TEGRA_SWGROUP_G2>,
> +			      <&gr3d TEGRA_SWGROUP_NV TEGRA_SWGROUP_NV2>,
> +			      <&dc TEGRA_SWGROUP_DC>,
> +			      <&dcb TEGRA_SWGROUP_DCB>,
> +			      <&uarta TEGRA_SWGROUP_PPCS>,
> +			      <&uartb TEGRA_SWGROUP_PPCS>,
> +			      <&uartc TEGRA_SWGROUP_PPCS>,
> +			      <&uartd TEGRA_SWGROUP_PPCS>,
> +			      <&uarte TEGRA_SWGROUP_PPCS>,
> +			      <&sdhci0 TEGRA_SWGROUP_PPCS>,
> +			      <&sdhci1 TEGRA_SWGROUP_PPCS>,
> +			      <&sdhci2 TEGRA_SWGROUP_PPCS>,
> +			      <&sdhci3 TEGRA_SWGROUP_PPCS>;
> 	};

At first glance this seems backward from all other cases we have in which we usually have the device have a property that points back, like interrupt-parent.

- k
Stephen Warren Nov. 13, 2013, 5:58 p.m. UTC | #6
On 11/13/2013 12:45 AM, Hiroshi Doyu wrote:
> On Wed, 13 Nov 2013 01:17:22 +0100
> Stephen Warren <swarren@wwwdotorg.org> wrote:
> 
>>> +		mmu-masters = <&host1x TEGRA_SWGROUP_HC>,
>>> +			      <&mpe TEGRA_SWGROUP_MPE>,
>>> +			      <&vi TEGRA_SWGROUP_VI>,
>>> +			      <&epp TEGRA_SWGROUP_EPP>,
>>> +			      <&isp TEGRA_SWGROUP_ISP>,
>>> +			      <&gr2d TEGRA_SWGROUP_G2>,
>>> +			      <&gr3d TEGRA_SWGROUP_NV TEGRA_SWGROUP_NV2>,
>>
>> So right now, the driver is statically assigning clients to a couple of
>> specific ASIDs. What if we want to configure that mapping from DT; does
>> that make sense? Instead of mmu-masters being a list of <phandle
>> streamid*>, should it be <phandle ASID streamid*> or <phandle (streamid
>> ASID)*>?
> 
> That's possible.
> 
> Here, swgroup ID == stream ID, and a device is statically bind to a
> specific swgroup ID(hard coded). ASID is dynamically assigned to
> swgroup(devices). So assigning ASID belongs to a policy, but we can
> consider this assigning as board specifc policy since it's hard to
> change after kernel boots up in general. So assigning ASID in a board
> DT makes sense. The format would be:
> 
>   <phandle "swgroup ID" "asid">,
> 
> ex:
>   <&host1x TEGRA_SWGROUP_HC 0>,
> 
> The above depends on the discussion of the standard IOMMU bindings,
> but the number of argument can be set by each IOMMU driver.
> 
> If we take the the other way,
> 
>  smmu: iommu@xxxxxx {
>        #iommu-cells = <3>;
>        ^^^^^^^^^^^^^^^^^^
>    };
> 
>    host1x {
>            compatible = "nvidia,tegra30-host1x", "simple-bus";
>            iommu = <&smmu 0x??????? 0x??????? "asid">;
> 	   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#######
>            gr3d {
>                    compatible = "nvidia,tegra30-gr3d";
>                    iommu = <&smmu 0x??????? 0x???????>;
>            }
> 
> I think that this "asid" part can be set 0 in tegra??.dtsi and the
> actual value can be overwritten in tegra??-<boardname>.dts file.

The one issue here is that we can only override entire properties, so
it's not possible for a board file to *just* replace the ASID, it'd have
to duplicate the entire property, just to change the one value.

Is the ASID mapping really likely to be board-specific though? To my
naive thinking, it seems that the chip design (e.g. number of
peripherals, number of available ASIDs) would tend to imply the
device->ASID mapping, since it would have been considered as part of
chip design. Hence, wouldn't soc.dtsi typically specify the expected
ASID mapping, and boards rarely if ever override it?

If the ASID mapping really is likely to vary per board, perhaps it makes
sense to put it into a separate property somehow so it's easier to override?
Hiroshi DOYU Nov. 14, 2013, 6:41 a.m. UTC | #7
Stephen Warren <swarren@wwwdotorg.org> wrote @ Wed, 13 Nov 2013 18:58:23 +0100:

> >  smmu: iommu@xxxxxx {
> >        #iommu-cells = <3>;
> >        ^^^^^^^^^^^^^^^^^^
> >    };
> > 
> >    host1x {
> >            compatible = "nvidia,tegra30-host1x", "simple-bus";
> >            iommu = <&smmu 0x??????? 0x??????? "asid">;
> > 	   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#######
> >            gr3d {
> >                    compatible = "nvidia,tegra30-gr3d";
> >                    iommu = <&smmu 0x??????? 0x???????>;
> >            }
> > 
> > I think that this "asid" part can be set 0 in tegra??.dtsi and the
> > actual value can be overwritten in tegra??-<boardname>.dts file.
> 
> The one issue here is that we can only override entire properties, so
> it's not possible for a board file to *just* replace the ASID, it'd have
> to duplicate the entire property, just to change the one value.
> 
> Is the ASID mapping really likely to be board-specific though? To my
> naive thinking, it seems that the chip design (e.g. number of
> peripherals, number of available ASIDs) would tend to imply the
> device->ASID mapping, since it would have been considered as part of
> chip design. Hence, wouldn't soc.dtsi typically specify the expected
> ASID mapping, and boards rarely if ever override it?
> 
> If the ASID mapping really is likely to vary per board, perhaps it makes
> sense to put it into a separate property somehow so it's easier to override?

  Older Tegra like T30: swgroups > asid(==4)
  Newer Tegra         : swgroups < asid

At newer Tegra, each swgroup should have one asid. In older Tegra,
there aren't many options how to assign ASID since # of AS is 4, and
we know which one should be isolated/protected first. Even we can fix
this assignment for a SoC, not for a board.

Also I think that a newer tegra won't care this mapping since all will
have an own AS. So maybe the binding between "swgroup" and "asid"
should be another one, since this info is only needed for the older
Tegra. For example:

  smmu: iommu@xxxxxx {
        #iommu-cells = <2>;

	/*
	 * This as <-> swgroups mapping is only needed for older tegra.
	 */
	as-swgroups = <0x??????? 0x???????,  /* AS[0]: assigned swgroups bits */
		       0x??????? 0x???????,  /* AS[1]: assigned swgroups bits */
		       0x??????? 0x???????,  /* AS[2]: assigned swgroups bits */
		       0x??????? 0x???????>; /* AS[3]: assigned swgroups bits */
    };
 
    host1x {
            compatible = "nvidia,tegra30-host1x", "simple-bus";
            iommus = <&smmu 0x??????? 0x???????>;
            gr3d {
                    compatible = "nvidia,tegra30-gr3d";
                    iommus = <&smmu 0x??????? 0x???????>;
            }
   };

Then we would push this mapping info into DT only for older
Tegra. Newer Tegra won't care where each swgroup will get its own.
Stephen Warren Nov. 14, 2013, 4:59 p.m. UTC | #8
On 11/13/2013 11:41 PM, Hiroshi Doyu wrote:
> Stephen Warren <swarren@wwwdotorg.org> wrote @ Wed, 13 Nov 2013 18:58:23 +0100:
> 
>>>  smmu: iommu@xxxxxx {
>>>        #iommu-cells = <3>;
>>>        ^^^^^^^^^^^^^^^^^^
>>>    };
>>>
>>>    host1x {
>>>            compatible = "nvidia,tegra30-host1x", "simple-bus";
>>>            iommu = <&smmu 0x??????? 0x??????? "asid">;
>>> 	   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#######
>>>            gr3d {
>>>                    compatible = "nvidia,tegra30-gr3d";
>>>                    iommu = <&smmu 0x??????? 0x???????>;
>>>            }
>>>
>>> I think that this "asid" part can be set 0 in tegra??.dtsi and the
>>> actual value can be overwritten in tegra??-<boardname>.dts file.
>>
>> The one issue here is that we can only override entire properties, so
>> it's not possible for a board file to *just* replace the ASID, it'd have
>> to duplicate the entire property, just to change the one value.
>>
>> Is the ASID mapping really likely to be board-specific though? To my
>> naive thinking, it seems that the chip design (e.g. number of
>> peripherals, number of available ASIDs) would tend to imply the
>> device->ASID mapping, since it would have been considered as part of
>> chip design. Hence, wouldn't soc.dtsi typically specify the expected
>> ASID mapping, and boards rarely if ever override it?
>>
>> If the ASID mapping really is likely to vary per board, perhaps it makes
>> sense to put it into a separate property somehow so it's easier to override?
> 
>   Older Tegra like T30: swgroups > asid(==4)
>   Newer Tegra         : swgroups < asid

In that case, I'd vote for hard-coding the mapping in the driver in all
cases. For older Tegra, we'll have to hard-code some static mapping just
like you've already done in the driver. For newer Tegra, we would just
assign a new AS for each swgroup as you say. If we ever need to tweak
this, we can invent a new DT property to affect the default. That makes
the DT content quite a bit simpler for now:-)
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt b/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt
index 89fb543..51884e9 100644
--- a/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt
@@ -8,9 +8,16 @@  Required properties:
 - nvidia,#asids : # of ASIDs
 - dma-window : IOVA start address and length.
 - nvidia,ahb : phandle to the ahb bus connected to SMMU.
+- mmu-masters   : A list of phandles to device nodes representing bus
+                  masters for which the SMMU can provide a translation
+                  and their corresponding StreamIDs (see example below).
+                  Each device node linked from this list must have a
+                  "#stream-id-cells" property, indicating the number of
+                  StreamIDs(swgroup ID) associated with it, which is defined
+		  in "include/dt-bindings/memory/tegra-swgroup.h".
 
 Example:
-	smmu {
+	iommu {
 		compatible = "nvidia,tegra30-smmu";
 		reg = <0x7000f010 0x02c
 		       0x7000f1f0 0x010
@@ -18,4 +25,23 @@  Example:
 		nvidia,#asids = <4>;		/* # of ASIDs */
 		dma-window = <0 0x40000000>;	/* IOVA start & length */
 		nvidia,ahb = <&ahb>;
+
+		mmu-masters = <&host1x TEGRA_SWGROUP_HC>,
+			      <&mpe TEGRA_SWGROUP_MPE>,
+			      <&vi TEGRA_SWGROUP_VI>,
+			      <&epp TEGRA_SWGROUP_EPP>,
+			      <&isp TEGRA_SWGROUP_ISP>,
+			      <&gr2d TEGRA_SWGROUP_G2>,
+			      <&gr3d TEGRA_SWGROUP_NV TEGRA_SWGROUP_NV2>,
+			      <&dc TEGRA_SWGROUP_DC>,
+			      <&dcb TEGRA_SWGROUP_DCB>,
+			      <&uarta TEGRA_SWGROUP_PPCS>,
+			      <&uartb TEGRA_SWGROUP_PPCS>,
+			      <&uartc TEGRA_SWGROUP_PPCS>,
+			      <&uartd TEGRA_SWGROUP_PPCS>,
+			      <&uarte TEGRA_SWGROUP_PPCS>,
+			      <&sdhci0 TEGRA_SWGROUP_PPCS>,
+			      <&sdhci1 TEGRA_SWGROUP_PPCS>,
+			      <&sdhci2 TEGRA_SWGROUP_PPCS>,
+			      <&sdhci3 TEGRA_SWGROUP_PPCS>;
 	};
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 67252e1..ab198ce 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -206,10 +206,12 @@  enum {
  * Per client for address space
  */
 struct smmu_client {
+	struct device_node	*of_node;
+	struct rb_node		node;
 	struct device		*dev;
 	struct list_head	list;
 	struct smmu_as		*as;
-	u32			hwgrp;
+	u64			hwgrp;
 };
 
 /*
@@ -249,6 +251,7 @@  struct smmu_device {
 	spinlock_t	lock;
 	char		*name;
 	struct device	*dev;
+	struct rb_root	clients;
 	struct page *avp_vector_page;	/* dummy page shared by all AS's */
 
 	/*
@@ -326,23 +329,22 @@  static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs)
  */
 #define FLUSH_SMMU_REGS(smmu)	smmu_read(smmu, SMMU_CONFIG)
 
-#define smmu_client_hwgrp(c) (u32)((c)->dev->platform_data)
-
 static int __smmu_client_set_hwgrp(struct smmu_client *c,
-				   unsigned long map, int on)
+				   u64 map, int on)
 {
 	int i;
 	struct smmu_as *as = c->as;
 	u32 val, offs, mask = SMMU_ASID_ENABLE(as->asid);
 	struct smmu_device *smmu = as->smmu;
+	unsigned long *bitmap = (unsigned long *)&map;
 
 	WARN_ON(!on && map);
 	if (on && !map)
 		return -EINVAL;
 	if (!on)
-		map = smmu_client_hwgrp(c);
+		map = c->hwgrp;
 
-	for_each_set_bit(i, &map, HWGRP_COUNT) {
+	for_each_set_bit(i, bitmap, TEGRA_SWGROUP_MAX) {
 		offs = HWGRP_ASID_REG(i);
 		val = smmu_read(smmu, offs);
 		if (on) {
@@ -360,7 +362,7 @@  static int __smmu_client_set_hwgrp(struct smmu_client *c,
 	return 0;
 
 err_hw_busy:
-	for_each_set_bit(i, &map, HWGRP_COUNT) {
+	for_each_set_bit(i, bitmap, TEGRA_SWGROUP_MAX) {
 		offs = HWGRP_ASID_REG(i);
 		val = smmu_read(smmu, offs);
 		val &= ~mask;
@@ -732,27 +734,26 @@  static int smmu_iommu_domain_has_cap(struct iommu_domain *domain,
 	return 0;
 }
 
+static struct smmu_client *find_smmu_client(struct smmu_device *smmu,
+					    struct device_node *dev_node);
+
 static int smmu_iommu_attach_dev(struct iommu_domain *domain,
 				 struct device *dev)
 {
 	struct smmu_as *as = domain->priv;
 	struct smmu_device *smmu = as->smmu;
 	struct smmu_client *client, *c;
-	u32 map;
 	int err;
 
-	client = devm_kzalloc(smmu->dev, sizeof(*c), GFP_KERNEL);
+	client = find_smmu_client(smmu, dev->of_node);
 	if (!client)
-		return -ENOMEM;
-	client->dev = dev;
-	client->as = as;
-	map = (unsigned long)dev->platform_data;
-	if (!map)
 		return -EINVAL;
 
-	err = smmu_client_enable_hwgrp(client, map);
+	client->dev = dev;
+	client->as = as;
+	err = smmu_client_enable_hwgrp(client, client->hwgrp);
 	if (err)
-		goto err_hwgrp;
+		return -EINVAL;
 
 	spin_lock(&as->client_lock);
 	list_for_each_entry(c, &as->client, list) {
@@ -770,7 +771,7 @@  static int smmu_iommu_attach_dev(struct iommu_domain *domain,
 	 * Reserve "page zero" for AVP vectors using a common dummy
 	 * page.
 	 */
-	if (map & TEGRA_SWGROUP_BIT(AVPC)) {
+	if (client->hwgrp & TEGRA_SWGROUP_BIT(AVPC)) {
 		struct page *page;
 
 		page = as->smmu->avp_vector_page;
@@ -785,8 +786,6 @@  static int smmu_iommu_attach_dev(struct iommu_domain *domain,
 err_client:
 	smmu_client_disable_hwgrp(client);
 	spin_unlock(&as->client_lock);
-err_hwgrp:
-	devm_kfree(smmu->dev, client);
 	return err;
 }
 
@@ -803,7 +802,6 @@  static void smmu_iommu_detach_dev(struct iommu_domain *domain,
 		if (c->dev == dev) {
 			smmu_client_disable_hwgrp(c);
 			list_del(&c->list);
-			devm_kfree(smmu->dev, c);
 			c->as = NULL;
 			dev_dbg(smmu->dev,
 				"%s is detached\n", dev_name(c->dev));
@@ -907,11 +905,14 @@  enum {
 static int smmu_iommu_add_device(struct device *dev)
 {
 	int err = -EPROBE_DEFER;
-	u64 swgroups;
 	struct dma_iommu_mapping *map = NULL;
+	struct smmu_client *client;
+
+	client = find_smmu_client(smmu_handle, dev->of_node);
+	if (!client)
+		return -EINVAL;
 
-	swgroups = smmu_of_get_memory_client(dev);
-	switch (swgroups) {
+	switch (client->hwgrp) {
 	case TEGRA_SWGROUP_BIT(PPCS):
 		map = smmu_handle->map[SYSTEM_PROTECTED];
 		break;
@@ -924,7 +925,7 @@  static int smmu_iommu_add_device(struct device *dev)
 		err = arm_iommu_attach_device(dev, map);
 
 	pr_debug("swgroups=%016llx map=%p err=%d %s\n",
-		 swgroups, map, err, dev_name(dev));
+		 client->hwgrp, map, err, dev_name(dev));
 
 	return err;
 }
@@ -1151,6 +1152,77 @@  static void tegra_smmu_create_default_map(struct smmu_device *smmu)
 	}
 }
 
+static struct smmu_client *find_smmu_client(struct smmu_device *smmu,
+					    struct device_node *dev_node)
+{
+	struct rb_node *node = smmu->clients.rb_node;
+
+	while (node) {
+		struct smmu_client *client;
+
+		client = container_of(node, struct smmu_client, node);
+		if (dev_node < client->of_node)
+			node = node->rb_left;
+		else if (dev_node > client->of_node)
+			node = node->rb_right;
+		else
+			return client;
+	}
+
+	return NULL;
+}
+
+static int insert_smmu_client(struct smmu_device *smmu,
+			      struct smmu_client *client)
+{
+	struct rb_node **new, *parent;
+
+	new = &smmu->clients.rb_node;
+	parent = NULL;
+	while (*new) {
+		struct smmu_client *this;
+		this = container_of(*new, struct smmu_client, node);
+
+		parent = *new;
+		if (client->of_node < this->of_node)
+			new = &((*new)->rb_left);
+		else if (client->of_node > this->of_node)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(&client->node, parent, new);
+	rb_insert_color(&client->node, &smmu->clients);
+	return 0;
+}
+
+static int register_smmu_client(struct smmu_device *smmu,
+				struct device *dev,
+				struct of_phandle_args *args)
+{
+	struct smmu_client *client;
+	int i;
+
+	client = find_smmu_client(smmu, args->np);
+	if (client) {
+		dev_err(dev,
+			"rejecting multiple registrations for client device %s\n",
+			args->np->full_name);
+		return -EBUSY;
+	}
+
+	client = devm_kzalloc(dev, sizeof(*client), GFP_KERNEL);
+	if (!client)
+		return -ENOMEM;
+
+	client->of_node = args->np;
+	for (i = 0; i < args->args_count; i++)
+		client->hwgrp |= 1ULL << args->args[i];
+
+	return insert_smmu_client(smmu, client);
+}
+
 static int tegra_smmu_probe(struct platform_device *pdev)
 {
 	struct smmu_device *smmu;
@@ -1158,6 +1230,7 @@  static int tegra_smmu_probe(struct platform_device *pdev)
 	int i, asids, err = 0;
 	dma_addr_t uninitialized_var(base);
 	size_t bytes, uninitialized_var(size);
+	struct of_phandle_args args;
 
 	if (smmu_handle)
 		return -EIO;
@@ -1238,6 +1311,23 @@  static int tegra_smmu_probe(struct platform_device *pdev)
 		return err;
 	platform_set_drvdata(pdev, smmu);
 
+	i = 0;
+	smmu->clients = RB_ROOT;
+	while (true) {
+		err = of_parse_phandle_with_args(dev->of_node, "mmu-masters",
+						 "#stream-id-cells", i, &args);
+		if (err)
+			break;
+
+		err = register_smmu_client(smmu, dev, &args);
+		if (err) {
+			dev_err(dev, "failed to add client %s\n",
+				args.np->full_name);
+		}
+
+		i++;
+	}
+
 	smmu->avp_vector_page = alloc_page(GFP_KERNEL);
 	if (!smmu->avp_vector_page)
 		return -ENOMEM;