diff mbox

[RFC,dtc] C-based DT schema checker integrated into dtc

Message ID 1382651488-9696-1-git-send-email-swarren@wwwdotorg.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stephen Warren Oct. 24, 2013, 9:51 p.m. UTC
From: Stephen Warren <swarren@nvidia.com>

This is a very quick proof-of-concept re: how a DT schema checker might
look if written in C, and integrated into dtc.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 Makefile.dtc                              |   11 ++-
 checks.c                                  |   11 +++
 schemas/clock/clock.c                     |   16 ++++
 schemas/gpio/gpio.c                       |   13 ++++
 schemas/i2c/i2c.c                         |   17 +++++
 schemas/i2c/nvidia,tegra20-i2c.c          |   20 +++++
 schemas/interrupt-controller/interrupts.c |   14 ++++
 schemas/mmio-bus.c                        |   15 ++++
 schemas/root-node.c                       |   27 +++++++
 schemas/schema.c                          |  119 +++++++++++++++++++++++++++++
 schemas/schema.h                          |   68 +++++++++++++++++
 schemas/sound/wlf,wm8903.c                |   20 +++++
 test-schema.dts                           |   45 +++++++++++
 13 files changed, 395 insertions(+), 1 deletion(-)
 create mode 100644 schemas/clock/clock.c
 create mode 100644 schemas/gpio/gpio.c
 create mode 100644 schemas/i2c/i2c.c
 create mode 100644 schemas/i2c/nvidia,tegra20-i2c.c
 create mode 100644 schemas/interrupt-controller/interrupts.c
 create mode 100644 schemas/mmio-bus.c
 create mode 100644 schemas/root-node.c
 create mode 100644 schemas/schema.c
 create mode 100644 schemas/schema.h
 create mode 100644 schemas/sound/wlf,wm8903.c
 create mode 100644 test-schema.dts

Comments

Grant Likely Oct. 24, 2013, 11:43 p.m. UTC | #1
On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren <swarren@wwwdotorg.org> wrote:
> From: Stephen Warren <swarren@nvidia.com>
> 
> This is a very quick proof-of-concept re: how a DT schema checker might
> look if written in C, and integrated into dtc.

Thanks for looking at this.

Very interesting. Certainly an expedient way to start checking schemas,
and for certain bindings it may be the best approach. The downside is it
forces a recompilation of DTC to bring in new bindings and it isn't a
great meduim for mixing schema with documentation in the bindings.

g.

> 
> Signed-off-by: Stephen Warren <swarren@nvidia.com>
> ---
>  Makefile.dtc                              |   11 ++-
>  checks.c                                  |   11 +++
>  schemas/clock/clock.c                     |   16 ++++
>  schemas/gpio/gpio.c                       |   13 ++++
>  schemas/i2c/i2c.c                         |   17 +++++
>  schemas/i2c/nvidia,tegra20-i2c.c          |   20 +++++
>  schemas/interrupt-controller/interrupts.c |   14 ++++
>  schemas/mmio-bus.c                        |   15 ++++
>  schemas/root-node.c                       |   27 +++++++
>  schemas/schema.c                          |  119 +++++++++++++++++++++++++++++
>  schemas/schema.h                          |   68 +++++++++++++++++
>  schemas/sound/wlf,wm8903.c                |   20 +++++
>  test-schema.dts                           |   45 +++++++++++
>  13 files changed, 395 insertions(+), 1 deletion(-)
>  create mode 100644 schemas/clock/clock.c
>  create mode 100644 schemas/gpio/gpio.c
>  create mode 100644 schemas/i2c/i2c.c
>  create mode 100644 schemas/i2c/nvidia,tegra20-i2c.c
>  create mode 100644 schemas/interrupt-controller/interrupts.c
>  create mode 100644 schemas/mmio-bus.c
>  create mode 100644 schemas/root-node.c
>  create mode 100644 schemas/schema.c
>  create mode 100644 schemas/schema.h
>  create mode 100644 schemas/sound/wlf,wm8903.c
>  create mode 100644 test-schema.dts
> 
> diff --git a/Makefile.dtc b/Makefile.dtc
> index bece49b..824aaaf 100644
> --- a/Makefile.dtc
> +++ b/Makefile.dtc
> @@ -12,7 +12,16 @@ DTC_SRCS = \
>  	livetree.c \
>  	srcpos.c \
>  	treesource.c \
> -	util.c
> +	util.c \
> +	schemas/mmio-bus.c \
> +	schemas/root-node.c \
> +	schemas/schema.c \
> +	schemas/clock/clock.c \
> +	schemas/gpio/gpio.c \
> +	schemas/i2c/i2c.c \
> +	schemas/i2c/nvidia,tegra20-i2c.c \
> +	schemas/interrupt-controller/interrupts.c \
> +	schemas/sound/wlf,wm8903.c
>  
>  DTC_GEN_SRCS = dtc-lexer.lex.c dtc-parser.tab.c
>  DTC_OBJS = $(DTC_SRCS:%.c=%.o) $(DTC_GEN_SRCS:%.c=%.o)
> diff --git a/checks.c b/checks.c
> index ee96a25..49143b3 100644
> --- a/checks.c
> +++ b/checks.c
> @@ -19,6 +19,7 @@
>   */
>  
>  #include "dtc.h"
> +#include "schemas/schema.h"
>  
>  #ifdef TRACE_CHECKS
>  #define TRACE(c, ...) \
> @@ -236,6 +237,14 @@ static void check_is_cell(struct check *c, struct node *root,
>   * Structural check functions
>   */
>  
> +static void check_schema(struct check *c, struct node *dt,
> +				       struct node *node)
> +{
> +	if (schema_check_node(node))
> +		FAIL(c, "Schema check failed for %s", node->fullpath);
> +}
> +NODE_ERROR(schema, NULL);
> +
>  static void check_duplicate_node_names(struct check *c, struct node *dt,
>  				       struct node *node)
>  {
> @@ -652,6 +661,8 @@ static void check_obsolete_chosen_interrupt_controller(struct check *c,
>  TREE_WARNING(obsolete_chosen_interrupt_controller, NULL);
>  
>  static struct check *check_table[] = {
> +	&schema,
> +
>  	&duplicate_node_names, &duplicate_property_names,
>  	&node_name_chars, &node_name_format, &property_name_chars,
>  	&name_is_string, &name_properties,
> diff --git a/schemas/clock/clock.c b/schemas/clock/clock.c
> new file mode 100644
> index 0000000..0b9ca1f
> --- /dev/null
> +++ b/schemas/clock/clock.c
> @@ -0,0 +1,16 @@
> +#include "schema.h"
> +
> +void is_a_clock_provider(struct node *node, int clock_cells)
> +{
> +	required_integer_property(node, "#clock-cells", clock_cells);
> +}
> +
> +void is_a_clock_consumer_by_name(struct node *node, int clock_count)
> +{
> +	required_property(node, "clock-names");
> +	/* FIXME: validate all required names are present */
> +	/* FIXME: validate all names present are in list of valid names */
> +	required_property(node, "clocks");
> +	/* FIXME: validate phandle, specifier list in property */
> +	/* FIXME: validate len(clocks) =~ len(clock-names) * #clock-cells */
> +}
> diff --git a/schemas/gpio/gpio.c b/schemas/gpio/gpio.c
> new file mode 100644
> index 0000000..e52f161
> --- /dev/null
> +++ b/schemas/gpio/gpio.c
> @@ -0,0 +1,13 @@
> +#include "schema.h"
> +
> +void is_a_gpio_provider(struct node *node, int gpio_cells)
> +{
> +	required_boolean_property(node, "gpio-controller");
> +	required_integer_property(node, "#gpio-cells", gpio_cells);
> +}
> +
> +void is_a_gpio_consumer(struct node *node, const char *propname)
> +{
> +	required_property(node, propname);
> +	/* FIXME: validate phandle, specifier list in property */
> +}
> diff --git a/schemas/i2c/i2c.c b/schemas/i2c/i2c.c
> new file mode 100644
> index 0000000..0772ea3
> --- /dev/null
> +++ b/schemas/i2c/i2c.c
> @@ -0,0 +1,17 @@
> +#include "../schema.h"
> +
> +void is_an_i2c_bus(struct node *node)
> +{
> +	printf("INFO: %s()\n", __FUNCTION__);
> +
> +	is_an_mmio_bus(node, 1, 0);
> +	required_property(node, "#address-cells");
> +	required_property(node, "#size-cells");
> +	optional_property(node, "clock-frequency");
> +	/* FIXME: set internal tag on *node to mark it as an I2C bus */
> +}
> +
> +void is_an_i2c_bus_child(struct node *node)
> +{
> +	/* FIXME: validate that is_an_i2c_bus() was called on node->parent */
> +}
> diff --git a/schemas/i2c/nvidia,tegra20-i2c.c b/schemas/i2c/nvidia,tegra20-i2c.c
> new file mode 100644
> index 0000000..c616f33
> --- /dev/null
> +++ b/schemas/i2c/nvidia,tegra20-i2c.c
> @@ -0,0 +1,20 @@
> +#include "../schema.h"
> +
> +static const char *compats_nvidia_tegra20_i2c[] = {
> +	"nvidia,tegra20-i2c",
> +	"nvidia,tegra30-i2c",
> +	"nvidia,tegra114-i2c",
> +	"nvidia,tegra124-i2c",
> +	NULL,
> +};
> +
> +static void checkfn_nvidia_tegra20_i2c(struct node *node)
> +{
> +	printf("INFO: %s()\n", __FUNCTION__);
> +
> +	is_an_mmio_bus_child(node, 1);
> +	is_an_i2c_bus(node);
> +	is_an_interrupt_consumer_by_index(node, 1);
> +	is_a_clock_consumer_by_name(node, 2);
> +}
> +SCHEMA_MATCH_COMPATIBLE(nvidia_tegra20_i2c);
> diff --git a/schemas/interrupt-controller/interrupts.c b/schemas/interrupt-controller/interrupts.c
> new file mode 100644
> index 0000000..39191a8
> --- /dev/null
> +++ b/schemas/interrupt-controller/interrupts.c
> @@ -0,0 +1,14 @@
> +#include "schema.h"
> +
> +void is_an_interrupt_provider(struct node *node, int int_cells)
> +{
> +	required_boolean_property(node, "interrupt-controller");
> +	required_integer_property(node, "#interrupt-cells", int_cells);
> +}
> +
> +void is_an_interrupt_consumer_by_index(struct node *node, int int_count)
> +{
> +	required_property(node, "interrupts");
> +	/* FIXME: validate phandle, specifier list in property */
> +	/* FIXME: validate len(interrupts) =~ int_count * #interrupt-cells */
> +}
> diff --git a/schemas/mmio-bus.c b/schemas/mmio-bus.c
> new file mode 100644
> index 0000000..74b5410
> --- /dev/null
> +++ b/schemas/mmio-bus.c
> @@ -0,0 +1,15 @@
> +#include "schema.h"
> +
> +void is_an_mmio_bus(struct node *node, int address_cells, int size_cells)
> +{
> +	required_integer_property(node, "#address-cells", address_cells);
> +	required_integer_property(node, "#size-cells", size_cells);
> +	/* FIXME: set internal tag on *node to mark it as an MMIO bus */
> +}
> +
> +void is_an_mmio_bus_child(struct node *node, int reg_count)
> +{
> +	/* FIXME: validate that is_an_mmio_bus() was called on node->parent */
> +	required_property(node, "reg");
> +	/* FIXME: validate len(reg) == reg_count * (#address-+#size-cells) */
> +}
> diff --git a/schemas/root-node.c b/schemas/root-node.c
> new file mode 100644
> index 0000000..c6ab0c7
> --- /dev/null
> +++ b/schemas/root-node.c
> @@ -0,0 +1,27 @@
> +#include "schema.h"
> +
> +static void checkfn_root_node(struct node *node)
> +{
> +	printf("INFO: %s()\n", __FUNCTION__);
> +
> +	/*
> +	 * FIXME: Need to allow is_an_mmio_bus() that allows any values of
> +	 * #address-cells/#size-cells
> +	 */
> +	is_an_mmio_bus(node, 1, 1);
> +	/*
> +	 * FIXME: call required_string_property() here instead, or perhaps
> +	 * required_property(node, "compatible", check_propval_string);
> +	 * where check_propval_string() is a function that performs additional
> +	 * checks on the property value.
> +	 */
> +	required_property(node, "compatible");
> +	/*
> +	 * FIXME: call optional_string_property() here instead, or perhaps
> +	 * optional_property(node, "compatible", check_propval_string);
> +	 * where check_propval_string() is a function that performs additional
> +	 * checks on the property value.
> +	 */
> +	optional_property(node, "model");
> +}
> +SCHEMA_MATCH_PATH(root_node, "/");
> diff --git a/schemas/schema.c b/schemas/schema.c
> new file mode 100644
> index 0000000..cb78170
> --- /dev/null
> +++ b/schemas/schema.c
> @@ -0,0 +1,119 @@
> +#include "schema.h"
> +
> +/* FIXME: automate this table... */
> +extern struct schema_checker schema_checker_root_node;
> +extern struct schema_checker schema_checker_nvidia_tegra20_i2c;
> +extern struct schema_checker schema_checker_wlf_wm8903;
> +static const struct schema_checker *schema_checkers[] = {
> +	&schema_checker_root_node,
> +	&schema_checker_nvidia_tegra20_i2c,
> +	&schema_checker_wlf_wm8903,
> +};
> +
> +int schema_check_node(struct node *node)
> +{
> +	int i;
> +	const struct schema_checker *checker, *best_checker = NULL;
> +	int match, best_match = 0;
> +
> +	for (i = 0; i < ARRAY_SIZE(schema_checkers); i++) {
> +		checker = schema_checkers[i];
> +		match = checker->matchfn(node, checker);
> +		if (match) {
> +			printf("INFO: Node %s matches checker %s at level %d\n",
> +				node->fullpath, checker->name, match);
> +			if (!best_checker || (match > best_match)) {
> +				best_match = match;
> +				best_checker = checker;
> +			}
> +		}
> +	}
> +
> +	if (!best_checker) {
> +		printf("WARNING: no schema for node %s\n", node->fullpath);
> +		return 0;
> +	}
> +
> +	printf("INFO: Node %s selected checker %s\n", node->fullpath,
> +		best_checker->name);
> +
> +	best_checker->checkfn(node);
> +
> +	/*
> +	 * FIXME: grab validation state from global somewhere.
> +	 * Using global state avoids having check return values after every
> +	 * function call, thus making the code less verbose and appear more
> +	 * assertion-based.
> +	 */
> +	return 0;
> +}
> +
> +int schema_match_path(struct node *node, const struct schema_checker *checker)
> +{
> +	return !strcmp(node->fullpath, checker->u.path.path);
> +}
> +
> +int schema_match_compatible(struct node *node,
> +				const struct schema_checker *checker)
> +{
> +	struct property *compat_prop;
> +	int index;
> +	const char *node_compat;
> +	const char **test_compats;
> +
> +	compat_prop = get_property(node, "compatible");
> +	if (!compat_prop)
> +		return 0;
> +
> +	/*
> +	 * The best_match logic here is to find the checker entry that matches
> +	 * the first compatible value in the node, then if there's no match,
> +	 * fall back to finding the checker that matches the second compatible
> +	 * value, etc. Perhaps we want to run all checkers instead? Especially,
> +	 * we might want to run all different types of checker (by path name,
> +	 * by compatible).
> +	 */
> +	for (node_compat = compat_prop->val.val, index = 0;
> +			*node_compat;
> +			node_compat += strlen(node_compat) + 1, index++) {
> +		for (test_compats = checker->u.compatible.compats;
> +				*test_compats; test_compats++) {
> +			if (!strcmp(node_compat, *test_compats))
> +				return -(index + 1);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +void required_property(struct node *node, const char *propname)
> +{
> +	struct property *prop;
> +
> +	prop = get_property(node, propname);
> +	if (!prop) {
> +		/*
> +		 * FIXME: set global error state. The same comment applies
> +		 * everywhere.
> +		 */
> +		printf("ERROR: node %s missing property %s\n", node->fullpath,
> +			propname);
> +	}
> +}
> +
> +void required_boolean_property(struct node *node, const char *propname)
> +{
> +	required_property(node, propname);
> +	/* FIXME: Validate it's length is zero if present */
> +}
> +
> +void required_integer_property(struct node *node, const char *propname,
> +				int value)
> +{
> +	required_property(node, propname);
> +	/* FIXME: Validate it's length is 1 cell, and value matches */
> +}
> +
> +void optional_property(struct node *node, const char *propname)
> +{
> +}
> diff --git a/schemas/schema.h b/schemas/schema.h
> new file mode 100644
> index 0000000..74e9931
> --- /dev/null
> +++ b/schemas/schema.h
> @@ -0,0 +1,68 @@
> +#ifndef _SCHEMAS_SCHEMA_H
> +#define _SCHEMAS_SCHEMA_H
> +
> +#include "dtc.h"
> +
> +struct schema_checker;
> +
> +typedef int (schema_matcher_func)(struct node *node,
> +					const struct schema_checker *checker);
> +typedef void (schema_checker_func)(struct node *node);
> +
> +struct schema_checker {
> +	const char *name;
> +	schema_matcher_func *matchfn;
> +	schema_checker_func *checkfn;
> +	union {
> +		struct {
> +			const char *path;
> +		} path;
> +		struct {
> +			const char **compats;
> +		} compatible;
> +	} u;
> +};
> +
> +int schema_check_node(struct node *node);
> +
> +int schema_match_path(struct node *node, const struct schema_checker *checker);
> +int schema_match_compatible(struct node *node,
> +				const struct schema_checker *checker);
> +
> +#define SCHEMA_MATCH_PATH(_name_, _path_) \
> +	struct schema_checker schema_checker_##_name_ = { \
> +		.name = #_name_, \
> +		.matchfn = schema_match_path, \
> +		.checkfn = checkfn_##_name_, \
> +		.u.path.path = _path_, \
> +	};
> +
> +#define SCHEMA_MATCH_COMPATIBLE(_name_) \
> +	struct schema_checker schema_checker_##_name_ = { \
> +		.name = #_name_, \
> +		.matchfn = schema_match_compatible, \
> +		.checkfn = checkfn_##_name_, \
> +		.u.compatible.compats = compats_##_name_, \
> +	};
> +
> +void required_property(struct node *node, const char *propname);
> +void required_boolean_property(struct node *node, const char *propname);
> +void required_integer_property(struct node *node, const char *propname,
> +				int value);
> +void optional_property(struct node *node, const char *propname);
> +void is_an_mmio_bus(struct node *node, int address_cells, int size_cells);
> +void is_an_mmio_bus_child(struct node *node, int reg_count);
> +void is_an_i2c_bus(struct node *node);
> +void is_an_i2c_bus_child(struct node *node);
> +void is_a_gpio_provider(struct node *node, int gpio_cells);
> +void is_a_gpio_consumer(struct node *node, const char *propname);
> +void is_an_interrupt_provider(struct node *node, int int_cells);
> +void is_an_interrupt_consumer_by_index(struct node *node, int int_count);
> +void is_a_clock_provider(struct node *node, int clock_cells);
> +/*
> + * FIXME: pass in a list of required and optional clock names instead of a
> + * count
> + */
> +void is_a_clock_consumer_by_name(struct node *node, int clock_count);
> +
> +#endif
> diff --git a/schemas/sound/wlf,wm8903.c b/schemas/sound/wlf,wm8903.c
> new file mode 100644
> index 0000000..f6ac49d
> --- /dev/null
> +++ b/schemas/sound/wlf,wm8903.c
> @@ -0,0 +1,20 @@
> +#include "../schema.h"
> +
> +static const char *compats_wlf_wm8903[] = {
> +	"wlf,wm8903",
> +	NULL,
> +};
> +
> +static void checkfn_wlf_wm8903(struct node *node)
> +{
> +	printf("INFO: %s()\n", __FUNCTION__);
> +
> +	is_an_mmio_bus_child(node, 1);
> +	is_an_i2c_bus_child(node);
> +	is_an_interrupt_consumer_by_index(node, 1);
> +	is_a_gpio_provider(node, 2);
> +	optional_property(node, "micdet-cfg");
> +	optional_property(node, "micdet-delay");
> +	optional_property(node, "gpio-cfg");
> +}
> +SCHEMA_MATCH_COMPATIBLE(wlf_wm8903);
> diff --git a/test-schema.dts b/test-schema.dts
> new file mode 100644
> index 0000000..02e1fdc
> --- /dev/null
> +++ b/test-schema.dts
> @@ -0,0 +1,45 @@
> +/dts-v1/;
> +
> +/ {
> +	model = "NVIDIA Tegra20 Harmony evaluation board";
> +	compatible = "nvidia,harmony", "nvidia,tegra20";
> +	#address-cells = <1>;
> +	#size-cells = <1>;
> +
> +	aliases {
> +	};
> +
> +	chosen {
> +	};
> +
> +	memory {
> +		device_type = "memory";
> +		reg = <0x00000000 0x40000000>;
> +	};
> +
> +	i2c@7000c000 {
> +		compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c";
> +		reg = <0x7000c000 0x100>;
> +		interrupts = <0 38 0>;
> +		#address-cells = <1>;
> +		#size-cells = <0>;
> +		clocks = <0 0>, <0 1>;
> +		clock-names = "div-clk", "fast-clk";
> +		status = "okay";
> +		clock-frequency = <400000>;
> +
> +		wm8903: wm8903@1a {
> +			compatible = "wlf,wm8903";
> +			reg = <0x1a>;
> +			interrupt-parent = <0>;
> +			interrupts = <5 0>;
> +
> +			gpio-controller;
> +			#gpio-cells = <2>;
> +
> +			micdet-cfg = <0>;
> +			micdet-delay = <100>;
> +			gpio-cfg = <0xffffffff 0xffffffff 0 0xffffffff 0xffffffff>;
> +		};
> +	};
> +};
> -- 
> 1.7.10.4
>
Kumar Gala Oct. 25, 2013, 4 a.m. UTC | #2
On Oct 24, 2013, at 6:43 PM, Grant Likely wrote:

> On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren <swarren@wwwdotorg.org> wrote:
>> From: Stephen Warren <swarren@nvidia.com>
>> 
>> This is a very quick proof-of-concept re: how a DT schema checker might
>> look if written in C, and integrated into dtc.
> 
> Thanks for looking at this.
> 
> Very interesting. Certainly an expedient way to start checking schemas,
> and for certain bindings it may be the best approach. The downside is it
> forces a recompilation of DTC to bring in new bindings and it isn't a
> great meduim for mixing schema with documentation in the bindings.
> 
> g.

Yeah, I think any solution has to make the Documentation and the schema representation one thing otherwise more error prone and another set of things to review.

- k
Stephen Warren Oct. 25, 2013, 2:44 p.m. UTC | #3
On 10/25/2013 12:43 AM, Grant Likely wrote:
> On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren <swarren@wwwdotorg.org> wrote:
>> From: Stephen Warren <swarren@nvidia.com>
>>
>> This is a very quick proof-of-concept re: how a DT schema checker might
>> look if written in C, and integrated into dtc.
> 
> Thanks for looking at this.
> 
> Very interesting. Certainly an expedient way to start checking schemas,
> and for certain bindings it may be the best approach. The downside is it
> forces a recompilation of DTC to bring in new bindings and it isn't a
> great meduim for mixing schema with documentation in the bindings.

This approach would certainly require recompiling something. I threw the
code into dtc simply because it was the easiest container for the
demonstration. It could be a separate DT validation utility if we
wanted, although we'd need to split the DT parser from dtc into a
library to avoid code duplication. The resultant utility could be part
of the repo containing the DTs, so it didn't end up as a separate
package to manage.

I think the additional documentation could be added as comments in the
validation functions, just like IIRC it was to be represented as
comments even in the .dts-based schema proposals.
Jon Loeliger Oct. 25, 2013, 3:21 p.m. UTC | #4
> On 10/25/2013 12:43 AM, Grant Likely wrote:
> > On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren <swarren@wwwdotorg.org> 
> wrote:
> >> From: Stephen Warren <swarren@nvidia.com>
> >>
> >> This is a very quick proof-of-concept re: how a DT schema checker might
> >> look if written in C, and integrated into dtc.
> > 
> > Thanks for looking at this.
> > 
> > Very interesting. Certainly an expedient way to start checking schemas,
> > and for certain bindings it may be the best approach. The downside is it
> > forces a recompilation of DTC to bring in new bindings and it isn't a
> > great meduim for mixing schema with documentation in the bindings.
> 
> This approach would certainly require recompiling something. I threw the
> code into dtc simply because it was the easiest container for the
> demonstration. It could be a separate DT validation utility if we
> wanted, although we'd need to split the DT parser from dtc into a
> library to avoid code duplication. The resultant utility could be part
> of the repo containing the DTs, so it didn't end up as a separate
> package to manage.
> 
> I think the additional documentation could be added as comments in the
> validation functions, just like IIRC it was to be represented as
> comments even in the .dts-based schema proposals.

DTers,

I think the additional benefit of starting with a direct C
implementation is that it causes us to begin to actually
codify the schema requirements.  Sure, it may not be ideal
at first, but over time it may reveal consistent patterns
that can be extracted.  And it may reveal what a real schema
might look like and how it might be expressed better.  Which
is to say that perhaps we are letting "perfect" get in the
way of "good enough to start"?

In the meantime, someone has shown us the code and we can
get started.  It's a Small Matter of Refactoring later. :-)

Just a notion,
jdl
Rob Herring Oct. 25, 2013, 5:38 p.m. UTC | #5
On 10/25/2013 10:21 AM, Jon Loeliger wrote:
>> On 10/25/2013 12:43 AM, Grant Likely wrote:
>>> On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren <swarren@wwwdotorg.org> 
>> wrote:
>>>> From: Stephen Warren <swarren@nvidia.com>
>>>>
>>>> This is a very quick proof-of-concept re: how a DT schema checker might
>>>> look if written in C, and integrated into dtc.
>>>
>>> Thanks for looking at this.
>>>
>>> Very interesting. Certainly an expedient way to start checking schemas,
>>> and for certain bindings it may be the best approach. The downside is it
>>> forces a recompilation of DTC to bring in new bindings and it isn't a
>>> great meduim for mixing schema with documentation in the bindings.
>>
>> This approach would certainly require recompiling something. I threw the
>> code into dtc simply because it was the easiest container for the
>> demonstration. It could be a separate DT validation utility if we
>> wanted, although we'd need to split the DT parser from dtc into a
>> library to avoid code duplication. The resultant utility could be part
>> of the repo containing the DTs, so it didn't end up as a separate
>> package to manage.
>>
>> I think the additional documentation could be added as comments in the
>> validation functions, just like IIRC it was to be represented as
>> comments even in the .dts-based schema proposals.
> 
> DTers,
> 
> I think the additional benefit of starting with a direct C
> implementation is that it causes us to begin to actually
> codify the schema requirements.  Sure, it may not be ideal
> at first, but over time it may reveal consistent patterns
> that can be extracted.  And it may reveal what a real schema
> might look like and how it might be expressed better.  Which
> is to say that perhaps we are letting "perfect" get in the
> way of "good enough to start"?
> 
> In the meantime, someone has shown us the code and we can
> get started.  It's a Small Matter of Refactoring later. :-)

I have not really looked at whether this would make sense or be low
effort, but what if Grant's run-time checker was converted to run at
kernel compile time. That would fall into the get something working
sooner rather than later, but doesn't solve the documentation problem
either.

Rob
David Gibson Oct. 25, 2013, 11:11 p.m. UTC | #6
On Fri, Oct 25, 2013 at 10:21:22AM -0500, Jon Loeliger wrote:
> > On 10/25/2013 12:43 AM, Grant Likely wrote:
> > > On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren <swarren@wwwdotorg.org> 
> > wrote:
> > >> From: Stephen Warren <swarren@nvidia.com>
> > >>
> > >> This is a very quick proof-of-concept re: how a DT schema checker might
> > >> look if written in C, and integrated into dtc.
> > > 
> > > Thanks for looking at this.
> > > 
> > > Very interesting. Certainly an expedient way to start checking schemas,
> > > and for certain bindings it may be the best approach. The downside is it
> > > forces a recompilation of DTC to bring in new bindings and it isn't a
> > > great meduim for mixing schema with documentation in the bindings.
> > 
> > This approach would certainly require recompiling something. I threw the
> > code into dtc simply because it was the easiest container for the
> > demonstration. It could be a separate DT validation utility if we
> > wanted, although we'd need to split the DT parser from dtc into a
> > library to avoid code duplication. The resultant utility could be part
> > of the repo containing the DTs, so it didn't end up as a separate
> > package to manage.
> > 
> > I think the additional documentation could be added as comments in the
> > validation functions, just like IIRC it was to be represented as
> > comments even in the .dts-based schema proposals.
> 
> DTers,
> 
> I think the additional benefit of starting with a direct C
> implementation is that it causes us to begin to actually
> codify the schema requirements.  Sure, it may not be ideal
> at first, but over time it may reveal consistent patterns
> that can be extracted.  And it may reveal what a real schema
> might look like and how it might be expressed better.  Which
> is to say that perhaps we are letting "perfect" get in the
> way of "good enough to start"?
> 
> In the meantime, someone has shown us the code and we can
> get started.  It's a Small Matter of Refactoring later. :-)

Yes!  This!

Think of this prototype as a mechanism for collating and applying a
bunch of schemas to the tree.  At the moment the schemas are all hard
coded in C, but it can be extended to load some or all of them
dynamically from a description / template / whatever.

That also gives us the flexibility to start out with a simple but
limited schema language which handles common cases, while leaving the
complex cases in C, at least until we understand the requirements well
enough to extend the schema language.
David Gibson Oct. 25, 2013, 11:29 p.m. UTC | #7
On Thu, Oct 24, 2013 at 10:51:28PM +0100, Stephen Warren wrote:
> From: Stephen Warren <swarren@nvidia.com>
> 
> This is a very quick proof-of-concept re: how a DT schema checker might
> look if written in C, and integrated into dtc.

So, this is much closer in outline than previous suggestions to how I
think schema checking should be integrated into dtc.

[snip]
> diff --git a/checks.c b/checks.c
> index ee96a25..49143b3 100644
> --- a/checks.c
> +++ b/checks.c
> @@ -19,6 +19,7 @@
>   */
>  
>  #include "dtc.h"
> +#include "schemas/schema.h"
>  
>  #ifdef TRACE_CHECKS
>  #define TRACE(c, ...) \
> @@ -236,6 +237,14 @@ static void check_is_cell(struct check *c, struct node *root,
>   * Structural check functions
>   */
>  
> +static void check_schema(struct check *c, struct node *dt,
> +				       struct node *node)
> +{
> +	if (schema_check_node(node))
> +		FAIL(c, "Schema check failed for %s", node->fullpath);
> +}
> +NODE_ERROR(schema, NULL);

So, I think the better approach is to pull each schema in as a
seperate check, rather than have a single 'check_schema'.  Way back
when I implemented 'checks' I did put a fair bit of thought into what
"schema checking" would need, even if I didn't think of it in those
terms at the time.

As seperate checks, multiple schemas can be checked, each printing
their own errors.  It tracks which ones failed and which ones passed.
We already have the -W and -E options to control which schemas/checks
are enabled/disabled.  It has the prerequisites mechanism - that can
simplify the checking code, because it lets you right code which might
segfault if the required check didn't succeed.

As/when we implement loading schemas dynamically, we'll need to extend
the checks mechanism from the static table it uses now, but that
should be straightforward enough.

>  static void check_duplicate_node_names(struct check *c, struct node *dt,
>  				       struct node *node)
>  {
> @@ -652,6 +661,8 @@ static void check_obsolete_chosen_interrupt_controller(struct check *c,
>  TREE_WARNING(obsolete_chosen_interrupt_controller, NULL);
>  
>  static struct check *check_table[] = {
> +	&schema,

As a rough guideline, this table is laid out with the most basic,
structural checks first, and the more complex semantic checks lower
down, so the schema check(s) should probably go at the bottom.

>  	&duplicate_node_names, &duplicate_property_names,
>  	&node_name_chars, &node_name_format, &property_name_chars,
>  	&name_is_string, &name_properties,
> diff --git a/schemas/clock/clock.c b/schemas/clock/clock.c
> new file mode 100644
> index 0000000..0b9ca1f
> --- /dev/null
> +++ b/schemas/clock/clock.c
> @@ -0,0 +1,16 @@
> +#include "schema.h"
> +
> +void is_a_clock_provider(struct node *node, int clock_cells)
> +{
> +	required_integer_property(node, "#clock-cells", clock_cells);
> +}
> +
> +void is_a_clock_consumer_by_name(struct node *node, int clock_count)
> +{
> +	required_property(node, "clock-names");
> +	/* FIXME: validate all required names are present */
> +	/* FIXME: validate all names present are in list of valid names */
> +	required_property(node, "clocks");
> +	/* FIXME: validate phandle, specifier list in property */
> +	/* FIXME: validate len(clocks) =~ len(clock-names) * #clock-cells */
> +}
> diff --git a/schemas/gpio/gpio.c b/schemas/gpio/gpio.c
> new file mode 100644
> index 0000000..e52f161
> --- /dev/null
> +++ b/schemas/gpio/gpio.c
> @@ -0,0 +1,13 @@
> +#include "schema.h"
> +
> +void is_a_gpio_provider(struct node *node, int gpio_cells)
> +{
> +	required_boolean_property(node, "gpio-controller");
> +	required_integer_property(node, "#gpio-cells", gpio_cells);
> +}
> +
> +void is_a_gpio_consumer(struct node *node, const char *propname)
> +{
> +	required_property(node, propname);
> +	/* FIXME: validate phandle, specifier list in property */
> +}
> diff --git a/schemas/i2c/i2c.c b/schemas/i2c/i2c.c
> new file mode 100644
> index 0000000..0772ea3
> --- /dev/null
> +++ b/schemas/i2c/i2c.c
> @@ -0,0 +1,17 @@
> +#include "../schema.h"
> +
> +void is_an_i2c_bus(struct node *node)
> +{
> +	printf("INFO: %s()\n", __FUNCTION__);
> +
> +	is_an_mmio_bus(node, 1, 0);
> +	required_property(node, "#address-cells");
> +	required_property(node, "#size-cells");
> +	optional_property(node, "clock-frequency");
> +	/* FIXME: set internal tag on *node to mark it as an I2C bus */
> +}
> +
> +void is_an_i2c_bus_child(struct node *node)
> +{
> +	/* FIXME: validate that is_an_i2c_bus() was called on node->parent */
> +}
> diff --git a/schemas/i2c/nvidia,tegra20-i2c.c b/schemas/i2c/nvidia,tegra20-i2c.c
> new file mode 100644
> index 0000000..c616f33
> --- /dev/null
> +++ b/schemas/i2c/nvidia,tegra20-i2c.c
> @@ -0,0 +1,20 @@
> +#include "../schema.h"
> +
> +static const char *compats_nvidia_tegra20_i2c[] = {
> +	"nvidia,tegra20-i2c",
> +	"nvidia,tegra30-i2c",
> +	"nvidia,tegra114-i2c",
> +	"nvidia,tegra124-i2c",
> +	NULL,
> +};
> +
> +static void checkfn_nvidia_tegra20_i2c(struct node *node)
> +{
> +	printf("INFO: %s()\n", __FUNCTION__);
> +
> +	is_an_mmio_bus_child(node, 1);
> +	is_an_i2c_bus(node);
> +	is_an_interrupt_consumer_by_index(node, 1);
> +	is_a_clock_consumer_by_name(node, 2);
> +}
> +SCHEMA_MATCH_COMPATIBLE(nvidia_tegra20_i2c);
> diff --git a/schemas/interrupt-controller/interrupts.c b/schemas/interrupt-controller/interrupts.c
> new file mode 100644
> index 0000000..39191a8
> --- /dev/null
> +++ b/schemas/interrupt-controller/interrupts.c
> @@ -0,0 +1,14 @@
> +#include "schema.h"
> +
> +void is_an_interrupt_provider(struct node *node, int int_cells)
> +{
> +	required_boolean_property(node, "interrupt-controller");
> +	required_integer_property(node, "#interrupt-cells", int_cells);
> +}
> +
> +void is_an_interrupt_consumer_by_index(struct node *node, int int_count)
> +{
> +	required_property(node, "interrupts");
> +	/* FIXME: validate phandle, specifier list in property */
> +	/* FIXME: validate len(interrupts) =~ int_count * #interrupt-cells */
> +}
> diff --git a/schemas/mmio-bus.c b/schemas/mmio-bus.c
> new file mode 100644
> index 0000000..74b5410
> --- /dev/null
> +++ b/schemas/mmio-bus.c
> @@ -0,0 +1,15 @@
> +#include "schema.h"
> +
> +void is_an_mmio_bus(struct node *node, int address_cells, int size_cells)
> +{
> +	required_integer_property(node, "#address-cells", address_cells);
> +	required_integer_property(node, "#size-cells", size_cells);
> +	/* FIXME: set internal tag on *node to mark it as an MMIO bus */
> +}
> +
> +void is_an_mmio_bus_child(struct node *node, int reg_count)
> +{
> +	/* FIXME: validate that is_an_mmio_bus() was called on node->parent */
> +	required_property(node, "reg");
> +	/* FIXME: validate len(reg) == reg_count * (#address-+#size-cells) */
> +}
> diff --git a/schemas/root-node.c b/schemas/root-node.c
> new file mode 100644
> index 0000000..c6ab0c7
> --- /dev/null
> +++ b/schemas/root-node.c
> @@ -0,0 +1,27 @@
> +#include "schema.h"
> +
> +static void checkfn_root_node(struct node *node)
> +{
> +	printf("INFO: %s()\n", __FUNCTION__);
> +
> +	/*
> +	 * FIXME: Need to allow is_an_mmio_bus() that allows any values of
> +	 * #address-cells/#size-cells
> +	 */
> +	is_an_mmio_bus(node, 1, 1);
> +	/*
> +	 * FIXME: call required_string_property() here instead, or perhaps
> +	 * required_property(node, "compatible", check_propval_string);
> +	 * where check_propval_string() is a function that performs additional
> +	 * checks on the property value.
> +	 */
> +	required_property(node, "compatible");
> +	/*
> +	 * FIXME: call optional_string_property() here instead, or perhaps
> +	 * optional_property(node, "compatible", check_propval_string);
> +	 * where check_propval_string() is a function that performs additional
> +	 * checks on the property value.
> +	 */
> +	optional_property(node, "model");
> +}
> +SCHEMA_MATCH_PATH(root_node, "/");
> diff --git a/schemas/schema.c b/schemas/schema.c
> new file mode 100644
> index 0000000..cb78170
> --- /dev/null
> +++ b/schemas/schema.c
> @@ -0,0 +1,119 @@
> +#include "schema.h"
> +
> +/* FIXME: automate this table... */
> +extern struct schema_checker schema_checker_root_node;
> +extern struct schema_checker schema_checker_nvidia_tegra20_i2c;
> +extern struct schema_checker schema_checker_wlf_wm8903;
> +static const struct schema_checker *schema_checkers[] = {
> +	&schema_checker_root_node,
> +	&schema_checker_nvidia_tegra20_i2c,
> +	&schema_checker_wlf_wm8903,
> +};
> +
> +int schema_check_node(struct node *node)
> +{
> +	int i;
> +	const struct schema_checker *checker, *best_checker = NULL;
> +	int match, best_match = 0;
> +
> +	for (i = 0; i < ARRAY_SIZE(schema_checkers); i++) {
> +		checker = schema_checkers[i];
> +		match = checker->matchfn(node, checker);
> +		if (match) {
> +			printf("INFO: Node %s matches checker %s at level %d\n",
> +				node->fullpath, checker->name, match);
> +			if (!best_checker || (match > best_match)) {
> +				best_match = match;
> +				best_checker = checker;
> +			}
> +		}
> +	}
> +
> +	if (!best_checker) {
> +		printf("WARNING: no schema for node %s\n", node->fullpath);
> +		return 0;
> +	}
> +
> +	printf("INFO: Node %s selected checker %s\n", node->fullpath,
> +		best_checker->name);
> +
> +	best_checker->checkfn(node);

IMO, thinking in terms of "the" schema for a node is a mistake.
Instead, think in terms of a bunch of schemas, which "known" what
nodes they're relevant for.  Often that will be determined by
compatible, but it could also be determined by other things (presence
of 'interrupts', parent node, explicitly implied by another schema).

> +	/*
> +	 * FIXME: grab validation state from global somewhere.
> +	 * Using global state avoids having check return values after every
> +	 * function call, thus making the code less verbose and appear more
> +	 * assertion-based.
> +	 */

So.. this is why the checks mechanism is structured as it is.  The
'check' structure is passed down everywhere as a context where this
global state can be saved to.

> +	return 0;
> +}
> +
> +int schema_match_path(struct node *node, const struct schema_checker *checker)
> +{
> +	return !strcmp(node->fullpath, checker->u.path.path);
> +}
> +
> +int schema_match_compatible(struct node *node,
> +				const struct schema_checker *checker)
> +{
> +	struct property *compat_prop;
> +	int index;
> +	const char *node_compat;
> +	const char **test_compats;
> +
> +	compat_prop = get_property(node, "compatible");
> +	if (!compat_prop)
> +		return 0;
> +
> +	/*
> +	 * The best_match logic here is to find the checker entry that matches
> +	 * the first compatible value in the node, then if there's no match,
> +	 * fall back to finding the checker that matches the second compatible
> +	 * value, etc. Perhaps we want to run all checkers instead? Especially,
> +	 * we might want to run all different types of checker (by path name,
> +	 * by compatible).
> +	 */
> +	for (node_compat = compat_prop->val.val, index = 0;
> +			*node_compat;
> +			node_compat += strlen(node_compat) + 1, index++) {
> +		for (test_compats = checker->u.compatible.compats;
> +				*test_compats; test_compats++) {
> +			if (!strcmp(node_compat, *test_compats))
> +				return -(index + 1);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +void required_property(struct node *node, const char *propname)
> +{
> +	struct property *prop;
> +
> +	prop = get_property(node, propname);
> +	if (!prop) {
> +		/*
> +		 * FIXME: set global error state. The same comment applies
> +		 * everywhere.
> +		 */

Right, this is why you want one-check-per-schema, and you pass the
check structure down everywhere.  Then you can use the FAIL() macro to
set that state.  That's what it was designed for :).

> +		printf("ERROR: node %s missing property %s\n", node->fullpath,
> +			propname);
> +	}
> +}
> +
> +void required_boolean_property(struct node *node, const char *propname)
> +{
> +	required_property(node, propname);
> +	/* FIXME: Validate it's length is zero if present */
> +}
> +
> +void required_integer_property(struct node *node, const char *propname,
> +				int value)
> +{
> +	required_property(node, propname);
> +	/* FIXME: Validate it's length is 1 cell, and value matches */
> +}
> +
> +void optional_property(struct node *node, const char *propname)
> +{
> +}
> diff --git a/schemas/schema.h b/schemas/schema.h
> new file mode 100644
> index 0000000..74e9931
> --- /dev/null
> +++ b/schemas/schema.h
> @@ -0,0 +1,68 @@
> +#ifndef _SCHEMAS_SCHEMA_H
> +#define _SCHEMAS_SCHEMA_H
> +
> +#include "dtc.h"
> +
> +struct schema_checker;
> +
> +typedef int (schema_matcher_func)(struct node *node,
> +					const struct schema_checker *checker);
> +typedef void (schema_checker_func)(struct node *node);
> +
> +struct schema_checker {
> +	const char *name;
> +	schema_matcher_func *matchfn;
> +	schema_checker_func *checkfn;
> +	union {
> +		struct {
> +			const char *path;
> +		} path;
> +		struct {
> +			const char **compats;
> +		} compatible;
> +	} u;
> +};
> +
> +int schema_check_node(struct node *node);
> +
> +int schema_match_path(struct node *node, const struct schema_checker *checker);
> +int schema_match_compatible(struct node *node,
> +				const struct schema_checker *checker);
> +
> +#define SCHEMA_MATCH_PATH(_name_, _path_) \
> +	struct schema_checker schema_checker_##_name_ = { \
> +		.name = #_name_, \
> +		.matchfn = schema_match_path, \
> +		.checkfn = checkfn_##_name_, \
> +		.u.path.path = _path_, \
> +	};
> +
> +#define SCHEMA_MATCH_COMPATIBLE(_name_) \
> +	struct schema_checker schema_checker_##_name_ = { \
> +		.name = #_name_, \
> +		.matchfn = schema_match_compatible, \
> +		.checkfn = checkfn_##_name_, \
> +		.u.compatible.compats = compats_##_name_, \
> +	};
> +
> +void required_property(struct node *node, const char *propname);
> +void required_boolean_property(struct node *node, const char *propname);
> +void required_integer_property(struct node *node, const char *propname,
> +				int value);
> +void optional_property(struct node *node, const char *propname);
> +void is_an_mmio_bus(struct node *node, int address_cells, int size_cells);
> +void is_an_mmio_bus_child(struct node *node, int reg_count);
> +void is_an_i2c_bus(struct node *node);
> +void is_an_i2c_bus_child(struct node *node);
> +void is_a_gpio_provider(struct node *node, int gpio_cells);
> +void is_a_gpio_consumer(struct node *node, const char *propname);
> +void is_an_interrupt_provider(struct node *node, int int_cells);
> +void is_an_interrupt_consumer_by_index(struct node *node, int int_count);
> +void is_a_clock_provider(struct node *node, int clock_cells);
> +/*
> + * FIXME: pass in a list of required and optional clock names instead of a
> + * count
> + */
> +void is_a_clock_consumer_by_name(struct node *node, int clock_count);
> +
> +#endif
> diff --git a/schemas/sound/wlf,wm8903.c b/schemas/sound/wlf,wm8903.c
> new file mode 100644
> index 0000000..f6ac49d
> --- /dev/null
> +++ b/schemas/sound/wlf,wm8903.c
> @@ -0,0 +1,20 @@
> +#include "../schema.h"
> +
> +static const char *compats_wlf_wm8903[] = {
> +	"wlf,wm8903",
> +	NULL,
> +};
> +
> +static void checkfn_wlf_wm8903(struct node *node)
> +{
> +	printf("INFO: %s()\n", __FUNCTION__);
> +
> +	is_an_mmio_bus_child(node, 1);
> +	is_an_i2c_bus_child(node);
> +	is_an_interrupt_consumer_by_index(node, 1);
> +	is_a_gpio_provider(node, 2);
> +	optional_property(node, "micdet-cfg");
> +	optional_property(node, "micdet-delay");
> +	optional_property(node, "gpio-cfg");
> +}
> +SCHEMA_MATCH_COMPATIBLE(wlf_wm8903);
> diff --git a/test-schema.dts b/test-schema.dts
> new file mode 100644
> index 0000000..02e1fdc
> --- /dev/null
> +++ b/test-schema.dts
> @@ -0,0 +1,45 @@
> +/dts-v1/;
> +
> +/ {
> +	model = "NVIDIA Tegra20 Harmony evaluation board";
> +	compatible = "nvidia,harmony", "nvidia,tegra20";
> +	#address-cells = <1>;
> +	#size-cells = <1>;
> +
> +	aliases {
> +	};
> +
> +	chosen {
> +	};
> +
> +	memory {
> +		device_type = "memory";
> +		reg = <0x00000000 0x40000000>;
> +	};
> +
> +	i2c@7000c000 {
> +		compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c";
> +		reg = <0x7000c000 0x100>;
> +		interrupts = <0 38 0>;
> +		#address-cells = <1>;
> +		#size-cells = <0>;
> +		clocks = <0 0>, <0 1>;
> +		clock-names = "div-clk", "fast-clk";
> +		status = "okay";
> +		clock-frequency = <400000>;
> +
> +		wm8903: wm8903@1a {
> +			compatible = "wlf,wm8903";
> +			reg = <0x1a>;
> +			interrupt-parent = <0>;
> +			interrupts = <5 0>;
> +
> +			gpio-controller;
> +			#gpio-cells = <2>;
> +
> +			micdet-cfg = <0>;
> +			micdet-delay = <100>;
> +			gpio-cfg = <0xffffffff 0xffffffff 0 0xffffffff 0xffffffff>;
> +		};
> +	};
> +};
David Gibson Oct. 28, 2013, 10:17 a.m. UTC | #8
On Fri, Oct 25, 2013 at 03:44:09PM +0100, Stephen Warren wrote:
> On 10/25/2013 12:43 AM, Grant Likely wrote:
> > On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren <swarren@wwwdotorg.org> wrote:
> >> From: Stephen Warren <swarren@nvidia.com>
> >>
> >> This is a very quick proof-of-concept re: how a DT schema checker might
> >> look if written in C, and integrated into dtc.
> > 
> > Thanks for looking at this.
> > 
> > Very interesting. Certainly an expedient way to start checking schemas,
> > and for certain bindings it may be the best approach. The downside is it
> > forces a recompilation of DTC to bring in new bindings and it isn't a
> > great meduim for mixing schema with documentation in the bindings.
> 
> This approach would certainly require recompiling something. I threw the
> code into dtc simply because it was the easiest container for the
> demonstration. It could be a separate DT validation utility if we
> wanted, although we'd need to split the DT parser from dtc into a
> library to avoid code duplication. The resultant utility could be part
> of the repo containing the DTs, so it didn't end up as a separate
> package to manage.
> 
> I think the additional documentation could be added as comments in the
> validation functions, just like IIRC it was to be represented as
> comments even in the .dts-based schema proposals.

Fwiw, I've been starting to do some hacking on the checks code, with a
view to making it accomodate the schema stuff better.  Branch
'checking' on the kernel.org tree.  In a state of flux, so expect
rebases.
Stephen Warren Oct. 31, 2013, 9:11 p.m. UTC | #9
On 10/25/2013 05:29 PM, David Gibson wrote:
> On Thu, Oct 24, 2013 at 10:51:28PM +0100, Stephen Warren wrote:
>> From: Stephen Warren <swarren@nvidia.com>
>> 
>> This is a very quick proof-of-concept re: how a DT schema checker
>> might look if written in C, and integrated into dtc.

>> diff --git a/schemas/schema.c b/schemas/schema.c

>> +int schema_check_node(struct node *node)
...
>> +	if (!best_checker) { +		printf("WARNING: no schema for node
>> %s\n", node->fullpath); +		return 0; +	} + +	printf("INFO: Node
>> %s selected checker %s\n", node->fullpath, +
>> best_checker->name); + +	best_checker->checkfn(node);
> 
> IMO, thinking in terms of "the" schema for a node is a mistake. 
> Instead, think in terms of a bunch of schemas, which "known" what 
> nodes they're relevant for.  Often that will be determined by 
> compatible, but it could also be determined by other things
> (presence of 'interrupts', parent node, explicitly implied by
> another schema).

I don't agree here.

Each node represents a single specific type of object, and the
representation uses a single specific overall schema.

I'll admit that sometimes that schema is picked via the compatible
value (most cases), and other times via the node name/path (e.g.
/chosen, /memory).

In particular, I don't think it's correct to say that e.g. both a
"Tegra I2C controller" schema and an "interrupt" schema apply equally
to a "Tegra I2C DT node". Instead, I think that the "interrupt" schema
only applies because the "Tegra I2C controller" schema happens to
inherit from, or aggregate, the "interrupt" schema.

I see two important results from this distinction:

1) We should only allow properties from the "interrupt" schema to
exist within a node /if/ the top-level schema for the node actually
does make use of the "interrupt" schema". This is important since it
disallows e.g.:

                battery: smart-battery@b {
                        compatible = "ti,bq20z45", "sbs,sbs-battery";
                        reg = <0xb>;
                        interrupt-parent = <&gpio>;
                        interrupts = <5 4>;
                };

... since the ti,bq20z45/sbs,sbs-battery don't actually have an
interrupt output (assuming that the current binding doc for that
compatible value accurately reflects the HW here anyway).

If we allowed the "interrupt" schema to match the node simply because
it saw an "interrupts" property there, that'd allow this unused
property to exist in the DT, whereas we really do want to throw an
error here, so the DT author is aware they made a mistake.

2) Some inheritance or aggregation of schemas is parameterized, e.g.
on property name. For example, GPIO properties are named ${xxx}-gpios.
However, I don't believe there's any hard rule that absolutely
mandates that an /unrelated/ property cannot exist that matches the
same naming rule. Admittedly, it would be suboptimal if such a
conflicting property name were used, but if there's no hard rule
against it, I don't think we should design our schema checker to
assume that.

If the GPIO schema checker simply searched for any properties named
according to that pattern, it might end up attempting to check
properties that weren't actually generic GPIO specifiers. Instead, I'd
prefer the node's top-level schema to explicitly state which
properties should be checked according to which inherited schema(s).

In particular, perhaps this conflict could occur in a slightly generic
binding for a series of similar I2C GPIO expander chips, some of which
have 4 GPIOs and others 8. Someone might choose a "count-gpios" or
"number-of-gpios" property to represent that aspect of the HW.

So overall, I believe it's actually very important to first determine
*the* exact schema for a node, then apply that one top-level checker,
with it then invoking various (potentially parameterized) sub-checkers
for any inherited/aggregated schemas.
Stephen Warren Oct. 31, 2013, 9:13 p.m. UTC | #10
On 10/28/2013 04:17 AM, David Gibson wrote:
> On Fri, Oct 25, 2013 at 03:44:09PM +0100, Stephen Warren wrote:
>> On 10/25/2013 12:43 AM, Grant Likely wrote:
>>> On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren
>>> <swarren@wwwdotorg.org> wrote:
>>>> From: Stephen Warren <swarren@nvidia.com>
>>>> 
>>>> This is a very quick proof-of-concept re: how a DT schema
>>>> checker might look if written in C, and integrated into dtc.
>>> 
>>> Thanks for looking at this.
>>> 
>>> Very interesting. Certainly an expedient way to start checking
>>> schemas, and for certain bindings it may be the best approach.
>>> The downside is it forces a recompilation of DTC to bring in
>>> new bindings and it isn't a great meduim for mixing schema with
>>> documentation in the bindings.
>> 
>> This approach would certainly require recompiling something. I
>> threw the code into dtc simply because it was the easiest
>> container for the demonstration. It could be a separate DT
>> validation utility if we wanted, although we'd need to split the
>> DT parser from dtc into a library to avoid code duplication. The
>> resultant utility could be part of the repo containing the DTs,
>> so it didn't end up as a separate package to manage.
>> 
>> I think the additional documentation could be added as comments
>> in the validation functions, just like IIRC it was to be
>> represented as comments even in the .dts-based schema proposals.
> 
> Fwiw, I've been starting to do some hacking on the checks code,
> with a view to making it accomodate the schema stuff better.
> Branch 'checking' on the kernel.org tree.  In a state of flux, so
> expect rebases.

Did you forget to push that? I don't see it in any of:
git://git.kernel.org/pub/scm/linux/kernel/git/jdl/dtc.git
git://git.kernel.org/pub/scm/utils/dtc/dtc.git
git://git.jdl.com/software/dtc.git
David Gibson Nov. 1, 2013, 1:24 p.m. UTC | #11
On Thu, Oct 31, 2013 at 03:13:46PM -0600, Stephen Warren wrote:
> On 10/28/2013 04:17 AM, David Gibson wrote:
> > On Fri, Oct 25, 2013 at 03:44:09PM +0100, Stephen Warren wrote:
> >> On 10/25/2013 12:43 AM, Grant Likely wrote:
> >>> On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren
> >>> <swarren@wwwdotorg.org> wrote:
> >>>> From: Stephen Warren <swarren@nvidia.com>
> >>>> 
> >>>> This is a very quick proof-of-concept re: how a DT schema
> >>>> checker might look if written in C, and integrated into dtc.
> >>> 
> >>> Thanks for looking at this.
> >>> 
> >>> Very interesting. Certainly an expedient way to start checking
> >>> schemas, and for certain bindings it may be the best approach.
> >>> The downside is it forces a recompilation of DTC to bring in
> >>> new bindings and it isn't a great meduim for mixing schema with
> >>> documentation in the bindings.
> >> 
> >> This approach would certainly require recompiling something. I
> >> threw the code into dtc simply because it was the easiest
> >> container for the demonstration. It could be a separate DT
> >> validation utility if we wanted, although we'd need to split the
> >> DT parser from dtc into a library to avoid code duplication. The
> >> resultant utility could be part of the repo containing the DTs,
> >> so it didn't end up as a separate package to manage.
> >> 
> >> I think the additional documentation could be added as comments
> >> in the validation functions, just like IIRC it was to be
> >> represented as comments even in the .dts-based schema proposals.
> > 
> > Fwiw, I've been starting to do some hacking on the checks code,
> > with a view to making it accomodate the schema stuff better.
> > Branch 'checking' on the kernel.org tree.  In a state of flux, so
> > expect rebases.
> 
> Did you forget to push that? I don't see it in any of:
> git://git.kernel.org/pub/scm/linux/kernel/git/jdl/dtc.git
> git://git.kernel.org/pub/scm/utils/dtc/dtc.git
> git://git.jdl.com/software/dtc.git

Oops.  Thought I'd pushed, but apparently not.  Should be there now
on:
	git://git.kernel.org/pub/scm/utils/dtc/dtc.git
Tomasz Figa Nov. 3, 2013, 11:15 p.m. UTC | #12
On Saturday 26 of October 2013 10:11:06 David Gibson wrote:
> On Fri, Oct 25, 2013 at 10:21:22AM -0500, Jon Loeliger wrote:
> > > On 10/25/2013 12:43 AM, Grant Likely wrote:
> > > > On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren
> > > > <swarren@wwwdotorg.org>> > 
> > > wrote:
> > > >> From: Stephen Warren <swarren@nvidia.com>
> > > >> 
> > > >> This is a very quick proof-of-concept re: how a DT schema checker
> > > >> might
> > > >> look if written in C, and integrated into dtc.
> > > > 
> > > > Thanks for looking at this.
> > > > 
> > > > Very interesting. Certainly an expedient way to start checking
> > > > schemas,
> > > > and for certain bindings it may be the best approach. The downside
> > > > is it forces a recompilation of DTC to bring in new bindings and
> > > > it isn't a great meduim for mixing schema with documentation in
> > > > the bindings.> > 
> > > This approach would certainly require recompiling something. I threw
> > > the code into dtc simply because it was the easiest container for
> > > the demonstration. It could be a separate DT validation utility if
> > > we wanted, although we'd need to split the DT parser from dtc into
> > > a library to avoid code duplication. The resultant utility could be
> > > part of the repo containing the DTs, so it didn't end up as a
> > > separate package to manage.
> > > 
> > > I think the additional documentation could be added as comments in
> > > the
> > > validation functions, just like IIRC it was to be represented as
> > > comments even in the .dts-based schema proposals.
> > 
> > DTers,
> > 
> > I think the additional benefit of starting with a direct C
> > implementation is that it causes us to begin to actually
> > codify the schema requirements.  Sure, it may not be ideal
> > at first, but over time it may reveal consistent patterns
> > that can be extracted.  And it may reveal what a real schema
> > might look like and how it might be expressed better.  Which
> > is to say that perhaps we are letting "perfect" get in the
> > way of "good enough to start"?
> > 
> > In the meantime, someone has shown us the code and we can
> > get started.  It's a Small Matter of Refactoring later. :-)
> 
> Yes!  This!
> 
> Think of this prototype as a mechanism for collating and applying a
> bunch of schemas to the tree.  At the moment the schemas are all hard
> coded in C, but it can be extended to load some or all of them
> dynamically from a description / template / whatever.
> 
> That also gives us the flexibility to start out with a simple but
> limited schema language which handles common cases, while leaving the
> complex cases in C, at least until we understand the requirements well
> enough to extend the schema language.

This is fine as an intermediate step, but I'm afraid that the overhead of 
work needed to describe all the bindings using C language directly will be 
pretty big. C language isn't really best fitted for such purposes.

If we agree to base on this, but solely as a mechanism and a base for 
further work, my idea would be to introduce a schema description language 
anyway and then some code generator that would generate C code from 
schemas described using such language (and possibly also something to 
generate textual documentation from schemas, so we could have a central 
repository of indexed DT bindings, for example on [1] - maybe kerneldoc 
could be reused here).

Such design would allow for describing a lot of cases using a simple 
description language, while leaving the possibility of adding inline C 
snippets, like PHP in HTML, to handle those super complex ones.

[1] https://www.kernel.org/doc/htmldocs/

Best regards,
Tomasz
Tomasz Figa Nov. 3, 2013, 11:26 p.m. UTC | #13
On Monday 04 of November 2013 00:15:58 Tomasz Figa wrote:
> On Saturday 26 of October 2013 10:11:06 David Gibson wrote:
> > On Fri, Oct 25, 2013 at 10:21:22AM -0500, Jon Loeliger wrote:
> > > > On 10/25/2013 12:43 AM, Grant Likely wrote:
> > > > > On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren
> > > > > <swarren@wwwdotorg.org>> >
> > > > 
> > > > wrote:
> > > > >> From: Stephen Warren <swarren@nvidia.com>
> > > > >> 
> > > > >> This is a very quick proof-of-concept re: how a DT schema
> > > > >> checker
> > > > >> might
> > > > >> look if written in C, and integrated into dtc.
> > > > > 
> > > > > Thanks for looking at this.
> > > > > 
> > > > > Very interesting. Certainly an expedient way to start checking
> > > > > schemas,
> > > > > and for certain bindings it may be the best approach. The
> > > > > downside
> > > > > is it forces a recompilation of DTC to bring in new bindings and
> > > > > it isn't a great meduim for mixing schema with documentation in
> > > > > the bindings.> >
> > > > 
> > > > This approach would certainly require recompiling something. I
> > > > threw
> > > > the code into dtc simply because it was the easiest container for
> > > > the demonstration. It could be a separate DT validation utility if
> > > > we wanted, although we'd need to split the DT parser from dtc into
> > > > a library to avoid code duplication. The resultant utility could
> > > > be
> > > > part of the repo containing the DTs, so it didn't end up as a
> > > > separate package to manage.
> > > > 
> > > > I think the additional documentation could be added as comments in
> > > > the
> > > > validation functions, just like IIRC it was to be represented as
> > > > comments even in the .dts-based schema proposals.
> > > 
> > > DTers,
> > > 
> > > I think the additional benefit of starting with a direct C
> > > implementation is that it causes us to begin to actually
> > > codify the schema requirements.  Sure, it may not be ideal
> > > at first, but over time it may reveal consistent patterns
> > > that can be extracted.  And it may reveal what a real schema
> > > might look like and how it might be expressed better.  Which
> > > is to say that perhaps we are letting "perfect" get in the
> > > way of "good enough to start"?
> > > 
> > > In the meantime, someone has shown us the code and we can
> > > get started.  It's a Small Matter of Refactoring later. :-)
> > 
> > Yes!  This!
> > 
> > Think of this prototype as a mechanism for collating and applying a
> > bunch of schemas to the tree.  At the moment the schemas are all hard
> > coded in C, but it can be extended to load some or all of them
> > dynamically from a description / template / whatever.
> > 
> > That also gives us the flexibility to start out with a simple but
> > limited schema language which handles common cases, while leaving the
> > complex cases in C, at least until we understand the requirements well
> > enough to extend the schema language.
> 
> This is fine as an intermediate step, but I'm afraid that the overhead
> of work needed to describe all the bindings using C language directly
> will be pretty big. C language isn't really best fitted for such
> purposes.
> 
> If we agree to base on this, but solely as a mechanism and a base for
> further work, my idea would be to introduce a schema description
> language anyway and then some code generator that would generate C code
> from schemas described using such language (and possibly also something
> to generate textual documentation from schemas, so we could have a
> central repository of indexed DT bindings, for example on [1] - maybe
> kerneldoc could be reused here).
> 
> Such design would allow for describing a lot of cases using a simple
> description language, while leaving the possibility of adding inline C
> snippets, like PHP in HTML, to handle those super complex ones.
> 
> [1] https://www.kernel.org/doc/htmldocs/

Aha, one more thing. As it was mentioned on ARM Mini Summit, as a long 
term goal, we will eventually have to introduce another validation tool 
that checks device drivers against device tree bindings. We should keep 
this in mind when considering schema design.
 
Best regards,
Tomasz
Arnd Bergmann Nov. 4, 2013, 9:28 a.m. UTC | #14
On Monday 04 November 2013 00:26:29 Tomasz Figa wrote:
> 
> Aha, one more thing. As it was mentioned on ARM Mini Summit, as a long 
> term goal, we will eventually have to introduce another validation tool 
> that checks device drivers against device tree bindings. We should keep 
> this in mind when considering schema design.

Incidentally I have just had an idea for a new driver-level API that
should let us do this much easier. I hope to find some time in the
next few days to come up with draft kernel patches. If you don't hear
back from me soon but want to work on validating the drivers against
the bindings, please contact me again.

The basic idea is to extend 'devres' to automatically register
all the resources (registers, irq, dma, gpio, pinctrl, clk, regulator, ...)
and simple properties before the ->probe() callback is even called,
based on a per-driver data structure that describes them, and that
can be easily parsed by an external tool.

	Arnd
Tomasz Figa Nov. 4, 2013, 12:31 p.m. UTC | #15
On Monday 04 of November 2013 10:28:49 Arnd Bergmann wrote:
> On Monday 04 November 2013 00:26:29 Tomasz Figa wrote:
> > 
> > Aha, one more thing. As it was mentioned on ARM Mini Summit, as a long 
> > term goal, we will eventually have to introduce another validation tool 
> > that checks device drivers against device tree bindings. We should keep 
> > this in mind when considering schema design.
> 
> Incidentally I have just had an idea for a new driver-level API that
> should let us do this much easier. I hope to find some time in the
> next few days to come up with draft kernel patches. If you don't hear
> back from me soon but want to work on validating the drivers against
> the bindings, please contact me again.

I'd prefer to focus on the schema language itself and tools for DTS
validation. However the former needs driver validation to be considered
as well and that's why I mentioned it.

> 
> The basic idea is to extend 'devres' to automatically register
> all the resources (registers, irq, dma, gpio, pinctrl, clk, regulator, ...)
> and simple properties before the ->probe() callback is even called,
> based on a per-driver data structure that describes them, and that
> can be easily parsed by an external tool.

That would be nice and may even be helpful for solving the
dependency/deferred probe hell (or at least optimizing probe deferring).

Best regards,
Tomasz
David Gibson Nov. 4, 2013, 2:28 p.m. UTC | #16
On Mon, Nov 04, 2013 at 12:15:58AM +0100, Tomasz Figa wrote:
> On Saturday 26 of October 2013 10:11:06 David Gibson wrote:
> > On Fri, Oct 25, 2013 at 10:21:22AM -0500, Jon Loeliger wrote:
> > > > On 10/25/2013 12:43 AM, Grant Likely wrote:
> > > > > On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren
> > > > > <swarren@wwwdotorg.org>> > 
> > > > wrote:
> > > > >> From: Stephen Warren <swarren@nvidia.com>
> > > > >> 
> > > > >> This is a very quick proof-of-concept re: how a DT schema checker
> > > > >> might
> > > > >> look if written in C, and integrated into dtc.
> > > > > 
> > > > > Thanks for looking at this.
> > > > > 
> > > > > Very interesting. Certainly an expedient way to start checking
> > > > > schemas,
> > > > > and for certain bindings it may be the best approach. The downside
> > > > > is it forces a recompilation of DTC to bring in new bindings and
> > > > > it isn't a great meduim for mixing schema with documentation in
> > > > > the bindings.> > 
> > > > This approach would certainly require recompiling something. I threw
> > > > the code into dtc simply because it was the easiest container for
> > > > the demonstration. It could be a separate DT validation utility if
> > > > we wanted, although we'd need to split the DT parser from dtc into
> > > > a library to avoid code duplication. The resultant utility could be
> > > > part of the repo containing the DTs, so it didn't end up as a
> > > > separate package to manage.
> > > > 
> > > > I think the additional documentation could be added as comments in
> > > > the
> > > > validation functions, just like IIRC it was to be represented as
> > > > comments even in the .dts-based schema proposals.
> > > 
> > > DTers,
> > > 
> > > I think the additional benefit of starting with a direct C
> > > implementation is that it causes us to begin to actually
> > > codify the schema requirements.  Sure, it may not be ideal
> > > at first, but over time it may reveal consistent patterns
> > > that can be extracted.  And it may reveal what a real schema
> > > might look like and how it might be expressed better.  Which
> > > is to say that perhaps we are letting "perfect" get in the
> > > way of "good enough to start"?
> > > 
> > > In the meantime, someone has shown us the code and we can
> > > get started.  It's a Small Matter of Refactoring later. :-)
> > 
> > Yes!  This!
> > 
> > Think of this prototype as a mechanism for collating and applying a
> > bunch of schemas to the tree.  At the moment the schemas are all hard
> > coded in C, but it can be extended to load some or all of them
> > dynamically from a description / template / whatever.
> > 
> > That also gives us the flexibility to start out with a simple but
> > limited schema language which handles common cases, while leaving the
> > complex cases in C, at least until we understand the requirements well
> > enough to extend the schema language.
> 
> This is fine as an intermediate step, but I'm afraid that the overhead of 
> work needed to describe all the bindings using C language directly will be 
> pretty big. C language isn't really best fitted for such purposes.

I don't disagree.

> If we agree to base on this, but solely as a mechanism and a base for 
> further work, my idea would be to introduce a schema description language 
> anyway and then some code generator that would generate C code from 
> schemas described using such language

I suspect interpreting the schemas rather than compiling them into C
code will be more convenient, but that's a detail, really.

> (and possibly also something to 
> generate textual documentation from schemas, so we could have a central 
> repository of indexed DT bindings, for example on [1] - maybe kerneldoc 
> could be reused here).

Hrm.  I think trying to generate documentation from a schema is a bit
backwards.  I think it makes more sense to think of the binding as the
documentation, some parts of which are machine parseable to generate
the formal schema.  Literate schema programming, if you want to think
of it that way.

> Such design would allow for describing a lot of cases using a simple 
> description language, while leaving the possibility of adding inline C 
> snippets, like PHP in HTML, to handle those super complex ones.
> 
> [1] https://www.kernel.org/doc/htmldocs/
> 
> Best regards,
> Tomasz
>
Stephen Warren Nov. 4, 2013, 4:37 p.m. UTC | #17
On 11/04/2013 02:28 AM, Arnd Bergmann wrote:
> On Monday 04 November 2013 00:26:29 Tomasz Figa wrote:
>>
>> Aha, one more thing. As it was mentioned on ARM Mini Summit, as a long 
>> term goal, we will eventually have to introduce another validation tool 
>> that checks device drivers against device tree bindings. We should keep 
>> this in mind when considering schema design.
> 
> Incidentally I have just had an idea for a new driver-level API that
> should let us do this much easier. I hope to find some time in the
> next few days to come up with draft kernel patches. If you don't hear
> back from me soon but want to work on validating the drivers against
> the bindings, please contact me again.
> 
> The basic idea is to extend 'devres' to automatically register
> all the resources (registers, irq, dma, gpio, pinctrl, clk, regulator, ...)
> and simple properties before the ->probe() callback is even called,
> based on a per-driver data structure that describes them, and that
> can be easily parsed by an external tool.

I had suggested that while talking to someone at the kernel summit,
basically each driver supplies functions like:

1) ACPI -> platform data or resources converter
2) DT -> platform or resources data converter
3) anything else -> platform or resources data converter
4) probe()

One of (1)..(3) (depending on device instantiation source) is called to
translate the data source to platform data or resources, before probe
(and could indeed handle much of deferred probe I suppose).

(4) is called to actually initialize the device, and always has complete
pre-parsed platform data/resources available, and does no DT/ACPI/...
parsing.

I forget who I was talking to, but it was asserted something like this
had been proposed before, and wasn't accepted. Unfortunately, I don't
entirely recall why...

It may have been due to the fact I proposed (1) and (2) above being
separate, rather than identical, due to using some unified API to read
data from ACPI and DT. That wasn't the most interesting aspect of the
proposal though. Still, I think conceptually there could be other data
sources in the future, so we may need to allow different types of
conversion function at some point, even if the ACPI and DT
implementations can end up being the same.
Stephen Warren Nov. 4, 2013, 4:42 p.m. UTC | #18
On 11/03/2013 04:15 PM, Tomasz Figa wrote:
> On Saturday 26 of October 2013 10:11:06 David Gibson wrote:
>> On Fri, Oct 25, 2013 at 10:21:22AM -0500, Jon Loeliger wrote:
>>>> On 10/25/2013 12:43 AM, Grant Likely wrote:
>>>>> On Thu, 24 Oct 2013 22:51:28 +0100, Stephen Warren
>>>>> <swarren@wwwdotorg.org>> > 
>>>> wrote:
>>>>>> From: Stephen Warren <swarren@nvidia.com>
>>>>>>
>>>>>> This is a very quick proof-of-concept re: how a DT schema checker
>>>>>> might
>>>>>> look if written in C, and integrated into dtc.
>>>>>
>>>>> Thanks for looking at this.
>>>>>
>>>>> Very interesting. Certainly an expedient way to start checking
>>>>> schemas,
>>>>> and for certain bindings it may be the best approach. The downside
>>>>> is it forces a recompilation of DTC to bring in new bindings and
>>>>> it isn't a great meduim for mixing schema with documentation in
>>>>> the bindings.> > 
>>>> This approach would certainly require recompiling something. I threw
>>>> the code into dtc simply because it was the easiest container for
>>>> the demonstration. It could be a separate DT validation utility if
>>>> we wanted, although we'd need to split the DT parser from dtc into
>>>> a library to avoid code duplication. The resultant utility could be
>>>> part of the repo containing the DTs, so it didn't end up as a
>>>> separate package to manage.
>>>>
>>>> I think the additional documentation could be added as comments in
>>>> the
>>>> validation functions, just like IIRC it was to be represented as
>>>> comments even in the .dts-based schema proposals.
>>>
>>> DTers,
>>>
>>> I think the additional benefit of starting with a direct C
>>> implementation is that it causes us to begin to actually
>>> codify the schema requirements.  Sure, it may not be ideal
>>> at first, but over time it may reveal consistent patterns
>>> that can be extracted.  And it may reveal what a real schema
>>> might look like and how it might be expressed better.  Which
>>> is to say that perhaps we are letting "perfect" get in the
>>> way of "good enough to start"?
>>>
>>> In the meantime, someone has shown us the code and we can
>>> get started.  It's a Small Matter of Refactoring later. :-)
>>
>> Yes!  This!
>>
>> Think of this prototype as a mechanism for collating and applying a
>> bunch of schemas to the tree.  At the moment the schemas are all hard
>> coded in C, but it can be extended to load some or all of them
>> dynamically from a description / template / whatever.
>>
>> That also gives us the flexibility to start out with a simple but
>> limited schema language which handles common cases, while leaving the
>> complex cases in C, at least until we understand the requirements well
>> enough to extend the schema language.
> 
> This is fine as an intermediate step, but I'm afraid that the overhead of 
> work needed to describe all the bindings using C language directly will be 
> pretty big. C language isn't really best fitted for such purposes.

My opinion is the exact opposite.

If you create a custom schema language, you have to develop:

* An entire new language specification, and perhaps syntax to represent
the schema.

* A new parser (if not lexer).

* Keep extending that as new schemas require more and more different
validation that wasn't originally considered.

Once we've specified every schema, we'll end up having had to invent a
completely new Turing-complete language.

However, if schemas are simply C code, you simply write code to validate
the data. In my opinion, nothing could be simpler. Everyone working on
the kernel is already familiar with C. We can refactor common patterns
into function calls so people don't have to re-implement it.

I don't see any reason why the C schema checker couldn't be linked into
the kernel and applied at run-time, assuming we take a little care to
write is as a library we call from dtc, rather than something closely
coupled with dtc's internals. (Or implement it as a separate app).

(Note that there's a bit more information in a .dts that a DTB, so some
schema checking might be harder on a DTB). For example, phandle
references are just another cell in the binary, but have a unique syntax
in the source.
Olof Johansson Nov. 4, 2013, 6:57 p.m. UTC | #19
On Mon, Nov 4, 2013 at 8:37 AM, Stephen Warren <swarren@wwwdotorg.org> wrote:

> I had suggested that while talking to someone at the kernel summit,
> basically each driver supplies functions like:
>
> 1) ACPI -> platform data or resources converter
> 2) DT -> platform or resources data converter
> 3) anything else -> platform or resources data converter
> 4) probe()
>
> One of (1)..(3) (depending on device instantiation source) is called to
> translate the data source to platform data or resources, before probe
> (and could indeed handle much of deferred probe I suppose).
>
> (4) is called to actually initialize the device, and always has complete
> pre-parsed platform data/resources available, and does no DT/ACPI/...
> parsing.
>
> I forget who I was talking to, but it was asserted something like this
> had been proposed before, and wasn't accepted. Unfortunately, I don't
> entirely recall why...
>
> It may have been due to the fact I proposed (1) and (2) above being
> separate, rather than identical, due to using some unified API to read
> data from ACPI and DT. That wasn't the most interesting aspect of the
> proposal though. Still, I think conceptually there could be other data
> sources in the future, so we may need to allow different types of
> conversion function at some point, even if the ACPI and DT
> implementations can end up being the same.

I think it might have been me you were talking to. I know Grant
considered something like that when first doing the device tree
enablement, and chose not to do it then but I am not sure what the
motivations were. Given that we have cleaned up a lot of driver
probing already by now, it might be suitable to try again.


-Olof
Arnd Bergmann Nov. 4, 2013, 8:43 p.m. UTC | #20
On Monday 04 November 2013 09:37:07 Stephen Warren wrote:
> > The basic idea is to extend 'devres' to automatically register
> > all the resources (registers, irq, dma, gpio, pinctrl, clk, regulator, ...)
> > and simple properties before the ->probe() callback is even called,
> > based on a per-driver data structure that describes them, and that
> > can be easily parsed by an external tool.
> 
> I had suggested that while talking to someone at the kernel summit,
> basically each driver supplies functions like:
> 
> 1) ACPI -> platform data or resources converter
> 2) DT -> platform or resources data converter
> 3) anything else -> platform or resources data converter
> 4) probe()

FWIW, here is a very early draft of the interfaces I have in mind.
At the moment the implementation is DT focused, but that should
be extensible to ACPI if necessary.

At the end, you can see how a probe function could end up looking.
I'm sure this is full of bugs at the moment, incomplete and needs
to be moved into actual header and C files, but it should be enough
to get across where I'm getting with this, and to see if anyone
thinks it's a really bad idea or won't actually work.

	Arnd

#if 0
/* allocate drvdata */
DEVM_ALLOC,

/* request hardware properties */
DEVM_IRQ,
DEVM_IOMAP,
DEVM_GPIO,
DEVM_DMA_SLAVE,
DEVM_PINCTRL,
DEVM_REGULATOR,
DEVM_CLK,
DEVM_PWM,
DEVM_USBPHY,

/* auxiliary information */
DEVM_PROP_BOOL
DEVM_PROP_U32
DEVM_PROP_STRING
#endif

#error don't bother compiling this file, it's only proof-of-concept

struct device;

struct devm_probe {
	int (*initfunc)(struct device *dev, const struct devm_probe *probe);
	ptrdiff_t offset;
	unsigned int index;
	const char *name;
	void *arg;
	unsigned int flags;
};

/* like offsetof(), but ensures that MEMBER is of type MEMBERTYPE */
#define offsetof_t(TYPE, MEMBER, MEMBERTYPE) \
	((typeof(MEMBER)*)0 - typeof(MEMBERTYPE*)0 + offsetof((TYPE), (MEMBER)))

/* cast 'ptr' to void* and cause a build warning if it wasn't of 'type' before */
#define typecheck_ptr(ptr, type) \
	(void *)(ptr - (type)NULL + (type)NULL)

/*
 * This is the main entry point for drivers: 
 *
 * Given an zero-terminated array of 'struct devm_probe' entries,
 * this calls all initfunc pointers in the given order. Each initfunc
 * may manipulate a field in the driver_data, as pointed to by
 * the 'offset' member.
 */
int devm_probe(struct device *dev, const struct devm_probe *probe)
{
	int ret;

	for (ret = 0; !ret && probe->initfunc, probe++)
		ret = probe->initfunc(dev, probe);

	return ret;
}
EXPORT_SYMBOL_GPL(devm_probe);

/*
 * this must be the called before any of the others, or not at
 * all, if dev_set_drvdata() has already been called.
 */
static void devm_probe_alloc_release(struct device *dev, void *res)
{
	dev_set_drvdata(dev, NULL);
}
int devm_probe_alloc(struct device *dev, const struct devm_probe *probe)
{
	void *dr;

	if (dev_get_drvdata)
		return -EBUSY;

	dr = alloc_dr(devm_probe_alloc_release, probe->offset, GFP_KERNEL);
	if (unlikely(!dr))
		return -ENOMEM;

	dev_set_drvdata(dev, dr);
	set_node_dbginfo(&dr->node, "devm_probe_alloc", size);
	devres_add(dev, dr->data);
}
EXPORT_SYMBOL_GPL(devm_probe_alloc);


#define DEVM_ALLOC(_struct) \
	{ .initfunc = devm_probe_alloc, .offset = sizeof(struct _struct), }

int devm_probe_irq(struct device *dev, const struct devm_probe *probe)
{
	int ret;
	int irq;
	int *irqp;
	int index;
	const char *name;

	/* come up with a reasonable string for the irq name,
	   maybe create devm_kasprintf() to allow arbitrary length? */
	name = devm_kmalloc(dev, 32, GFP_KERNEL);
	if (!name)
		return -ENOMEM;
	if (probe->name)
		snprintf(name, 32 - 1, "%s:%s", dev_name(dev), probe->name);
	else
		snprintf(name, 32 - 1, "%s:%n", dev_name(dev), probe->index);

	/* find IRQ number from resource if platform device */
	irq = 0;
	if (dev->bus_type == &platform_bus) {
		struct platform_device *pdev = to_platform_device(dev);

		if (probe->name)
			irq = platform_get_irq_byname(pdev, probe->name);
		else
			irq = platform_get_irq(pdev, probe->index);
	}

	/* try getting irq number from device tree if that failed */
	if (!irq && dev->of_node) {
		if (probe->name)
			index = of_property_match_string(dev->of_node,
							 "interrupt-names",
							 probe->name);
		else
			index = probe->index;

		irq = irq_of_parse_and_map(dev->of_node, index);
	}

	/* copy irq number to driver data */
	irqp = dev_get_drvdata(dev) + probe->offset;
	*irqp = irq;
	
	return devm_request_irq(dev, irq, probe->arg, probe->flags, name, probe->dev);
}
EXPORT_SYMBOL_GPL(devm_probe_irq);

#define DEVM_IRQ(_struct, _member, _index, _handler, _flags) {	\
	.initfunc = devm_probe_irq,				\
	.offset = offsetof_t(struct _struct, _member, int),	\
	.index = _index,					\
	.arg = typecheck_ptr((_handler), irq_handler_t),	\
	.flags = _flags,					\
}	

#define DEVM_IRQ_NAMED(_struct, _member, _name, _handler, _flags) { \
	.initfunc = devm_probe_irq,				\
	.offset = offsetof_t(struct _struct, _member, int),	\
	.name = _name,						\
	.arg = typecheck_ptr((_handler), irq_handler_t),	\
	.flags = _flags,					\
}	


#define DEVM_IOMAP_NOREQUEST 1

int devm_probe_iomap(struct device *dev, const struct devm_probe *probe)
{
	struct resource res, *resp;
	void __iomem *vaddr;
	void * __iomem *vaddrp;
	int ret;

	/* find mem resource from platform device */
	if (dev->bus_type == &platform_bus) {
		struct platform_device *pdev = to_platform_device(dev);

		if (probe->name)
			resp = platform_get_resource_byname(pdev,
					IORESOURCE_MEM, probe->name);
		else
			resp = platform_get_resource(pdev,
					IORESOURCE_MEM, probe->index);
	}

	/* try getting resource from device tree if that failed */
	if (!resp && dev->of_node) {
		if (probe->name)
			index = of_property_match_string(dev->of_node,
							 "reg-names",
							 probe->name);
		else
			index = probe->index;

		ret = of_address_to_resource(dev->of_node, index, &res);
		if (ret)
			return ret;
		resp = &res;
	}


	if (probe->flags & DEVM_IOMAP_NOREQUEST) {
		vaddr = devm_ioremap(dev, resp->start, resource_size(resp));
		if (!vaddr)
			return -EINVAL;
	} else {
		vaddr = devm_ioremap_resource(dev, resp);
		if (IS_ERR(vaddr))
			return PTR_ERR(vaddr);
	}

	vaddrp = dev_get_drvdata(dev) + probe->offset;
	*vaddrp = vaddr;

	return 0;
}
EXPORT_SYMBOL_GPL(devm_probe_iomap);

#define DEVM_IOMAP(_struct, _member, _index, _flags) {			\
	.initfunc = devm_probe_iomap,					\
	.offset = offsetof_t(struct _struct, _member, void __iomem *),	\
	.index = _index,						\
	.flags = _flags,						\
}	

#define DEVM_IOMAP_NAMED(_struct, _member, _name, _flags) { 		\
	.initfunc = devm_probe_iomap,					\
	.offset = offsetof_t(struct _struct, _member, void __iomem *),	\
	.name = _name,							\
	.flags = _flags,						\
}	

int devm_probe_gpio(struct device *dev, const struct devm_probe *probe)
{
	struct gpio_desc *desc, **descp;

	desc = devm_gpiod_get_index(dev, probe->name, probe->index);
	if (IS_ERR(desc)) {
		/* FIXME: this looks wrong */
		desc = of_get_named_gpiod_flags(dev->of_node, probe->name,
						probe->index, NULL);
		if (IS_ERR(desc))
			return PTR_ERR(desc);
		devm_gpio_request(dev, desc_to_gpio(desc), probe->name);
	}

	descp = dev_get_drvdata(dev) + probe->offset;
	*descp = desc;

	return 0;
}

#define DEVM_GPIO(_struct, _member, _index) {				 \
	.initfunc = devm_probe_iomap,					 \
	.offset = offsetof_t(struct _struct, _member, struct gpio_desc*),\
	.name = "gpios",						 \
	.index = _index,						 \
}	

#define DEVM_GPIO_NAMED(_struct, _member, _name) { 			 \
	.initfunc = devm_probe_iomap,					 \
	.offset = offsetof_t(struct _struct, _member, struct gpio_desc*),\
	.name = _name,							 \
}	

static void devm_probe_dma_release(struct device *dev, void *chanp)
{
	dma_release_channel(*(struct dma_chan**)chanp);
}

int devm_probe_dma(struct device *dev, const struct devm_probe *probe)
{
	struct dma_chan *chan, **chanp;

	/* there is no devm_dma_request_channel yet, so build our own */
	chanp = devres_alloc(devm_probe_dma_release, sizeof(chanp), GFP_KERNEL);
	if (!chanp)
		return -ENOMEM;

	chan = dma_request_slave_channel(dev, probe->name);
	if (!chan) {
		devres_free(chanp);
		return -EINVAL;
	}

	*chanp = chan;
	devres_add(dev, chanp);

	/* add pointer to the private data */
	chanp = dev_get_drvdata(dev) + probe->offset;
	*chanp = chan;

	return 0;
}

#define DEVM_DMA_SLAVE(_struct, _member, _name)				\
	.offset = offsetof_t(struct _struct, _member, struct dma_chan*),\
	.name = _name,							\
}

/*
 * simple properties: bool, u32, string
 * no actual need for managed interfaces, just a way to abstract the
 * access to DT or other information source
 */
int devm_probe_prop_bool(struct device *dev, struct devm_probe *probe)
{
	bool *val;

	val = dev_get_drvdata(dev) + probe->offset;
	*val = of_property_read_bool(dev->of_node, probe->name);

	return 0;
}
EXPORT_SYMBOL_GPL(devm_probe_prop_bool);

#define DEVM_PROP_BOOL(_struct, _member, _name)			\
	.offset = offsetof_t(struct _struct, _member, bool),	\
	.name = _name,						\
}

int devm_probe_prop_u32(struct device *dev, struct devm_probe *probe)
{
	u32 *val;
	int ret;

	val = dev_get_drvdata(dev) + probe->offset;
	ret = of_property_read_u32(dev->of_node, probe->name, val);

	return ret;
}
EXPORT_SYMBOL_GPL(devm_probe_prop_bool);

#define DEVM_PROP_U32(_struct, _member, _name)			\
	.offset = offsetof_t(struct _struct, _member, u32),	\
	.name = _name,						\
}

int devm_probe_prop_string(struct device *dev, struct devm_probe *probe)
{
	const char **val;
	int ret;

	val = dev_get_drvdata(dev) + probe->offset;
	ret = of_property_read_string(dev->of_node, probe->name, val);

	return ret;
}
EXPORT_SYMBOL_GPL(devm_probe_prop_bool);

#define DEVM_PROP_STRING(_struct, _member, _name)			\
	.offset = offsetof_t(struct _struct, _member, const char *),	\
	.name = _name,							\
}

/* example driver */
struct foo_priv {
	spinlock_t lock;
	void __iomem *regs;
	int irq;
	struct gpio_desc *gpio;
	struct dma_chan *rxdma;
	struct dma_chan *txdma;
	bool oldstyle_dma;
};

static irqreturn_t foo_irq_handler(int irq, void *dev);

/*
 * this lists all properties we access from the driver. The list
 * is interpreted by devm_probe() and can be programmatically
 * verified to match the binding.
 */
static const struct devm_probe foo_probe_list[] = {
	DEVM_ALLOC(foo_priv),
	DEVM_IOMAP(foo_priv, regs, 0, 0),
	DEVM_PROP_BOOL(foo_priv, oldstyle_dma, "foo,oldstyle-dma"),
	DEVM_DMA_SLAVE(foo_priv, rxdma, "rx");
	DEVM_DMA_SLAVE(foo_priv, txdma, "tx");
	DEVM_GPIO(foo_priv, gpio, 0);
	DEVM_IRQ_NAMED(foo_priv, irq, foo_irq_handler, "fifo", IRQF_SHARED),
	{},
};

static int foo_probe(struct platform_device *dev)
{
	int ret;

	ret = devm_probe(dev->dev, foo_probe_list);
	if (ret)
		return ret;

	return bar_subsystem_register(&foo_bar_ops, dev);
}
Jason Gunthorpe Nov. 4, 2013, 9:29 p.m. UTC | #21
On Mon, Nov 04, 2013 at 09:43:22PM +0100, Arnd Bergmann wrote:

> /*
>  * this lists all properties we access from the driver. The list
>  * is interpreted by devm_probe() and can be programmatically
>  * verified to match the binding.
>  */
> static const struct devm_probe foo_probe_list[] = {
> 	DEVM_ALLOC(foo_priv),
> 	DEVM_IOMAP(foo_priv, regs, 0, 0),
> 	DEVM_PROP_BOOL(foo_priv, oldstyle_dma, "foo,oldstyle-dma"),
> 	DEVM_DMA_SLAVE(foo_priv, rxdma, "rx");
> 	DEVM_DMA_SLAVE(foo_priv, txdma, "tx");
> 	DEVM_GPIO(foo_priv, gpio, 0);
> 	DEVM_IRQ_NAMED(foo_priv, irq, foo_irq_handler, "fifo", IRQF_SHARED),
> 	{},
> };

Drivers are required to gain control of, and disable the device before
they bind and enable things like DMA or interrupts.

At the very least the action list above needs an explicit callback to
do that step..

> static int foo_probe(struct platform_device *dev)
> {
> 	int ret;
> 
> 	ret = devm_probe(dev->dev, foo_probe_list);

Some subsystems (like net) have the core system allocate the private
data, and some subsystem (like tpm) steal the drvdata for use in the
core, drivers can't touch it.

Regards,
Jason
Stephen Warren Nov. 4, 2013, 9:43 p.m. UTC | #22
On 11/04/2013 02:29 PM, Jason Gunthorpe wrote:
> On Mon, Nov 04, 2013 at 09:43:22PM +0100, Arnd Bergmann wrote:
> 
>> /*
>>  * this lists all properties we access from the driver. The list
>>  * is interpreted by devm_probe() and can be programmatically
>>  * verified to match the binding.
>>  */
>> static const struct devm_probe foo_probe_list[] = {
>> 	DEVM_ALLOC(foo_priv),
>> 	DEVM_IOMAP(foo_priv, regs, 0, 0),
>> 	DEVM_PROP_BOOL(foo_priv, oldstyle_dma, "foo,oldstyle-dma"),
>> 	DEVM_DMA_SLAVE(foo_priv, rxdma, "rx");
>> 	DEVM_DMA_SLAVE(foo_priv, txdma, "tx");
>> 	DEVM_GPIO(foo_priv, gpio, 0);
>> 	DEVM_IRQ_NAMED(foo_priv, irq, foo_irq_handler, "fifo", IRQF_SHARED),
>> 	{},
>> };
> 
> Drivers are required to gain control of, and disable the device before
> they bind and enable things like DMA or interrupts.
> 
> At the very least the action list above needs an explicit callback to
> do that step..

For IRQs, it looks like Arnd's code was simply parsing the IRQ
specifier, and converting it to the Linux-internal number. The actual
request of the IRQ was presumably left to probe(). I think theren's no
issue here.

For DMA, it does look like Arnd's code was requesting it too, but that
should also be fine; as long as no transactions are actually issued
against that DMA slave channel, then the HW state shouldn't matter?
Stephen Warren Nov. 4, 2013, 9:50 p.m. UTC | #23
On 11/04/2013 01:43 PM, Arnd Bergmann wrote:
> On Monday 04 November 2013 09:37:07 Stephen Warren wrote:
>>> The basic idea is to extend 'devres' to automatically register
>>> all the resources (registers, irq, dma, gpio, pinctrl, clk, regulator, ...)
>>> and simple properties before the ->probe() callback is even called,
>>> based on a per-driver data structure that describes them, and that
>>> can be easily parsed by an external tool.
>>
>> I had suggested that while talking to someone at the kernel summit,
>> basically each driver supplies functions like:
>>
>> 1) ACPI -> platform data or resources converter
>> 2) DT -> platform or resources data converter
>> 3) anything else -> platform or resources data converter
>> 4) probe()
> 
> FWIW, here is a very early draft of the interfaces I have in mind.
> At the moment the implementation is DT focused, but that should
> be extensible to ACPI if necessary.
> 
> At the end, you can see how a probe function could end up looking.
> I'm sure this is full of bugs at the moment, incomplete and needs
> to be moved into actual header and C files, but it should be enough
> to get across where I'm getting with this, and to see if anyone
> thinks it's a really bad idea or won't actually work.

This looks interesting. The simple cases end up quite simple. I wonder
what's the best way to handle the more complex cases; more on that below.

> int devm_probe_gpio(struct device *dev, const struct devm_probe *probe)
...
> #define DEVM_GPIO(_struct, _member, _index) {				 \
...
> #define DEVM_GPIO_NAMED(_struct, _member, _name) { 			 \

Should those save the GPIO flags too? Or, do we assume that gpiod-based
GPIOs already handle the flags inside the gpiod API?

> /*
>  * this lists all properties we access from the driver. The list
>  * is interpreted by devm_probe() and can be programmatically
>  * verified to match the binding.
>  */
> static const struct devm_probe foo_probe_list[] = {
> 	DEVM_ALLOC(foo_priv),
> 	DEVM_IOMAP(foo_priv, regs, 0, 0),
> 	DEVM_PROP_BOOL(foo_priv, oldstyle_dma, "foo,oldstyle-dma"),

I wonder if it makes sense to handle pure data here, or whether this
only makes sense for resources/services that are provided by some other
device?

I guess it's nice for all resources and configuration to come through to
probe() in the same way, but I rather wonder how we'd handle bindings
that have large custom (driver-specific) tables of data. Would be simply
defer such things to custom code in probe()? If so, it feels slightly
inconsistent to handle some data in the probe_list[] and some in code in
probe(). Still, I guess there's something to be said for keeping the
simple cases simple.

> 	DEVM_DMA_SLAVE(foo_priv, rxdma, "rx");
> 	DEVM_DMA_SLAVE(foo_priv, txdma, "tx");
> 	DEVM_GPIO(foo_priv, gpio, 0);
> 	DEVM_IRQ_NAMED(foo_priv, irq, foo_irq_handler, "fifo", IRQF_SHARED),

What about the case where some resources are optional, or only required
based on the values of certain other properties? I suppose that probe()
could call devm_probe() multiple times, with different probe_list[]s,
based on whatever algorithm they need.

> 	{},
> };
> 
> static int foo_probe(struct platform_device *dev)
> {
> 	int ret;
> 
> 	ret = devm_probe(dev->dev, foo_probe_list);
> 	if (ret)
> 		return ret;
> 
> 	return bar_subsystem_register(&foo_bar_ops, dev);
> }
Jason Gunthorpe Nov. 4, 2013, 10:21 p.m. UTC | #24
On Mon, Nov 04, 2013 at 02:43:00PM -0700, Stephen Warren wrote:
> On 11/04/2013 02:29 PM, Jason Gunthorpe wrote:
> > On Mon, Nov 04, 2013 at 09:43:22PM +0100, Arnd Bergmann wrote:
> > 
> >> /*
> >>  * this lists all properties we access from the driver. The list
> >>  * is interpreted by devm_probe() and can be programmatically
> >>  * verified to match the binding.
> >>  */
> >> static const struct devm_probe foo_probe_list[] = {
> >> 	DEVM_ALLOC(foo_priv),
> >> 	DEVM_IOMAP(foo_priv, regs, 0, 0),
> >> 	DEVM_PROP_BOOL(foo_priv, oldstyle_dma, "foo,oldstyle-dma"),
> >> 	DEVM_DMA_SLAVE(foo_priv, rxdma, "rx");
> >> 	DEVM_DMA_SLAVE(foo_priv, txdma, "tx");
> >> 	DEVM_GPIO(foo_priv, gpio, 0);
> >> 	DEVM_IRQ_NAMED(foo_priv, irq, foo_irq_handler, "fifo", IRQF_SHARED),
> >> 	{},
> >> };
> > 
> > Drivers are required to gain control of, and disable the device before
> > they bind and enable things like DMA or interrupts.
> > 
> > At the very least the action list above needs an explicit callback to
> > do that step..
> 
> For IRQs, it looks like Arnd's code was simply parsing the IRQ
> specifier, and converting it to the Linux-internal number. The actual
> request of the IRQ was presumably left to probe(). I think theren's no
> issue here.

The handler is an argument to DEMV_IRQ_* so it can be passed to
request_irq..

int devm_probe_irq(struct device *dev, const struct devm_probe *probe)
{
[..]
	return devm_request_irq(dev, irq, probe->arg, probe->flags, name, probe->dev);
}

Which is nice because it gives a chance to centralize the error
handling printks as well.

> For DMA, it does look like Arnd's code was requesting it too, but that
> should also be fine; as long as no transactions are actually issued
> against that DMA slave channel, then the HW state shouldn't matter?

TBH, I'm not really familiar with how the DMA slave API works.

As long as the API and HW guarantees that the channel cannot do any
DMAs no matter what the connected IP does, it is obviously fine..

But not all DMA is like that, eg bus master DMA in PCI requires
drivers to call pci_enable_device only after they disable DMA in the
device, which is why I mentioned it..

It would be nice to see a general API like this unambiguously make
clear the steps required:

- Gain access to registers
- Gain control of the device
- Enable DMA, bind interrupts, etc
- Finalize device setup
- Make the device visible to the rest of the system

I've seen lots of drivers where the above is just not done right.

Regards,
Jason
Arnd Bergmann Nov. 5, 2013, 8:22 a.m. UTC | #25
On Monday 04 November 2013, Stephen Warren wrote:
> On 11/04/2013 01:43 PM, Arnd Bergmann wrote:
> > int devm_probe_gpio(struct device *dev, const struct devm_probe *probe)
> ...
> > #define DEVM_GPIO(_struct, _member, _index) {				 \
> ...
> > #define DEVM_GPIO_NAMED(_struct, _member, _name) { 			 \
> 
> Should those save the GPIO flags too? Or, do we assume that gpiod-based
> GPIOs already handle the flags inside the gpiod API?

The latter. I would certainly hope to get away with assigning just one
structure field per callback function.
 
> > /*
> >  * this lists all properties we access from the driver. The list
> >  * is interpreted by devm_probe() and can be programmatically
> >  * verified to match the binding.
> >  */
> > static const struct devm_probe foo_probe_list[] = {
> > 	DEVM_ALLOC(foo_priv),
> > 	DEVM_IOMAP(foo_priv, regs, 0, 0),
> > 	DEVM_PROP_BOOL(foo_priv, oldstyle_dma, "foo,oldstyle-dma"),
> 
> I wonder if it makes sense to handle pure data here, or whether this
> only makes sense for resources/services that are provided by some other
> device?
> 
> I guess it's nice for all resources and configuration to come through to
> probe() in the same way, but I rather wonder how we'd handle bindings
> that have large custom (driver-specific) tables of data. Would be simply
> defer such things to custom code in probe()? If so, it feels slightly
> inconsistent to handle some data in the probe_list[] and some in code in
> probe(). Still, I guess there's something to be said for keeping the
> simple cases simple.

Right, and I didn't intend to solve all cases in a completely generic way.
Thinking about it some more, a driver could actually add a custom callback
for some properties, but I don't know if that adds any value over just
interpreting them from the regular probe() function.

> > 	DEVM_DMA_SLAVE(foo_priv, rxdma, "rx");
> > 	DEVM_DMA_SLAVE(foo_priv, txdma, "tx");
> > 	DEVM_GPIO(foo_priv, gpio, 0);
> > 	DEVM_IRQ_NAMED(foo_priv, irq, foo_irq_handler, "fifo", IRQF_SHARED),
> 
> What about the case where some resources are optional, or only required
> based on the values of certain other properties? I suppose that probe()
> could call devm_probe() multiple times, with different probe_list[]s,
> based on whatever algorithm they need.

That's one open question that I haven't found the best solution for yet.
My first take on that was to add another field in struct devm_probe to
mark a property as optional, but that would increase the overall
complexity. I'd first want to do a survey of what kinds of properties
are typically optional. If it's only a few of them, we could have
something like

DEVM_DMA_SLAVE_OPTIONAL()

for the cases that need it, with a variant of the callback function.

	Arnd
Arnd Bergmann Nov. 5, 2013, 8:39 a.m. UTC | #26
On Monday 04 November 2013, Jason Gunthorpe wrote:
> On Mon, Nov 04, 2013 at 09:43:22PM +0100, Arnd Bergmann wrote:
> 
> > /*
> >  * this lists all properties we access from the driver. The list
> >  * is interpreted by devm_probe() and can be programmatically
> >  * verified to match the binding.
> >  */
> > static const struct devm_probe foo_probe_list[] = {
> > 	DEVM_ALLOC(foo_priv),
> > 	DEVM_IOMAP(foo_priv, regs, 0, 0),
> > 	DEVM_PROP_BOOL(foo_priv, oldstyle_dma, "foo,oldstyle-dma"),
> > 	DEVM_DMA_SLAVE(foo_priv, rxdma, "rx");
> > 	DEVM_DMA_SLAVE(foo_priv, txdma, "tx");
> > 	DEVM_GPIO(foo_priv, gpio, 0);
> > 	DEVM_IRQ_NAMED(foo_priv, irq, foo_irq_handler, "fifo", IRQF_SHARED),
> > 	{},
> > };
> 
> Drivers are required to gain control of, and disable the device before
> they bind and enable things like DMA or interrupts.
> 
> At the very least the action list above needs an explicit callback to
> do that step..

I was aware of this for the interrupts, my plan for this was to either
leave the interrupt disabled when requesting it and leave it up to the
probe function to call enable_irq(), or to have the irq request function
last in the list, and preceded by a custom per-driver callback. AFAIK
this is only required for some drivers anyway, while other drivers
would function just fine when the irq is enabled early, because they
never raise an IRQ without first having something touch their registers.
Another option would be to have a flag in the driver data that lets
the irq handler know it should ignore the interrupt (or disable the source)
if the probe function has not completed yet.

What is the requirement for DMA channels? I did not expect the
dma_request_slave_channel step to have any ordering requirements.

> > static int foo_probe(struct platform_device *dev)
> > {
> > 	int ret;
> > 
> > 	ret = devm_probe(dev->dev, foo_probe_list);
> 
> Some subsystems (like net) have the core system allocate the private
> data, and some subsystem (like tpm) steal the drvdata for use in the
> core, drivers can't touch it.

I think we can handle the first case by adding a per-subsystem DEV_ALLOC
variant. For the second case, I don't see a solution other than than
changing the subsystem not to steal the driver_data pointer. I need
to find out how common this case is. If a lot of subsystems do it,
we probably want a different solution.

Thanks a lot for your insights!

	Arnd
Arnd Bergmann Nov. 5, 2013, 12:14 p.m. UTC | #27
On Monday 04 November 2013, Jason Gunthorpe wrote:
> On Mon, Nov 04, 2013 at 02:43:00PM -0700, Stephen Warren wrote:

> > For DMA, it does look like Arnd's code was requesting it too, but that
> > should also be fine; as long as no transactions are actually issued
> > against that DMA slave channel, then the HW state shouldn't matter?
> 
> TBH, I'm not really familiar with how the DMA slave API works.
> 
> As long as the API and HW guarantees that the channel cannot do any
> DMAs no matter what the connected IP does, it is obviously fine..

Right, to initiate a DMA, you need to at least request a channel,
configure it, and submit a descriptor. This does only the first
step, so I'm pretty it's ok.

> But not all DMA is like that, eg bus master DMA in PCI requires
> drivers to call pci_enable_device only after they disable DMA in the
> device, which is why I mentioned it..

Yes, a DMA bus master device is different here and needs to be
handled as you exlain.

> It would be nice to see a general API like this unambiguously make
> clear the steps required:
> 
> - Gain access to registers
> - Gain control of the device
> - Enable DMA, bind interrupts, etc
> - Finalize device setup
> - Make the device visible to the rest of the system
> 
> I've seen lots of drivers where the above is just not done right.

I agree this would be nice, but it's probably out of scope of what
we're trying to do here, unless you have better ideas for how to
structure it.

	Arnd
Jason Gunthorpe Nov. 5, 2013, 6:03 p.m. UTC | #28
On Tue, Nov 05, 2013 at 09:39:20AM +0100, Arnd Bergmann wrote:

> I was aware of this for the interrupts, my plan for this was to either
> leave the interrupt disabled when requesting it and leave it up to the
> probe function to call enable_irq(), or to have the irq request function
> last in the list, and preceded by a custom per-driver callback. AFAIK
> this is only required for some drivers anyway, while other drivers
> would function just fine when the irq is enabled early, because they
> never raise an IRQ without first having something touch their registers.
> Another option would be to have a flag in the driver data that lets
> the irq handler know it should ignore the interrupt (or disable the source)
> if the probe function has not completed yet.

Requiring enable_irq() would be symmetric with the DMA channels at
least.

I would not assume that most drivers don't need this - anything but
the simplest cases have a problem. Eg even a simple RTC driver
could have the driver attach while the RTC is configured to generate
an alarm IRQ. An I2C driver could attach while the HW is executing an
I2C transaction, etc.

The mistake in drivers is assuming that the device is already
quiescent at probe..

I like the idea of having a DEVM_QUIESCE_DEVICE(foo_reset)
action. That at least documents the need, and makes it clearer to
reviewers.

> I think we can handle the first case by adding a per-subsystem DEV_ALLOC
> variant. For the second case, I don't see a solution other than than
> changing the subsystem not to steal the driver_data pointer. I need
> to find out how common this case is. If a lot of subsystems do it,
> we probably want a different solution.

I've been brewing a patch set to fix TPM, but it is a large effort. I
would think other legacy subsystems that are not fully class device
enabled will have similar challenges?

How about an alternate entry point:

 net = alloc_etherdev(sizeof(foo_priv));
 priv = netdev_priv(net);
 devm_probe_priv(priv, probes);

Don't touch the drvdata for that flow.

Regards,
Jason
Arnd Bergmann Nov. 5, 2013, 6:48 p.m. UTC | #29
On Tuesday 05 November 2013, Jason Gunthorpe wrote:
> I've been brewing a patch set to fix TPM, but it is a large effort. I
> would think other legacy subsystems that are not fully class device
> enabled will have similar challenges?

I think a common pattern is to have the subsystem data be referenced from
the driver data using a pointer, and contain a back reference to the struct
device, which should not be a problem.

> How about an alternate entry point:
> 
>  net = alloc_etherdev(sizeof(foo_priv));
>  priv = netdev_priv(net);
>  devm_probe_priv(priv, probes);
> 
> Don't touch the drvdata for that flow.

Yes, that would work, but it would prevent another idea I had that I haven't
mentioned here: If we add a pointer to the 'struct devm_probe' array to
'struct device_driver', those can actually be called by the driver core
code before entering the probe() callback, or potentially replace the
probe() function entirely for simple drivers (provided we add a few more
bits that we haven't talked about here.

I was originally thinking of something like

int devm_probe_alloc_netdev(struct device *dev, struct devm_probe *probe)
{
	struct net_device *netdev, **netdevp;

	/* open-coded devm_alloc_ethernet */
	netdevp = devres_alloc(devm_free_netdev, sizeof (*netdevp), GFP_KERNEL);
	if (!netdevp)
		return -ENOMEM;

	netdev = alloc_etherdev(probe->size);
	if (!netdev) {
		devres_free(netdevp);
		return -ENOMEM;
	}

	*netdevp = netdev;
	devres_add(dev, netdevp);

	dev_set_drvdata(dev, netdev_priv(netdev));

        netdevp = dev_get_drvdata(dev) + probe->offset;
        *netdevp = netdev;
}
#define DEVM_ALLOC_NETDEV(_struct, _member) {				\
	.initfunc = devm_probe_alloc_netdev,				\
	.size = sizeof(struct _struct),					\
	.offset = offsetof_t(struct _struct, _member, struct netdev *), \
}

which would fit in nicely with the rest of the design, but now I'm no longer
sure if that would actually work with the lifetime rules of the netdev, which
would pin the refcount on the 'struct device', which in turn could prevent
the devres cleanup to be executed.

	Arnd
Jason Gunthorpe Nov. 5, 2013, 7:12 p.m. UTC | #30
On Tue, Nov 05, 2013 at 07:48:11PM +0100, Arnd Bergmann wrote:
> On Tuesday 05 November 2013, Jason Gunthorpe wrote:
> > I've been brewing a patch set to fix TPM, but it is a large effort. I
> > would think other legacy subsystems that are not fully class device
> > enabled will have similar challenges?
> 
> I think a common pattern is to have the subsystem data be referenced from
> the driver data using a pointer, and contain a back reference to the struct
> device, which should not be a problem.

The issue is sysfs, the TPM core attaches attributes to the driver's
struct device, which means it has to convert from struct device * to
its own private data. It is totally wrong, but that is the way
it has been for ever. :(

> Yes, that would work, but it would prevent another idea I had that I
> haven't mentioned here: If we add a pointer to the 'struct
> devm_probe' array to

Seems quiet reasonable, but having two entry points just means drivers
that need to use the 2nd can't use the pointer in struct devm_probe.

The other thing that occured to me is your method could reduce the
devm overhead. If you keep a pointer to the probe list and to the
private data structure then you can unwide all the allocations just by
running backwards along the probe list. You don't need to do any
per-resource devm allocations..

> which would fit in nicely with the rest of the design, but now I'm no longer
> sure if that would actually work with the lifetime rules of the netdev, which
> would pin the refcount on the 'struct device', which in turn could prevent
> the devres cleanup to be executed.

I looked into this question recently, while trying to fixup TPM..

I concluded that devm doesn't actually really follow the refcount on
struct device. devm cleanup is called unconditionally after remove()
is called, and remove is called unconditionally when the device_driver
is deleted, which is called unconditionally on module unload.

So anything that used to be in the remove() function can be safely
moved into devm, including netdev deallocation. This makes sense,
considering what devm is for..

This process is separate from the refcount driven release, which
happens once all krefs to the struct device are dropped (eg, sysfs
open files, open char devices, etc)

Which creates the nasty bit, a sysfs callback/etc can be run even
after the device release function has finished and after the private
data has been deleted..

At least, that is where my search ended up, would appreciate it if
anyone knows different :)

Regards,
Jason
Arnd Bergmann Nov. 5, 2013, 7:34 p.m. UTC | #31
On Tuesday 05 November 2013, Jason Gunthorpe wrote:
> The issue is sysfs, the TPM core attaches attributes to the driver's
> struct device, which means it has to convert from struct device * to
> its own private data. It is totally wrong, but that is the way
> it has been for ever. :(

I've just had a very brief look at that subsystem, and the first driver
I looked at (tpm_atmel) actually creates a child platform_device for
the sole purpose of registering that with the tpm subsystem, which seems
to avoid this problem. Maybe the same can be done for the other drivers
as well. Unfortunately, that will change the sysfs structure, which might
break user space relying on the current path to the device. The TPM
subsystem definely seems a bit unusual in this regard, so I hope not too
many other parts of the kernel have this particular problem.

(side note: another unusual aspect of the TPM subsystem is the use of a
 custom 'release' function override. Seems harmless, but very weird).	

	Arnd
Jason Gunthorpe Nov. 5, 2013, 7:58 p.m. UTC | #32
On Tue, Nov 05, 2013 at 08:34:00PM +0100, Arnd Bergmann wrote:
> On Tuesday 05 November 2013, Jason Gunthorpe wrote:
> > The issue is sysfs, the TPM core attaches attributes to the driver's
> > struct device, which means it has to convert from struct device * to
> > its own private data. It is totally wrong, but that is the way
> > it has been for ever. :(
> 
> I've just had a very brief look at that subsystem, and the first driver
> I looked at (tpm_atmel) actually creates a child platform_device for
> the sole purpose of registering that with the tpm subsystem, which seems
> to avoid this problem. 

The TPM subsystem requires a struct device to bind onto (for sysfs at
least), so drivers that don't have one are required to create one :|

atmel is unusual as most drivers have a pre-existing device.

> Maybe the same can be done for the other drivers as
> well. Unfortunately, that will change the sysfs structure, which
> might break user space relying on the current path to the
> device. 

AFAIK the correct fix is to have the TPM subsystem core create a
struct device for its own use (eg tpm0), under its own class, which is
what other subsystems I looked at do. This way the subsystem can have
access to a release function that is properly tied to the actual
object lifetime, and it can use container_of to retrieve its own
private data from, eg sysfs. Combined with get_ops/put_ops style
access I think you can solve the unload lifetime issues with sysfs..

However, even if all that is done, compatibility sysfs's under the
driver's struct device will still be needed, which AFAICT will still
require the drvdata be owned by the subsystem not the driver :(

> The TPM subsystem definely seems a bit unusual in this regard, so I
> hope not too many other parts of the kernel have this particular
> problem.

It is just old, it was never updated to modern standards, hopefully that
is rare :|

> (side note: another unusual aspect of the TPM subsystem is the use of a
>  custom 'release' function override. Seems harmless, but very weird).	

Near as I can tell that is a hack that was put in to avoid panics
around unload lifetime issues - it doesn't solve any problems, just
makes them less common..

This is why I looked so closely at devm, I thought 'lets just remove
that release function hack and use devm', but they are not
equivalent. :)

Thanks for looking at this, a good chunk of the first round of
my cleanups will hit 3.13, next on the list are these problems.
https://lkml.org/lkml/2013/9/23/444

Jason
Arnd Bergmann Nov. 5, 2013, 8:17 p.m. UTC | #33
On Tuesday 05 November 2013, Jason Gunthorpe wrote:
> On Tue, Nov 05, 2013 at 08:34:00PM +0100, Arnd Bergmann wrote:
> > On Tuesday 05 November 2013, Jason Gunthorpe wrote:
> > > The issue is sysfs, the TPM core attaches attributes to the driver's
> > > struct device, which means it has to convert from struct device * to
> > > its own private data. It is totally wrong, but that is the way
> > > it has been for ever. :(
> > 
> > I've just had a very brief look at that subsystem, and the first driver
> > I looked at (tpm_atmel) actually creates a child platform_device for
> > the sole purpose of registering that with the tpm subsystem, which seems
> > to avoid this problem. 
> 
> The TPM subsystem requires a struct device to bind onto (for sysfs at
> least), so drivers that don't have one are required to create one :|
> 
> atmel is unusual as most drivers have a pre-existing device.

Eeek, yes I see now. OTOH, it would still be able to do the same thing
by binding to the actual platform_device. Judging from the copyright
notice, this driver predates the move to creating 'struct device'
instances for (most) device nodes, and it uses the old-style
method of searching the DT from the module_init function, which is
not how we do things today.

Creating a platform_device (or a plain device, for that matter)
from a proper probe() function would have a similar effect from
the perspective of the TPM subsystem (or other subsystems using
the same drvdata trick), and make it possible to use my proposed
devm_probe method from any tpm driver. The main downside I can
see is that it requires duplicating some code in each TPM driver
that wants to convert to devm_probe, but on the upside it doesn't
require any changes to the TPM subsystem or to other drivers the
way that a proper fix would.

> > Maybe the same can be done for the other drivers as
> > well. Unfortunately, that will change the sysfs structure, which
> > might break user space relying on the current path to the
> > device. 
> 
> AFAIK the correct fix is to have the TPM subsystem core create a
> struct device for its own use (eg tpm0), under its own class, which is
> what other subsystems I looked at do.

Yes. Note that the use of new 'class'es is not recommended any more,
nowadays we are supposed to use 'bus_type' for this, which is
traditionally something slightly different, but technically almost
the same implementation-wise.

> This way the subsystem can have
> access to a release function that is properly tied to the actual
> object lifetime, and it can use container_of to retrieve its own
> private data from, eg sysfs. Combined with get_ops/put_ops style
> access I think you can solve the unload lifetime issues with sysfs..
> 
> However, even if all that is done, compatibility sysfs's under the
> driver's struct device will still be needed, which AFAICT will still
> require the drvdata be owned by the subsystem not the driver :(

That depends: the requirement is that no user space breaks after
a change. I assume that there is a fairly limited set of packages
accessing the tpm sysfs files. If all of them are written properly,
they should be able to deal with looking at the files in a different
place in sysfs.

Another option is to change the TPM core sysfs attributes not to
look at drvdata, but to use devres_find() to find the data they
need.

	Arnd
Jason Gunthorpe Nov. 5, 2013, 8:36 p.m. UTC | #34
On Tue, Nov 05, 2013 at 09:17:32PM +0100, Arnd Bergmann wrote:
> Creating a platform_device (or a plain device, for that matter)
> from a proper probe() function would have a similar effect from
> the perspective of the TPM subsystem (or other subsystems using

I'd rather see the core get fixed than more muck in the drivers,
especially since things are going in that direction :)

> Yes. Note that the use of new 'class'es is not recommended any more,
> nowadays we are supposed to use 'bus_type' for this, which is
> traditionally something slightly different, but technically almost
> the same implementation-wise.

Thanks, I will look into that!
 
> That depends: the requirement is that no user space breaks after
> a change. I assume that there is a fairly limited set of packages
> accessing the tpm sysfs files. If all of them are written properly,
> they should be able to deal with looking at the files in a different
> place in sysfs.

The other issue is that none of the sysfs files are even close to
following the one value per file rule, they are all wrong. I actually
think that nothing in userspace ever uses them and they were only ever
done for debugfs-like purposes.

My dark hope is to move them all to debugfs and entirely out of
sysfs.. But I think it will be through a CONFIG_TPM_SYSFS_COMPAT=y +
printk_once("Stop using these sysfs attributes!") kind of scheme.
 
> Another option is to change the TPM core sysfs attributes not to
> look at drvdata, but to use devres_find() to find the data they
> need.

Ooh, that looks like it could work well, I will keep that in mind!

So there is a path forward, it is just long :)

BTW, in the final form TPM will require a 2 stage init much like net,
the sequence will be:
 - Allocate the core and driver memory and resources
 - Setup the driver fully
 - Perform TPM commands on the fully active driver to setup the
   TPM itself
 - Register the TPM with the system

Just remarking,I don't see a problem with this and your devm_probe
plan.

Thanks
Jason
Thierry Reding Nov. 6, 2013, 12:17 p.m. UTC | #35
On Mon, Nov 04, 2013 at 09:43:22PM +0100, Arnd Bergmann wrote:
> On Monday 04 November 2013 09:37:07 Stephen Warren wrote:
> > > The basic idea is to extend 'devres' to automatically register
> > > all the resources (registers, irq, dma, gpio, pinctrl, clk, regulator, ...)
> > > and simple properties before the ->probe() callback is even called,
> > > based on a per-driver data structure that describes them, and that
> > > can be easily parsed by an external tool.
> > 
> > I had suggested that while talking to someone at the kernel summit,
> > basically each driver supplies functions like:
> > 
> > 1) ACPI -> platform data or resources converter
> > 2) DT -> platform or resources data converter
> > 3) anything else -> platform or resources data converter
> > 4) probe()
> 
> FWIW, here is a very early draft of the interfaces I have in mind.
> At the moment the implementation is DT focused, but that should
> be extensible to ACPI if necessary.
> 
> At the end, you can see how a probe function could end up looking.
> I'm sure this is full of bugs at the moment, incomplete and needs
> to be moved into actual header and C files, but it should be enough
> to get across where I'm getting with this, and to see if anyone
> thinks it's a really bad idea or won't actually work.

I know that this is completely unconstructive, but the below gives me
the creeps.

Thierry

> 
> 	Arnd
> 
> #if 0
> /* allocate drvdata */
> DEVM_ALLOC,
> 
> /* request hardware properties */
> DEVM_IRQ,
> DEVM_IOMAP,
> DEVM_GPIO,
> DEVM_DMA_SLAVE,
> DEVM_PINCTRL,
> DEVM_REGULATOR,
> DEVM_CLK,
> DEVM_PWM,
> DEVM_USBPHY,
> 
> /* auxiliary information */
> DEVM_PROP_BOOL
> DEVM_PROP_U32
> DEVM_PROP_STRING
> #endif
> 
> #error don't bother compiling this file, it's only proof-of-concept
> 
> struct device;
> 
> struct devm_probe {
> 	int (*initfunc)(struct device *dev, const struct devm_probe *probe);
> 	ptrdiff_t offset;
> 	unsigned int index;
> 	const char *name;
> 	void *arg;
> 	unsigned int flags;
> };
> 
> /* like offsetof(), but ensures that MEMBER is of type MEMBERTYPE */
> #define offsetof_t(TYPE, MEMBER, MEMBERTYPE) \
> 	((typeof(MEMBER)*)0 - typeof(MEMBERTYPE*)0 + offsetof((TYPE), (MEMBER)))
> 
> /* cast 'ptr' to void* and cause a build warning if it wasn't of 'type' before */
> #define typecheck_ptr(ptr, type) \
> 	(void *)(ptr - (type)NULL + (type)NULL)
> 
> /*
>  * This is the main entry point for drivers: 
>  *
>  * Given an zero-terminated array of 'struct devm_probe' entries,
>  * this calls all initfunc pointers in the given order. Each initfunc
>  * may manipulate a field in the driver_data, as pointed to by
>  * the 'offset' member.
>  */
> int devm_probe(struct device *dev, const struct devm_probe *probe)
> {
> 	int ret;
> 
> 	for (ret = 0; !ret && probe->initfunc, probe++)
> 		ret = probe->initfunc(dev, probe);
> 
> 	return ret;
> }
> EXPORT_SYMBOL_GPL(devm_probe);
> 
> /*
>  * this must be the called before any of the others, or not at
>  * all, if dev_set_drvdata() has already been called.
>  */
> static void devm_probe_alloc_release(struct device *dev, void *res)
> {
> 	dev_set_drvdata(dev, NULL);
> }
> int devm_probe_alloc(struct device *dev, const struct devm_probe *probe)
> {
> 	void *dr;
> 
> 	if (dev_get_drvdata)
> 		return -EBUSY;
> 
> 	dr = alloc_dr(devm_probe_alloc_release, probe->offset, GFP_KERNEL);
> 	if (unlikely(!dr))
> 		return -ENOMEM;
> 
> 	dev_set_drvdata(dev, dr);
> 	set_node_dbginfo(&dr->node, "devm_probe_alloc", size);
> 	devres_add(dev, dr->data);
> }
> EXPORT_SYMBOL_GPL(devm_probe_alloc);
> 
> 
> #define DEVM_ALLOC(_struct) \
> 	{ .initfunc = devm_probe_alloc, .offset = sizeof(struct _struct), }
> 
> int devm_probe_irq(struct device *dev, const struct devm_probe *probe)
> {
> 	int ret;
> 	int irq;
> 	int *irqp;
> 	int index;
> 	const char *name;
> 
> 	/* come up with a reasonable string for the irq name,
> 	   maybe create devm_kasprintf() to allow arbitrary length? */
> 	name = devm_kmalloc(dev, 32, GFP_KERNEL);
> 	if (!name)
> 		return -ENOMEM;
> 	if (probe->name)
> 		snprintf(name, 32 - 1, "%s:%s", dev_name(dev), probe->name);
> 	else
> 		snprintf(name, 32 - 1, "%s:%n", dev_name(dev), probe->index);
> 
> 	/* find IRQ number from resource if platform device */
> 	irq = 0;
> 	if (dev->bus_type == &platform_bus) {
> 		struct platform_device *pdev = to_platform_device(dev);
> 
> 		if (probe->name)
> 			irq = platform_get_irq_byname(pdev, probe->name);
> 		else
> 			irq = platform_get_irq(pdev, probe->index);
> 	}
> 
> 	/* try getting irq number from device tree if that failed */
> 	if (!irq && dev->of_node) {
> 		if (probe->name)
> 			index = of_property_match_string(dev->of_node,
> 							 "interrupt-names",
> 							 probe->name);
> 		else
> 			index = probe->index;
> 
> 		irq = irq_of_parse_and_map(dev->of_node, index);
> 	}
> 
> 	/* copy irq number to driver data */
> 	irqp = dev_get_drvdata(dev) + probe->offset;
> 	*irqp = irq;
> 	
> 	return devm_request_irq(dev, irq, probe->arg, probe->flags, name, probe->dev);
> }
> EXPORT_SYMBOL_GPL(devm_probe_irq);
> 
> #define DEVM_IRQ(_struct, _member, _index, _handler, _flags) {	\
> 	.initfunc = devm_probe_irq,				\
> 	.offset = offsetof_t(struct _struct, _member, int),	\
> 	.index = _index,					\
> 	.arg = typecheck_ptr((_handler), irq_handler_t),	\
> 	.flags = _flags,					\
> }	
> 
> #define DEVM_IRQ_NAMED(_struct, _member, _name, _handler, _flags) { \
> 	.initfunc = devm_probe_irq,				\
> 	.offset = offsetof_t(struct _struct, _member, int),	\
> 	.name = _name,						\
> 	.arg = typecheck_ptr((_handler), irq_handler_t),	\
> 	.flags = _flags,					\
> }	
> 
> 
> #define DEVM_IOMAP_NOREQUEST 1
> 
> int devm_probe_iomap(struct device *dev, const struct devm_probe *probe)
> {
> 	struct resource res, *resp;
> 	void __iomem *vaddr;
> 	void * __iomem *vaddrp;
> 	int ret;
> 
> 	/* find mem resource from platform device */
> 	if (dev->bus_type == &platform_bus) {
> 		struct platform_device *pdev = to_platform_device(dev);
> 
> 		if (probe->name)
> 			resp = platform_get_resource_byname(pdev,
> 					IORESOURCE_MEM, probe->name);
> 		else
> 			resp = platform_get_resource(pdev,
> 					IORESOURCE_MEM, probe->index);
> 	}
> 
> 	/* try getting resource from device tree if that failed */
> 	if (!resp && dev->of_node) {
> 		if (probe->name)
> 			index = of_property_match_string(dev->of_node,
> 							 "reg-names",
> 							 probe->name);
> 		else
> 			index = probe->index;
> 
> 		ret = of_address_to_resource(dev->of_node, index, &res);
> 		if (ret)
> 			return ret;
> 		resp = &res;
> 	}
> 
> 
> 	if (probe->flags & DEVM_IOMAP_NOREQUEST) {
> 		vaddr = devm_ioremap(dev, resp->start, resource_size(resp));
> 		if (!vaddr)
> 			return -EINVAL;
> 	} else {
> 		vaddr = devm_ioremap_resource(dev, resp);
> 		if (IS_ERR(vaddr))
> 			return PTR_ERR(vaddr);
> 	}
> 
> 	vaddrp = dev_get_drvdata(dev) + probe->offset;
> 	*vaddrp = vaddr;
> 
> 	return 0;
> }
> EXPORT_SYMBOL_GPL(devm_probe_iomap);
> 
> #define DEVM_IOMAP(_struct, _member, _index, _flags) {			\
> 	.initfunc = devm_probe_iomap,					\
> 	.offset = offsetof_t(struct _struct, _member, void __iomem *),	\
> 	.index = _index,						\
> 	.flags = _flags,						\
> }	
> 
> #define DEVM_IOMAP_NAMED(_struct, _member, _name, _flags) { 		\
> 	.initfunc = devm_probe_iomap,					\
> 	.offset = offsetof_t(struct _struct, _member, void __iomem *),	\
> 	.name = _name,							\
> 	.flags = _flags,						\
> }	
> 
> int devm_probe_gpio(struct device *dev, const struct devm_probe *probe)
> {
> 	struct gpio_desc *desc, **descp;
> 
> 	desc = devm_gpiod_get_index(dev, probe->name, probe->index);
> 	if (IS_ERR(desc)) {
> 		/* FIXME: this looks wrong */
> 		desc = of_get_named_gpiod_flags(dev->of_node, probe->name,
> 						probe->index, NULL);
> 		if (IS_ERR(desc))
> 			return PTR_ERR(desc);
> 		devm_gpio_request(dev, desc_to_gpio(desc), probe->name);
> 	}
> 
> 	descp = dev_get_drvdata(dev) + probe->offset;
> 	*descp = desc;
> 
> 	return 0;
> }
> 
> #define DEVM_GPIO(_struct, _member, _index) {				 \
> 	.initfunc = devm_probe_iomap,					 \
> 	.offset = offsetof_t(struct _struct, _member, struct gpio_desc*),\
> 	.name = "gpios",						 \
> 	.index = _index,						 \
> }	
> 
> #define DEVM_GPIO_NAMED(_struct, _member, _name) { 			 \
> 	.initfunc = devm_probe_iomap,					 \
> 	.offset = offsetof_t(struct _struct, _member, struct gpio_desc*),\
> 	.name = _name,							 \
> }	
> 
> static void devm_probe_dma_release(struct device *dev, void *chanp)
> {
> 	dma_release_channel(*(struct dma_chan**)chanp);
> }
> 
> int devm_probe_dma(struct device *dev, const struct devm_probe *probe)
> {
> 	struct dma_chan *chan, **chanp;
> 
> 	/* there is no devm_dma_request_channel yet, so build our own */
> 	chanp = devres_alloc(devm_probe_dma_release, sizeof(chanp), GFP_KERNEL);
> 	if (!chanp)
> 		return -ENOMEM;
> 
> 	chan = dma_request_slave_channel(dev, probe->name);
> 	if (!chan) {
> 		devres_free(chanp);
> 		return -EINVAL;
> 	}
> 
> 	*chanp = chan;
> 	devres_add(dev, chanp);
> 
> 	/* add pointer to the private data */
> 	chanp = dev_get_drvdata(dev) + probe->offset;
> 	*chanp = chan;
> 
> 	return 0;
> }
> 
> #define DEVM_DMA_SLAVE(_struct, _member, _name)				\
> 	.offset = offsetof_t(struct _struct, _member, struct dma_chan*),\
> 	.name = _name,							\
> }
> 
> /*
>  * simple properties: bool, u32, string
>  * no actual need for managed interfaces, just a way to abstract the
>  * access to DT or other information source
>  */
> int devm_probe_prop_bool(struct device *dev, struct devm_probe *probe)
> {
> 	bool *val;
> 
> 	val = dev_get_drvdata(dev) + probe->offset;
> 	*val = of_property_read_bool(dev->of_node, probe->name);
> 
> 	return 0;
> }
> EXPORT_SYMBOL_GPL(devm_probe_prop_bool);
> 
> #define DEVM_PROP_BOOL(_struct, _member, _name)			\
> 	.offset = offsetof_t(struct _struct, _member, bool),	\
> 	.name = _name,						\
> }
> 
> int devm_probe_prop_u32(struct device *dev, struct devm_probe *probe)
> {
> 	u32 *val;
> 	int ret;
> 
> 	val = dev_get_drvdata(dev) + probe->offset;
> 	ret = of_property_read_u32(dev->of_node, probe->name, val);
> 
> 	return ret;
> }
> EXPORT_SYMBOL_GPL(devm_probe_prop_bool);
> 
> #define DEVM_PROP_U32(_struct, _member, _name)			\
> 	.offset = offsetof_t(struct _struct, _member, u32),	\
> 	.name = _name,						\
> }
> 
> int devm_probe_prop_string(struct device *dev, struct devm_probe *probe)
> {
> 	const char **val;
> 	int ret;
> 
> 	val = dev_get_drvdata(dev) + probe->offset;
> 	ret = of_property_read_string(dev->of_node, probe->name, val);
> 
> 	return ret;
> }
> EXPORT_SYMBOL_GPL(devm_probe_prop_bool);
> 
> #define DEVM_PROP_STRING(_struct, _member, _name)			\
> 	.offset = offsetof_t(struct _struct, _member, const char *),	\
> 	.name = _name,							\
> }
> 
> /* example driver */
> struct foo_priv {
> 	spinlock_t lock;
> 	void __iomem *regs;
> 	int irq;
> 	struct gpio_desc *gpio;
> 	struct dma_chan *rxdma;
> 	struct dma_chan *txdma;
> 	bool oldstyle_dma;
> };
> 
> static irqreturn_t foo_irq_handler(int irq, void *dev);
> 
> /*
>  * this lists all properties we access from the driver. The list
>  * is interpreted by devm_probe() and can be programmatically
>  * verified to match the binding.
>  */
> static const struct devm_probe foo_probe_list[] = {
> 	DEVM_ALLOC(foo_priv),
> 	DEVM_IOMAP(foo_priv, regs, 0, 0),
> 	DEVM_PROP_BOOL(foo_priv, oldstyle_dma, "foo,oldstyle-dma"),
> 	DEVM_DMA_SLAVE(foo_priv, rxdma, "rx");
> 	DEVM_DMA_SLAVE(foo_priv, txdma, "tx");
> 	DEVM_GPIO(foo_priv, gpio, 0);
> 	DEVM_IRQ_NAMED(foo_priv, irq, foo_irq_handler, "fifo", IRQF_SHARED),
> 	{},
> };
> 
> static int foo_probe(struct platform_device *dev)
> {
> 	int ret;
> 
> 	ret = devm_probe(dev->dev, foo_probe_list);
> 	if (ret)
> 		return ret;
> 
> 	return bar_subsystem_register(&foo_bar_ops, dev);
> }
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
David Gibson Nov. 10, 2013, 11 a.m. UTC | #36
On Thu, Oct 31, 2013 at 03:11:15PM -0600, Stephen Warren wrote:
> On 10/25/2013 05:29 PM, David Gibson wrote:
> > On Thu, Oct 24, 2013 at 10:51:28PM +0100, Stephen Warren wrote:
> >> From: Stephen Warren <swarren@nvidia.com>
> >> 
> >> This is a very quick proof-of-concept re: how a DT schema checker
> >> might look if written in C, and integrated into dtc.
> 
> >> diff --git a/schemas/schema.c b/schemas/schema.c
> 
> >> +int schema_check_node(struct node *node)
> ...
> >> +	if (!best_checker) { +		printf("WARNING: no schema for node
> >> %s\n", node->fullpath); +		return 0; +	} + +	printf("INFO: Node
> >> %s selected checker %s\n", node->fullpath, +
> >> best_checker->name); + +	best_checker->checkfn(node);
> > 
> > IMO, thinking in terms of "the" schema for a node is a mistake. 
> > Instead, think in terms of a bunch of schemas, which "known" what 
> > nodes they're relevant for.  Often that will be determined by 
> > compatible, but it could also be determined by other things
> > (presence of 'interrupts', parent node, explicitly implied by
> > another schema).
> 
> I don't agree here.
> 
> Each node represents a single specific type of object, and the
> representation uses a single specific overall schema.
> 
> I'll admit that sometimes that schema is picked via the compatible
> value (most cases), and other times via the node name/path (e.g.
> /chosen, /memory).
> 
> In particular, I don't think it's correct to say that e.g. both a
> "Tegra I2C controller" schema and an "interrupt" schema apply equally
> to a "Tegra I2C DT node".

Why not?  Both are mandatory.

> Instead, I think that the "interrupt" schema
> only applies because the "Tegra I2C controller" schema happens to
> inherit from, or aggregate, the "interrupt" schema.

So, explicit inheritence makes sense in many cases, but I don't like
seeing these universal rules as inheritence based.  They should be
just that - universal - not opt-in for any particular device binding.

> I see two important results from this distinction:
> 
> 1) We should only allow properties from the "interrupt" schema to
> exist within a node /if/ the top-level schema for the node actually
> does make use of the "interrupt" schema". This is important since it
> disallows e.g.:
> 
>                 battery: smart-battery@b {
>                         compatible = "ti,bq20z45", "sbs,sbs-battery";
>                         reg = <0xb>;
>                         interrupt-parent = <&gpio>;
>                         interrupts = <5 4>;
>                 };
> 
> ... since the ti,bq20z45/sbs,sbs-battery don't actually have an
> interrupt output (assuming that the current binding doc for that
> compatible value accurately reflects the HW here anyway).
> 
> If we allowed the "interrupt" schema to match the node simply because
> it saw an "interrupts" property there, that'd allow this unused
> property to exist in the DT, whereas we really do want to throw an
> error here, so the DT author is aware they made a mistake.

So.  To clarify my proposal of multiple applied schemas: in order for
the node to "pass" the checks, it must satisfy all constraints of all
applicable schemas - they don't override each other.  Furthermore, I'm
not seeing any specific property as being owned by a particular schema
- multiple schemas applying to a node can place overlapping
constraints on the same property.

So in this case, the interrupt schema describes what the interrupts
property needs to look like if it exists, but the device schema says
that there should be no interrupts.  The only way to satisfy both is
to have no interrupts property, which is the right answer.

Actually, rather than a global "interrupts" schema here, more useful
would be a schema provided by the interrupt controller (and so
selected by the interrupt-parent property) which could validate the
internal format of the interrupt descriptors.  But, again, this
doesn't prevent the device schema from validating the number of
interrupt descriptors for this device.

> 2) Some inheritance or aggregation of schemas is parameterized, e.g.
> on property name. For example, GPIO properties are named ${xxx}-gpios.
> However, I don't believe there's any hard rule that absolutely
> mandates that an /unrelated/ property cannot exist that matches the
> same naming rule. Admittedly, it would be suboptimal if such a
> conflicting property name were used, but if there's no hard rule
> against it, I don't think we should design our schema checker to
> assume that.

The schema checker, no.  The schemas themselves, maybe.  At least for
really well established things, like interrupts, I think it's
reasonble that the schemas enforce best practice, not merely minimal
correctness.

> If the GPIO schema checker simply searched for any properties named
> according to that pattern, it might end up attempting to check
> properties that weren't actually generic GPIO specifiers. Instead, I'd
> prefer the node's top-level schema to explicitly state which
> properties should be checked according to which inherited schema(s).

So, in the case of GPIOs, I tend to agree.  For the case of
interrupts, not so much.

> In particular, perhaps this conflict could occur in a slightly generic
> binding for a series of similar I2C GPIO expander chips, some of which
> have 4 GPIOs and others 8. Someone might choose a "count-gpios" or
> "number-of-gpios" property to represent that aspect of the HW.
> 
> So overall, I believe it's actually very important to first determine
> *the* exact schema for a node, then apply that one top-level checker,
> with it then invoking various (potentially parameterized) sub-checkers
> for any inherited/aggregated schemas.

So, I certainly think we want explicitly invoked subcheckers.  But I
think we want multiple independently applied schemas for a single node
too.
Stephen Warren Nov. 12, 2013, 10:06 p.m. UTC | #37
On 11/10/2013 04:00 AM, David Gibson wrote:
> On Thu, Oct 31, 2013 at 03:11:15PM -0600, Stephen Warren wrote:
>> On 10/25/2013 05:29 PM, David Gibson wrote:
>>> On Thu, Oct 24, 2013 at 10:51:28PM +0100, Stephen Warren
>>> wrote:
>>>> From: Stephen Warren <swarren@nvidia.com>
>>>> 
>>>> This is a very quick proof-of-concept re: how a DT schema
>>>> checker might look if written in C, and integrated into dtc.
>> 
>>>> diff --git a/schemas/schema.c b/schemas/schema.c
>> 
>>>> +int schema_check_node(struct node *node)
>> ...
>>>> +	if (!best_checker) { +		printf("WARNING: no schema for
>>>> node %s\n", node->fullpath); +		return 0; +	} + +
>>>> printf("INFO: Node %s selected checker %s\n", node->fullpath,
>>>> + best_checker->name); + +	best_checker->checkfn(node);
>>> 
>>> IMO, thinking in terms of "the" schema for a node is a mistake.
>>>  Instead, think in terms of a bunch of schemas, which "known"
>>> what nodes they're relevant for.  Often that will be determined
>>> by compatible, but it could also be determined by other things 
>>> (presence of 'interrupts', parent node, explicitly implied by 
>>> another schema).
>> 
>> I don't agree here.
>> 
>> Each node represents a single specific type of object, and the 
>> representation uses a single specific overall schema.
>> 
>> I'll admit that sometimes that schema is picked via the
>> compatible value (most cases), and other times via the node
>> name/path (e.g. /chosen, /memory).
>> 
>> In particular, I don't think it's correct to say that e.g. both
>> a "Tegra I2C controller" schema and an "interrupt" schema apply
>> equally to a "Tegra I2C DT node".
> 
> Why not?  Both are mandatory.

No. The interrupt schema isn't mandatory unless the Tegra I2C schema
actively opts in to interrupt usage.

>> Instead, I think that the "interrupt" schema only applies because
>> the "Tegra I2C controller" schema happens to inherit from, or
>> aggregate, the "interrupt" schema.
> 
> So, explicit inheritence makes sense in many cases, but I don't
> like seeing these universal rules as inheritence based.  They
> should be just that - universal - not opt-in for any particular
> device binding.

There isn't /an/ interrupt schema, there's a framework for interrupt
schemas, where the specific "final" schema parameterizes the interrupt
schema. It's impossible to fully evaluate whether an interrupts
property conforms to the requirements unless you evaluate it in the
context of a particular "final" schema.

Also, while it would be silly, I've never seen a rule specifically
stated that common properties such as interrupts, reg, *-gpios,
*-supply, pinctrl-* are absolutely reserved for their typical meaning,
and that no binding can ever use them for other local purposes. It
would make sense to explicitly state this though.

In practical terms, what this means is that, without any knowledge of
the final binding, you can check that an interrupts property conforms
to some general rules such as: it's a list of phandle + specifier, and
the specifier length must match the interrupt parent, and the number
of entries in interrupts an interrupt-names (if present) must match.

However, you can't check any of the following without specific
knowledge of the final binding:

* Some bindings don't use interrupts, and hence
interrupts/interrupt-names/interrupt-parent must /not/ be present in
the node.

* The specific (range of) number of entries that must be present in
interrupts.

* The specific set of entries that must exist in interrupt-names.

* Any ordering requirements on the names in interrupt-names (say the
binding was first defined to use interrupts by index, then later
extended to look up additional entries by name via interrupt-names).

I don't think it makes sense to check the base interrupts syntax
alone, then those other rules later. Instead, it makes sense to check
all the interrupt-related rules at once, when you have enough
knowledge of all the requirements.

Also, interrupts is an easy case because the property name is static.
What about GPIOs or regulators. Do the base GPIO/regulator schema
checkers search for all properties name *-gpios and *-supply and check
those? How would one avoid collisions with other bindings that happen
to use those same property names. Explicitly disallowing property name
collisions for single property names like interrupts seems reasonable.
Disallowing whole patterns of property names seems a bit harder, and
less reasonable.

>> I see two important results from this distinction:
>> 
>> 1) We should only allow properties from the "interrupt" schema
>> to exist within a node /if/ the top-level schema for the node
>> actually does make use of the "interrupt" schema". This is
>> important since it disallows e.g.:
>> 
>> battery: smart-battery@b { compatible = "ti,bq20z45",
>> "sbs,sbs-battery"; reg = <0xb>; interrupt-parent = <&gpio>; 
>> interrupts = <5 4>; };
>> 
>> ... since the ti,bq20z45/sbs,sbs-battery don't actually have an 
>> interrupt output (assuming that the current binding doc for that 
>> compatible value accurately reflects the HW here anyway).
>> 
>> If we allowed the "interrupt" schema to match the node simply
>> because it saw an "interrupts" property there, that'd allow this
>> unused property to exist in the DT, whereas we really do want to
>> throw an error here, so the DT author is aware they made a
>> mistake.
> 
> So.  To clarify my proposal of multiple applied schemas: in order
> for the node to "pass" the checks, it must satisfy all constraints
> of all applicable schemas - they don't override each other.
> Furthermore, I'm not seeing any specific property as being owned by
> a particular schema - multiple schemas applying to a node can place
> overlapping constraints on the same property.
> 
> So in this case, the interrupt schema describes what the
> interrupts property needs to look like if it exists, but the device
> schema says that there should be no interrupts.  The only way to
> satisfy both is to have no interrupts property, which is the right
> answer.

A big issue here is that if a node actually has an interrupts
property, but the final schema says it shouldn't, the only way to
check for that is for every schema that doesn't use interrupts to
specifically check that no interrupts property is present. That
explicit negative check has to be added for every common/generic/base
schema. That doesn't seem scalable.

Going back to the GPIOs/regulators case, if a base GPIO/regulator
schema has "blessed" property xxx-gpios as being OK, how does the
final schema for a node undo that; must it explicitly enumerate every
property that's present in the node and check it against the list of
valid properties? I had envisaged something more like:

for each node:
    mark each property as invalid
    run all the schema checks
      (marking properties as valid as they're checked)
    for all properties in node:
        if property is not marked valid, error out

If we run the various schema checkers at unrelated times, because
they're separate checks, then we end up with either:

for each node:
    run lots of sub-checkers
    mark each property as invalid
    run just the final schema checker
      (marking properties as valid as they're checked)
    for all properties in node:
        if property is not marked valid, error out

I don't like the special case for the final checker there.

If the schema checkers start to run on the same node in parallel, or
in any undefined order, then things start getting worse.

> Actually, rather than a global "interrupts" schema here, more
> useful would be a schema provided by the interrupt controller (and
> so selected by the interrupt-parent property) which could validate
> the internal format of the interrupt descriptors.  But, again,
> this doesn't prevent the device schema from validating the number
> of interrupt descriptors for this device.

interrupts is a bit special here because there's historically only
been one parent. However, not all "base" schemas have this benefit,
and even with interrupts, the interrupts-extend property breaks this
assumption. This means that n different "parent" (provider/supplier)
nodes end up defining the format of a single property in a consumer
node, which feels a bit messy. Having a single "check the entire
interrupts property" function/schema, which goes and gets a little
information from the provider/supplier (i.e. #interrupt-cells) feels a
lot cleaner.

>> 2) Some inheritance or aggregation of schemas is parameterized,
>> e.g. on property name. For example, GPIO properties are named
>> ${xxx}-gpios. However, I don't believe there's any hard rule that
>> absolutely mandates that an /unrelated/ property cannot exist
>> that matches the same naming rule. Admittedly, it would be
>> suboptimal if such a conflicting property name were used, but if
>> there's no hard rule against it, I don't think we should design
>> our schema checker to assume that.
> 
> The schema checker, no.  The schemas themselves, maybe.  At least
> for really well established things, like interrupts, I think it's 
> reasonable that the schemas enforce best practice, not merely
> minimal correctness.

True. It sounds like we want a schema checker for the schemas, so that
schema X can't (re-)define a property that schema Y defines the global
meaning of!

>> If the GPIO schema checker simply searched for any properties
>> named according to that pattern, it might end up attempting to
>> check properties that weren't actually generic GPIO specifiers.
>> Instead, I'd prefer the node's top-level schema to explicitly
>> state which properties should be checked according to which
>> inherited schema(s).
> 
> So, in the case of GPIOs, I tend to agree.  For the case of 
> interrupts, not so much.

Surely we want to deal with everything in a uniform way though, rather
than having some techniques that work for interrupts, and a different
set of techniques that work for GPIOs.

>> In particular, perhaps this conflict could occur in a slightly
>> generic binding for a series of similar I2C GPIO expander chips,
>> some of which have 4 GPIOs and others 8. Someone might choose a
>> "count-gpios" or "number-of-gpios" property to represent that
>> aspect of the HW.
>> 
>> So overall, I believe it's actually very important to first
>> determine *the* exact schema for a node, then apply that one
>> top-level checker, with it then invoking various (potentially
>> parameterized) sub-checkers for any inherited/aggregated
>> schemas.
> 
> So, I certainly think we want explicitly invoked subcheckers.  But
> I think we want multiple independently applied schemas for a single
> node too.
David Gibson Nov. 13, 2013, 12:33 a.m. UTC | #38
On Tue, Nov 12, 2013 at 03:06:03PM -0700, Stephen Warren wrote:
> On 11/10/2013 04:00 AM, David Gibson wrote:
> > On Thu, Oct 31, 2013 at 03:11:15PM -0600, Stephen Warren wrote:
> >> On 10/25/2013 05:29 PM, David Gibson wrote:
> >>> On Thu, Oct 24, 2013 at 10:51:28PM +0100, Stephen Warren
> >>> wrote:
> >>>> From: Stephen Warren <swarren@nvidia.com>
> >>>> 
> >>>> This is a very quick proof-of-concept re: how a DT schema
> >>>> checker might look if written in C, and integrated into dtc.
> >> 
> >>>> diff --git a/schemas/schema.c b/schemas/schema.c
> >> 
> >>>> +int schema_check_node(struct node *node)
> >> ...
> >>>> +	if (!best_checker) { +		printf("WARNING: no schema for
> >>>> node %s\n", node->fullpath); +		return 0; +	} + +
> >>>> printf("INFO: Node %s selected checker %s\n", node->fullpath,
> >>>> + best_checker->name); + +	best_checker->checkfn(node);
> >>> 
> >>> IMO, thinking in terms of "the" schema for a node is a mistake.
> >>>  Instead, think in terms of a bunch of schemas, which "known"
> >>> what nodes they're relevant for.  Often that will be determined
> >>> by compatible, but it could also be determined by other things 
> >>> (presence of 'interrupts', parent node, explicitly implied by 
> >>> another schema).
> >> 
> >> I don't agree here.
> >> 
> >> Each node represents a single specific type of object, and the 
> >> representation uses a single specific overall schema.
> >> 
> >> I'll admit that sometimes that schema is picked via the
> >> compatible value (most cases), and other times via the node
> >> name/path (e.g. /chosen, /memory).
> >> 
> >> In particular, I don't think it's correct to say that e.g. both
> >> a "Tegra I2C controller" schema and an "interrupt" schema apply
> >> equally to a "Tegra I2C DT node".
> > 
> > Why not?  Both are mandatory.
> 
> No. The interrupt schema isn't mandatory unless the Tegra I2C schema
> actively opts in to interrupt usage.

It really is.  What I'm thinking of as the "interrupt schema"
specifies (in part) what the interrupts and related properties need to
look like if they exist.  Those constraints must *always* be met,
regardless of whether the device schema additionally specifies the
number of interrupts or other constraints on interrupt format.

> >> Instead, I think that the "interrupt" schema only applies because
> >> the "Tegra I2C controller" schema happens to inherit from, or
> >> aggregate, the "interrupt" schema.
> > 
> > So, explicit inheritence makes sense in many cases, but I don't
> > like seeing these universal rules as inheritence based.  They
> > should be just that - universal - not opt-in for any particular
> > device binding.
> 
> There isn't /an/ interrupt schema, there's a framework for interrupt
> schemas, where the specific "final" schema parameterizes the interrupt
> schema.

That all depends on how broad your notion of "schema" is.

And if you assume there's a "final" schema, then yes, you need to have
something defining the "final" schema, but that's a circular argument.

> It's impossible to fully evaluate whether an interrupts
> property conforms to the requirements unless you evaluate it in the
> context of a particular "final" schema.

So?

> Also, while it would be silly, I've never seen a rule specifically
> stated that common properties such as interrupts, reg, *-gpios,
> *-supply, pinctrl-* are absolutely reserved for their typical meaning,
> and that no binding can ever use them for other local purposes. It
> would make sense to explicitly state this though.

Yes it would.  And as I said enforcing best practice, not just minimal
constraints makes sense for a schema checker.

> In practical terms, what this means is that, without any knowledge of
> the final binding, you can check that an interrupts property conforms
> to some general rules such as: it's a list of phandle + specifier, and
> the specifier length must match the interrupt parent, and the number
> of entries in interrupts an interrupt-names (if present) must match.

Yes.  And..?  Those things can still be checked by the device schema.

> However, you can't check any of the following without specific
> knowledge of the final binding:
> 
> * Some bindings don't use interrupts, and hence
> interrupts/interrupt-names/interrupt-parent must /not/ be present in
> the node.
> 
> * The specific (range of) number of entries that must be present in
> interrupts.
> 
> * The specific set of entries that must exist in interrupt-names.
> 
> * Any ordering requirements on the names in interrupt-names (say the
> binding was first defined to use interrupts by index, then later
> extended to look up additional entries by name via interrupt-names).
> 
> I don't think it makes sense to check the base interrupts syntax
> alone, then those other rules later.

Why not?

> Instead, it makes sense to check
> all the interrupt-related rules at once, when you have enough
> knowledge of all the requirements.

Knowledge of the requirements is distributed.  What's wrong with
checking those distributedly?

> Also, interrupts is an easy case because the property name is static.
> What about GPIOs or regulators. Do the base GPIO/regulator schema
> checkers search for all properties name *-gpios and *-supply and check
> those? How would one avoid collisions with other bindings that happen
> to use those same property names. Explicitly disallowing property name
> collisions for single property names like interrupts seems reasonable.
> Disallowing whole patterns of property names seems a bit harder, and
> less reasonable.

Sure.  And for those properties, I'd agree with you.  But there are
universal properties with universal constraints which can and should
be checked universally.

> >> I see two important results from this distinction:
> >> 
> >> 1) We should only allow properties from the "interrupt" schema
> >> to exist within a node /if/ the top-level schema for the node
> >> actually does make use of the "interrupt" schema". This is
> >> important since it disallows e.g.:
> >> 
> >> battery: smart-battery@b { compatible = "ti,bq20z45",
> >> "sbs,sbs-battery"; reg = <0xb>; interrupt-parent = <&gpio>; 
> >> interrupts = <5 4>; };
> >> 
> >> ... since the ti,bq20z45/sbs,sbs-battery don't actually have an 
> >> interrupt output (assuming that the current binding doc for that 
> >> compatible value accurately reflects the HW here anyway).
> >> 
> >> If we allowed the "interrupt" schema to match the node simply
> >> because it saw an "interrupts" property there, that'd allow this
> >> unused property to exist in the DT, whereas we really do want to
> >> throw an error here, so the DT author is aware they made a
> >> mistake.
> > 
> > So.  To clarify my proposal of multiple applied schemas: in order
> > for the node to "pass" the checks, it must satisfy all constraints
> > of all applicable schemas - they don't override each other.
> > Furthermore, I'm not seeing any specific property as being owned by
> > a particular schema - multiple schemas applying to a node can place
> > overlapping constraints on the same property.
> > 
> > So in this case, the interrupt schema describes what the
> > interrupts property needs to look like if it exists, but the device
> > schema says that there should be no interrupts.  The only way to
> > satisfy both is to have no interrupts property, which is the right
> > answer.
> 
> A big issue here is that if a node actually has an interrupts
> property, but the final schema says it shouldn't, the only way to
> check for that is for every schema that doesn't use interrupts to
> specifically check that no interrupts property is present. That
> explicit negative check has to be added for every common/generic/base
> schema. That doesn't seem scalable.

Um.. but your proposal is that every node which does use interrupts
(i.e. most devices) must explicitly say that interrupts are present.
How's that any more scalable?

> Going back to the GPIOs/regulators case, if a base GPIO/regulator
> schema has "blessed" property xxx-gpios as being OK, how does the
> final schema for a node undo that; must it explicitly enumerate every
> property that's present in the node and check it against the list of
> valid properties? I had envisaged something more like:

So as I've said, I think a scheme like yours makes more sense for
GPIOs, because of the more complex binding with multiple possible
properties.

> for each node:
>     mark each property as invalid
>     run all the schema checks
>       (marking properties as valid as they're checked)

So.. here I think is the central point of our disagreement.  You're
thinking of "valid" as an absolute, boolean attribute of a property.
I'm only thinking in terms of "valid w.r.t. schema X".  Valid overall
just means valid w.r.t. every schema we know about - a set which will
grow over time.

>     for all properties in node:
>         if property is not marked valid, error out

Right, so my proposed flow is instead:

for each schema:
    for each node:
        if schema is applicable:
            if schema fails:
                print error for this node and schema
                mark node as failing this schema

I think this approach allows a broader concept of schemas with more
flexible constraints e.g.:
 * Node must have property-X OR property-Y
 * If machine type X has node /foo/bar, it must also have node
   /baz/qux

> If we run the various schema checkers at unrelated times, because
> they're separate checks, then we end up with either:
> 
> for each node:
>     run lots of sub-checkers
>     mark each property as invalid
>     run just the final schema checker
>       (marking properties as valid as they're checked)
>     for all properties in node:
>         if property is not marked valid, error out
> 
> I don't like the special case for the final checker there.

Yeah, no.  I have no concept of "final" checker.

> If the schema checkers start to run on the same node in parallel, or
> in any undefined order, then things start getting worse.

Why?  We'd need some order dependencies between schemas - the check
structure in dtc already supports this.

> > Actually, rather than a global "interrupts" schema here, more
> > useful would be a schema provided by the interrupt controller (and
> > so selected by the interrupt-parent property) which could validate
> > the internal format of the interrupt descriptors.  But, again,
> > this doesn't prevent the device schema from validating the number
> > of interrupt descriptors for this device.
> 
> interrupts is a bit special here because there's historically only
> been one parent. However, not all "base" schemas have this benefit,
> and even with interrupts, the interrupts-extend property breaks this
> assumption. This means that n different "parent" (provider/supplier)
> nodes end up defining the format of a single property in a consumer
> node, which feels a bit messy.

Only if you think of them as "defining" the property.  I'm just
thinking of them as constraining the property.

> Having a single "check the entire
> interrupts property" function/schema, which goes and gets a little
> information from the provider/supplier (i.e. #interrupt-cells) feels a
> lot cleaner.

Not to me.

> >> 2) Some inheritance or aggregation of schemas is parameterized,
> >> e.g. on property name. For example, GPIO properties are named
> >> ${xxx}-gpios. However, I don't believe there's any hard rule that
> >> absolutely mandates that an /unrelated/ property cannot exist
> >> that matches the same naming rule. Admittedly, it would be
> >> suboptimal if such a conflicting property name were used, but if
> >> there's no hard rule against it, I don't think we should design
> >> our schema checker to assume that.
> > 
> > The schema checker, no.  The schemas themselves, maybe.  At least
> > for really well established things, like interrupts, I think it's 
> > reasonable that the schemas enforce best practice, not merely
> > minimal correctness.
> 
> True. It sounds like we want a schema checker for the schemas, so that
> schema X can't (re-)define a property that schema Y defines the global
> meaning of!

Again, thinking of schemas as constraining, rather than defining,
makes things much more flexible.  Schemas which heavily constrain, and
may also have semantic information attached - like nearly all device
schemas - can be thought of informally as "defining", but that doesn't
make a difference for the mechanics of the validation.

> >> If the GPIO schema checker simply searched for any properties
> >> named according to that pattern, it might end up attempting to
> >> check properties that weren't actually generic GPIO specifiers.
> >> Instead, I'd prefer the node's top-level schema to explicitly
> >> state which properties should be checked according to which
> >> inherited schema(s).
> > 
> > So, in the case of GPIOs, I tend to agree.  For the case of 
> > interrupts, not so much.
> 
> Surely we want to deal with everything in a uniform way though, rather
> than having some techniques that work for interrupts, and a different
> set of techniques that work for GPIOs.

Only if what we were checking itself was uniform, which it's not.
Different bindings impose different constraints for which different
techniques are appropriate.

> >> In particular, perhaps this conflict could occur in a slightly
> >> generic binding for a series of similar I2C GPIO expander chips,
> >> some of which have 4 GPIOs and others 8. Someone might choose a
> >> "count-gpios" or "number-of-gpios" property to represent that
> >> aspect of the HW.
> >> 
> >> So overall, I believe it's actually very important to first
> >> determine *the* exact schema for a node, then apply that one
> >> top-level checker, with it then invoking various (potentially
> >> parameterized) sub-checkers for any inherited/aggregated
> >> schemas.
> > 
> > So, I certainly think we want explicitly invoked subcheckers.  But
> > I think we want multiple independently applied schemas for a single
> > node too.
>
diff mbox

Patch

diff --git a/Makefile.dtc b/Makefile.dtc
index bece49b..824aaaf 100644
--- a/Makefile.dtc
+++ b/Makefile.dtc
@@ -12,7 +12,16 @@  DTC_SRCS = \
 	livetree.c \
 	srcpos.c \
 	treesource.c \
-	util.c
+	util.c \
+	schemas/mmio-bus.c \
+	schemas/root-node.c \
+	schemas/schema.c \
+	schemas/clock/clock.c \
+	schemas/gpio/gpio.c \
+	schemas/i2c/i2c.c \
+	schemas/i2c/nvidia,tegra20-i2c.c \
+	schemas/interrupt-controller/interrupts.c \
+	schemas/sound/wlf,wm8903.c
 
 DTC_GEN_SRCS = dtc-lexer.lex.c dtc-parser.tab.c
 DTC_OBJS = $(DTC_SRCS:%.c=%.o) $(DTC_GEN_SRCS:%.c=%.o)
diff --git a/checks.c b/checks.c
index ee96a25..49143b3 100644
--- a/checks.c
+++ b/checks.c
@@ -19,6 +19,7 @@ 
  */
 
 #include "dtc.h"
+#include "schemas/schema.h"
 
 #ifdef TRACE_CHECKS
 #define TRACE(c, ...) \
@@ -236,6 +237,14 @@  static void check_is_cell(struct check *c, struct node *root,
  * Structural check functions
  */
 
+static void check_schema(struct check *c, struct node *dt,
+				       struct node *node)
+{
+	if (schema_check_node(node))
+		FAIL(c, "Schema check failed for %s", node->fullpath);
+}
+NODE_ERROR(schema, NULL);
+
 static void check_duplicate_node_names(struct check *c, struct node *dt,
 				       struct node *node)
 {
@@ -652,6 +661,8 @@  static void check_obsolete_chosen_interrupt_controller(struct check *c,
 TREE_WARNING(obsolete_chosen_interrupt_controller, NULL);
 
 static struct check *check_table[] = {
+	&schema,
+
 	&duplicate_node_names, &duplicate_property_names,
 	&node_name_chars, &node_name_format, &property_name_chars,
 	&name_is_string, &name_properties,
diff --git a/schemas/clock/clock.c b/schemas/clock/clock.c
new file mode 100644
index 0000000..0b9ca1f
--- /dev/null
+++ b/schemas/clock/clock.c
@@ -0,0 +1,16 @@ 
+#include "schema.h"
+
+void is_a_clock_provider(struct node *node, int clock_cells)
+{
+	required_integer_property(node, "#clock-cells", clock_cells);
+}
+
+void is_a_clock_consumer_by_name(struct node *node, int clock_count)
+{
+	required_property(node, "clock-names");
+	/* FIXME: validate all required names are present */
+	/* FIXME: validate all names present are in list of valid names */
+	required_property(node, "clocks");
+	/* FIXME: validate phandle, specifier list in property */
+	/* FIXME: validate len(clocks) =~ len(clock-names) * #clock-cells */
+}
diff --git a/schemas/gpio/gpio.c b/schemas/gpio/gpio.c
new file mode 100644
index 0000000..e52f161
--- /dev/null
+++ b/schemas/gpio/gpio.c
@@ -0,0 +1,13 @@ 
+#include "schema.h"
+
+void is_a_gpio_provider(struct node *node, int gpio_cells)
+{
+	required_boolean_property(node, "gpio-controller");
+	required_integer_property(node, "#gpio-cells", gpio_cells);
+}
+
+void is_a_gpio_consumer(struct node *node, const char *propname)
+{
+	required_property(node, propname);
+	/* FIXME: validate phandle, specifier list in property */
+}
diff --git a/schemas/i2c/i2c.c b/schemas/i2c/i2c.c
new file mode 100644
index 0000000..0772ea3
--- /dev/null
+++ b/schemas/i2c/i2c.c
@@ -0,0 +1,17 @@ 
+#include "../schema.h"
+
+void is_an_i2c_bus(struct node *node)
+{
+	printf("INFO: %s()\n", __FUNCTION__);
+
+	is_an_mmio_bus(node, 1, 0);
+	required_property(node, "#address-cells");
+	required_property(node, "#size-cells");
+	optional_property(node, "clock-frequency");
+	/* FIXME: set internal tag on *node to mark it as an I2C bus */
+}
+
+void is_an_i2c_bus_child(struct node *node)
+{
+	/* FIXME: validate that is_an_i2c_bus() was called on node->parent */
+}
diff --git a/schemas/i2c/nvidia,tegra20-i2c.c b/schemas/i2c/nvidia,tegra20-i2c.c
new file mode 100644
index 0000000..c616f33
--- /dev/null
+++ b/schemas/i2c/nvidia,tegra20-i2c.c
@@ -0,0 +1,20 @@ 
+#include "../schema.h"
+
+static const char *compats_nvidia_tegra20_i2c[] = {
+	"nvidia,tegra20-i2c",
+	"nvidia,tegra30-i2c",
+	"nvidia,tegra114-i2c",
+	"nvidia,tegra124-i2c",
+	NULL,
+};
+
+static void checkfn_nvidia_tegra20_i2c(struct node *node)
+{
+	printf("INFO: %s()\n", __FUNCTION__);
+
+	is_an_mmio_bus_child(node, 1);
+	is_an_i2c_bus(node);
+	is_an_interrupt_consumer_by_index(node, 1);
+	is_a_clock_consumer_by_name(node, 2);
+}
+SCHEMA_MATCH_COMPATIBLE(nvidia_tegra20_i2c);
diff --git a/schemas/interrupt-controller/interrupts.c b/schemas/interrupt-controller/interrupts.c
new file mode 100644
index 0000000..39191a8
--- /dev/null
+++ b/schemas/interrupt-controller/interrupts.c
@@ -0,0 +1,14 @@ 
+#include "schema.h"
+
+void is_an_interrupt_provider(struct node *node, int int_cells)
+{
+	required_boolean_property(node, "interrupt-controller");
+	required_integer_property(node, "#interrupt-cells", int_cells);
+}
+
+void is_an_interrupt_consumer_by_index(struct node *node, int int_count)
+{
+	required_property(node, "interrupts");
+	/* FIXME: validate phandle, specifier list in property */
+	/* FIXME: validate len(interrupts) =~ int_count * #interrupt-cells */
+}
diff --git a/schemas/mmio-bus.c b/schemas/mmio-bus.c
new file mode 100644
index 0000000..74b5410
--- /dev/null
+++ b/schemas/mmio-bus.c
@@ -0,0 +1,15 @@ 
+#include "schema.h"
+
+void is_an_mmio_bus(struct node *node, int address_cells, int size_cells)
+{
+	required_integer_property(node, "#address-cells", address_cells);
+	required_integer_property(node, "#size-cells", size_cells);
+	/* FIXME: set internal tag on *node to mark it as an MMIO bus */
+}
+
+void is_an_mmio_bus_child(struct node *node, int reg_count)
+{
+	/* FIXME: validate that is_an_mmio_bus() was called on node->parent */
+	required_property(node, "reg");
+	/* FIXME: validate len(reg) == reg_count * (#address-+#size-cells) */
+}
diff --git a/schemas/root-node.c b/schemas/root-node.c
new file mode 100644
index 0000000..c6ab0c7
--- /dev/null
+++ b/schemas/root-node.c
@@ -0,0 +1,27 @@ 
+#include "schema.h"
+
+static void checkfn_root_node(struct node *node)
+{
+	printf("INFO: %s()\n", __FUNCTION__);
+
+	/*
+	 * FIXME: Need to allow is_an_mmio_bus() that allows any values of
+	 * #address-cells/#size-cells
+	 */
+	is_an_mmio_bus(node, 1, 1);
+	/*
+	 * FIXME: call required_string_property() here instead, or perhaps
+	 * required_property(node, "compatible", check_propval_string);
+	 * where check_propval_string() is a function that performs additional
+	 * checks on the property value.
+	 */
+	required_property(node, "compatible");
+	/*
+	 * FIXME: call optional_string_property() here instead, or perhaps
+	 * optional_property(node, "compatible", check_propval_string);
+	 * where check_propval_string() is a function that performs additional
+	 * checks on the property value.
+	 */
+	optional_property(node, "model");
+}
+SCHEMA_MATCH_PATH(root_node, "/");
diff --git a/schemas/schema.c b/schemas/schema.c
new file mode 100644
index 0000000..cb78170
--- /dev/null
+++ b/schemas/schema.c
@@ -0,0 +1,119 @@ 
+#include "schema.h"
+
+/* FIXME: automate this table... */
+extern struct schema_checker schema_checker_root_node;
+extern struct schema_checker schema_checker_nvidia_tegra20_i2c;
+extern struct schema_checker schema_checker_wlf_wm8903;
+static const struct schema_checker *schema_checkers[] = {
+	&schema_checker_root_node,
+	&schema_checker_nvidia_tegra20_i2c,
+	&schema_checker_wlf_wm8903,
+};
+
+int schema_check_node(struct node *node)
+{
+	int i;
+	const struct schema_checker *checker, *best_checker = NULL;
+	int match, best_match = 0;
+
+	for (i = 0; i < ARRAY_SIZE(schema_checkers); i++) {
+		checker = schema_checkers[i];
+		match = checker->matchfn(node, checker);
+		if (match) {
+			printf("INFO: Node %s matches checker %s at level %d\n",
+				node->fullpath, checker->name, match);
+			if (!best_checker || (match > best_match)) {
+				best_match = match;
+				best_checker = checker;
+			}
+		}
+	}
+
+	if (!best_checker) {
+		printf("WARNING: no schema for node %s\n", node->fullpath);
+		return 0;
+	}
+
+	printf("INFO: Node %s selected checker %s\n", node->fullpath,
+		best_checker->name);
+
+	best_checker->checkfn(node);
+
+	/*
+	 * FIXME: grab validation state from global somewhere.
+	 * Using global state avoids having check return values after every
+	 * function call, thus making the code less verbose and appear more
+	 * assertion-based.
+	 */
+	return 0;
+}
+
+int schema_match_path(struct node *node, const struct schema_checker *checker)
+{
+	return !strcmp(node->fullpath, checker->u.path.path);
+}
+
+int schema_match_compatible(struct node *node,
+				const struct schema_checker *checker)
+{
+	struct property *compat_prop;
+	int index;
+	const char *node_compat;
+	const char **test_compats;
+
+	compat_prop = get_property(node, "compatible");
+	if (!compat_prop)
+		return 0;
+
+	/*
+	 * The best_match logic here is to find the checker entry that matches
+	 * the first compatible value in the node, then if there's no match,
+	 * fall back to finding the checker that matches the second compatible
+	 * value, etc. Perhaps we want to run all checkers instead? Especially,
+	 * we might want to run all different types of checker (by path name,
+	 * by compatible).
+	 */
+	for (node_compat = compat_prop->val.val, index = 0;
+			*node_compat;
+			node_compat += strlen(node_compat) + 1, index++) {
+		for (test_compats = checker->u.compatible.compats;
+				*test_compats; test_compats++) {
+			if (!strcmp(node_compat, *test_compats))
+				return -(index + 1);
+		}
+	}
+
+	return 0;
+}
+
+void required_property(struct node *node, const char *propname)
+{
+	struct property *prop;
+
+	prop = get_property(node, propname);
+	if (!prop) {
+		/*
+		 * FIXME: set global error state. The same comment applies
+		 * everywhere.
+		 */
+		printf("ERROR: node %s missing property %s\n", node->fullpath,
+			propname);
+	}
+}
+
+void required_boolean_property(struct node *node, const char *propname)
+{
+	required_property(node, propname);
+	/* FIXME: Validate it's length is zero if present */
+}
+
+void required_integer_property(struct node *node, const char *propname,
+				int value)
+{
+	required_property(node, propname);
+	/* FIXME: Validate it's length is 1 cell, and value matches */
+}
+
+void optional_property(struct node *node, const char *propname)
+{
+}
diff --git a/schemas/schema.h b/schemas/schema.h
new file mode 100644
index 0000000..74e9931
--- /dev/null
+++ b/schemas/schema.h
@@ -0,0 +1,68 @@ 
+#ifndef _SCHEMAS_SCHEMA_H
+#define _SCHEMAS_SCHEMA_H
+
+#include "dtc.h"
+
+struct schema_checker;
+
+typedef int (schema_matcher_func)(struct node *node,
+					const struct schema_checker *checker);
+typedef void (schema_checker_func)(struct node *node);
+
+struct schema_checker {
+	const char *name;
+	schema_matcher_func *matchfn;
+	schema_checker_func *checkfn;
+	union {
+		struct {
+			const char *path;
+		} path;
+		struct {
+			const char **compats;
+		} compatible;
+	} u;
+};
+
+int schema_check_node(struct node *node);
+
+int schema_match_path(struct node *node, const struct schema_checker *checker);
+int schema_match_compatible(struct node *node,
+				const struct schema_checker *checker);
+
+#define SCHEMA_MATCH_PATH(_name_, _path_) \
+	struct schema_checker schema_checker_##_name_ = { \
+		.name = #_name_, \
+		.matchfn = schema_match_path, \
+		.checkfn = checkfn_##_name_, \
+		.u.path.path = _path_, \
+	};
+
+#define SCHEMA_MATCH_COMPATIBLE(_name_) \
+	struct schema_checker schema_checker_##_name_ = { \
+		.name = #_name_, \
+		.matchfn = schema_match_compatible, \
+		.checkfn = checkfn_##_name_, \
+		.u.compatible.compats = compats_##_name_, \
+	};
+
+void required_property(struct node *node, const char *propname);
+void required_boolean_property(struct node *node, const char *propname);
+void required_integer_property(struct node *node, const char *propname,
+				int value);
+void optional_property(struct node *node, const char *propname);
+void is_an_mmio_bus(struct node *node, int address_cells, int size_cells);
+void is_an_mmio_bus_child(struct node *node, int reg_count);
+void is_an_i2c_bus(struct node *node);
+void is_an_i2c_bus_child(struct node *node);
+void is_a_gpio_provider(struct node *node, int gpio_cells);
+void is_a_gpio_consumer(struct node *node, const char *propname);
+void is_an_interrupt_provider(struct node *node, int int_cells);
+void is_an_interrupt_consumer_by_index(struct node *node, int int_count);
+void is_a_clock_provider(struct node *node, int clock_cells);
+/*
+ * FIXME: pass in a list of required and optional clock names instead of a
+ * count
+ */
+void is_a_clock_consumer_by_name(struct node *node, int clock_count);
+
+#endif
diff --git a/schemas/sound/wlf,wm8903.c b/schemas/sound/wlf,wm8903.c
new file mode 100644
index 0000000..f6ac49d
--- /dev/null
+++ b/schemas/sound/wlf,wm8903.c
@@ -0,0 +1,20 @@ 
+#include "../schema.h"
+
+static const char *compats_wlf_wm8903[] = {
+	"wlf,wm8903",
+	NULL,
+};
+
+static void checkfn_wlf_wm8903(struct node *node)
+{
+	printf("INFO: %s()\n", __FUNCTION__);
+
+	is_an_mmio_bus_child(node, 1);
+	is_an_i2c_bus_child(node);
+	is_an_interrupt_consumer_by_index(node, 1);
+	is_a_gpio_provider(node, 2);
+	optional_property(node, "micdet-cfg");
+	optional_property(node, "micdet-delay");
+	optional_property(node, "gpio-cfg");
+}
+SCHEMA_MATCH_COMPATIBLE(wlf_wm8903);
diff --git a/test-schema.dts b/test-schema.dts
new file mode 100644
index 0000000..02e1fdc
--- /dev/null
+++ b/test-schema.dts
@@ -0,0 +1,45 @@ 
+/dts-v1/;
+
+/ {
+	model = "NVIDIA Tegra20 Harmony evaluation board";
+	compatible = "nvidia,harmony", "nvidia,tegra20";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+	};
+
+	chosen {
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x40000000>;
+	};
+
+	i2c@7000c000 {
+		compatible = "nvidia,tegra30-i2c", "nvidia,tegra20-i2c";
+		reg = <0x7000c000 0x100>;
+		interrupts = <0 38 0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <0 0>, <0 1>;
+		clock-names = "div-clk", "fast-clk";
+		status = "okay";
+		clock-frequency = <400000>;
+
+		wm8903: wm8903@1a {
+			compatible = "wlf,wm8903";
+			reg = <0x1a>;
+			interrupt-parent = <0>;
+			interrupts = <5 0>;
+
+			gpio-controller;
+			#gpio-cells = <2>;
+
+			micdet-cfg = <0>;
+			micdet-delay = <100>;
+			gpio-cfg = <0xffffffff 0xffffffff 0 0xffffffff 0xffffffff>;
+		};
+	};
+};