diff mbox

[1/5] plugins: implement format_template attribute

Message ID 20171112211911.27779-1-linux@rasmusvillemoes.dk (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Rasmus Villemoes Nov. 12, 2017, 9:19 p.m. UTC
Most format strings in the kernel are string literals, so the compiler
and other static analyzers can do type checking. This plugin covers a
few of the remaining cases, by introducing a format_template
attribute.

Consider struct usb_class_driver. Its member 'name' is used as a
format string in usb_register_dev(), and that use obviously expects
that the format string contains a single "%d" (or maybe %u). So the
idea is that we simply attach __format_template("%d") to the
declaration of the name member of struct usb_class_driver. We can then
check that any static initialization of that member is with a string
literal with the same set of specifiers. This is what the plugin
currently does. There are a number of things it also could/should do:

- Check run-time assignments of literals to a __format_template
annotated struct member.

- Use this at call sites of *printf functions, where the format string
argument is a __format_template annotated struct member - that is, use
the template in lieu of a string literal. For now, this is not
implemented - mostly because I'm lazy and don't want to write my own
format checking code (again), and I suppose there should be an internal
gcc function I could (ab)use to say "check this variadic function call,
but use _this_ as format string".

- It should also be possible to attach it to function parameters -
e.g. the namefmt parameter to kthread_create_on_cpu should have
__format_template("%u"); its only caller is __smpboot_create_thread
which passes struct smp_hotplug_thread->thread_comm, which in turn
should also have that attribute. Combined with the above, this would
check the entire chain from initializer to sprintf().

While strictly speaking "%*s" and "%d %s" both expect (int, const
char*), they're morally distinct, so I don't want to treat them as
equivalent. If this is ever a problem, I think one should let the
attribute take an optional flag argument, which could then control how
strict or lax the checking should be.

I'm not sure how much this affects compilation time, but there's not
really any point in building with this all the time - it should
suffice that the various build bots do it once in a while. Even
without the plugin, the __format_template(...) in headers serve
as concise documentation.

Applying this attribute to smp_hotplug_thread::thread_comm and modifying
kernel/watchdog.c slightly, an example of the error message produced for
violations is:

kernel/watchdog.c:528:1: error: initializer string 'watchdog/%u %d' contains extra format specifier '%d' compared to format template 'foobar/%u'

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
---
 arch/Kconfig                                 |  18 ++
 include/linux/compiler.h                     |   6 +
 scripts/Makefile.gcc-plugins                 |   2 +
 scripts/gcc-plugins/format_template_plugin.c | 331 +++++++++++++++++++++++++++
 4 files changed, 357 insertions(+)
 create mode 100644 scripts/gcc-plugins/format_template_plugin.c
diff mbox

Patch

diff --git a/arch/Kconfig b/arch/Kconfig
index 057370a0ac4e..71c582eaeb69 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -517,6 +517,24 @@  config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
 	  in structures.  This reduces the performance hit of RANDSTRUCT
 	  at the cost of weakened randomization.
 
+config GCC_PLUGIN_FORMAT_TEMPLATE
+	bool "Enable format_template attribute"
+	depends on GCC_PLUGINS
+	help
+	  This plugin implements a format_template attribute which can
+	  be attached to struct members which are supposed to hold a
+	  (printf) format string. This allows the compiler to check
+	  that (a) any string statically assigned to such a struct
+	  member has format specifiers compatible with those in the
+	  template and (b) when such a struct member is used as the
+	  format argument to a printf function, use the template in
+	  lieu of a string literal to do type checking of the variadic
+	  arguments.
+
+	  Even without using the plugin, attaching the format_template
+	  attribute can be beneficial, since it serves as
+	  documentation.
+
 config HAVE_CC_STACKPROTECTOR
 	bool
 	help
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 202710420d6d..478914ad280b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -625,4 +625,10 @@  static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	(_________p1); \
 })
 
+#ifdef HAVE_ATTRIBUTE_FORMAT_TEMPLATE
+#define __format_template(x) __attribute__((__format_template__(x)))
+#else
+#define __format_template(x)
+#endif
+
 #endif /* __LINUX_COMPILER_H */
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index b2a95af7df18..2f9bc96aab90 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -35,6 +35,8 @@  ifdef CONFIG_GCC_PLUGINS
   gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT)	+= -DRANDSTRUCT_PLUGIN
   gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE)	+= -fplugin-arg-randomize_layout_plugin-performance-mode
 
+  gcc-plugin-$(CONFIG_GCC_PLUGIN_FORMAT_TEMPLATE)	+= format_template_plugin.so
+
   GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
 
   export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR
diff --git a/scripts/gcc-plugins/format_template_plugin.c b/scripts/gcc-plugins/format_template_plugin.c
new file mode 100644
index 000000000000..09a798773cfd
--- /dev/null
+++ b/scripts/gcc-plugins/format_template_plugin.c
@@ -0,0 +1,331 @@ 
+#include <string.h>
+#include <assert.h>
+
+#include "gcc-common.h"
+#include "c-family/c-pragma.h"
+
+int plugin_is_GPL_compatible;
+
+static struct plugin_info format_template_plugin_info = {
+	.version	= "20151209",
+	.help		= "format_template_plugin\n",
+};
+
+static tree handle_format_template_attribute(tree *node, tree name, tree args, int __unused flags, bool *no_add_attrs)
+{
+	tree tmpl, orig_attr;
+
+	*no_add_attrs = true;
+	switch (TREE_CODE(*node)) {
+	case FIELD_DECL:
+		break;
+	default:
+		error("%qE attribute only applies to struct members", name);
+		return NULL_TREE;
+	}
+
+	tmpl = TREE_VALUE(args);
+	if (TREE_CODE(tmpl) != STRING_CST) {
+		error("%qE parameter of the %qE attribute is not a string constant", args, name);
+		return NULL_TREE;
+	}
+
+	orig_attr = lookup_attribute("format_template", DECL_ATTRIBUTES(*node));
+	if (orig_attr) {
+		error("%qE attribute applied twice", name);
+		return NULL_TREE;
+	}
+	else
+		*no_add_attrs = false;
+
+	return NULL_TREE;
+}
+
+static struct attribute_spec format_template_attr = {
+	.name				= "format_template",
+	.min_length			= 1,
+	.max_length			= 1,
+	.decl_required			= true,
+	.type_required			= false,
+	.function_type_required		= false,
+	.handler			= handle_format_template_attribute,
+#if BUILDING_GCC_VERSION >= 4007
+	.affects_type_identity		= false
+#endif
+};
+
+static void register_attributes(void __unused *event_data, void __unused *data)
+{
+	register_attribute(&format_template_attr);
+}
+
+static void define_feature_macro(void __unused *event_data, void __unused *data)
+{
+	cpp_define(parse_in, "HAVE_ATTRIBUTE_FORMAT_TEMPLATE");
+}
+
+enum {
+	QUAL_NONE,
+	QUAL_SHORT, /* h */
+	QUAL_BYTE, /* hh, == QUAL_SHORT+1*/
+	QUAL_LONG, /* l */
+	QUAL_LONGLONG, /* ll, == QUAL_LONG+1 */
+	QUAL_MAX, /* j */
+	QUAL_SIZE, /* z */
+	QUAL_PTRDIFF, /* t */
+};
+#define FW_P_NONE (-1)
+#define FW_P_ARG (-2)
+
+struct spec_iter {
+	const char *spec;
+	int len;
+
+	int field_width;
+	int precision;
+	int qual;
+	char type;
+
+};
+
+static inline void
+spec_iter_init(struct spec_iter *spec, const char *s)
+{
+	spec->spec = s;
+	spec->len = 0;
+}
+
+static void get_fw_p(const char **c, int *dst, int prec)
+{
+	*dst = FW_P_NONE;
+	if (prec) {
+		if (**c != '.')
+			return;
+		++(*c);
+		*dst = 0;
+	}
+	if (**c == '*') {
+		++(*c);
+		*dst = FW_P_ARG;
+		return;
+	}
+	if (!ISDIGIT(**c))
+		return;
+	*dst = **c - '0';
+	++(*c);
+	while (ISDIGIT(**c)) {
+		/* should do if (*dst > 10000) warn("insane explicit field width/precision"); */
+		*dst *= 10;
+		*dst += **c - '0';
+		++(*c);
+	}
+}
+
+static int spec_next(struct spec_iter *spec)
+{
+	const char *c;
+	int slen = 0;
+
+	spec->spec += spec->len;
+again:
+	c = strchrnul(spec->spec, '%');
+	slen += c - spec->spec;
+	if (!c[0]) {
+		spec->spec = NULL;
+		return slen;
+	}
+	assert(c[0] == '%');
+	if (c[1] == '%') {
+		slen++;
+		spec->spec = c+2;
+		goto again;
+	}
+
+	spec->spec = c;
+	++c;
+	/* skip flags */
+	while (strchr("#0- +", *c))
+		++c;
+
+	get_fw_p(&c, &spec->field_width, 0);
+	get_fw_p(&c, &spec->precision, 1);
+
+	spec->qual = QUAL_NONE;
+	switch (*c) {
+	case 'h': spec->qual = QUAL_SHORT; break;
+	case 'l': spec->qual = QUAL_LONG; break;
+#if 0 /* The kernel doesn't grok the j qualifier */
+	case 'j': spec->qual = QUAL_MAX; break;
+#endif
+	case 'z': spec->qual = QUAL_SIZE; break;
+	case 't': spec->qual = QUAL_PTRDIFF; break;
+	}
+	if (spec->qual) {
+		++c;
+		if (*c == c[-1] && (*c == 'h' || *c == 'l')) {
+			spec->qual++;
+			++c;
+		}
+	}
+
+	spec->type = *c++;
+	spec->len = c - spec->spec;
+
+	switch (spec->type) {
+	case 'd':
+	case 'u':
+	case 'x':
+	case 'X':
+	case 'o':
+	case 'c':
+	case 's':
+		break;
+	/*
+	 * Disallowing %p is the safe and sane thing to do, given the
+	 * different interpretations based on following alphanumerics.
+	 */
+	case 'p':
+	/*
+	 * Why are there two with the same meaning? %i is the lesser
+	 * used one and should just die.
+	 */
+	case 'i':
+		error("unsupported specifier '%.*s' in template or initializer", spec->len, spec->spec);
+	default:
+		error("invalid specifier '%.*s' in template or initializer", spec->len, spec->spec);
+	}
+
+	return slen;
+}
+
+static bool specs_compatible(const struct spec_iter *a, const struct spec_iter *b)
+{
+	if (a->qual != b->qual)
+		return false;
+	if (a->field_width != b->field_width)
+		return false;
+	if (a->precision != b->precision)
+		return false;
+	if (a->type != b->type)
+		return false;
+	return true;
+}
+
+static void check_literal(tree attr, const char *str)
+{
+	const char *pattern = TREE_STRING_POINTER(TREE_VALUE(TREE_VALUE(attr)));
+	struct spec_iter sp, ss;
+	int i;
+
+	spec_iter_init(&sp, pattern);
+	spec_iter_init(&ss, str);
+
+	/*
+	 * Walk over the pattern and string in lockstep.
+	 */
+	for (i = 1; ; ++i) {
+		spec_next(&sp);
+		spec_next(&ss);
+		/*
+		 * It's ok for the template to have more specifiers
+		 * than the actual string. But issue warning(s)
+		 * anyway, conditional on -Wformat-extra-args.
+		 */
+		if (ss.spec == NULL) {
+			while (sp.spec != NULL) {
+			       warning(OPT_Wformat_extra_args,
+				       "format template '%s' contains extra specifier '%.*s' compared to initializer string '%s'",
+				       pattern, sp.len, sp.spec, str);
+			       spec_next(&sp);
+			}
+			return;
+		}
+		/*
+		 * It's absolutely not ok for the actual string to
+		 * have more specifiers than the template.
+		 */
+		if (!sp.spec) {
+			do {
+				error("initializer string '%s' contains extra format specifier '%.*s' compared to format template '%s'",
+					str, ss.len, ss.spec, pattern);
+				spec_next(&ss);
+			} while (ss.spec != NULL);
+			return;
+		}
+		if (!specs_compatible(&ss, &sp)) {
+			error("specifier %d in '%s' ('%.*s') incompatible with format template '%s'",
+			      i, str, ss.len, ss.spec, pattern);
+		}
+	}
+}
+
+static void check_declaration(void *event_data, void *data __unused)
+{
+	tree decl = (tree)event_data;
+	tree ini, type;
+	unsigned idx;
+	tree field, value;
+
+	switch (TREE_CODE(decl)) {
+	case VAR_DECL:
+		break;
+	default:
+		return;
+	}
+
+	ini = DECL_INITIAL(decl);
+	if (!ini)
+		return;
+
+	type = TREE_TYPE(decl);
+	if (TREE_CODE(type) != RECORD_TYPE)
+		return;
+
+	if (TREE_CODE(ini) != CONSTRUCTOR) {
+		// warning(0, "weird, initializer is not a CONSTRUCTOR, tree_code=%d", TREE_CODE(ini));
+		return;
+	}
+
+	FOR_EACH_CONSTRUCTOR_ELT(CONSTRUCTOR_ELTS(ini), idx, field, value) {
+		tree attr;
+
+		/* if (TREE_CODE(value) != STRING_CST) */
+		/* 	continue; */
+
+		attr = lookup_attribute("format_template", DECL_ATTRIBUTES(field));
+		if (!attr)
+			continue;
+
+		/*
+		 * Hm, apparently the string literal is hidden behind
+		 * a NOP_EXPR and a ADDR_EXPR.
+		 */
+		STRIP_NOPS(value);
+		if (TREE_CODE(value) == ADDR_EXPR)
+			value = TREE_OPERAND(value, 0);
+
+		if (TREE_CODE(value) != STRING_CST)
+			continue;
+
+		check_literal(attr, TREE_STRING_POINTER(value));
+
+	}
+
+}
+
+int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
+{
+	const char *const plugin_name = plugin_info->base_name;
+
+	if (!plugin_default_version_check(version, &gcc_version)) {
+		error(G_("incompatible gcc/plugin versions"));
+		return 1;
+	}
+
+	register_callback(plugin_name, PLUGIN_INFO, NULL, &format_template_plugin_info);
+	register_callback(plugin_name, PLUGIN_START_UNIT, &define_feature_macro, NULL);
+	register_callback(plugin_name, PLUGIN_FINISH_DECL, &check_declaration, NULL);
+	register_callback(plugin_name, PLUGIN_ATTRIBUTES, &register_attributes, NULL);
+
+	return 0;
+}