From patchwork Wed Jan 18 12:32:04 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Westphal X-Patchwork-Id: 13106383 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id F34E9C38147 for ; Wed, 18 Jan 2023 13:09:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230280AbjARNJW (ORCPT ); Wed, 18 Jan 2023 08:09:22 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39590 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231162AbjARNJB (ORCPT ); Wed, 18 Jan 2023 08:09:01 -0500 Received: from Chamillionaire.breakpoint.cc (Chamillionaire.breakpoint.cc [IPv6:2a0a:51c0:0:237:300::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 114985C0F2; Wed, 18 Jan 2023 04:32:54 -0800 (PST) Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.92) (envelope-from ) id 1pI7cU-00075G-Oy; Wed, 18 Jan 2023 13:32:46 +0100 From: Florian Westphal To: Cc: Jakub Kicinski , Eric Dumazet , Paolo Abeni , "David S. Miller" , , Florian Westphal Subject: [PATCH net-next 5/9] netfilter: ip_tables: remove clusterip target Date: Wed, 18 Jan 2023 13:32:04 +0100 Message-Id: <20230118123208.17167-6-fw@strlen.de> X-Mailer: git-send-email 2.38.2 In-Reply-To: <20230118123208.17167-1-fw@strlen.de> References: <20230118123208.17167-1-fw@strlen.de> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org Marked as 'to be removed soon' since kernel 4.1 (2015). Functionality was superseded by the 'cluster' match, added in kernel 2.6.30 (2009). clusterip_tg_check still has races that can give proc_dir_entry 'ipt_CLUSTERIP/10.1.1.2' already registered followed by a WARN splat. Remove it instead of trying to fix this up again. clusterip uapi header is left as-is for now. Signed-off-by: Florian Westphal --- net/ipv4/netfilter/Kconfig | 14 - net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_CLUSTERIP.c | 929 ----------------------------- 3 files changed, 944 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_CLUSTERIP.c diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index aab384126f61..f71a7e9a7de6 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -259,20 +259,6 @@ config IP_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_CLUSTERIP - tristate "CLUSTERIP target support" - depends on IP_NF_MANGLE - depends on NF_CONNTRACK - depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - select NETFILTER_FAMILY_ARP - help - The CLUSTERIP target allows you to build load-balancing clusters of - network servers without having a dedicated load-balancing - router/server/switch. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_ECN tristate "ECN target support" depends on IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 93bad1184251..5a26f9de1ab9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -39,7 +39,6 @@ obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o # targets -obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c deleted file mode 100644 index b3cc416ed292..000000000000 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ /dev/null @@ -1,929 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Cluster IP hashmark target - * (C) 2003-2004 by Harald Welte - * based on ideas of Fabio Olive Leite - * - * Development of this code funded by SuSE Linux AG, https://www.suse.com/ - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CLUSTERIP_VERSION "0.8" - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); - -struct clusterip_config { - struct list_head list; /* list of all configs */ - refcount_t refcount; /* reference count */ - refcount_t entries; /* number of entries/rules - * referencing us */ - - __be32 clusterip; /* the IP address */ - u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ - int ifindex; /* device ifindex */ - u_int16_t num_total_nodes; /* total number of nodes */ - unsigned long local_nodes; /* node number array */ - -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; /* proc dir entry */ -#endif - enum clusterip_hashmode hash_mode; /* which hashing mode */ - u_int32_t hash_initval; /* hash initialization */ - struct rcu_head rcu; /* for call_rcu */ - struct net *net; /* netns for pernet list */ - char ifname[IFNAMSIZ]; /* device ifname */ -}; - -#ifdef CONFIG_PROC_FS -static const struct proc_ops clusterip_proc_ops; -#endif - -struct clusterip_net { - struct list_head configs; - /* lock protects the configs list */ - spinlock_t lock; - - bool clusterip_deprecated_warning; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *procdir; - /* mutex protects the config->pde*/ - struct mutex mutex; -#endif - unsigned int hook_users; -}; - -static unsigned int clusterip_arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); - -static const struct nf_hook_ops cip_arp_ops = { - .hook = clusterip_arp_mangle, - .pf = NFPROTO_ARP, - .hooknum = NF_ARP_OUT, - .priority = -1 -}; - -static unsigned int clusterip_net_id __read_mostly; -static inline struct clusterip_net *clusterip_pernet(struct net *net) -{ - return net_generic(net, clusterip_net_id); -} - -static inline void -clusterip_config_get(struct clusterip_config *c) -{ - refcount_inc(&c->refcount); -} - -static void clusterip_config_rcu_free(struct rcu_head *head) -{ - struct clusterip_config *config; - struct net_device *dev; - - config = container_of(head, struct clusterip_config, rcu); - dev = dev_get_by_name(config->net, config->ifname); - if (dev) { - dev_mc_del(dev, config->clustermac); - dev_put(dev); - } - kfree(config); -} - -static inline void -clusterip_config_put(struct clusterip_config *c) -{ - if (refcount_dec_and_test(&c->refcount)) - call_rcu(&c->rcu, clusterip_config_rcu_free); -} - -/* decrease the count of entries using/referencing this config. If last - * entry(rule) is removed, remove the config from lists, but don't free it - * yet, since proc-files could still be holding references */ -static inline void -clusterip_config_entry_put(struct clusterip_config *c) -{ - struct clusterip_net *cn = clusterip_pernet(c->net); - - local_bh_disable(); - if (refcount_dec_and_lock(&c->entries, &cn->lock)) { - list_del_rcu(&c->list); - spin_unlock(&cn->lock); - local_bh_enable(); - /* In case anyone still accesses the file, the open/close - * functions are also incrementing the refcount on their own, - * so it's safe to remove the entry even if it's in use. */ -#ifdef CONFIG_PROC_FS - mutex_lock(&cn->mutex); - if (cn->procdir) - proc_remove(c->pde); - mutex_unlock(&cn->mutex); -#endif - return; - } - local_bh_enable(); -} - -static struct clusterip_config * -__clusterip_config_find(struct net *net, __be32 clusterip) -{ - struct clusterip_config *c; - struct clusterip_net *cn = clusterip_pernet(net); - - list_for_each_entry_rcu(c, &cn->configs, list) { - if (c->clusterip == clusterip) - return c; - } - - return NULL; -} - -static inline struct clusterip_config * -clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) -{ - struct clusterip_config *c; - - rcu_read_lock_bh(); - c = __clusterip_config_find(net, clusterip); - if (c) { -#ifdef CONFIG_PROC_FS - if (!c->pde) - c = NULL; - else -#endif - if (unlikely(!refcount_inc_not_zero(&c->refcount))) - c = NULL; - else if (entry) { - if (unlikely(!refcount_inc_not_zero(&c->entries))) { - clusterip_config_put(c); - c = NULL; - } - } - } - rcu_read_unlock_bh(); - - return c; -} - -static void -clusterip_config_init_nodelist(struct clusterip_config *c, - const struct ipt_clusterip_tgt_info *i) -{ - int n; - - for (n = 0; n < i->num_local_nodes; n++) - set_bit(i->local_nodes[n] - 1, &c->local_nodes); -} - -static int -clusterip_netdev_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net *net = dev_net(dev); - struct clusterip_net *cn = clusterip_pernet(net); - struct clusterip_config *c; - - spin_lock_bh(&cn->lock); - list_for_each_entry_rcu(c, &cn->configs, list) { - switch (event) { - case NETDEV_REGISTER: - if (!strcmp(dev->name, c->ifname)) { - c->ifindex = dev->ifindex; - dev_mc_add(dev, c->clustermac); - } - break; - case NETDEV_UNREGISTER: - if (dev->ifindex == c->ifindex) { - dev_mc_del(dev, c->clustermac); - c->ifindex = -1; - } - break; - case NETDEV_CHANGENAME: - if (!strcmp(dev->name, c->ifname)) { - c->ifindex = dev->ifindex; - dev_mc_add(dev, c->clustermac); - } else if (dev->ifindex == c->ifindex) { - dev_mc_del(dev, c->clustermac); - c->ifindex = -1; - } - break; - } - } - spin_unlock_bh(&cn->lock); - - return NOTIFY_DONE; -} - -static struct clusterip_config * -clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, - __be32 ip, const char *iniface) -{ - struct clusterip_net *cn = clusterip_pernet(net); - struct clusterip_config *c; - struct net_device *dev; - int err; - - if (iniface[0] == '\0') { - pr_info("Please specify an interface name\n"); - return ERR_PTR(-EINVAL); - } - - c = kzalloc(sizeof(*c), GFP_ATOMIC); - if (!c) - return ERR_PTR(-ENOMEM); - - dev = dev_get_by_name(net, iniface); - if (!dev) { - pr_info("no such interface %s\n", iniface); - kfree(c); - return ERR_PTR(-ENOENT); - } - c->ifindex = dev->ifindex; - strcpy(c->ifname, dev->name); - memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); - dev_mc_add(dev, c->clustermac); - dev_put(dev); - - c->clusterip = ip; - c->num_total_nodes = i->num_total_nodes; - clusterip_config_init_nodelist(c, i); - c->hash_mode = i->hash_mode; - c->hash_initval = i->hash_initval; - c->net = net; - refcount_set(&c->refcount, 1); - - spin_lock_bh(&cn->lock); - if (__clusterip_config_find(net, ip)) { - err = -EBUSY; - goto out_config_put; - } - - list_add_rcu(&c->list, &cn->configs); - spin_unlock_bh(&cn->lock); - -#ifdef CONFIG_PROC_FS - { - char buffer[16]; - - /* create proc dir entry */ - sprintf(buffer, "%pI4", &ip); - mutex_lock(&cn->mutex); - c->pde = proc_create_data(buffer, 0600, - cn->procdir, - &clusterip_proc_ops, c); - mutex_unlock(&cn->mutex); - if (!c->pde) { - err = -ENOMEM; - goto err; - } - } -#endif - - refcount_set(&c->entries, 1); - return c; - -#ifdef CONFIG_PROC_FS -err: -#endif - spin_lock_bh(&cn->lock); - list_del_rcu(&c->list); -out_config_put: - spin_unlock_bh(&cn->lock); - clusterip_config_put(c); - return ERR_PTR(err); -} - -#ifdef CONFIG_PROC_FS -static int -clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) -{ - - if (nodenum == 0 || - nodenum > c->num_total_nodes) - return 1; - - /* check if we already have this number in our bitfield */ - if (test_and_set_bit(nodenum - 1, &c->local_nodes)) - return 1; - - return 0; -} - -static bool -clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) -{ - if (nodenum == 0 || - nodenum > c->num_total_nodes) - return true; - - if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) - return false; - - return true; -} -#endif - -static inline u_int32_t -clusterip_hashfn(const struct sk_buff *skb, - const struct clusterip_config *config) -{ - const struct iphdr *iph = ip_hdr(skb); - unsigned long hashval; - u_int16_t sport = 0, dport = 0; - int poff; - - poff = proto_ports_offset(iph->protocol); - if (poff >= 0) { - const u_int16_t *ports; - u16 _ports[2]; - - ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports); - if (ports) { - sport = ports[0]; - dport = ports[1]; - } - } else { - net_info_ratelimited("unknown protocol %u\n", iph->protocol); - } - - switch (config->hash_mode) { - case CLUSTERIP_HASHMODE_SIP: - hashval = jhash_1word(ntohl(iph->saddr), - config->hash_initval); - break; - case CLUSTERIP_HASHMODE_SIP_SPT: - hashval = jhash_2words(ntohl(iph->saddr), sport, - config->hash_initval); - break; - case CLUSTERIP_HASHMODE_SIP_SPT_DPT: - hashval = jhash_3words(ntohl(iph->saddr), sport, dport, - config->hash_initval); - break; - default: - /* to make gcc happy */ - hashval = 0; - /* This cannot happen, unless the check function wasn't called - * at rule load time */ - pr_info("unknown mode %u\n", config->hash_mode); - BUG(); - break; - } - - /* node numbers are 1..n, not 0..n */ - return reciprocal_scale(hashval, config->num_total_nodes) + 1; -} - -static inline int -clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) -{ - return test_bit(hash - 1, &config->local_nodes); -} - -/*********************************************************************** - * IPTABLES TARGET - ***********************************************************************/ - -static unsigned int -clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - u_int32_t hash; - - /* don't need to clusterip_config_get() here, since refcount - * is only decremented by destroy() - and ip_tables guarantees - * that the ->target() function isn't called after ->destroy() */ - - ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) - return NF_DROP; - - /* special case: ICMP error handling. conntrack distinguishes between - * error messages (RELATED) and information requests (see below) */ - if (ip_hdr(skb)->protocol == IPPROTO_ICMP && - (ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)) - return XT_CONTINUE; - - /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO, - * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here - * on, which all have an ID field [relevant for hashing]. */ - - hash = clusterip_hashfn(skb, cipinfo->config); - - switch (ctinfo) { - case IP_CT_NEW: - WRITE_ONCE(ct->mark, hash); - break; - case IP_CT_RELATED: - case IP_CT_RELATED_REPLY: - /* FIXME: we don't handle expectations at the moment. - * They can arrive on a different node than - * the master connection (e.g. FTP passive mode) */ - case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED_REPLY: - break; - default: /* Prevent gcc warnings */ - break; - } - -#ifdef DEBUG - nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); -#endif - pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark)); - if (!clusterip_responsible(cipinfo->config, hash)) { - pr_debug("not responsible\n"); - return NF_DROP; - } - pr_debug("responsible\n"); - - /* despite being received via linklayer multicast, this is - * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ - skb->pkt_type = PACKET_HOST; - - return XT_CONTINUE; -} - -static int clusterip_tg_check(const struct xt_tgchk_param *par) -{ - struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; - struct clusterip_net *cn = clusterip_pernet(par->net); - const struct ipt_entry *e = par->entryinfo; - struct clusterip_config *config; - int ret, i; - - if (par->nft_compat) { - pr_err("cannot use CLUSTERIP target from nftables compat\n"); - return -EOPNOTSUPP; - } - - if (cn->hook_users == UINT_MAX) - return -EOVERFLOW; - - if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && - cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && - cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { - pr_info("unknown mode %u\n", cipinfo->hash_mode); - return -EINVAL; - - } - if (e->ip.dmsk.s_addr != htonl(0xffffffff) || - e->ip.dst.s_addr == 0) { - pr_info("Please specify destination IP\n"); - return -EINVAL; - } - if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) { - pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes); - return -EINVAL; - } - for (i = 0; i < cipinfo->num_local_nodes; i++) { - if (cipinfo->local_nodes[i] - 1 >= - sizeof(config->local_nodes) * 8) { - pr_info("bad local_nodes[%d] %u\n", - i, cipinfo->local_nodes[i]); - return -EINVAL; - } - } - - config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); - if (!config) { - if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { - pr_info("no config found for %pI4, need 'new'\n", - &e->ip.dst.s_addr); - return -EINVAL; - } else { - config = clusterip_config_init(par->net, cipinfo, - e->ip.dst.s_addr, - e->ip.iniface); - if (IS_ERR(config)) - return PTR_ERR(config); - } - } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) { - clusterip_config_entry_put(config); - clusterip_config_put(config); - return -EINVAL; - } - - ret = nf_ct_netns_get(par->net, par->family); - if (ret < 0) { - pr_info("cannot load conntrack support for proto=%u\n", - par->family); - clusterip_config_entry_put(config); - clusterip_config_put(config); - return ret; - } - - if (cn->hook_users == 0) { - ret = nf_register_net_hook(par->net, &cip_arp_ops); - - if (ret < 0) { - clusterip_config_entry_put(config); - clusterip_config_put(config); - nf_ct_netns_put(par->net, par->family); - return ret; - } - } - - cn->hook_users++; - - if (!cn->clusterip_deprecated_warning) { - pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " - "use xt_cluster instead\n"); - cn->clusterip_deprecated_warning = true; - } - - cipinfo->config = config; - return ret; -} - -/* drop reference count of cluster config when rule is deleted */ -static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) -{ - const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; - struct clusterip_net *cn = clusterip_pernet(par->net); - - /* if no more entries are referencing the config, remove it - * from the list and destroy the proc entry */ - clusterip_config_entry_put(cipinfo->config); - - clusterip_config_put(cipinfo->config); - - nf_ct_netns_put(par->net, par->family); - cn->hook_users--; - - if (cn->hook_users == 0) - nf_unregister_net_hook(par->net, &cip_arp_ops); -} - -#ifdef CONFIG_NETFILTER_XTABLES_COMPAT -struct compat_ipt_clusterip_tgt_info -{ - u_int32_t flags; - u_int8_t clustermac[6]; - u_int16_t num_total_nodes; - u_int16_t num_local_nodes; - u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; - u_int32_t hash_mode; - u_int32_t hash_initval; - compat_uptr_t config; -}; -#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */ - -static struct xt_target clusterip_tg_reg __read_mostly = { - .name = "CLUSTERIP", - .family = NFPROTO_IPV4, - .target = clusterip_tg, - .checkentry = clusterip_tg_check, - .destroy = clusterip_tg_destroy, - .targetsize = sizeof(struct ipt_clusterip_tgt_info), - .usersize = offsetof(struct ipt_clusterip_tgt_info, config), -#ifdef CONFIG_NETFILTER_XTABLES_COMPAT - .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), -#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */ - .me = THIS_MODULE -}; - - -/*********************************************************************** - * ARP MANGLING CODE - ***********************************************************************/ - -/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ -struct arp_payload { - u_int8_t src_hw[ETH_ALEN]; - __be32 src_ip; - u_int8_t dst_hw[ETH_ALEN]; - __be32 dst_ip; -} __packed; - -#ifdef DEBUG -static void arp_print(struct arp_payload *payload) -{ -#define HBUFFERLEN 30 - char hbuffer[HBUFFERLEN]; - int j, k; - - for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) { - hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); - hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); - hbuffer[k++] = ':'; - } - hbuffer[--k] = '\0'; - - pr_debug("src %pI4@%s, dst %pI4\n", - &payload->src_ip, hbuffer, &payload->dst_ip); -} -#endif - -static unsigned int -clusterip_arp_mangle(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct arphdr *arp = arp_hdr(skb); - struct arp_payload *payload; - struct clusterip_config *c; - struct net *net = state->net; - - /* we don't care about non-ethernet and non-ipv4 ARP */ - if (arp->ar_hrd != htons(ARPHRD_ETHER) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) - return NF_ACCEPT; - - /* we only want to mangle arp requests and replies */ - if (arp->ar_op != htons(ARPOP_REPLY) && - arp->ar_op != htons(ARPOP_REQUEST)) - return NF_ACCEPT; - - payload = (void *)(arp+1); - - /* if there is no clusterip configuration for the arp reply's - * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(net, payload->src_ip, 0); - if (!c) - return NF_ACCEPT; - - /* normally the linux kernel always replies to arp queries of - * addresses on different interfacs. However, in the CLUSTERIP case - * this wouldn't work, since we didn't subscribe the mcast group on - * other interfaces */ - if (c->ifindex != state->out->ifindex) { - pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n", - c->ifindex, state->out->ifindex); - clusterip_config_put(c); - return NF_ACCEPT; - } - - /* mangle reply hardware address */ - memcpy(payload->src_hw, c->clustermac, arp->ar_hln); - -#ifdef DEBUG - pr_debug("mangled arp reply: "); - arp_print(payload); -#endif - - clusterip_config_put(c); - - return NF_ACCEPT; -} - -/*********************************************************************** - * PROC DIR HANDLING - ***********************************************************************/ - -#ifdef CONFIG_PROC_FS - -struct clusterip_seq_position { - unsigned int pos; /* position */ - unsigned int weight; /* number of bits set == size */ - unsigned int bit; /* current bit */ - unsigned long val; /* current value */ -}; - -static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) -{ - struct clusterip_config *c = s->private; - unsigned int weight; - u_int32_t local_nodes; - struct clusterip_seq_position *idx; - - /* FIXME: possible race */ - local_nodes = c->local_nodes; - weight = hweight32(local_nodes); - if (*pos >= weight) - return NULL; - - idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); - if (!idx) - return ERR_PTR(-ENOMEM); - - idx->pos = *pos; - idx->weight = weight; - idx->bit = ffs(local_nodes); - idx->val = local_nodes; - clear_bit(idx->bit - 1, &idx->val); - - return idx; -} - -static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct clusterip_seq_position *idx = v; - - *pos = ++idx->pos; - if (*pos >= idx->weight) { - kfree(v); - return NULL; - } - idx->bit = ffs(idx->val); - clear_bit(idx->bit - 1, &idx->val); - return idx; -} - -static void clusterip_seq_stop(struct seq_file *s, void *v) -{ - if (!IS_ERR(v)) - kfree(v); -} - -static int clusterip_seq_show(struct seq_file *s, void *v) -{ - struct clusterip_seq_position *idx = v; - - if (idx->pos != 0) - seq_putc(s, ','); - - seq_printf(s, "%u", idx->bit); - - if (idx->pos == idx->weight - 1) - seq_putc(s, '\n'); - - return 0; -} - -static const struct seq_operations clusterip_seq_ops = { - .start = clusterip_seq_start, - .next = clusterip_seq_next, - .stop = clusterip_seq_stop, - .show = clusterip_seq_show, -}; - -static int clusterip_proc_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &clusterip_seq_ops); - - if (!ret) { - struct seq_file *sf = file->private_data; - struct clusterip_config *c = pde_data(inode); - - sf->private = c; - - clusterip_config_get(c); - } - - return ret; -} - -static int clusterip_proc_release(struct inode *inode, struct file *file) -{ - struct clusterip_config *c = pde_data(inode); - int ret; - - ret = seq_release(inode, file); - - if (!ret) - clusterip_config_put(c); - - return ret; -} - -static ssize_t clusterip_proc_write(struct file *file, const char __user *input, - size_t size, loff_t *ofs) -{ - struct clusterip_config *c = pde_data(file_inode(file)); -#define PROC_WRITELEN 10 - char buffer[PROC_WRITELEN+1]; - unsigned long nodenum; - int rc; - - if (size > PROC_WRITELEN) - return -EIO; - if (copy_from_user(buffer, input, size)) - return -EFAULT; - buffer[size] = 0; - - if (*buffer == '+') { - rc = kstrtoul(buffer+1, 10, &nodenum); - if (rc) - return rc; - if (clusterip_add_node(c, nodenum)) - return -ENOMEM; - } else if (*buffer == '-') { - rc = kstrtoul(buffer+1, 10, &nodenum); - if (rc) - return rc; - if (clusterip_del_node(c, nodenum)) - return -ENOENT; - } else - return -EIO; - - return size; -} - -static const struct proc_ops clusterip_proc_ops = { - .proc_open = clusterip_proc_open, - .proc_read = seq_read, - .proc_write = clusterip_proc_write, - .proc_lseek = seq_lseek, - .proc_release = clusterip_proc_release, -}; - -#endif /* CONFIG_PROC_FS */ - -static int clusterip_net_init(struct net *net) -{ - struct clusterip_net *cn = clusterip_pernet(net); - - INIT_LIST_HEAD(&cn->configs); - - spin_lock_init(&cn->lock); - -#ifdef CONFIG_PROC_FS - cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); - if (!cn->procdir) { - pr_err("Unable to proc dir entry\n"); - return -ENOMEM; - } - mutex_init(&cn->mutex); -#endif /* CONFIG_PROC_FS */ - - return 0; -} - -static void clusterip_net_exit(struct net *net) -{ -#ifdef CONFIG_PROC_FS - struct clusterip_net *cn = clusterip_pernet(net); - - mutex_lock(&cn->mutex); - proc_remove(cn->procdir); - cn->procdir = NULL; - mutex_unlock(&cn->mutex); -#endif -} - -static struct pernet_operations clusterip_net_ops = { - .init = clusterip_net_init, - .exit = clusterip_net_exit, - .id = &clusterip_net_id, - .size = sizeof(struct clusterip_net), -}; - -static struct notifier_block cip_netdev_notifier = { - .notifier_call = clusterip_netdev_event -}; - -static int __init clusterip_tg_init(void) -{ - int ret; - - ret = register_pernet_subsys(&clusterip_net_ops); - if (ret < 0) - return ret; - - ret = xt_register_target(&clusterip_tg_reg); - if (ret < 0) - goto cleanup_subsys; - - ret = register_netdevice_notifier(&cip_netdev_notifier); - if (ret < 0) - goto unregister_target; - - pr_info("ClusterIP Version %s loaded successfully\n", - CLUSTERIP_VERSION); - - return 0; - -unregister_target: - xt_unregister_target(&clusterip_tg_reg); -cleanup_subsys: - unregister_pernet_subsys(&clusterip_net_ops); - return ret; -} - -static void __exit clusterip_tg_exit(void) -{ - pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); - - unregister_netdevice_notifier(&cip_netdev_notifier); - xt_unregister_target(&clusterip_tg_reg); - unregister_pernet_subsys(&clusterip_net_ops); - - /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */ - rcu_barrier(); -} - -module_init(clusterip_tg_init); -module_exit(clusterip_tg_exit);