@@ -37,10 +37,15 @@
#include <linux/in.h>
#include <linux/ipv6.h>
#include <linux/poll.h>
+#include <linux/sched/mm.h>
#include <net/sock.h>
#include "rds.h"
+static bool rds_force_noio;
+module_param_named(force_noio, rds_force_noio, bool, 0444);
+MODULE_PARM_DESC(force_noio, "Force the use of GFP_NOIO (Y/N)");
+
/* this is just used for stats gathering :/ */
static DEFINE_SPINLOCK(rds_sock_lock);
static unsigned long rds_sock_count;
@@ -59,8 +64,12 @@ DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq);
static int rds_release(struct socket *sock)
{
struct sock *sk = sock->sk;
+ unsigned int noio_flags;
struct rds_sock *rs;
+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
if (!sk)
goto out;
@@ -90,6 +99,8 @@ static int rds_release(struct socket *sock)
sock->sk = NULL;
sock_put(sk);
out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return 0;
}
@@ -214,9 +225,13 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct rds_sock *rs = rds_sk_to_rs(sk);
+ unsigned int noio_flags;
__poll_t mask = 0;
unsigned long flags;
+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
poll_wait(file, sk_sleep(sk), wait);
if (rs->rs_seen_congestion)
@@ -249,6 +264,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
if (mask)
rs->rs_seen_congestion = 0;
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return mask;
}
@@ -293,9 +310,13 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len)
{
struct sockaddr_in6 sin6;
+ unsigned int noio_flags;
struct sockaddr_in sin;
int ret = 0;
+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
/* racing with another thread binding seems ok here */
if (ipv6_addr_any(&rs->rs_bound_addr)) {
ret = -ENOTCONN; /* XXX not a great errno */
@@ -324,6 +345,8 @@ static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len)
rds_send_drop_to(rs, &sin6);
out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return ret;
}
@@ -485,8 +508,12 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
int ret = -ENOPROTOOPT, len;
+ unsigned int noio_flags;
int trans;
+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
if (level != SOL_RDS)
goto out;
@@ -529,6 +556,8 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
}
out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return ret;
}
@@ -538,12 +567,16 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
{
struct sock *sk = sock->sk;
struct sockaddr_in *sin;
+ unsigned int noio_flags;
struct rds_sock *rs = rds_sk_to_rs(sk);
int ret = 0;
if (addr_len < offsetofend(struct sockaddr, sa_family))
return -EINVAL;
+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
lock_sock(sk);
switch (uaddr->sa_family) {
@@ -626,6 +659,8 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
}
release_sock(sk);
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return ret;
}
@@ -697,16 +732,28 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
static int rds_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ unsigned int noio_flags;
struct sock *sk;
+ int ret;
if (sock->type != SOCK_SEQPACKET || protocol)
return -ESOCKTNOSUPPORT;
+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern);
- if (!sk)
- return -ENOMEM;
+ if (!sk) {
+ ret = -ENOMEM;
+ goto out;
+ }
- return __rds_create(sock, sk, protocol);
+ ret = __rds_create(sock, sk, protocol);
+out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
+
+ return ret;
}
void rds_sock_addref(struct rds_sock *rs)
@@ -895,8 +942,12 @@ u32 rds_gen_num;
static int __init rds_init(void)
{
+ unsigned int noio_flags;
int ret;
+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));
ret = rds_bind_lock_init();
@@ -947,6 +998,8 @@ static int __init rds_init(void)
out_bind:
rds_bind_lock_destroy();
out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return ret;
}
module_init(rds_init);
For most entry points to RDS, we call memalloc_noio_{save,restore} in a parenthetic fashion when enabled by the module parameter force_noio. We skip the calls to memalloc_noio_{save,restore} in rds_ioctl(), as no memory allocations are executed in this function or its callees. The reason we execute memalloc_noio_{save,restore} in rds_poll(), is due to the following call chain: rds_poll() poll_wait() __pollwait() poll_get_entry() __get_free_page(GFP_KERNEL) The function rds_setsockopt() allocates memory in its callee's rds_get_mr() and rds_get_mr_for_dest(). Hence, we need memalloc_noio_{save,restore} in rds_setsockopt(). In rds_getsockopt(), we have rds_info_getsockopt() that allocates memory. Hence, we need memalloc_noio_{save,restore} in rds_getsockopt(). All the above, in order to conditionally enable RDS to become a block I/O device. Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com> --- v1 -> v2: * s/EXPORT_SYMBOL/static/ for the rds_force_noio variable as pin-pointed by Simon * Straightened the reverse xmas tree two places * Fixed C/P error in rds_cancel_sent_to() where I had two _save()s and no _restore() as reported by Simon --- net/rds/af_rds.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-)