From 77b228276c0c160b3be23cac758817df1a2ca2ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ole=20Andr=C3=A9=20Vadla=20Ravn=C3=A5s?= <oleavr@gmail.com>
Date: Tue, 10 May 2022 00:47:37 +0200
Subject: [PATCH] Make it possible to build IP stack as a module

This is not pretty, but it works.
---
 include/net/ip.h              |    4 +-
 net/.gitignore                |    4 +
 net/Kconfig                   |    2 +-
 net/core/secure_seq.c         |    2 +-
 net/ipv4/Makefile             |   30 +-
 net/ipv4/af_inet.c            |   35 +
 net/ipv4/inet_hashtables.c    |    1 +
 net/ipv4/inet_timewait_sock.c |    1 +
 net/ipv4/module_lib.c         | 1370 +++++++++++++++++++++++++++++++++
 net/ipv4/proc.c               |    2 +
 net/ipv4/sysctl_net_ipv4.c    |    6 +
 net/ipv4/sysfs_net_ipv4.c     |    6 +
 net/ipv4/tcp.c                |    8 +
 net/ipv4/tcp_cong.c           |    7 +
 net/ipv4/tcp_fastopen.c       |    6 +
 net/ipv4/tcp_ipv4.c           |    2 +-
 net/ipv4/tcp_memcontrol.c     |    6 +
 net/ipv4/udp.c                |    2 +
 net/ipv6/Makefile             |    7 +
 net/ipv6/af_inet6.c           |    4 +
 net/ipv6/exthdrs_offload.c    |    2 +-
 net/ipv6/ip6_offload.c        |    5 +-
 net/ipv6/ip6_offload.h        |    1 +
 net/ipv6/tcp_ipv6.c           |    2 +-
 net/ipv6/tcpv6_offload.c      |    2 +-
 net/ipv6/udp_offload.c        |    2 +-
 26 files changed, 1494 insertions(+), 25 deletions(-)
 create mode 100644 net/.gitignore
 create mode 100644 net/ipv4/module_lib.c

diff --git a/include/net/ip.h b/include/net/ip.h
index 1f6794b2..65ac5fab 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -234,7 +234,7 @@ static inline bool ip_is_fragment(const struct iphdr *iph)
 	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
 }
 
-#ifdef CONFIG_INET
+#if IS_ENABLED(CONFIG_INET)
 #include <net/dst.h>
 
 /* The function in 2.2 was invalid, producing wrong result for
@@ -404,7 +404,7 @@ enum ip_defrag_users {
 };
 
 int ip_defrag(struct sk_buff *skb, u32 user);
-#ifdef CONFIG_INET
+#if IS_ENABLED(CONFIG_INET)
 struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user);
 #else
 static inline struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
diff --git a/net/.gitignore b/net/.gitignore
new file mode 100644
index 00000000..b3f74bdb
--- /dev/null
+++ b/net/.gitignore
@@ -0,0 +1,4 @@
+#
+# Generated files
+#
+Module.symvers
diff --git a/net/Kconfig b/net/Kconfig
index 2a680dad..4e986e85 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -51,7 +51,7 @@ source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 
 config INET
-	bool "TCP/IP networking"
+	tristate "TCP/IP networking"
 	select CRYPTO
 	select CRYPTO_AES
 	---help---
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index d0afc322..533ebfc8 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -32,7 +32,7 @@ static void net_secret_init(void)
 }
 #endif
 
-#ifdef CONFIG_INET
+#if IS_ENABLED(CONFIG_INET)
 static u32 seq_scale(u32 seq)
 {
 	/*
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 5a9af0a9..e6188ede 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -2,7 +2,9 @@
 # Makefile for the Linux TCP/IP (INET) layer.
 #
 
-obj-y     := route.o inetpeer.o protocol.o \
+obj-$(CONFIG_INET) += ipv4.o
+
+ipv4-y :=    route.o inetpeer.o protocol.o \
 	     ip_input.o ip_fragment.o ip_forward.o ip_options.o \
 	     ip_output.o ip_sockglue.o inet_hashtables.o \
 	     inet_timewait_sock.o inet_connection_sock.o \
@@ -13,17 +15,21 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     fib_frontend.o fib_semantics.o fib_trie.o \
 	     inet_fragment.o ping.o
 
+ifeq ($(CONFIG_INET),m)
+ipv4-y += module_lib.o
+endif
+
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
-obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
-obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o
-obj-$(CONFIG_PROC_FS) += proc.o
-obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
-obj-$(CONFIG_IP_MROUTE) += ipmr.o
+ipv4-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
+ipv4-$(CONFIG_SYSFS) += sysfs_net_ipv4.o
+ipv4-$(CONFIG_PROC_FS) += proc.o
+ipv4-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
+ipv4-$(CONFIG_IP_MROUTE) += ipmr.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
 obj-$(CONFIG_NET_IPGRE) += ip_gre.o
 obj-$(CONFIG_NET_IPVTI) += ip_vti.o
-obj-$(CONFIG_SYN_COOKIES) += syncookies.o
+ipv4-$(CONFIG_SYN_COOKIES) += syncookies.o
 obj-$(CONFIG_INET_AH) += ah4.o
 obj-$(CONFIG_INET_ESP) += esp4.o
 obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
@@ -33,8 +39,8 @@ obj-$(CONFIG_INET_LRO) += inet_lro.o
 obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
 obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
 obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
-obj-$(CONFIG_IP_PNP) += ipconfig.o
-obj-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
+ipv4-$(CONFIG_IP_PNP) += ipconfig.o
+ipv4-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
 obj-$(CONFIG_INET_DIAG) += inet_diag.o 
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
@@ -51,8 +57,8 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
-obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
-obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+ipv4-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
+ipv4-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
-obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
+ipv4-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
 		      xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f022e0e9..ca900ac8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,6 +119,10 @@
 #include <linux/mroute.h>
 #endif
 
+MODULE_AUTHOR("Cast of dozens");
+MODULE_DESCRIPTION("IPv4 protocol stack for Linux");
+MODULE_LICENSE("GPL");
+
 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
 #include <linux/android_aid.h>
 
@@ -1702,13 +1706,23 @@ static int __init ipv4_offload_init(void)
 	return 0;
 }
 
+#ifndef CONFIG_INET_MODULE
 fs_initcall(ipv4_offload_init);
+#endif
 
 static struct packet_type ip_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
 	.func = ip_rcv,
 };
 
+#ifdef CONFIG_INET_MODULE
+int tcp_congestion_init(void);
+int tcp_fastopen_init(void);
+int sysctl_ipv4_init(void);
+int sysfs_ipv4_init(void);
+int tcp_memcontrol_init(void);
+#endif
+
 static int __init inet_init(void)
 {
 	struct inet_protosw *q;
@@ -1717,6 +1731,12 @@ static int __init inet_init(void)
 
 	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
+#ifdef CONFIG_INET_MODULE
+	tcp_congestion_init();
+	tcp_fastopen_init();
+	ipv4_offload_init();
+#endif
+
 	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
 	if (!sysctl_local_reserved_ports)
 		goto out;
@@ -1823,6 +1843,21 @@ static int __init inet_init(void)
 
 	dev_add_pack(&ip_packet_type);
 
+#ifdef CONFIG_INET_MODULE
+#ifdef CONFIG_SYSCTL
+	sysctl_ipv4_init();
+#endif
+#ifdef CONFIG_SYSFS
+	sysfs_ipv4_init();
+#endif
+#ifdef CONFIG_MEMCG_KMEM
+	tcp_memcontrol_init();
+#endif
+
+	/* TODO: Implement unload logic */
+	try_module_get(THIS_MODULE);
+#endif
+
 	rc = 0;
 out:
 	return rc;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c95848d0..1e321e2b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -582,6 +582,7 @@ out:
 		return ret;
 	}
 }
+EXPORT_SYMBOL(__inet_hash_connect);
 
 /*
  * Bind a port for a connect operation and hash it.
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 1f27c9f4..0f599b21 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -65,6 +65,7 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
 	 */
 	return 1;
 }
+EXPORT_SYMBOL(inet_twsk_unhash);
 
 /* Must be called with locally disabled BHs. */
 static void __inet_twsk_kill(struct inet_timewait_sock *tw,
diff --git a/net/ipv4/module_lib.c b/net/ipv4/module_lib.c
new file mode 100644
index 00000000..3972ee3f
--- /dev/null
+++ b/net/ipv4/module_lib.c
@@ -0,0 +1,1370 @@
+#include <linux/bootmem.h>
+#include <linux/cryptohash.h>
+#include <linux/ctype.h>
+#include <linux/res_counter.h>
+#include <linux/skbuff.h>
+#include <linux/splice.h>
+#include <linux/sysctl.h>
+#include <linux/vmpressure.h>
+#include <net/dst.h>
+#include <net/neighbour.h>
+#include <net/request_sock.h>
+#include <net/secure_seq.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+
+/*
+ * Bits from fs/pipe.c
+ */
+
+void pipe_wait(struct pipe_inode_info *pipe)
+{
+	DEFINE_WAIT(wait);
+
+	/*
+	 * Pipes are system-local resources, so sleeping on them
+	 * is considered a noninteractive wait:
+	 */
+	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
+	pipe_unlock(pipe);
+	schedule();
+	finish_wait(&pipe->wait, &wait);
+	pipe_lock(pipe);
+}
+
+
+/*
+ * Bits from fs/splice.c
+ */
+
+static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
+{
+	smp_mb();
+	if (waitqueue_active(&pipe->wait))
+		wake_up_interruptible(&pipe->wait);
+	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+}
+
+ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+		       struct splice_pipe_desc *spd)
+{
+	unsigned int spd_pages = spd->nr_pages;
+	int ret, do_wakeup, page_nr;
+
+	ret = 0;
+	do_wakeup = 0;
+	page_nr = 0;
+
+	pipe_lock(pipe);
+
+	for (;;) {
+		if (!pipe->readers) {
+			send_sig(SIGPIPE, current, 0);
+			if (!ret)
+				ret = -EPIPE;
+			break;
+		}
+
+		if (pipe->nrbufs < pipe->buffers) {
+			int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+			struct pipe_buffer *buf = pipe->bufs + newbuf;
+
+			buf->page = spd->pages[page_nr];
+			buf->offset = spd->partial[page_nr].offset;
+			buf->len = spd->partial[page_nr].len;
+			buf->private = spd->partial[page_nr].private;
+			buf->ops = spd->ops;
+			if (spd->flags & SPLICE_F_GIFT)
+				buf->flags |= PIPE_BUF_FLAG_GIFT;
+
+			pipe->nrbufs++;
+			page_nr++;
+			ret += buf->len;
+
+			if (pipe->files)
+				do_wakeup = 1;
+
+			if (!--spd->nr_pages)
+				break;
+			if (pipe->nrbufs < pipe->buffers)
+				continue;
+
+			break;
+		}
+
+		if (spd->flags & SPLICE_F_NONBLOCK) {
+			if (!ret)
+				ret = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (do_wakeup) {
+			smp_mb();
+			if (waitqueue_active(&pipe->wait))
+				wake_up_interruptible_sync(&pipe->wait);
+			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+			do_wakeup = 0;
+		}
+
+		pipe->waiting_writers++;
+		pipe_wait(pipe);
+		pipe->waiting_writers--;
+	}
+
+	pipe_unlock(pipe);
+
+	if (do_wakeup)
+		wakeup_pipe_readers(pipe);
+
+	while (page_nr < spd_pages)
+		spd->spd_release(spd, page_nr++);
+
+	return ret;
+}
+
+
+/*
+ * Bits from kernel/res_counter.c
+ */
+
+void res_counter_init(struct res_counter *counter, struct res_counter *parent)
+{
+	spin_lock_init(&counter->lock);
+	counter->limit = RESOURCE_MAX;
+	counter->soft_limit = RESOURCE_MAX;
+	counter->parent = parent;
+}
+
+static inline unsigned long long *
+res_counter_member(struct res_counter *counter, int member)
+{
+	switch (member) {
+	case RES_USAGE:
+		return &counter->usage;
+	case RES_MAX_USAGE:
+		return &counter->max_usage;
+	case RES_LIMIT:
+		return &counter->limit;
+	case RES_FAILCNT:
+		return &counter->failcnt;
+	case RES_SOFT_LIMIT:
+		return &counter->soft_limit;
+	};
+
+	BUG();
+	return NULL;
+}
+
+#if BITS_PER_LONG == 32
+u64 res_counter_read_u64(struct res_counter *counter, int member)
+{
+	unsigned long flags;
+	u64 ret;
+
+	spin_lock_irqsave(&counter->lock, flags);
+	ret = *res_counter_member(counter, member);
+	spin_unlock_irqrestore(&counter->lock, flags);
+
+	return ret;
+}
+#else
+u64 res_counter_read_u64(struct res_counter *counter, int member)
+{
+	return *res_counter_member(counter, member);
+}
+#endif
+
+int res_counter_memparse_write_strategy(const char *buf,
+					unsigned long long *res)
+{
+	char *end;
+
+	/* return RESOURCE_MAX(unlimited) if "-1" is specified */
+	if (*buf == '-') {
+		*res = simple_strtoull(buf + 1, &end, 10);
+		if (*res != 1 || *end != '\0')
+			return -EINVAL;
+		*res = RESOURCE_MAX;
+		return 0;
+	}
+
+	*res = memparse(buf, &end);
+	if (*end != '\0')
+		return -EINVAL;
+
+	*res = PAGE_ALIGN(*res);
+	return 0;
+}
+
+
+/*
+ * Bits from kernel/sysctl.c
+ */
+
+static void proc_skip_char(char **buf, size_t *size, const char v)
+{
+	while (*size) {
+		if (**buf != v)
+			break;
+		(*size)--;
+		(*buf)++;
+	}
+}
+
+#define TMPBUFLEN 22
+static int proc_get_long(char **buf, size_t *size,
+			  unsigned long *val, bool *neg,
+			  const char *perm_tr, unsigned perm_tr_len, char *tr)
+{
+	int len;
+	char *p, tmp[TMPBUFLEN];
+
+	if (!*size)
+		return -EINVAL;
+
+	len = *size;
+	if (len > TMPBUFLEN - 1)
+		len = TMPBUFLEN - 1;
+
+	memcpy(tmp, *buf, len);
+
+	tmp[len] = 0;
+	p = tmp;
+	if (*p == '-' && *size > 1) {
+		*neg = true;
+		p++;
+	} else
+		*neg = false;
+	if (!isdigit(*p))
+		return -EINVAL;
+
+	*val = simple_strtoul(p, &p, 0);
+
+	len = p - tmp;
+
+	/* We don't know if the next char is whitespace thus we may accept
+	 * invalid integers (e.g. 1234...a) or two integers instead of one
+	 * (e.g. 123...1). So lets not allow such large numbers. */
+	if (len == TMPBUFLEN - 1)
+		return -EINVAL;
+
+	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
+		return -EINVAL;
+
+	if (tr && (len < *size))
+		*tr = *p;
+
+	*buf += len;
+	*size -= len;
+
+	return 0;
+}
+
+static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
+			  bool neg)
+{
+	int len;
+	char tmp[TMPBUFLEN], *p = tmp;
+
+	sprintf(p, "%s%lu", neg ? "-" : "", val);
+	len = strlen(tmp);
+	if (len > *size)
+		len = *size;
+	if (copy_to_user(*buf, tmp, len))
+		return -EFAULT;
+	*size -= len;
+	*buf += len;
+	return 0;
+}
+#undef TMPBUFLEN
+
+static int proc_put_char(void __user **buf, size_t *size, char c)
+{
+	if (*size) {
+		char __user **buffer = (char __user **)buf;
+		if (put_user(c, *buffer))
+			return -EFAULT;
+		(*size)--, (*buffer)++;
+		*buf = *buffer;
+	}
+	return 0;
+}
+
+int proc_do_large_bitmap(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int err = 0;
+	bool first = 1;
+	size_t left = *lenp;
+	unsigned long bitmap_len = table->maxlen;
+	unsigned long *bitmap = (unsigned long *) table->data;
+	unsigned long *tmp_bitmap = NULL;
+	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
+
+	if (!bitmap_len || !left || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	if (write) {
+		unsigned long page = 0;
+		char *kbuf;
+
+		if (left > PAGE_SIZE - 1)
+			left = PAGE_SIZE - 1;
+
+		page = __get_free_page(GFP_TEMPORARY);
+		kbuf = (char *) page;
+		if (!kbuf)
+			return -ENOMEM;
+		if (copy_from_user(kbuf, buffer, left)) {
+			free_page(page);
+			return -EFAULT;
+                }
+		kbuf[left] = 0;
+
+		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
+				     GFP_KERNEL);
+		if (!tmp_bitmap) {
+			free_page(page);
+			return -ENOMEM;
+		}
+		proc_skip_char(&kbuf, &left, '\n');
+		while (!err && left) {
+			unsigned long val_a, val_b;
+			bool neg;
+
+			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
+					     sizeof(tr_a), &c);
+			if (err)
+				break;
+			if (val_a >= bitmap_len || neg) {
+				err = -EINVAL;
+				break;
+			}
+
+			val_b = val_a;
+			if (left) {
+				kbuf++;
+				left--;
+			}
+
+			if (c == '-') {
+				err = proc_get_long(&kbuf, &left, &val_b,
+						     &neg, tr_b, sizeof(tr_b),
+						     &c);
+				if (err)
+					break;
+				if (val_b >= bitmap_len || neg ||
+				    val_a > val_b) {
+					err = -EINVAL;
+					break;
+				}
+				if (left) {
+					kbuf++;
+					left--;
+				}
+			}
+
+			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
+			first = 0;
+			proc_skip_char(&kbuf, &left, '\n');
+		}
+		free_page(page);
+	} else {
+		unsigned long bit_a, bit_b = 0;
+
+		while (left) {
+			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
+			if (bit_a >= bitmap_len)
+				break;
+			bit_b = find_next_zero_bit(bitmap, bitmap_len,
+						   bit_a + 1) - 1;
+
+			if (!first) {
+				err = proc_put_char(&buffer, &left, ',');
+				if (err)
+					break;
+			}
+			err = proc_put_long(&buffer, &left, bit_a, false);
+			if (err)
+				break;
+			if (bit_a != bit_b) {
+				err = proc_put_char(&buffer, &left, '-');
+				if (err)
+					break;
+				err = proc_put_long(&buffer, &left, bit_b, false);
+				if (err)
+					break;
+			}
+
+			first = 0; bit_b++;
+		}
+		if (!err)
+			err = proc_put_char(&buffer, &left, '\n');
+	}
+
+	if (!err) {
+		if (write) {
+			if (*ppos)
+				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
+			else
+				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
+		}
+		kfree(tmp_bitmap);
+		*lenp -= left;
+		*ppos += *lenp;
+		return 0;
+	} else {
+		kfree(tmp_bitmap);
+		return err;
+	}
+}
+
+
+/*
+ * Bits from mm/memcontrol.c
+ */
+
+enum mem_cgroup_stat_index {
+	/*
+	 * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
+	 */
+	MEM_CGROUP_STAT_CACHE,		/* # of pages charged as cache */
+	MEM_CGROUP_STAT_RSS,		/* # of pages charged as anon rss */
+	MEM_CGROUP_STAT_RSS_HUGE,	/* # of pages charged as anon huge */
+	MEM_CGROUP_STAT_FILE_MAPPED,	/* # of pages charged as file rss */
+	MEM_CGROUP_STAT_SWAP,		/* # of pages, swapped out */
+	MEM_CGROUP_STAT_NSTATS,
+};
+
+enum mem_cgroup_events_index {
+	MEM_CGROUP_EVENTS_PGPGIN,	/* # of pages paged in */
+	MEM_CGROUP_EVENTS_PGPGOUT,	/* # of pages paged out */
+	MEM_CGROUP_EVENTS_PGFAULT,	/* # of page-faults */
+	MEM_CGROUP_EVENTS_PGMAJFAULT,	/* # of major page-faults */
+	MEM_CGROUP_EVENTS_NSTATS,
+};
+
+enum mem_cgroup_events_target {
+	MEM_CGROUP_TARGET_THRESH,
+	MEM_CGROUP_TARGET_SOFTLIMIT,
+	MEM_CGROUP_TARGET_NUMAINFO,
+	MEM_CGROUP_NTARGETS,
+};
+
+struct mem_cgroup_stat_cpu {
+	long count[MEM_CGROUP_STAT_NSTATS];
+	unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
+	unsigned long nr_page_events;
+	unsigned long targets[MEM_CGROUP_NTARGETS];
+};
+
+struct mem_cgroup_reclaim_iter {
+	/*
+	 * last scanned hierarchy member. Valid only if last_dead_count
+	 * matches memcg->dead_count of the hierarchy root group.
+	 */
+	struct mem_cgroup *last_visited;
+	unsigned long last_dead_count;
+
+	/* scan generation, increased every round-trip */
+	unsigned int generation;
+};
+
+struct mem_cgroup_per_zone {
+	struct lruvec		lruvec;
+	unsigned long		lru_size[NR_LRU_LISTS];
+
+	struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
+
+	struct rb_node		tree_node;	/* RB tree node */
+	unsigned long long	usage_in_excess;/* Set to the value by which */
+						/* the soft limit is exceeded*/
+	bool			on_tree;
+	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
+						/* use container_of	   */
+};
+
+struct mem_cgroup_per_node {
+	struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_lru_info {
+	struct mem_cgroup_per_node *nodeinfo[0];
+};
+
+struct mem_cgroup_thresholds {
+	/* Primary thresholds array */
+	struct mem_cgroup_threshold_ary *primary;
+	/*
+	 * Spare threshold array.
+	 * This is needed to make mem_cgroup_unregister_event() "never fail".
+	 * It must be able to store at least primary->size - 1 entries.
+	 */
+	struct mem_cgroup_threshold_ary *spare;
+};
+
+struct mem_cgroup {
+	struct cgroup_subsys_state css;
+	/*
+	 * the counter to account for memory usage
+	 */
+	struct res_counter res;
+
+	/* vmpressure notifications */
+	struct vmpressure vmpressure;
+
+	union {
+		/*
+		 * the counter to account for mem+swap usage.
+		 */
+		struct res_counter memsw;
+
+		/*
+		 * rcu_freeing is used only when freeing struct mem_cgroup,
+		 * so put it into a union to avoid wasting more memory.
+		 * It must be disjoint from the css field.  It could be
+		 * in a union with the res field, but res plays a much
+		 * larger part in mem_cgroup life than memsw, and might
+		 * be of interest, even at time of free, when debugging.
+		 * So share rcu_head with the less interesting memsw.
+		 */
+		struct rcu_head rcu_freeing;
+		/*
+		 * We also need some space for a worker in deferred freeing.
+		 * By the time we call it, rcu_freeing is no longer in use.
+		 */
+		struct work_struct work_freeing;
+	};
+
+	/*
+	 * the counter to account for kernel memory usage.
+	 */
+	struct res_counter kmem;
+	/*
+	 * Should the accounting and control be hierarchical, per subtree?
+	 */
+	bool use_hierarchy;
+	unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */
+
+	bool		oom_lock;
+	atomic_t	under_oom;
+	atomic_t	oom_wakeups;
+
+	atomic_t	refcnt;
+
+	int	swappiness;
+	/* OOM-Killer disable */
+	int		oom_kill_disable;
+
+	/* set when res.limit == memsw.limit */
+	bool		memsw_is_minimum;
+
+	/* protect arrays of thresholds */
+	struct mutex thresholds_lock;
+
+	/* thresholds for memory usage. RCU-protected */
+	struct mem_cgroup_thresholds thresholds;
+
+	/* thresholds for mem+swap usage. RCU-protected */
+	struct mem_cgroup_thresholds memsw_thresholds;
+
+	/* For oom notifier event fd */
+	struct list_head oom_notify;
+
+	/*
+	 * Should we move charges of a task when a task is moved into this
+	 * mem_cgroup ? And what type of charges should we move ?
+	 */
+	unsigned long 	move_charge_at_immigrate;
+	/*
+	 * set > 0 if pages under this cgroup are moving to other cgroup.
+	 */
+	atomic_t	moving_account;
+	/* taken only while moving_account > 0 */
+	spinlock_t	move_lock;
+	/*
+	 * percpu counter.
+	 */
+	struct mem_cgroup_stat_cpu __percpu *stat;
+	/*
+	 * used when a cpu is offlined or other synchronizations
+	 * See mem_cgroup_read_stat().
+	 */
+	struct mem_cgroup_stat_cpu nocpu_base;
+	spinlock_t pcp_counter_lock;
+
+	atomic_t	dead_count;
+#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
+	struct tcp_memcontrol tcp_mem;
+#endif
+#if defined(CONFIG_MEMCG_KMEM)
+	/* analogous to slab_common's slab_caches list. per-memcg */
+	struct list_head memcg_slab_caches;
+	/* Not a spinlock, we can take a lot of time walking the list */
+	struct mutex slab_caches_mutex;
+        /* Index in the kmem_cache->memcg_params->memcg_caches array */
+	int kmemcg_id;
+#endif
+
+	int last_scanned_node;
+#if MAX_NUMNODES > 1
+	nodemask_t	scan_nodes;
+	atomic_t	numainfo_events;
+	atomic_t	numainfo_updating;
+#endif
+
+	/*
+	 * Per cgroup active and inactive list, similar to the
+	 * per zone LRU lists.
+	 *
+	 * WARNING: This has to be the last element of the struct. Don't
+	 * add new fields after this point.
+	 */
+	struct mem_cgroup_lru_info info;
+};
+
+static inline
+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+	return container_of(s, struct mem_cgroup, css);
+}
+
+struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
+{
+	return mem_cgroup_from_css(
+		cgroup_subsys_state(cont, mem_cgroup_subsys_id));
+}
+
+struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
+{
+	/*
+	 * mm_update_next_owner() may clear mm->owner to NULL
+	 * if it races with swapoff, page migration, etc.
+	 * So this can be called with p == NULL.
+	 */
+	if (unlikely(!p))
+		return NULL;
+
+	return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id));
+}
+
+
+/*
+ * Bits from mm/page_alloc.c
+ *
+ * Slightly simplified to avoid depending on nr_kernel_pages, nr_all_pages,
+ * and alloc_bootmem_nopanic().
+ */
+
+void *alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int flags,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask,
+				     unsigned long low_limit,
+				     unsigned long high_limit)
+{
+	unsigned long long max = high_limit;
+	unsigned long log2qty, size;
+	void *table = NULL;
+
+	/* allow the kernel cmdline to have a say */
+	if (!numentries)
+		numentries = PAGE_SIZE / bucketsize;
+	numentries = roundup_pow_of_two(numentries);
+
+	/* limit allocation size */
+	if (max == 0) {
+		max = 64 * 1024;
+		do_div(max, bucketsize);
+	}
+	max = min(max, 0x80000000ULL);
+
+	if (numentries < low_limit)
+		numentries = low_limit;
+	if (numentries > max)
+		numentries = max;
+
+	log2qty = ilog2(numentries);
+
+	do {
+		size = bucketsize << log2qty;
+		/*
+		 * If bucketsize is not a power-of-two, we may free
+		 * some pages at the end of hash table which
+		 * alloc_pages_exact() automatically does
+		 */
+		if (get_order(size) < MAX_ORDER) {
+			table = alloc_pages_exact(size, GFP_ATOMIC);
+			kmemleak_alloc(table, size, 1, GFP_ATOMIC);
+		}
+	} while (!table && size > PAGE_SIZE && --log2qty);
+
+	if (!table)
+		panic("Failed to allocate %s hash table\n", tablename);
+
+	printk(KERN_INFO "%s hash table entries: %ld (order: %d, %lu bytes)\n",
+	       tablename,
+	       (1UL << log2qty),
+	       ilog2(size) - PAGE_SHIFT,
+	       size);
+
+	if (_hash_shift)
+		*_hash_shift = log2qty;
+	if (_hash_mask)
+		*_hash_mask = (1 << log2qty) - 1;
+
+	return table;
+}
+
+
+/*
+ * Bits from net/core/dst.c
+ */
+
+const u32 dst_default_metrics[RTAX_MAX + 1] = {
+	/* This initializer is needed to force linker to place this variable
+	 * into const section. Otherwise it might end into bss section.
+	 * We really want to avoid false sharing on this variable, and catch
+	 * any writes on it.
+	 */
+	[RTAX_MAX] = 0xdeadbeef,
+};
+
+
+/*
+ * Bits from net/core/neighbour.c
+ */
+
+#define PNEIGH_HASHMASK		0xF
+
+static u32 pneigh_hash(const void *pkey, int key_len)
+{
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+	return hash_val;
+}
+
+int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
+		  struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	int key_len = tbl->key_len;
+	u32 hash_val = pneigh_hash(pkey, key_len);
+
+	write_lock_bh(&tbl->lock);
+	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+	     np = &n->next) {
+		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
+		    net_eq(pneigh_net(n), net)) {
+			*np = n->next;
+			write_unlock_bh(&tbl->lock);
+			if (tbl->pdestructor)
+				tbl->pdestructor(n);
+			if (n->dev)
+				dev_put(n->dev);
+			release_net(pneigh_net(n));
+			kfree(n);
+			return 0;
+		}
+	}
+	write_unlock_bh(&tbl->lock);
+	return -ENOENT;
+}
+
+
+/*
+ * Bits from net/core/request_sock.c
+ */
+
+int reqsk_queue_alloc(struct request_sock_queue *queue,
+		      unsigned int nr_table_entries)
+{
+	size_t lopt_size = sizeof(struct listen_sock);
+	struct listen_sock *lopt;
+
+	nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
+	nr_table_entries = max_t(u32, nr_table_entries, 8);
+	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
+	lopt_size += nr_table_entries * sizeof(struct request_sock *);
+	if (lopt_size > PAGE_SIZE)
+		lopt = vzalloc(lopt_size);
+	else
+		lopt = kzalloc(lopt_size, GFP_KERNEL);
+	if (lopt == NULL)
+		return -ENOMEM;
+
+	for (lopt->max_qlen_log = 3;
+	     (1 << lopt->max_qlen_log) < nr_table_entries;
+	     lopt->max_qlen_log++);
+
+	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
+	rwlock_init(&queue->syn_wait_lock);
+	queue->rskq_accept_head = NULL;
+	lopt->nr_table_entries = nr_table_entries;
+
+	write_lock_bh(&queue->syn_wait_lock);
+	queue->listen_opt = lopt;
+	write_unlock_bh(&queue->syn_wait_lock);
+
+	return 0;
+}
+
+void __reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+	struct listen_sock *lopt;
+	size_t lopt_size;
+
+	/*
+	 * this is an error recovery path only
+	 * no locking needed and the lopt is not NULL
+	 */
+
+	lopt = queue->listen_opt;
+	lopt_size = sizeof(struct listen_sock) +
+		lopt->nr_table_entries * sizeof(struct request_sock *);
+
+	if (lopt_size > PAGE_SIZE)
+		vfree(lopt);
+	else
+		kfree(lopt);
+}
+
+static inline struct listen_sock *reqsk_queue_yank_listen_sk(
+		struct request_sock_queue *queue)
+{
+	struct listen_sock *lopt;
+
+	write_lock_bh(&queue->syn_wait_lock);
+	lopt = queue->listen_opt;
+	queue->listen_opt = NULL;
+	write_unlock_bh(&queue->syn_wait_lock);
+
+	return lopt;
+}
+
+void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+	/* make all the listen_opt local to us */
+	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+	size_t lopt_size = sizeof(struct listen_sock) +
+		lopt->nr_table_entries * sizeof(struct request_sock *);
+
+	if (lopt->qlen != 0) {
+		unsigned int i;
+
+		for (i = 0; i < lopt->nr_table_entries; i++) {
+			struct request_sock *req;
+
+			while ((req = lopt->syn_table[i]) != NULL) {
+				lopt->syn_table[i] = req->dl_next;
+				lopt->qlen--;
+				reqsk_free(req);
+			}
+		}
+	}
+
+	WARN_ON(lopt->qlen != 0);
+	if (lopt_size > PAGE_SIZE)
+		vfree(lopt);
+	else
+		kfree(lopt);
+}
+
+void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
+			   bool reset)
+{
+	struct sock *lsk = tcp_rsk(req)->listener;
+	struct fastopen_queue *fastopenq =
+	    inet_csk(lsk)->icsk_accept_queue.fastopenq;
+
+	tcp_sk(sk)->fastopen_rsk = NULL;
+	spin_lock_bh(&fastopenq->lock);
+	fastopenq->qlen--;
+	tcp_rsk(req)->listener = NULL;
+	if (req->sk)	/* the child socket hasn't been accepted yet */
+		goto out;
+
+	if (!reset || lsk->sk_state != TCP_LISTEN) {
+		/* If the listener has been closed don't bother with the
+		 * special RST handling below.
+		 */
+		spin_unlock_bh(&fastopenq->lock);
+		sock_put(lsk);
+		reqsk_free(req);
+		return;
+	}
+	/* Wait for 60secs before removing a req that has triggered RST.
+	 * This is a simple defense against TFO spoofing attack - by
+	 * counting the req against fastopen.max_qlen, and disabling
+	 * TFO when the qlen exceeds max_qlen.
+	 *
+	 * For more details see CoNext'11 "TCP Fast Open" paper.
+	 */
+	req->expires = jiffies + 60*HZ;
+	if (fastopenq->rskq_rst_head == NULL)
+		fastopenq->rskq_rst_head = req;
+	else
+		fastopenq->rskq_rst_tail->dl_next = req;
+
+	req->dl_next = NULL;
+	fastopenq->rskq_rst_tail = req;
+	fastopenq->qlen++;
+out:
+	spin_unlock_bh(&fastopenq->lock);
+	sock_put(lsk);
+	return;
+}
+
+
+/*
+ * Bits from net/core/secure_seq.c
+ */
+
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
+
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
+{
+	u32 tmp;
+	int i;
+
+	if (likely(net_secret[0]))
+		return;
+
+	for (i = NET_SECRET_SIZE; i > 0;) {
+		do {
+			get_random_bytes(&tmp, sizeof(tmp));
+		} while (!tmp);
+		cmpxchg(&net_secret[--i], 0, tmp);
+	}
+}
+
+static u32 seq_scale(u32 seq)
+{
+	/*
+	 *	As close as possible to RFC 793, which
+	 *	suggests using a 250 kHz clock.
+	 *	Further reading shows this assumes 2 Mb/s networks.
+	 *	For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
+	 *	For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but
+	 *	we also need to limit the resolution so that the u32 seq
+	 *	overlaps less than one time per MSL (2 minutes).
+	 *	Choosing a clock of 64 ns period is OK. (period of 274 s)
+	 */
+	return seq + (ktime_to_ns(ktime_get_real()) >> 6);
+}
+
+__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+				 __be16 sport, __be16 dport)
+{
+	u32 hash[MD5_DIGEST_WORDS];
+
+	net_secret_init();
+	hash[0] = (__force u32)saddr;
+	hash[1] = (__force u32)daddr;
+	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
+	hash[3] = net_secret[15];
+
+	md5_transform(hash, net_secret);
+
+	return seq_scale(hash[0]);
+}
+
+u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+{
+	u32 hash[MD5_DIGEST_WORDS];
+
+	net_secret_init();
+	hash[0] = (__force u32)saddr;
+	hash[1] = (__force u32)daddr;
+	hash[2] = (__force u32)dport ^ net_secret[14];
+	hash[3] = net_secret[15];
+
+	md5_transform(hash, net_secret);
+
+	return hash[0];
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
+				   __be16 sport, __be16 dport)
+{
+	u32 secret[MD5_MESSAGE_BYTES / 4];
+	u32 hash[MD5_DIGEST_WORDS];
+	u32 i;
+
+	net_secret_init();
+	memcpy(hash, saddr, 16);
+	for (i = 0; i < 4; i++)
+		secret[i] = net_secret[i] + (__force u32)daddr[i];
+	secret[4] = net_secret[4] +
+		(((__force u16)sport << 16) + (__force u16)dport);
+	for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
+		secret[i] = net_secret[i];
+
+	md5_transform(hash, secret);
+
+	return seq_scale(hash[0]);
+}
+EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+
+u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+			       __be16 dport)
+{
+	u32 secret[MD5_MESSAGE_BYTES / 4];
+	u32 hash[MD5_DIGEST_WORDS];
+	u32 i;
+
+	net_secret_init();
+	memcpy(hash, saddr, 16);
+	for (i = 0; i < 4; i++)
+		secret[i] = net_secret[i] + (__force u32) daddr[i];
+	secret[4] = net_secret[4] + (__force u32)dport;
+	for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
+		secret[i] = net_secret[i];
+
+	md5_transform(hash, secret);
+
+	return hash[0];
+}
+EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
+#endif
+
+
+/*
+ * Bits from net/core/skbuff.c
+ */
+
+static struct page *linear_to_page(struct page *page, unsigned int *len,
+				   unsigned int *offset,
+				   struct sock *sk)
+{
+	struct page_frag *pfrag = sk_page_frag(sk);
+
+	if (!sk_page_frag_refill(sk, pfrag))
+		return NULL;
+
+	*len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
+
+	memcpy(page_address(pfrag->page) + pfrag->offset,
+	       page_address(page) + *offset, *len);
+	*offset = pfrag->offset;
+	pfrag->offset += *len;
+
+	return pfrag->page;
+}
+
+static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
+			     struct page *page,
+			     unsigned int offset)
+{
+	return	spd->nr_pages &&
+		spd->pages[spd->nr_pages - 1] == page &&
+		(spd->partial[spd->nr_pages - 1].offset +
+		 spd->partial[spd->nr_pages - 1].len == offset);
+}
+
+static bool spd_fill_page(struct splice_pipe_desc *spd,
+			  struct pipe_inode_info *pipe, struct page *page,
+			  unsigned int *len, unsigned int offset,
+			  bool linear,
+			  struct sock *sk)
+{
+	if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
+		return true;
+
+	if (linear) {
+		page = linear_to_page(page, len, &offset, sk);
+		if (!page)
+			return true;
+	}
+	if (spd_can_coalesce(spd, page, offset)) {
+		spd->partial[spd->nr_pages - 1].len += *len;
+		return false;
+	}
+	get_page(page);
+	spd->pages[spd->nr_pages] = page;
+	spd->partial[spd->nr_pages].len = *len;
+	spd->partial[spd->nr_pages].offset = offset;
+	spd->nr_pages++;
+
+	return false;
+}
+
+static bool __splice_segment(struct page *page, unsigned int poff,
+			     unsigned int plen, unsigned int *off,
+			     unsigned int *len,
+			     struct splice_pipe_desc *spd, bool linear,
+			     struct sock *sk,
+			     struct pipe_inode_info *pipe)
+{
+	if (!*len)
+		return true;
+
+	/* skip this segment if already processed */
+	if (*off >= plen) {
+		*off -= plen;
+		return false;
+	}
+
+	/* ignore any bits we already processed */
+	poff += *off;
+	plen -= *off;
+	*off = 0;
+
+	do {
+		unsigned int flen = min(*len, plen);
+
+		if (spd_fill_page(spd, pipe, page, &flen, poff,
+				  linear, sk))
+			return true;
+		poff += flen;
+		plen -= flen;
+		*len -= flen;
+	} while (*len && plen);
+
+	return false;
+}
+
+static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+			      unsigned int *offset, unsigned int *len,
+			      struct splice_pipe_desc *spd, struct sock *sk)
+{
+	int seg;
+
+	/* map the linear part :
+	 * If skb->head_frag is set, this 'linear' part is backed by a
+	 * fragment, and if the head is not shared with any clones then
+	 * we can avoid a copy since we own the head portion of this page.
+	 */
+	if (__splice_segment(virt_to_page(skb->data),
+			     (unsigned long) skb->data & (PAGE_SIZE - 1),
+			     skb_headlen(skb),
+			     offset, len, spd,
+			     skb_head_is_locked(skb),
+			     sk, pipe))
+		return true;
+
+	/*
+	 * then map the fragments
+	 */
+	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
+		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+
+		if (__splice_segment(skb_frag_page(f),
+				     f->page_offset, skb_frag_size(f),
+				     offset, len, spd, false, sk, pipe))
+			return true;
+	}
+
+	return false;
+}
+
+static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+	put_page(spd->pages[i]);
+}
+
+int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+		    struct pipe_inode_info *pipe, unsigned int tlen,
+		    unsigned int flags)
+{
+	struct partial_page partial[MAX_SKB_FRAGS];
+	struct page *pages[MAX_SKB_FRAGS];
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.nr_pages_max = MAX_SKB_FRAGS,
+		.flags = flags,
+		.ops = &nosteal_pipe_buf_ops,
+		.spd_release = sock_spd_release,
+	};
+	struct sk_buff *frag_iter;
+	struct sock *sk = skb->sk;
+	int ret = 0;
+
+	/*
+	 * __skb_splice_bits() only fails if the output has no room left,
+	 * so no point in going over the frag_list for the error case.
+	 */
+	if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
+		goto done;
+	else if (!tlen)
+		goto done;
+
+	/*
+	 * now see if we have a frag_list to map
+	 */
+	skb_walk_frags(skb, frag_iter) {
+		if (!tlen)
+			break;
+		if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
+			break;
+	}
+
+done:
+	if (spd.nr_pages) {
+		/*
+		 * Drop the socket lock, otherwise we have reverse
+		 * locking dependencies between sk_lock and i_mutex
+		 * here as compared to sendfile(). We enter here
+		 * with the socket lock held, and splice_to_pipe() will
+		 * grab the pipe inode lock. For sendfile() emulation,
+		 * we call into ->sendpage() with the i_mutex lock held
+		 * and networking will grab the socket lock.
+		 */
+		release_sock(sk);
+		ret = splice_to_pipe(pipe, &spd);
+		lock_sock(sk);
+	}
+
+	return ret;
+}
+
+static int skb_prepare_for_shift(struct sk_buff *skb)
+{
+	return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
+int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
+{
+	int from, to, merge, todo;
+	struct skb_frag_struct *fragfrom, *fragto;
+
+	BUG_ON(shiftlen > skb->len);
+	BUG_ON(skb_headlen(skb));	/* Would corrupt stream */
+
+	todo = shiftlen;
+	from = 0;
+	to = skb_shinfo(tgt)->nr_frags;
+	fragfrom = &skb_shinfo(skb)->frags[from];
+
+	/* Actual merge is delayed until the point when we know we can
+	 * commit all, so that we don't have to undo partial changes
+	 */
+	if (!to ||
+	    !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+			      fragfrom->page_offset)) {
+		merge = -1;
+	} else {
+		merge = to - 1;
+
+		todo -= skb_frag_size(fragfrom);
+		if (todo < 0) {
+			if (skb_prepare_for_shift(skb) ||
+			    skb_prepare_for_shift(tgt))
+				return 0;
+
+			/* All previous frag pointers might be stale! */
+			fragfrom = &skb_shinfo(skb)->frags[from];
+			fragto = &skb_shinfo(tgt)->frags[merge];
+
+			skb_frag_size_add(fragto, shiftlen);
+			skb_frag_size_sub(fragfrom, shiftlen);
+			fragfrom->page_offset += shiftlen;
+
+			goto onlymerged;
+		}
+
+		from++;
+	}
+
+	/* Skip full, not-fitting skb to avoid expensive operations */
+	if ((shiftlen == skb->len) &&
+	    (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
+		return 0;
+
+	if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
+		return 0;
+
+	while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
+		if (to == MAX_SKB_FRAGS)
+			return 0;
+
+		fragfrom = &skb_shinfo(skb)->frags[from];
+		fragto = &skb_shinfo(tgt)->frags[to];
+
+		if (todo >= skb_frag_size(fragfrom)) {
+			*fragto = *fragfrom;
+			todo -= skb_frag_size(fragfrom);
+			from++;
+			to++;
+
+		} else {
+			__skb_frag_ref(fragfrom);
+			fragto->page = fragfrom->page;
+			fragto->page_offset = fragfrom->page_offset;
+			skb_frag_size_set(fragto, todo);
+
+			fragfrom->page_offset += todo;
+			skb_frag_size_sub(fragfrom, todo);
+			todo = 0;
+
+			to++;
+			break;
+		}
+	}
+
+	/* Ready to "commit" this state change to tgt */
+	skb_shinfo(tgt)->nr_frags = to;
+
+	if (merge >= 0) {
+		fragfrom = &skb_shinfo(skb)->frags[0];
+		fragto = &skb_shinfo(tgt)->frags[merge];
+
+		skb_frag_size_add(fragto, skb_frag_size(fragfrom));
+		__skb_frag_unref(fragfrom);
+	}
+
+	/* Reposition in the original skb */
+	to = 0;
+	while (from < skb_shinfo(skb)->nr_frags)
+		skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
+	skb_shinfo(skb)->nr_frags = to;
+
+	BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
+
+onlymerged:
+	/* Most likely the tgt won't ever need its checksum anymore, skb on
+	 * the other hand might need it if it needs to be resent
+	 */
+	tgt->ip_summed = CHECKSUM_PARTIAL;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	/* Yak, is it really working this way? Some helper please? */
+	skb->len -= shiftlen;
+	skb->data_len -= shiftlen;
+	skb->truesize -= shiftlen;
+	tgt->len += shiftlen;
+	tgt->data_len += shiftlen;
+	tgt->truesize += shiftlen;
+
+	return shiftlen;
+}
+
+
+/*
+ * Bits from net/core/sock.c
+ */
+
+#define _SK_MEM_PACKETS		256
+#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
+#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 2a5bf86d..9764bf37 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,7 +59,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	sockets = proto_sockets_allocated_sum_positive(&tcp_prot);
 	local_bh_enable();
 
+#ifndef CONFIG_INET_MODULE
 	socket_seq_show(seq);
+#endif
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
 		   sock_prot_inuse_get(net, &tcp_prot), orphans,
 		   tcp_death_row.tw_count, sockets,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2c707a91..7215c207 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -958,7 +958,11 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
 	.exit = ipv4_sysctl_exit_net,
 };
 
+#ifdef CONFIG_INET_MODULE
+int sysctl_ipv4_init(void)
+#else
 static __init int sysctl_ipv4_init(void)
+#endif
 {
 	struct ctl_table_header *hdr;
 	struct ctl_table *i;
@@ -984,4 +988,6 @@ static __init int sysctl_ipv4_init(void)
 	return 0;
 }
 
+#ifndef CONFIG_INET_MODULE
 __initcall(sysctl_ipv4_init);
+#endif
diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c
index 0cbbf100..2d441831 100644
--- a/net/ipv4/sysfs_net_ipv4.c
+++ b/net/ipv4/sysfs_net_ipv4.c
@@ -67,7 +67,11 @@ static struct attribute_group ipv4_attr_group = {
 	.attrs = ipv4_attrs,
 };
 
+#ifdef CONFIG_INET_MODULE
+int sysfs_ipv4_init(void)
+#else
 static __init int sysfs_ipv4_init(void)
+#endif
 {
 	struct kobject *ipv4_kobject;
 	int ret;
@@ -85,4 +89,6 @@ static __init int sysfs_ipv4_init(void)
 	return 0;
 }
 
+#ifndef CONFIG_INET_MODULE
 subsys_initcall(sysfs_ipv4_init);
+#endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8cc9b549..d9a929fb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1245,8 +1245,10 @@ out:
 out_nopush:
 	release_sock(sk);
 
+#ifndef CONFIG_INET_MODULE
 	if (copied + copied_syn)
 		uid_stat_tcp_snd(current_uid(), copied + copied_syn);
+#endif
 	return copied + copied_syn;
 
 do_fault:
@@ -1551,7 +1553,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	if (copied > 0) {
 		tcp_recv_skb(sk, seq, &offset);
 		tcp_cleanup_rbuf(sk, copied);
+#ifndef CONFIG_INET_MODULE
 		uid_stat_tcp_rcv(current_uid(), copied);
+#endif
 	}
 	return copied;
 }
@@ -1957,8 +1961,10 @@ skip_copy:
 
 	release_sock(sk);
 
+#ifndef CONFIG_INET_MODULE
 	if (copied > 0)
 		uid_stat_tcp_rcv(current_uid(), copied);
+#endif
 	return copied;
 
 out:
@@ -1967,8 +1973,10 @@ out:
 
 recv_urg:
 	err = tcp_recv_urg(sk, msg, len, flags);
+#ifndef CONFIG_INET_MODULE
 	if (err > 0)
 		uid_stat_tcp_rcv(current_uid(), err);
+#endif
 	goto out;
 
 recv_sndq:
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 019c2389..49d655c0 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -137,12 +137,19 @@ int tcp_set_default_congestion_control(const char *name)
 	return ret;
 }
 
+#ifdef CONFIG_INET_MODULE
+int tcp_congestion_init(void)
+#else
 /* Set default value from kernel configuration at bootup */
 static int __init tcp_congestion_default(void)
+#endif
 {
 	return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
 }
+
+#ifndef CONFIG_INET_MODULE
 late_initcall(tcp_congestion_default);
+#endif
 
 
 /* Build string with list of available congestion control values */
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8f7ef0ad..3ce673b9 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -80,7 +80,11 @@ void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc)
 	rcu_read_unlock();
 }
 
+#ifdef CONFIG_INET_MODULE
+int tcp_fastopen_init(void)
+#else
 static int __init tcp_fastopen_init(void)
+#endif
 {
 	__u8 key[TCP_FASTOPEN_KEY_LENGTH];
 
@@ -89,4 +93,6 @@ static int __init tcp_fastopen_init(void)
 	return 0;
 }
 
+#ifndef CONFIG_INET_MODULE
 late_initcall(tcp_fastopen_init);
+#endif
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ef47406d..32bac336 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2891,7 +2891,7 @@ struct proto tcp_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
-#ifdef CONFIG_MEMCG_KMEM
+#if defined(CONFIG_MEMCG_KMEM) && !defined(CONFIG_INET_MODULE)
 	.init_cgroup		= tcp_init_cgroup,
 	.destroy_cgroup		= tcp_destroy_cgroup,
 	.proto_cgroup		= tcp_proto_cgroup,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index da14436c..377a6f8e 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -280,9 +280,15 @@ static struct cftype tcp_files[] = {
 	{ }	/* terminate */
 };
 
+#ifdef CONFIG_INET_MODULE
+int tcp_memcontrol_init(void)
+#else
 static int __init tcp_memcontrol_init(void)
+#endif
 {
 	WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
 	return 0;
 }
+#ifndef CONFIG_INET_MODULE
 __initcall(tcp_memcontrol_init);
+#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 58c5dbd5..94910949 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1413,7 +1413,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 					 is_udplite);
 		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 		kfree_skb(skb);
+#ifndef CONFIG_INET_MODULE
 		trace_udp_fail_queue_rcv_skb(rc, sk);
+#endif
 		return -1;
 	}
 
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 470a9c00..da4db0e7 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -40,7 +40,14 @@ obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
 obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
 
+ifeq ($(CONFIG_INET),m)
+ipv6-y += ip6_icmp.o
+ipv6-y += output_core.o protocol.o $(ipv6-offload)
+
+ipv6-y += inet6_hashtables.o
+else
 obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
 obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
 
 obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
+endif
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d29ae19a..08a98f0e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -77,6 +77,8 @@ static inline int current_has_network(void)
 }
 #endif
 
+#include "ip6_offload.h"
+
 MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
@@ -850,6 +852,8 @@ static int __init inet6_init(void)
 
 	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
+	ipv6_offload_init();
+
 	/* Register the socket-side information for inet6_create.  */
 	for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
 		INIT_LIST_HEAD(r);
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index 447a7fbd..9e95c0fc 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -20,7 +20,7 @@ static const struct net_offload dstopt_offload = {
 	.flags		=	INET6_PROTO_GSO_EXTHDR,
 };
 
-int __init ipv6_exthdrs_offload_init(void)
+int ipv6_exthdrs_offload_init(void)
 {
 	int ret;
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 71b766ee..e0130f35 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -264,7 +264,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
 	},
 };
 
-static int __init ipv6_offload_init(void)
+void ipv6_offload_init(void)
 {
 
 	if (tcpv6_offload_init() < 0)
@@ -275,7 +275,4 @@ static int __init ipv6_offload_init(void)
 		pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
 
 	dev_add_offload(&ipv6_packet_offload);
-	return 0;
 }
-
-fs_initcall(ipv6_offload_init);
diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
index 2e155c65..e474f886 100644
--- a/net/ipv6/ip6_offload.h
+++ b/net/ipv6/ip6_offload.h
@@ -11,6 +11,7 @@
 #ifndef __ip6_offload_h
 #define __ip6_offload_h
 
+void ipv6_offload_init(void);
 int ipv6_exthdrs_offload_init(void);
 int udp_offload_init(void);
 int tcpv6_offload_init(void);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0acad490..70f40a25 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1945,7 +1945,7 @@ struct proto tcpv6_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
-#ifdef CONFIG_MEMCG_KMEM
+#if defined(CONFIG_MEMCG_KMEM) && !defined(CONFIG_INET_MODULE)
 	.proto_cgroup		= tcp_proto_cgroup,
 #endif
 	.clear_sk		= tcp_v6_clear_sk,
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 2ec6bf6a..8642491c 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -89,7 +89,7 @@ static const struct net_offload tcpv6_offload = {
 	},
 };
 
-int __init tcpv6_offload_init(void)
+int tcpv6_offload_init(void)
 {
 	return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
 }
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 2f65b022..3f66f0ac 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -125,7 +125,7 @@ static const struct net_offload udpv6_offload = {
 	},
 };
 
-int __init udp_offload_init(void)
+int udp_offload_init(void)
 {
 	return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
 }