@@ -1,3 +1,22 @@
+linux (3.16.7-ckt20-1+deb8u2) jessie-security; urgency=medium
+
+ * [xen] Fix race conditions in back-end drivers (CVE-2015-8550, XSA-155)
+ * [xen] pciback: Fix state validation in MSI control operations
+ (CVE-2015-8551, CVE-2015-8852, XSA-157)
+ * pptp: verify sockaddr_len in pptp_bind() and pptp_connect() (CVE-2015-8569)
+ * bluetooth: Validate socket address length in sco_sock_bind() (CVE-2015-8575)
+ * ptrace: being capable wrt a process requires mapped uids/gids
+ (CVE-2015-8709)
+ * KEYS: Fix race between read and revoke (CVE-2015-7550)
+ * [x86] KVM: Reload pit counters for all channels when restoring state
+ (CVE-2015-7513)
+ * udp: properly support MSG_PEEK with truncated buffers
+ (Closes: #808293, regression in 3.16.7-ckt17)
+ * Revert "xhci: don't finish a TD if we get a short transfer event mid TD"
+ (Closes: #808602, #808953, regression in 3.16.7-ckt20)
+
+ -- Ben Hutchings <ben@decadent.org.uk> Sat, 02 Jan 2016 03:31:39 +0000
+
linux (3.16.7-ckt20-1+deb8u1) jessie-security; urgency=medium
[ Salvatore Bonaccorso ]
new file mode 100644
@@ -0,0 +1,22 @@
+From: "David S. Miller" <davem@davemloft.net>
+Date: Tue, 15 Dec 2015 15:39:08 -0500
+Subject: bluetooth: Validate socket address length in sco_sock_bind().
+Origin: https://git.kernel.org/linus/5233252fce714053f0151680933571a2da9cbfb4
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ net/bluetooth/sco.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/bluetooth/sco.c
++++ b/net/bluetooth/sco.c
+@@ -459,6 +459,9 @@ static int sco_sock_bind(struct socket *
+ if (!addr || addr->sa_family != AF_BLUETOOTH)
+ return -EINVAL;
+
++ if (addr_len < sizeof(struct sockaddr_sco))
++ return -EINVAL;
++
+ lock_sock(sk);
+
+ if (sk->sk_state != BT_OPEN) {
new file mode 100644
@@ -0,0 +1,110 @@
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 18 Dec 2015 01:34:26 +0000
+Subject: KEYS: Fix race between read and revoke
+Origin: https://git.kernel.org/linus/b4a1b4f5047e4f54e194681125c74c0aa64d637d
+
+This fixes CVE-2015-7550.
+
+There's a race between keyctl_read() and keyctl_revoke(). If the revoke
+happens between keyctl_read() checking the validity of a key and the key's
+semaphore being taken, then the key type read method will see a revoked key.
+
+This causes a problem for the user-defined key type because it assumes in
+its read method that there will always be a payload in a non-revoked key
+and doesn't check for a NULL pointer.
+
+Fix this by making keyctl_read() check the validity of a key after taking
+semaphore instead of before.
+
+I think the bug was introduced with the original keyrings code.
+
+This was discovered by a multithreaded test program generated by syzkaller
+(http://github.com/google/syzkaller). Here's a cleaned up version:
+
+ #include <sys/types.h>
+ #include <keyutils.h>
+ #include <pthread.h>
+ void *thr0(void *arg)
+ {
+ key_serial_t key = (unsigned long)arg;
+ keyctl_revoke(key);
+ return 0;
+ }
+ void *thr1(void *arg)
+ {
+ key_serial_t key = (unsigned long)arg;
+ char buffer[16];
+ keyctl_read(key, buffer, 16);
+ return 0;
+ }
+ int main()
+ {
+ key_serial_t key = add_key("user", "%", "foo", 3, KEY_SPEC_USER_KEYRING);
+ pthread_t th[5];
+ pthread_create(&th[0], 0, thr0, (void *)(unsigned long)key);
+ pthread_create(&th[1], 0, thr1, (void *)(unsigned long)key);
+ pthread_create(&th[2], 0, thr0, (void *)(unsigned long)key);
+ pthread_create(&th[3], 0, thr1, (void *)(unsigned long)key);
+ pthread_join(th[0], 0);
+ pthread_join(th[1], 0);
+ pthread_join(th[2], 0);
+ pthread_join(th[3], 0);
+ return 0;
+ }
+
+Build as:
+
+ cc -o keyctl-race keyctl-race.c -lkeyutils -lpthread
+
+Run as:
+
+ while keyctl-race; do :; done
+
+as it may need several iterations to crash the kernel. The crash can be
+summarised as:
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
+ IP: [<ffffffff81279b08>] user_read+0x56/0xa3
+ ...
+ Call Trace:
+ [<ffffffff81276aa9>] keyctl_read_key+0xb6/0xd7
+ [<ffffffff81277815>] SyS_keyctl+0x83/0xe0
+ [<ffffffff815dbb97>] entry_SYSCALL_64_fastpath+0x12/0x6f
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Tested-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: James Morris <james.l.morris@oracle.com>
+---
+ security/keys/keyctl.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/security/keys/keyctl.c
++++ b/security/keys/keyctl.c
+@@ -744,16 +744,16 @@ long keyctl_read_key(key_serial_t keyid,
+
+ /* the key is probably readable - now try to read it */
+ can_read_key:
+- ret = key_validate(key);
+- if (ret == 0) {
+- ret = -EOPNOTSUPP;
+- if (key->type->read) {
+- /* read the data with the semaphore held (since we
+- * might sleep) */
+- down_read(&key->sem);
++ ret = -EOPNOTSUPP;
++ if (key->type->read) {
++ /* Read the data with the semaphore held (since we might sleep)
++ * to protect against the key being updated or revoked.
++ */
++ down_read(&key->sem);
++ ret = key_validate(key);
++ if (ret == 0)
+ ret = key->type->read(key, buffer, buflen);
+- up_read(&key->sem);
+- }
++ up_read(&key->sem);
+ }
+
+ error2:
new file mode 100644
@@ -0,0 +1,34 @@
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Mon, 14 Dec 2015 13:48:36 -0800
+Subject: pptp: verify sockaddr_len in pptp_bind() and pptp_connect()
+Origin: https://git.kernel.org/linus/09ccfd238e5a0e670d8178cf50180ea81ae09ae1
+
+Reported-by: Dmitry Vyukov <dvyukov@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ppp/pptp.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/ppp/pptp.c
++++ b/drivers/net/ppp/pptp.c
+@@ -420,6 +420,9 @@ static int pptp_bind(struct socket *sock
+ struct pptp_opt *opt = &po->proto.pptp;
+ int error = 0;
+
++ if (sockaddr_len < sizeof(struct sockaddr_pppox))
++ return -EINVAL;
++
+ lock_sock(sk);
+
+ opt->src_addr = sp->sa_addr.pptp;
+@@ -441,6 +444,9 @@ static int pptp_connect(struct socket *s
+ struct flowi4 fl4;
+ int error = 0;
+
++ if (sockaddr_len < sizeof(struct sockaddr_pppox))
++ return -EINVAL;
++
+ if (sp->sa_protocol != PX_PROTO_PPTP)
+ return -EINVAL;
+
new file mode 100644
@@ -0,0 +1,102 @@
+From: Jann Horn <jann@thejh.net>
+Subject: ptrace: being capable wrt a process requires mapped uids/gids
+Date: Sat, 26 Dec 2015 03:52:31 +0100
+Origin: https://lkml.org/lkml/2015/12/25/71
+
+ptrace_has_cap() checks whether the current process should be
+treated as having a certain capability for ptrace checks
+against another process. Until now, this was equivalent to
+has_ns_capability(current, target_ns, CAP_SYS_PTRACE).
+
+However, if a root-owned process wants to enter a user
+namespace for some reason without knowing who owns it and
+therefore can't change to the namespace owner's uid and gid
+before entering, as soon as it has entered the namespace,
+the namespace owner can attach to it via ptrace and thereby
+gain access to its uid and gid.
+
+While it is possible for the entering process to switch to
+the uid of a claimed namespace owner before entering,
+causing the attempt to enter to fail if the claimed uid is
+wrong, this doesn't solve the problem of determining an
+appropriate gid.
+
+With this change, the entering process can first enter the
+namespace and then safely inspect the namespace's
+properties, e.g. through /proc/self/{uid_map,gid_map},
+assuming that the namespace owner doesn't have access to
+uid 0.
+Changed in v2: The caller needs to be capable in the
+namespace into which tcred's uids/gids can be mapped.
+
+Signed-off-by: Jann Horn <jann@thejh.net>
+---
+ kernel/ptrace.c | 33 ++++++++++++++++++++++++++++-----
+ 1 file changed, 28 insertions(+), 5 deletions(-)
+
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -20,6 +20,7 @@
+ #include <linux/uio.h>
+ #include <linux/audit.h>
+ #include <linux/pid_namespace.h>
++#include <linux/user_namespace.h>
+ #include <linux/syscalls.h>
+ #include <linux/uaccess.h>
+ #include <linux/regset.h>
+@@ -213,12 +214,34 @@ static int ptrace_check_attach(struct ta
+ return ret;
+ }
+
+-static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
++static bool ptrace_has_cap(const struct cred *tcred, unsigned int mode)
+ {
++ struct user_namespace *tns = tcred->user_ns;
++
++ /* When a root-owned process enters a user namespace created by a
++ * malicious user, the user shouldn't be able to execute code under
++ * uid 0 by attaching to the root-owned process via ptrace.
++ * Therefore, similar to the capable_wrt_inode_uidgid() check,
++ * verify that all the uids and gids of the target process are
++ * mapped into a namespace below the current one in which the caller
++ * is capable.
++ * No fsuid/fsgid check because __ptrace_may_access doesn't do it
++ * either.
++ */
++ while (
++ !kuid_has_mapping(tns, tcred->euid) ||
++ !kuid_has_mapping(tns, tcred->suid) ||
++ !kuid_has_mapping(tns, tcred->uid) ||
++ !kgid_has_mapping(tns, tcred->egid) ||
++ !kgid_has_mapping(tns, tcred->sgid) ||
++ !kgid_has_mapping(tns, tcred->gid)) {
++ tns = tns->parent;
++ }
++
+ if (mode & PTRACE_MODE_NOAUDIT)
+- return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
++ return has_ns_capability_noaudit(current, tns, CAP_SYS_PTRACE);
+ else
+- return has_ns_capability(current, ns, CAP_SYS_PTRACE);
++ return has_ns_capability(current, tns, CAP_SYS_PTRACE);
+ }
+
+ /* Returns 0 on success, -errno on denial. */
+@@ -247,7 +270,7 @@ static int __ptrace_may_access(struct ta
+ gid_eq(cred->gid, tcred->sgid) &&
+ gid_eq(cred->gid, tcred->gid))
+ goto ok;
+- if (ptrace_has_cap(tcred->user_ns, mode))
++ if (ptrace_has_cap(tcred, mode))
+ goto ok;
+ rcu_read_unlock();
+ return -EPERM;
+@@ -258,7 +281,7 @@ ok:
+ dumpable = get_dumpable(task->mm);
+ rcu_read_lock();
+ if (dumpable != SUID_DUMP_USER &&
+- !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
++ !ptrace_has_cap(__task_cred(task), mode)) {
+ rcu_read_unlock();
+ return -EPERM;
+ }
new file mode 100644
@@ -0,0 +1,95 @@
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Sat, 02 Jan 2016 01:11:55 +0000
+Subject: Revert "net: add length argument to skb_copy_and_csum_datagram_iovec"
+Bug-Debian: https://bugs.debian.org/808293
+
+This reverts commit fa89ae5548ed282f0ceb4660b3b93e4e2ee875f3. That fixed
+the problem of buffer over-reads introduced by backporting commit
+89c22d8c3b27 ("net: Fix skb csum races when peeking"), but resulted in
+incorrect checksumming for short reads. It will be replaced with a
+complete fix.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2525,7 +2525,7 @@
+ int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
+ struct iovec *to, int size);
+ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
+- struct iovec *iov, int len);
++ struct iovec *iov);
+ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
+ const struct iovec *from, int from_offset,
+ int len);
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -818,7 +818,6 @@
+ * @skb: skbuff
+ * @hlen: hardware length
+ * @iov: io vector
+- * @len: amount of data to copy from skb to iov
+ *
+ * Caller _must_ check that skb will fit to this iovec.
+ *
+@@ -828,14 +827,11 @@
+ * can be modified!
+ */
+ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
+- int hlen, struct iovec *iov, int len)
++ int hlen, struct iovec *iov)
+ {
+ __wsum csum;
+ int chunk = skb->len - hlen;
+
+- if (chunk > len)
+- chunk = len;
+-
+ if (!chunk)
+ return 0;
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4906,7 +4906,7 @@
+ err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
+ else
+ err = skb_copy_and_csum_datagram_iovec(skb, hlen,
+- tp->ucopy.iov, chunk);
++ tp->ucopy.iov);
+
+ if (!err) {
+ tp->ucopy.len -= chunk;
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1307,7 +1307,7 @@
+ else {
+ err = skb_copy_and_csum_datagram_iovec(skb,
+ sizeof(struct udphdr),
+- msg->msg_iov, copied);
++ msg->msg_iov);
+
+ if (err == -EINVAL)
+ goto csum_copy_err;
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -492,7 +492,7 @@
+ goto csum_copy_err;
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ } else {
+- err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov, copied);
++ err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
+ if (err == -EINVAL)
+ goto csum_copy_err;
+ }
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -428,8 +428,7 @@
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
+ msg->msg_iov, copied);
+ else {
+- err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr),
+- msg->msg_iov, copied);
++ err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
+ if (err == -EINVAL)
+ goto csum_copy_err;
+ }
new file mode 100644
@@ -0,0 +1,37 @@
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Sat, 02 Jan 2016 03:03:27 +0000
+Subject: Revert "xhci: don't finish a TD if we get a short transfer event mid TD"
+Bug-Debian: https://bugs.debian.org/808602
+Bug-Debian: https://bugs.debian.org/808953
+
+This reverts commit dbd81f75b991c972970764ba75287cbbc8f066be, which
+was commit e210c422b6fdd2dc123bedc588f399aefd8bf9de upstream. It
+caused serious regressions as referenced above.
+
+---
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -2191,10 +2191,6 @@ static int process_bulk_intr_td(struct x
+ EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)));
+ /* Fast path - was this the last TRB in the TD for this URB? */
+ if (event_trb == td->last_trb) {
+- if (td->urb_length_set && trb_comp_code == COMP_SHORT_TX)
+- return finish_td(xhci, td, event_trb, event, ep,
+- status, false);
+-
+ if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
+ td->urb->actual_length =
+ td->urb->transfer_buffer_length -
+@@ -2246,12 +2242,6 @@ static int process_bulk_intr_td(struct x
+ td->urb->actual_length +=
+ TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
+ EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
+-
+- if (trb_comp_code == COMP_SHORT_TX) {
+- xhci_dbg(xhci, "mid bulk/intr SP, wait for last TRB event\n");
+- td->urb_length_set = true;
+- return 0;
+- }
+ }
+
+ return finish_td(xhci, td, event_trb, event, ep, status, false);
new file mode 100644
@@ -0,0 +1,88 @@
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 30 Dec 2015 08:51:12 -0500
+Subject: udp: properly support MSG_PEEK with truncated buffers
+Bug-Debian: https://bugs.debian.org/808293
+Origin: http://article.gmane.org/gmane.linux.kernel.stable/159132
+
+Backport of this upstream commit into stable kernels :
+89c22d8c3b27 ("net: Fix skb csum races when peeking")
+exposed a bug in udp stack vs MSG_PEEK support, when user provides
+a buffer smaller than skb payload.
+
+In this case,
+skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr),
+ msg->msg_iov);
+returns -EFAULT.
+
+This bug does not happen in upstream kernels since Al Viro did a great
+job to replace this into :
+skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg);
+This variant is safe vs short buffers.
+
+For the time being, instead reverting Herbert Xu patch and add back
+skb->ip_summed invalid changes, simply store the result of
+udp_lib_checksum_complete() so that we avoid computing the checksum a
+second time, and avoid the problematic
+skb_copy_and_csum_datagram_iovec() call.
+
+This patch can be applied on recent kernels as it avoids a double
+checksumming, then backported to stable kernels as a bug fix.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ net/ipv4/udp.c | 6 ++++--
+ net/ipv6/udp.c | 6 ++++--
+ 2 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1274,6 +1274,7 @@ int udp_recvmsg(struct kiocb *iocb, stru
+ int peeked, off = 0;
+ int err;
+ int is_udplite = IS_UDPLITE(sk);
++ bool checksum_valid = false;
+ bool slow;
+
+ if (flags & MSG_ERRQUEUE)
+@@ -1299,11 +1300,12 @@ try_again:
+ */
+
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+- if (udp_lib_checksum_complete(skb))
++ checksum_valid = !udp_lib_checksum_complete(skb);
++ if (!checksum_valid)
+ goto csum_copy_err;
+ }
+
+- if (skb_csum_unnecessary(skb))
++ if (checksum_valid || skb_csum_unnecessary(skb))
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
+ msg->msg_iov, copied);
+ else {
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -389,6 +389,7 @@ int udpv6_recvmsg(struct kiocb *iocb, st
+ int peeked, off = 0;
+ int err;
+ int is_udplite = IS_UDPLITE(sk);
++ bool checksum_valid = false;
+ int is_udp4;
+ bool slow;
+
+@@ -420,11 +421,12 @@ try_again:
+ */
+
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+- if (udp_lib_checksum_complete(skb))
++ checksum_valid = !udp_lib_checksum_complete(skb);
++ if (!checksum_valid)
+ goto csum_copy_err;
+ }
+
+- if (skb_csum_unnecessary(skb))
++ if (checksum_valid || skb_csum_unnecessary(skb))
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
+ msg->msg_iov, copied);
+ else {
new file mode 100644
@@ -0,0 +1,52 @@
+From: David Vrabel <david.vrabel@citrix.com>
+Date: Fri, 30 Oct 2015 14:58:08 +0000
+Subject: [1/7] xen: Add RING_COPY_REQUEST()
+Origin: https://git.kernel.org/linus/454d5d882c7e412b840e3c99010fe81a9862f6fb
+
+Using RING_GET_REQUEST() on a shared ring is easy to use incorrectly
+(i.e., by not considering that the other end may alter the data in the
+shared ring while it is being inspected). Safe usage of a request
+generally requires taking a local copy.
+
+Provide a RING_COPY_REQUEST() macro to use instead of
+RING_GET_REQUEST() and an open-coded memcpy(). This takes care of
+ensuring that the copy is done correctly regardless of any possible
+compiler optimizations.
+
+Use a volatile source to prevent the compiler from reordering or
+omitting the copy.
+
+This is part of XSA155.
+
+CC: stable@vger.kernel.org
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ include/xen/interface/io/ring.h | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
+index 7d28aff..7dc685b4 100644
+--- a/include/xen/interface/io/ring.h
++++ b/include/xen/interface/io/ring.h
+@@ -181,6 +181,20 @@ struct __name##_back_ring { \
+ #define RING_GET_REQUEST(_r, _idx) \
+ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
++/*
++ * Get a local copy of a request.
++ *
++ * Use this in preference to RING_GET_REQUEST() so all processing is
++ * done on a local copy that cannot be modified by the other end.
++ *
++ * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
++ * to be ineffective where _req is a struct which consists of only bitfields.
++ */
++#define RING_COPY_REQUEST(_r, _idx, _req) do { \
++ /* Use volatile to force the copy into _req. */ \
++ *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \
++} while (0)
++
+ #define RING_GET_RESPONSE(_r, _idx) \
+ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
new file mode 100644
@@ -0,0 +1,48 @@
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 3 Nov 2015 16:34:09 +0000
+Subject: [4/7] xen-blkback: only read request operation from shared ring once
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Origin: https://git.kernel.org/linus/1f13d75ccb806260079e0679d55d9253e370ec8a
+
+A compiler may load a switch statement value multiple times, which could
+be bad when the value is in memory shared with the frontend.
+
+When converting a non-native request to a native one, ensure that
+src->operation is only loaded once by using READ_ONCE().
+
+This is part of XSA155.
+
+CC: stable@vger.kernel.org
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/block/xen-blkback/common.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/block/xen-blkback/common.h
++++ b/drivers/block/xen-blkback/common.h
+@@ -391,8 +391,8 @@ static inline void blkif_get_x86_32_req(
+ struct blkif_x86_32_request *src)
+ {
+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
+- dst->operation = src->operation;
+- switch (src->operation) {
++ dst->operation = READ_ONCE(src->operation);
++ switch (dst->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+@@ -439,8 +439,8 @@ static inline void blkif_get_x86_64_req(
+ struct blkif_x86_64_request *src)
+ {
+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
+- dst->operation = src->operation;
+- switch (src->operation) {
++ dst->operation = READ_ONCE(src->operation);
++ switch (dst->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
new file mode 100644
@@ -0,0 +1,63 @@
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 3 Nov 2015 16:40:43 +0000
+Subject: [5/7] xen-blkback: read from indirect descriptors only once
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Origin: https://git.kernel.org/linus/18779149101c0dd43ded43669ae2a92d21b6f9cb
+
+Since indirect descriptors are in memory shared with the frontend, the
+frontend could alter the first_sect and last_sect values after they have
+been validated but before they are recorded in the request. This may
+result in I/O requests that overflow the foreign page, possibly
+overwriting local pages when the I/O request is executed.
+
+When parsing indirect descriptors, only read first_sect and last_sect
+once.
+
+This is part of XSA155.
+
+CC: stable@vger.kernel.org
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+[bwh: For 4.3, s/XEN_PAGE_SIZE/PAGE_SIZE/]
+---
+ drivers/block/xen-blkback/blkback.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/drivers/block/xen-blkback/blkback.c
++++ b/drivers/block/xen-blkback/blkback.c
+@@ -861,6 +861,8 @@ static int xen_blkbk_parse_indirect(stru
+ goto unmap;
+
+ for (n = 0, i = 0; n < nseg; n++) {
++ uint8_t first_sect, last_sect;
++
+ if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
+ /* Map indirect segments */
+ if (segments)
+@@ -868,15 +870,18 @@ static int xen_blkbk_parse_indirect(stru
+ segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
+ }
+ i = n % SEGS_PER_INDIRECT_FRAME;
++
+ pending_req->segments[n]->gref = segments[i].gref;
+- seg[n].nsec = segments[i].last_sect -
+- segments[i].first_sect + 1;
+- seg[n].offset = (segments[i].first_sect << 9);
+- if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) ||
+- (segments[i].last_sect < segments[i].first_sect)) {
++
++ first_sect = READ_ONCE(segments[i].first_sect);
++ last_sect = READ_ONCE(segments[i].last_sect);
++ if (last_sect >= (PAGE_SIZE >> 9) || last_sect < first_sect) {
+ rc = -EINVAL;
+ goto unmap;
+ }
++
++ seg[n].nsec = last_sect - first_sect + 1;
++ seg[n].offset = first_sect << 9;
+ preq->nr_sects += seg[n].nsec;
+ }
+
new file mode 100644
@@ -0,0 +1,35 @@
+From: David Vrabel <david.vrabel@citrix.com>
+Date: Fri, 30 Oct 2015 15:16:01 +0000
+Subject: [2/7] xen-netback: don't use last request to determine minimum Tx
+ credit
+Origin: https://git.kernel.org/linus/0f589967a73f1f30ab4ac4dd9ce0bb399b4d6357
+
+The last from guest transmitted request gives no indication about the
+minimum amount of credit that the guest might need to send a packet
+since the last packet might have been a small one.
+
+Instead allow for the worst case 128 KiB packet.
+
+This is part of XSA155.
+
+CC: stable@vger.kernel.org
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/net/xen-netback/netback.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -809,9 +809,7 @@ static void tx_add_credit(struct xenvif_
+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+ * Otherwise the interface can seize up due to insufficient credit.
+ */
+- max_burst = RING_GET_REQUEST(&queue->tx, queue->tx.req_cons)->size;
+- max_burst = min(max_burst, 131072UL);
+- max_burst = max(max_burst, queue->credit_bytes);
++ max_burst = max(131072UL, queue->credit_bytes);
+
+ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
+ max_credit = queue->remaining_credit + queue->credit_bytes;
new file mode 100644
@@ -0,0 +1,126 @@
+From: David Vrabel <david.vrabel@citrix.com>
+Date: Fri, 30 Oct 2015 15:17:06 +0000
+Subject: [3/7] xen-netback: use RING_COPY_REQUEST() throughout
+Origin: https://git.kernel.org/linus/68a33bfd8403e4e22847165d149823a2e0e67c9c
+
+Instead of open-coding memcpy()s and directly accessing Tx and Rx
+requests, use the new RING_COPY_REQUEST() that ensures the local copy
+is correct.
+
+This is more than is strictly necessary for guest Rx requests since
+only the id and gref fields are used and it is harmless if the
+frontend modifies these.
+
+This is part of XSA155.
+
+CC: stable@vger.kernel.org
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/net/xen-netback/netback.c | 30 ++++++++++++++----------------
+ 1 file changed, 14 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -288,18 +288,18 @@ static struct xenvif_rx_meta *get_next_r
+ struct netrx_pending_operations *npo)
+ {
+ struct xenvif_rx_meta *meta;
+- struct xen_netif_rx_request *req;
++ struct xen_netif_rx_request req;
+
+- req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
++ RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+
+ meta = npo->meta + npo->meta_prod++;
+ meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
+ meta->gso_size = 0;
+ meta->size = 0;
+- meta->id = req->id;
++ meta->id = req.id;
+
+ npo->copy_off = 0;
+- npo->copy_gref = req->gref;
++ npo->copy_gref = req.gref;
+
+ return meta;
+ }
+@@ -450,7 +450,7 @@ static int xenvif_gop_skb(struct sk_buff
+ struct xenvif *vif = netdev_priv(skb->dev);
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ int i;
+- struct xen_netif_rx_request *req;
++ struct xen_netif_rx_request req;
+ struct xenvif_rx_meta *meta;
+ unsigned char *data;
+ int head = 1;
+@@ -471,15 +471,15 @@ static int xenvif_gop_skb(struct sk_buff
+
+ /* Set up a GSO prefix descriptor, if necessary */
+ if ((1 << gso_type) & vif->gso_prefix_mask) {
+- req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
++ RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+ meta = npo->meta + npo->meta_prod++;
+ meta->gso_type = gso_type;
+ meta->gso_size = skb_shinfo(skb)->gso_size;
+ meta->size = 0;
+- meta->id = req->id;
++ meta->id = req.id;
+ }
+
+- req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
++ RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+ meta = npo->meta + npo->meta_prod++;
+
+ if ((1 << gso_type) & vif->gso_mask) {
+@@ -491,9 +491,9 @@ static int xenvif_gop_skb(struct sk_buff
+ }
+
+ meta->size = 0;
+- meta->id = req->id;
++ meta->id = req.id;
+ npo->copy_off = 0;
+- npo->copy_gref = req->gref;
++ npo->copy_gref = req.gref;
+
+ data = skb->data;
+ while (data < skb_tail_pointer(skb)) {
+@@ -838,7 +838,7 @@ static void xenvif_tx_err(struct xenvif_
+ spin_unlock_irqrestore(&queue->response_lock, flags);
+ if (cons == end)
+ break;
+- txp = RING_GET_REQUEST(&queue->tx, cons++);
++ RING_COPY_REQUEST(&queue->tx, cons++, txp);
+ } while (1);
+ queue->tx.req_cons = cons;
+ }
+@@ -905,8 +905,7 @@ static int xenvif_count_requests(struct
+ if (drop_err)
+ txp = &dropped_tx;
+
+- memcpy(txp, RING_GET_REQUEST(&queue->tx, cons + slots),
+- sizeof(*txp));
++ RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
+
+ /* If the guest submitted a frame >= 64 KiB then
+ * first->size overflowed and following slots will
+@@ -1258,8 +1257,7 @@ static int xenvif_get_extras(struct xenv
+ return -EBADR;
+ }
+
+- memcpy(&extra, RING_GET_REQUEST(&queue->tx, cons),
+- sizeof(extra));
++ RING_COPY_REQUEST(&queue->tx, cons, &extra);
+ if (unlikely(!extra.type ||
+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ queue->tx.req_cons = ++cons;
+@@ -1395,7 +1393,7 @@ static void xenvif_tx_build_gops(struct
+
+ idx = queue->tx.req_cons;
+ rmb(); /* Ensure that we see the request before we copy it. */
+- memcpy(&txreq, RING_GET_REQUEST(&queue->tx, idx), sizeof(txreq));
++ RING_COPY_REQUEST(&queue->tx, idx, &txreq);
+
+ /* Credit-based scheduling. */
+ if (txreq.size > queue->remaining_credit &&
new file mode 100644
@@ -0,0 +1,77 @@
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Mon, 2 Nov 2015 17:24:08 -0500
+Subject: [3/5] xen/pciback: Do not install an IRQ handler for MSI interrupts.
+Origin: https://git.kernel.org/linus/a396f3a210c3a61e94d6b87ec05a75d0be2a60d0
+
+Otherwise an guest can subvert the generic MSI code to trigger
+an BUG_ON condition during MSI interrupt freeing:
+
+ for (i = 0; i < entry->nvec_used; i++)
+ BUG_ON(irq_has_action(entry->irq + i));
+
+Xen PCI backed installs an IRQ handler (request_irq) for
+the dev->irq whenever the guest writes PCI_COMMAND_MEMORY
+(or PCI_COMMAND_IO) to the PCI_COMMAND register. This is
+done in case the device has legacy interrupts the GSI line
+is shared by the backend devices.
+
+To subvert the backend the guest needs to make the backend
+to change the dev->irq from the GSI to the MSI interrupt line,
+make the backend allocate an interrupt handler, and then command
+the backend to free the MSI interrupt and hit the BUG_ON.
+
+Since the backend only calls 'request_irq' when the guest
+writes to the PCI_COMMAND register the guest needs to call
+XEN_PCI_OP_enable_msi before any other operation. This will
+cause the generic MSI code to setup an MSI entry and
+populate dev->irq with the new PIRQ value.
+
+Then the guest can write to PCI_COMMAND PCI_COMMAND_MEMORY
+and cause the backend to setup an IRQ handler for dev->irq
+(which instead of the GSI value has the MSI pirq). See
+'xen_pcibk_control_isr'.
+
+Then the guest disables the MSI: XEN_PCI_OP_disable_msi
+which ends up triggering the BUG_ON condition in 'free_msi_irqs'
+as there is an IRQ handler for the entry->irq (dev->irq).
+
+Note that this cannot be done using MSI-X as the generic
+code does not over-write dev->irq with the MSI-X PIRQ values.
+
+The patch inhibits setting up the IRQ handler if MSI or
+MSI-X (for symmetry reasons) code had been called successfully.
+
+P.S.
+Xen PCIBack when it sets up the device for the guest consumption
+ends up writting 0 to the PCI_COMMAND (see xen_pcibk_reset_device).
+XSA-120 addendum patch removed that - however when upstreaming said
+addendum we found that it caused issues with qemu upstream. That
+has now been fixed in qemu upstream.
+
+This is part of XSA-157
+
+CC: stable@vger.kernel.org
+Reviewed-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/xen/xen-pciback/pciback_ops.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
+index 029f33d..d0696ce 100644
+--- a/drivers/xen/xen-pciback/pciback_ops.c
++++ b/drivers/xen/xen-pciback/pciback_ops.c
+@@ -70,6 +70,13 @@ static void xen_pcibk_control_isr(struct pci_dev *dev, int reset)
+ enable ? "enable" : "disable");
+
+ if (enable) {
++ /*
++ * The MSI or MSI-X should not have an IRQ handler. Otherwise
++ * if the guest terminates we BUG_ON in free_msi_irqs.
++ */
++ if (dev->msi_enabled || dev->msix_enabled)
++ goto out;
++
+ rc = request_irq(dev_data->irq,
+ xen_pcibk_guest_interrupt, IRQF_SHARED,
+ dev_data->irq_name, dev);
new file mode 100644
@@ -0,0 +1,61 @@
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Mon, 2 Nov 2015 18:13:27 -0500
+Subject: [5/5] xen/pciback: Don't allow MSI-X ops if PCI_COMMAND_MEMORY is not
+ set.
+Origin: https://git.kernel.org/linus/408fb0e5aa7fda0059db282ff58c3b2a4278baa0
+
+commit f598282f51 ("PCI: Fix the NIU MSI-X problem in a better way")
+teaches us that dealing with MSI-X can be troublesome.
+
+Further checks in the MSI-X architecture shows that if the
+PCI_COMMAND_MEMORY bit is turned of in the PCI_COMMAND we
+may not be able to access the BAR (since they are memory regions).
+
+Since the MSI-X tables are located in there.. that can lead
+to us causing PCIe errors. Inhibit us performing any
+operation on the MSI-X unless the MEMORY bit is set.
+
+Note that Xen hypervisor with:
+"x86/MSI-X: access MSI-X table only after having enabled MSI-X"
+will return:
+xen_pciback: 0000:0a:00.1: error -6 enabling MSI-X for guest 3!
+
+When the generic MSI code tries to setup the PIRQ without
+MEMORY bit set. Which means with later versions of Xen
+(4.6) this patch is not neccessary.
+
+This is part of XSA-157
+
+CC: stable@vger.kernel.org
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/xen/xen-pciback/pciback_ops.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
+index 4ee5fc0..73dafdc 100644
+--- a/drivers/xen/xen-pciback/pciback_ops.c
++++ b/drivers/xen/xen-pciback/pciback_ops.c
+@@ -212,6 +212,7 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
+ struct xen_pcibk_dev_data *dev_data;
+ int i, result;
+ struct msix_entry *entries;
++ u16 cmd;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
+@@ -223,7 +224,12 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
+ if (dev->msix_enabled)
+ return -EALREADY;
+
+- if (dev->msi_enabled)
++ /*
++ * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able
++ * to access the BARs where the MSI-X entries reside.
++ */
++ pci_read_config_word(dev, PCI_COMMAND, &cmd);
++ if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY))
+ return -ENXIO;
+
+ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
new file mode 100644
@@ -0,0 +1,102 @@
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Wed, 1 Apr 2015 10:49:47 -0400
+Subject: [4/5] xen/pciback: For XEN_PCI_OP_disable_msi[|x] only disable if
+ device has MSI(X) enabled.
+Origin: https://git.kernel.org/linus/7cfb905b9638982862f0331b36ccaaca5d383b49
+
+Otherwise just continue on, returning the same values as
+previously (return of 0, and op->result has the PIRQ value).
+
+This does not change the behavior of XEN_PCI_OP_disable_msi[|x].
+
+The pci_disable_msi or pci_disable_msix have the checks for
+msi_enabled or msix_enabled so they will error out immediately.
+
+However the guest can still call these operations and cause
+us to disable the 'ack_intr'. That means the backend IRQ handler
+for the legacy interrupt will not respond to interrupts anymore.
+
+This will lead to (if the device is causing an interrupt storm)
+for the Linux generic code to disable the interrupt line.
+
+Naturally this will only happen if the device in question
+is plugged in on the motherboard on shared level interrupt GSI.
+
+This is part of XSA-157
+
+CC: stable@vger.kernel.org
+Reviewed-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/xen/xen-pciback/pciback_ops.c | 33 ++++++++++++++++++++-------------
+ 1 file changed, 20 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
+index d0696ce..4ee5fc0 100644
+--- a/drivers/xen/xen-pciback/pciback_ops.c
++++ b/drivers/xen/xen-pciback/pciback_ops.c
+@@ -185,20 +185,23 @@ static
+ int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
+- struct xen_pcibk_dev_data *dev_data;
+-
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
+ pci_name(dev));
+- pci_disable_msi(dev);
+
++ if (dev->msi_enabled) {
++ struct xen_pcibk_dev_data *dev_data;
++
++ pci_disable_msi(dev);
++
++ dev_data = pci_get_drvdata(dev);
++ if (dev_data)
++ dev_data->ack_intr = 1;
++ }
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
+ op->value);
+- dev_data = pci_get_drvdata(dev);
+- if (dev_data)
+- dev_data->ack_intr = 1;
+ return 0;
+ }
+
+@@ -264,23 +267,27 @@ static
+ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
+- struct xen_pcibk_dev_data *dev_data;
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
+ pci_name(dev));
+- pci_disable_msix(dev);
+
++ if (dev->msix_enabled) {
++ struct xen_pcibk_dev_data *dev_data;
++
++ pci_disable_msix(dev);
++
++ dev_data = pci_get_drvdata(dev);
++ if (dev_data)
++ dev_data->ack_intr = 1;
++ }
+ /*
+ * SR-IOV devices (which don't have any legacy IRQ) have
+ * an undefined IRQ value of zero.
+ */
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+ if (unlikely(verbose_request))
+- printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev),
+- op->value);
+- dev_data = pci_get_drvdata(dev);
+- if (dev_data)
+- dev_data->ack_intr = 1;
++ printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n",
++ pci_name(dev), op->value);
+ return 0;
+ }
+ #endif
new file mode 100644
@@ -0,0 +1,58 @@
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Fri, 3 Apr 2015 11:08:22 -0400
+Subject: [1/5] xen/pciback: Return error on XEN_PCI_OP_enable_msi when device
+ has MSI or MSI-X enabled
+Origin: https://git.kernel.org/linus/56441f3c8e5bd45aab10dd9f8c505dd4bec03b0d
+
+The guest sequence of:
+
+ a) XEN_PCI_OP_enable_msi
+ b) XEN_PCI_OP_enable_msi
+ c) XEN_PCI_OP_disable_msi
+
+results in hitting an BUG_ON condition in the msi.c code.
+
+The MSI code uses an dev->msi_list to which it adds MSI entries.
+Under the above conditions an BUG_ON() can be hit. The device
+passed in the guest MUST have MSI capability.
+
+The a) adds the entry to the dev->msi_list and sets msi_enabled.
+The b) adds a second entry but adding in to SysFS fails (duplicate entry)
+and deletes all of the entries from msi_list and returns (with msi_enabled
+is still set). c) pci_disable_msi passes the msi_enabled checks and hits:
+
+BUG_ON(list_empty(dev_to_msi_list(&dev->dev)));
+
+and blows up.
+
+The patch adds a simple check in the XEN_PCI_OP_enable_msi to guard
+against that. The check for msix_enabled is not stricly neccessary.
+
+This is part of XSA-157.
+
+CC: stable@vger.kernel.org
+Reviewed-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/xen/xen-pciback/pciback_ops.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
+index a0e0e3e..8bfb87c 100644
+--- a/drivers/xen/xen-pciback/pciback_ops.c
++++ b/drivers/xen/xen-pciback/pciback_ops.c
+@@ -144,7 +144,12 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
+
+- status = pci_enable_msi(dev);
++ if (dev->msi_enabled)
++ status = -EALREADY;
++ else if (dev->msix_enabled)
++ status = -ENXIO;
++ else
++ status = pci_enable_msi(dev);
+
+ if (status) {
+ pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
new file mode 100644
@@ -0,0 +1,60 @@
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Mon, 2 Nov 2015 18:07:44 -0500
+Subject: [2/5] xen/pciback: Return error on XEN_PCI_OP_enable_msix when device
+ has MSI or MSI-X enabled
+Origin: https://git.kernel.org/linus/5e0ce1455c09dd61d029b8ad45d82e1ac0b6c4c9
+
+The guest sequence of:
+
+ a) XEN_PCI_OP_enable_msix
+ b) XEN_PCI_OP_enable_msix
+
+results in hitting an NULL pointer due to using freed pointers.
+
+The device passed in the guest MUST have MSI-X capability.
+
+The a) constructs and SysFS representation of MSI and MSI groups.
+The b) adds a second set of them but adding in to SysFS fails (duplicate entry).
+'populate_msi_sysfs' frees the newly allocated msi_irq_groups (note that
+in a) pdev->msi_irq_groups is still set) and also free's ALL of the
+MSI-X entries of the device (the ones allocated in step a) and b)).
+
+The unwind code: 'free_msi_irqs' deletes all the entries and tries to
+delete the pdev->msi_irq_groups (which hasn't been set to NULL).
+However the pointers in the SysFS are already freed and we hit an
+NULL pointer further on when 'strlen' is attempted on a freed pointer.
+
+The patch adds a simple check in the XEN_PCI_OP_enable_msix to guard
+against that. The check for msi_enabled is not stricly neccessary.
+
+This is part of XSA-157
+
+CC: stable@vger.kernel.org
+Reviewed-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/xen/xen-pciback/pciback_ops.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
+index 8bfb87c..029f33d 100644
+--- a/drivers/xen/xen-pciback/pciback_ops.c
++++ b/drivers/xen/xen-pciback/pciback_ops.c
+@@ -206,9 +206,16 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
+ pci_name(dev));
++
+ if (op->value > SH_INFO_MAX_VEC)
+ return -EINVAL;
+
++ if (dev->msix_enabled)
++ return -EALREADY;
++
++ if (dev->msi_enabled)
++ return -ENXIO;
++
+ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
+ if (entries == NULL)
+ return -ENOMEM;
new file mode 100644
@@ -0,0 +1,77 @@
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Mon, 16 Nov 2015 12:40:48 -0500
+Subject: [7/7] xen/pciback: Save xen_pci_op commands before processing it
+Origin: https://git.kernel.org/linus/8135cf8b092723dbfcc611fe6fdcb3a36c9951c5
+
+Double fetch vulnerabilities that happen when a variable is
+fetched twice from shared memory but a security check is only
+performed the first time.
+
+The xen_pcibk_do_op function performs a switch statements on the op->cmd
+value which is stored in shared memory. Interestingly this can result
+in a double fetch vulnerability depending on the performed compiler
+optimization.
+
+This patch fixes it by saving the xen_pci_op command before
+processing it. We also use 'barrier' to make sure that the
+compiler does not perform any optimization.
+
+This is part of XSA155.
+
+CC: stable@vger.kernel.org
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Jan Beulich <JBeulich@suse.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+---
+ drivers/xen/xen-pciback/pciback.h | 1 +
+ drivers/xen/xen-pciback/pciback_ops.c | 15 ++++++++++++++-
+ 2 files changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
+index 58e38d5..4d529f3 100644
+--- a/drivers/xen/xen-pciback/pciback.h
++++ b/drivers/xen/xen-pciback/pciback.h
+@@ -37,6 +37,7 @@ struct xen_pcibk_device {
+ struct xen_pci_sharedinfo *sh_info;
+ unsigned long flags;
+ struct work_struct op_work;
++ struct xen_pci_op op;
+ };
+
+ struct xen_pcibk_dev_data {
+diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
+index c4a0666..a0e0e3e 100644
+--- a/drivers/xen/xen-pciback/pciback_ops.c
++++ b/drivers/xen/xen-pciback/pciback_ops.c
+@@ -298,9 +298,11 @@ void xen_pcibk_do_op(struct work_struct *data)
+ container_of(data, struct xen_pcibk_device, op_work);
+ struct pci_dev *dev;
+ struct xen_pcibk_dev_data *dev_data = NULL;
+- struct xen_pci_op *op = &pdev->sh_info->op;
++ struct xen_pci_op *op = &pdev->op;
+ int test_intx = 0;
+
++ *op = pdev->sh_info->op;
++ barrier();
+ dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
+
+ if (dev == NULL)
+@@ -342,6 +344,17 @@ void xen_pcibk_do_op(struct work_struct *data)
+ if ((dev_data->enable_intx != test_intx))
+ xen_pcibk_control_isr(dev, 0 /* no reset */);
+ }
++ pdev->sh_info->op.err = op->err;
++ pdev->sh_info->op.value = op->value;
++#ifdef CONFIG_PCI_MSI
++ if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) {
++ unsigned int i;
++
++ for (i = 0; i < op->value; i++)
++ pdev->sh_info->op.msix_entries[i].vector =
++ op->msix_entries[i].vector;
++ }
++#endif
+ /* Tell the driver domain that we're done. */
+ wmb();
+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
new file mode 100644
@@ -0,0 +1,53 @@
+From: Andrew Honig <ahonig@google.com>
+Date: Wed, 18 Nov 2015 14:50:23 -0800
+Subject: KVM: x86: Reload pit counters for all channels when restoring state
+Origin: https://git.kernel.org/linus/0185604c2d82c560dab2f2933a18f797e74ab5a8
+
+Currently if userspace restores the pit counters with a count of 0
+on channels 1 or 2 and the guest attempts to read the count on those
+channels, then KVM will perform a mod of 0 and crash. This will ensure
+that 0 values are converted to 65536 as per the spec.
+
+This is CVE-2015-7513.
+
+Signed-off-by: Andy Honig <ahonig@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[carnil: Backport to 4.3.3: context]
+---
+ arch/x86/kvm/x86.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3559,10 +3559,11 @@ static int kvm_vm_ioctl_get_pit(struct k
+ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
+ {
+ int r = 0;
+-
++ int i;
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
+- kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
++ for (i = 0; i < 3; i++)
++ kvm_pit_load_count(kvm, i, ps->channels[i].count, 0);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ return r;
+ }
+@@ -3583,6 +3584,7 @@ static int kvm_vm_ioctl_get_pit2(struct
+ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
+ {
+ int r = 0, start = 0;
++ int i;
+ u32 prev_legacy, cur_legacy;
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
+@@ -3592,7 +3594,8 @@ static int kvm_vm_ioctl_set_pit2(struct
+ memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
+ sizeof(kvm->arch.vpit->pit_state.channels));
+ kvm->arch.vpit->pit_state.flags = ps->flags;
+- kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
++ for (i = 0; i < 3; i++)
++ kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, start);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ return r;
+ }
new file mode 100644
@@ -0,0 +1,19 @@
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Sun, 27 Dec 2015 21:11:09 +0000
+Subject: ptrace: Fix ABI change for priv-esc fix
+Forwarded: not-needed
+
+Hide the new #include from genksyms.
+---
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -20,7 +20,9 @@
+ #include <linux/uio.h>
+ #include <linux/audit.h>
+ #include <linux/pid_namespace.h>
++#ifndef __GENKSYMS__
+ #include <linux/user_namespace.h>
++#endif
+ #include <linux/syscalls.h>
+ #include <linux/uaccess.h>
+ #include <linux/regset.h>
@@ -650,3 +650,23 @@ bugfix/all/unix-avoid-use-after-free-in-ep_remove_wait_queue.patch
debian/af_unix-avoid-abi-changes.patch
bugfix/all/btrfs-fix-truncation-of-compressed-and-inlined-exten.patch
bugfix/all/net-add-validation-for-the-socket-syscall-protocol.patch
+bugfix/all/xen-add-ring_copy_request.patch
+bugfix/all/xen-netback-don-t-use-last-request-to-determine-mini.patch
+bugfix/all/xen-netback-use-ring_copy_request-throughout.patch
+bugfix/all/xen-blkback-only-read-request-operation-from-shared-.patch
+bugfix/all/xen-blkback-read-from-indirect-descriptors-only-once.patch
+bugfix/all/xen-pciback-save-xen_pci_op-commands-before-processi.patch
+bugfix/all/xen-pciback-return-error-on-xen_pci_op_enable_msi-wh.patch
+bugfix/all/xen-pciback-return-error-on-xen_pci_op_enable_msix-w.patch
+bugfix/all/xen-pciback-do-not-install-an-irq-handler-for-msi-in.patch
+bugfix/all/xen-pciback-for-xen_pci_op_disable_msi-x-only-disabl.patch
+bugfix/all/xen-pciback-don-t-allow-msi-x-ops-if-pci_command_mem.patch
+bugfix/all/pptp-verify-sockaddr_len-in-pptp_bind-and-pptp_conne.patch
+bugfix/all/bluetooth-validate-socket-address-length-in-sco_sock.patch
+bugfix/all/ptrace-being-capable-wrt-a-process-requires-mapped-uids-gids.patch
+debian/ptrace-fix-abi-change-for-priv-esc-fix.patch
+bugfix/all/keys-fix-race-between-read-and-revoke.patch
+bugfix/x86/KVM-x86-Reload-pit-counters-for-all-channels-when-re.patch
+bugfix/all/revert-net-add-length-argument-to-skb_copy_and_csum_datagram_iovec.patch
+bugfix/all/udp-properly-support-msg_peek-with-truncated-buffers.patch
+bugfix/all/revert-xhci-don-t-finish-a-td-if-we-get-a-short-transfer.patch