From patchwork Mon Dec 6 16:23:08 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alex Williamson X-Patchwork-Id: 378752 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oB6GNEvd028066 for ; Mon, 6 Dec 2010 16:23:14 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753785Ab0LFQXK (ORCPT ); Mon, 6 Dec 2010 11:23:10 -0500 Received: from mx1.redhat.com ([209.132.183.28]:1027 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753782Ab0LFQXJ (ORCPT ); Mon, 6 Dec 2010 11:23:09 -0500 Received: from int-mx02.intmail.prod.int.phx2.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id oB6GN9Wq025267 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Mon, 6 Dec 2010 11:23:09 -0500 Received: from s20.home (ovpn01.gateway.prod.ext.phx2.redhat.com [10.5.9.1]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id oB6GN8kA028687; Mon, 6 Dec 2010 11:23:08 -0500 From: Alex Williamson Subject: [PATCH v2 5/5] device-assignment: pass through and stub more PCI caps To: kvm@vger.kernel.org Cc: alex.williamson@redhat.com, ddutile@redhat.com, mst@redhat.com, chrisw@redhat.com Date: Mon, 06 Dec 2010 09:23:08 -0700 Message-ID: <20101206162302.4648.62666.stgit@s20.home> In-Reply-To: <20101206161810.4648.45658.stgit@s20.home> References: <20101206161810.4648.45658.stgit@s20.home> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.67 on 10.5.11.12 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Mon, 06 Dec 2010 16:23:14 +0000 (UTC) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 0ae04de..50c6408 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -67,6 +67,9 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len); +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len); + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -370,11 +373,32 @@ static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) return (uint8_t)assigned_dev_pci_read(d, pos, 1); } -static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap) +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) +{ + AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + ssize_t ret; + int fd = pci_dev->real_device.config_fd; + +again: + ret = pwrite(fd, &val, len, pos); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) + goto again; + + fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", + __func__, ret, errno); + + exit(1); + } + + return; +} + +static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) { int id; int max_cap = 48; - int pos = PCI_CAPABILITY_LIST; + int pos = start ? start : PCI_CAPABILITY_LIST; int status; status = assigned_dev_pci_read_byte(d, PCI_STATUS); @@ -453,10 +477,16 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, ssize_t ret; AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { + val = assigned_device_pci_cap_read_config(d, address, len); + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); + return val; + } + if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) || (address >= 0x10 && address <= 0x24) || address == 0x30 || - address == 0x34 || address == 0x3c || address == 0x3d || - (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address])) { + address == 0x34 || address == 0x3c || address == 0x3d) { val = pci_default_read_config(d, address, len); DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); @@ -1251,7 +1281,70 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) #endif #endif -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, +/* There can be multiple VNDR capabilities per device, we need to find the + * one that starts closet to the given address without going over. */ +static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address) +{ + uint8_t cap, pos; + + for (cap = pos = 0; + (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); + pos += PCI_CAP_LIST_NEXT) { + if (pos <= address) { + cap = MAX(pos, cap); + } + } + return cap; +} + +/* Merge the bits set in mask from mval into val. Both val and mval are + * at the same addr offset, pos is the starting offset of the mask. */ +static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr, + int len, uint8_t pos, uint32_t mask) +{ + if (!ranges_overlap(addr, len, pos, 4)) { + return val; + } + + if (addr >= pos) { + mask >>= (addr - pos) * 8; + } else { + mask <<= (pos - addr) * 8; + } + mask &= 0xffffffffU >> (4 - len) * 8; + + val &= ~mask; + val |= (mval & mask); + + return val; +} + +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len) +{ + uint8_t cap, cap_id = pci_dev->config_map[address]; + uint32_t val; + + switch (cap_id) { + + case PCI_CAP_ID_VPD: + cap = pci_find_capability(pci_dev, cap_id); + val = assigned_dev_pci_read(pci_dev, address, len); + return merge_bits(val, pci_get_long(pci_dev->config + address), + address, len, cap + PCI_CAP_LIST_NEXT, 0xff); + + case PCI_CAP_ID_VNDR: + cap = find_vndr_start(pci_dev, address); + val = assigned_dev_pci_read(pci_dev, address, len); + return merge_bits(val, pci_get_long(pci_dev->config + address), + address, len, cap + PCI_CAP_LIST_NEXT, 0xff); + } + + return pci_default_read_config(pci_dev, address, len); +} + +static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, + uint32_t address, uint32_t val, int len) { uint8_t cap_id = pci_dev->config_map[address]; @@ -1281,6 +1374,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t ad #endif break; #endif + + case PCI_CAP_ID_VPD: + case PCI_CAP_ID_VNDR: + assigned_dev_pci_write(pci_dev, address, val, len); + break; } } @@ -1300,7 +1398,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #ifdef KVM_CAP_DEVICE_MSI /* Expose MSI capability * MSI capability is the 1st capability in capability config */ - if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI))) { + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0))) { dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI; /* Only 32-bit/no-mask currently supported */ if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10)) < 0) { @@ -1322,7 +1420,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #endif #ifdef KVM_CAP_DEVICE_MSIX /* Expose MSI-X capability */ - if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX))) { + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0))) { int bar_nr; uint32_t msix_table_entry; @@ -1347,6 +1445,157 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #endif #endif + /* Minimal PM support, nothing writable, device appears to NAK changes */ + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM, 0))) { + uint16_t pmc; + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos, + PCI_PM_SIZEOF)) < 0) { + return ret; + } + + pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); + pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); + pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); + + /* assign_device will bring the device up to D0, so we don't need + * to worry about doing that ourselves here. */ + pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, + PCI_PM_CTRL_NO_SOFT_RST); + + pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0); + pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP, 0))) { + uint8_t version; + uint16_t type, devctl, lnkcap, lnksta; + uint32_t devcap; + int size = 0x3c; /* version 2 size */ + + version = pci_get_byte(pci_dev->config + pos + PCI_EXP_FLAGS); + version &= PCI_EXP_FLAGS_VERS; + if (version == 1) { + size = 0x14; + } else if (version > 2) { + fprintf(stderr, "Unsupported PCI express capability version %d\n", + version); + return -EINVAL; + } + + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_EXP, + pos, size)) < 0) { + return ret; + } + + type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); + type = (type & PCI_EXP_FLAGS_TYPE) >> 8; + if (type != PCI_EXP_TYPE_ENDPOINT && + type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) { + fprintf(stderr, + "Device assignment only supports endpoint assignment, " + "device type %d\n", type); + return -EINVAL; + } + + /* capabilities, pass existing read-only copy + * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */ + + /* device capabilities: hide FLR */ + devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP); + devcap &= ~PCI_EXP_DEVCAP_FLR; + pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap); + + /* device control: clear all error reporting enable bits, leaving + * leaving only a few host values. Note, these are + * all writable, but not passed to hw. + */ + devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL); + devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) | + PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl); + devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME; + pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl); + + /* Clear device status */ + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0); + + /* Link capabilities, expose links and latencues, clear reporting */ + lnkcap = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKCAP); + lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW | + PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL | + PCI_EXP_LNKCAP_L1EL); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap); + pci_set_word(pci_dev->wmask + pos + PCI_EXP_LNKCAP, + PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_RCB | + PCI_EXP_LNKCTL_CCC | PCI_EXP_LNKCTL_ES | + PCI_EXP_LNKCTL_CLKREQ_EN | PCI_EXP_LNKCTL_HAWD); + + /* Link control, pass existing read-only copy. Should be writable? */ + + /* Link status, only expose current speed and width */ + lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA); + lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta); + + if (version >= 2) { + /* Slot capabilities, control, status - not needed for endpoints */ + pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0); + + /* Root control, capabilities, status - not needed for endpoints */ + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0); + pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0); + + /* Device capabilities/control 2, pass existing read-only copy */ + /* Link control 2, pass existing read-only copy */ + } + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX, 0))) { + uint16_t cmd; + uint32_t status; + + /* Only expose the minimum, 8 byte capability */ + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8)) < 0) { + return ret; + } + + /* Command register, clear upper bits, including extended modes */ + cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); + cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | + PCI_X_CMD_MAX_SPLIT); + pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd); + + /* Status register, update with emulated PCI bus location, clear + * error bits, leave the rest. */ + status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); + status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); + status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn; + status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | + PCI_X_STATUS_SPL_ERR); + pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0))) { + /* Direct R/W passthrough */ + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0) { + return ret; + } + } + + /* Devices can have multiple vendor capabilities, get them all */ + for (pos = 0; (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); + pos += PCI_CAP_LIST_NEXT) { + uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS); + /* Direct R/W passthrough */ + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VNDR, + pos, len)) < 0) { + return ret; + } + } + return 0; }