From patchwork Fri Nov 19 23:21:11 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alex Williamson X-Patchwork-Id: 343601 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oAJNLJqT011000 for ; Fri, 19 Nov 2010 23:21:19 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932416Ab0KSXVP (ORCPT ); Fri, 19 Nov 2010 18:21:15 -0500 Received: from mx1.redhat.com ([209.132.183.28]:12724 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932372Ab0KSXVP (ORCPT ); Fri, 19 Nov 2010 18:21:15 -0500 Received: from int-mx02.intmail.prod.int.phx2.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id oAJNLDP9021785 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Fri, 19 Nov 2010 18:21:13 -0500 Received: from s20.home (ovpn01.gateway.prod.ext.phx2.redhat.com [10.5.9.1]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id oAJNLBXG007366; Fri, 19 Nov 2010 18:21:12 -0500 From: Alex Williamson Subject: [PATCH v3 9/9] device-assignment: pass through and stub more PCI caps To: kvm@vger.kernel.org, mst@redhat.com Cc: qemu-devel@nongnu.org, alex.williamson@redhat.com, chrisw@redhat.com Date: Fri, 19 Nov 2010 16:21:11 -0700 Message-ID: <20101119232102.22162.24871.stgit@s20.home> In-Reply-To: <20101119231138.22162.93647.stgit@s20.home> References: <20101119231138.22162.93647.stgit@s20.home> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.67 on 10.5.11.12 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Fri, 19 Nov 2010 23:21:19 +0000 (UTC) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 832c236..cd62b5a 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -67,6 +67,9 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len); +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len); + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -370,6 +373,27 @@ static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) return (uint8_t)assigned_dev_pci_read(d, pos, 1); } +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) +{ + AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + ssize_t ret; + int fd = pci_dev->real_device.config_fd; + +again: + ret = pwrite(fd, &val, len, pos); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) + goto again; + + fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", + __func__, ret, errno); + + exit(1); + } + + return; +} + static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap) { int id; @@ -453,10 +477,13 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, ssize_t ret; AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + if (address > PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { + return assigned_device_pci_cap_read_config(d, address, len); + } + if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) || (address >= 0x10 && address <= 0x24) || address == 0x30 || - address == 0x34 || address == 0x3c || address == 0x3d || - (address > PCI_CONFIG_HEADER_SIZE && d->config_map[address])) { + address == 0x34 || address == 0x3c || address == 0x3d) { val = pci_default_read_config(d, address, len); DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); @@ -1251,36 +1278,72 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) #endif #endif -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len) +{ + uint8_t cap, cap_id = pci_dev->config_map[address]; + + switch (cap_id) { + + case PCI_CAP_ID_VPD: + cap = pci_find_capability(pci_dev, cap_id); + if (!ranges_overlap(address, len, cap, PCI_CAP_FLAGS)) { + return assigned_dev_pci_read(pci_dev, address, len); + } + break; + + case PCI_CAP_ID_VNDR: + cap = pci_find_capability(pci_dev, cap_id); + if (!ranges_overlap(address, len, cap, PCI_CAP_FLAGS + 1)) { + return assigned_dev_pci_read(pci_dev, address, len); + } + break; + } + + return pci_default_read_config(pci_dev, address, len); +} + +static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, + uint32_t address, uint32_t val, int len) { - uint8_t cap_id = pci_dev->config_map[address]; + uint8_t cap, cap_id = pci_dev->config_map[address]; pci_default_write_config(pci_dev, address, val, len); switch (cap_id) { #ifdef KVM_CAP_IRQ_ROUTING case PCI_CAP_ID_MSI: #ifdef KVM_CAP_DEVICE_MSI - { - uint8_t cap = pci_find_capability(pci_dev, cap_id); - if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) { - assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS); - } + cap = pci_find_capability(pci_dev, cap_id); + if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) { + assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS); } #endif break; case PCI_CAP_ID_MSIX: #ifdef KVM_CAP_DEVICE_MSIX - { - uint8_t cap = pci_find_capability(pci_dev, cap_id); - if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) { - assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS); - } + cap = pci_find_capability(pci_dev, cap_id); + if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) { + assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS); } #endif break; #endif + + case PCI_CAP_ID_VPD: + cap = pci_find_capability(pci_dev, cap_id); + if (!ranges_overlap(address, len, cap, PCI_CAP_FLAGS)) { + assigned_dev_pci_write(pci_dev, address, val, len); + } + break; + + case PCI_CAP_ID_VNDR: + cap = pci_find_capability(pci_dev, cap_id); + if (!ranges_overlap(address, len, cap, PCI_CAP_FLAGS + 1)) { + assigned_dev_pci_write(pci_dev, address, val, len); + } + break; } } @@ -1345,6 +1408,128 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #endif #endif + /* Minimal PM support, nothing writable, device appears to NAK changes */ + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM))) { + uint16_t pmc, pmcsr; + pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF); + + pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); + pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); + pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); + + pmcsr = pci_get_word(pci_dev->config + pos + PCI_PM_CTRL); + pmcsr &= (PCI_PM_CTRL_STATE_MASK); + pmcsr |= PCI_PM_CTRL_NO_SOFT_RST; + pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, pmcsr); + + pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0); + pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP))) { + uint16_t type, devctl, lnkcap, lnksta; + uint32_t devcap; + + pci_add_capability(pci_dev, PCI_CAP_ID_EXP, pos, 0x40); + + type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); + type = (type & PCI_EXP_FLAGS_TYPE) >> 8; + if (type != PCI_EXP_TYPE_ENDPOINT && + type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) { + fprintf(stderr, + "Device assignment only supports endpoint assignment, " + "device type %d\n", type); + return -EINVAL; + } + + /* capabilities, pass existing read-only copy + * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */ + + /* device capabilities: hide FLR */ + devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP); + devcap &= ~PCI_EXP_DEVCAP_FLR; + pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap); + + /* device control: clear all error reporting enable bits, leaving + * leaving only a few host values. Note, these are + * all writable, but not passed to hw. + */ + devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL); + devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) | + PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl); + devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME; + pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl); + + /* Clear device status */ + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0); + + /* Link capabilities, expose links and latencues, clear reporting */ + lnkcap = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKCAP); + lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW | + PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL | + PCI_EXP_LNKCAP_L1EL); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap); + pci_set_word(pci_dev->wmask + pos + PCI_EXP_LNKCAP, + PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_RCB | + PCI_EXP_LNKCTL_CCC | PCI_EXP_LNKCTL_ES | + PCI_EXP_LNKCTL_CLKREQ_EN | PCI_EXP_LNKCTL_HAWD); + + /* Link control, pass existing read-only copy. Should be writable? */ + + /* Link status, only expose current speed and width */ + lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA); + lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta); + + /* Slot capabilities, control, status - not needed for endpoints */ + pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0); + + /* Root control, capabilities, status - not needed for endpoints */ + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0); + pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0); + + /* Device capabilities/control 2, pass existing read-only copy */ + /* Link control 2, pass existing read-only copy */ + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX))) { + uint16_t cmd; + uint32_t status; + + /* Only expose the minimum, 8 byte capability */ + pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8); + + /* Command register, clear upper bits, including extended modes */ + cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); + cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | + PCI_X_CMD_MAX_SPLIT); + pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd); + + /* Status register, update with emulated PCI bus location, clear + * error bits, leave the rest. */ + status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); + status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); + status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn; + status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | + PCI_X_STATUS_SPL_ERR); + pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD))) { + /* Direct R/W passthrough */ + pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR))) { + uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS); + /* Direct R/W passthrough */ + pci_add_capability(pci_dev, PCI_CAP_ID_VNDR, pos, len); + } + return 0; }