new file mode 100644
@@ -0,0 +1,129 @@
+Xen virtual IOMMU
+
+Motivation
+==========
+*) Enable more than 255 vcpu support
+HPC cloud service requires VM provides high performance parallel
+computing and we hope to create a huge VM with >255 vcpu on one machine
+to meet such requirement. Pin each vcpu to separate pcpus.
+
+To support >255 vcpus, X2APIC mode in guest is necessary because legacy
+APIC(XAPIC) just supports 8-bit APIC ID and it only can support 255
+vcpus at most. X2APIC mode supports 32-bit APIC ID and it requires
+interrupt mapping function of vIOMMU.
+
+The reason for this is that there is no modification to existing PCI MSI
+and IOAPIC with the introduction of X2APIC. PCI MSI/IOAPIC can only send
+interrupt message containing 8-bit APIC ID, which cannot address >255
+cpus. Interrupt remapping supports 32-bit APIC ID and so it's necessary
+to enable >255 cpus with x2apic mode.
+
+
+vIOMMU Architecture
+===================
+vIOMMU device model is inside Xen hypervisor for following factors
+ 1) Avoid round trips between Qemu and Xen hypervisor
+ 2) Ease of integration with the rest of hypervisor
+ 3) HVMlite/PVH doesn't use Qemu
+
+* Interrupt remapping overview.
+Interrupts from virtual devices and physical devices are delivered
+to vLAPIC from vIOAPIC and vMSI. vIOMMU needs to remap interrupt during
+this procedure.
+
++---------------------------------------------------+
+|Qemu |VM |
+| | +----------------+ |
+| | | Device driver | |
+| | +--------+-------+ |
+| | ^ |
+| +----------------+ | +--------+-------+ |
+| | Virtual device | | | IRQ subsystem | |
+| +-------+--------+ | +--------+-------+ |
+| | | ^ |
+| | | | |
++---------------------------+-----------------------+
+|hyperviosr | | VIRQ |
+| | +---------+--------+ |
+| | | vLAPIC | |
+| |VIRQ +---------+--------+ |
+| | ^ |
+| | | |
+| | +---------+--------+ |
+| | | vIOMMU | |
+| | +---------+--------+ |
+| | ^ |
+| | | |
+| | +---------+--------+ |
+| | | vIOAPIC/vMSI | |
+| | +----+----+--------+ |
+| | ^ ^ |
+| +-----------------+ | |
+| | |
++---------------------------------------------------+
+HW |IRQ
+ +-------------------+
+ | PCI Device |
+ +-------------------+
+
+
+vIOMMU hypercall
+================
+Introduce new domctl hypercall "xen_domctl_viommu_op" to create/destroy
+vIOMMU and query vIOMMU capabilities that device model can support.
+
+* vIOMMU hypercall parameter structure
+struct xen_domctl_viommu_op {
+ uint32_t cmd;
+#define XEN_DOMCTL_create_viommu 0
+#define XEN_DOMCTL_destroy_viommu 1
+#define XEN_DOMCTL_query_viommu_caps 2
+ union {
+ struct {
+ /* IN - vIOMMU type */
+ uint64_t viommu_type;
+ /* IN - MMIO base address of vIOMMU. */
+ uint64_t base_address;
+ /* IN - Length of MMIO region */
+ uint64_t length;
+ /* IN - Capabilities with which we want to create */
+ uint64_t capabilities;
+ /* OUT - vIOMMU identity */
+ uint32_t viommu_id;
+ } create_viommu;
+
+ struct {
+ /* IN - vIOMMU identity */
+ uint32_t viommu_id;
+ } destroy_viommu;
+
+ struct {
+ /* IN - vIOMMU type */
+ uint64_t viommu_type;
+ /* OUT - vIOMMU Capabilities */
+ uint64_t caps;
+ } query_caps;
+ } u;
+};
+
+- XEN_DOMCTL_query_viommu_caps
+ Query capabilities of vIOMMU device model. vIOMMU_type specifies
+which vendor vIOMMU device model(E,G Intel VTD) is targeted and hypervisor
+returns capability bits(E,G interrupt remapping bit).
+
+- XEN_DOMCTL_create_viommu
+ Create vIOMMU device with vIOMMU_type, capabilities, MMIO
+base address and length. Hypervisor returns viommu_id. Capabilities should
+be in range of value returned by query_viommu_caps hypercall.
+
+- XEN_DOMCTL_destroy_viommu
+ Destroy vIOMMU in Xen hypervisor with viommu_id as parameters.
+
+xl vIOMMU configuration
+=======================
+viommu="type=vtd,intremap=1,x2apic=1"
+
+"type" - Specify vIOMMU device model type. Currently only supports Intel vtd
+device model.
+"intremap" - Enable vIOMMU interrupt remapping function.
+"x2apic" - Support x2apic mode with interrupt remapping function.
This patch is to add Xen virtual IOMMU doc to introduce motivation, framework, vIOMMU hypercall and xl configuration. Signed-off-by: Lan Tianyu <tianyu.lan@intel.com> --- docs/misc/viommu.txt | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 docs/misc/viommu.txt