@@ -402,14 +402,6 @@ F: */configure
F: */*.ac
F: tools/
-TRANSCENDENT MEMORY (TMEM)
-M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-W: https://oss.oracle.com/projects/tmem
-S: Supported
-F: xen/common/tmem*
-F: xen/include/xen/tmem*
-F: docs/misc/tmem*
-
UNMODIFIED LINUX PV DRIVERS
M: Jan Beulich <jbeulich@suse.com>
S: Obsolete
@@ -10,8 +10,8 @@ allow dom0_t xen_t:xen {
settime tbufcontrol readconsole clearconsole perfcontrol mtrr_add
mtrr_del mtrr_read microcode physinfo quirk writeconsole readapic
writeapic privprofile nonprivprofile kexec firmware sleep frequency
- getidle debug getcpuinfo heap pm_op mca_op lockprof cpupool_op tmem_op
- tmem_control getscheduler setscheduler
+ getidle debug getcpuinfo heap pm_op mca_op lockprof cpupool_op
+ getscheduler setscheduler
};
allow dom0_t xen_t:xen2 {
resource_op psr_cmt_op psr_alloc pmu_ctrl get_symbol
@@ -1,6 +1,3 @@
-# Allow all domains to use (unprivileged parts of) the tmem hypercall
-allow domain_type xen_t:xen tmem_op;
-
# Allow all domains to use PMU (but not to change its settings --- that's what
# pmu_ctrl is for)
allow domain_type xen_t:xen2 pmu_use;
@@ -11,7 +11,6 @@ CONFIG_ARM=y
#
# Common Features
#
-# CONFIG_TMEM is not set
CONFIG_SCHED_CREDIT=y
# CONFIG_SCHED_CREDIT2 is not set
# CONFIG_SCHED_RTDS is not set
@@ -11,7 +11,6 @@ CONFIG_NR_CPUS=32
# CONFIG_HVM_FEP is not set
# CONFIG_TBOOT is not set
# CONFIG_KEXEC is not set
-# CONFIG_TMEM is not set
# CONFIG_XENOPROF is not set
# CONFIG_XSM is not set
# CONFIG_SCHED_CREDIT2 is not set
@@ -58,8 +58,8 @@ DECLARE_HYPERCALL(hvm_op)
DECLARE_HYPERCALL(sysctl)
DECLARE_HYPERCALL(domctl)
DECLARE_HYPERCALL(kexec_op)
-DECLARE_HYPERCALL(tmem_op)
DECLARE_HYPERCALL(argo_op)
+DECLARE_HYPERCALL(xc_reserved_op)
DECLARE_HYPERCALL(xenpmu_op)
DECLARE_HYPERCALL(arch_0)
@@ -135,9 +135,6 @@ static const hypercall_table_t hvm_hypercall_table[] = {
HYPERCALL(hvm_op),
HYPERCALL(sysctl),
HYPERCALL(domctl),
-#ifdef CONFIG_TMEM
- HYPERCALL(tmem_op),
-#endif
#ifdef CONFIG_ARGO
COMPAT_CALL(argo_op),
#endif
@@ -65,7 +65,6 @@ const hypercall_args_t hypercall_args_table[NR_hypercalls] =
ARGS(sysctl, 1),
ARGS(domctl, 1),
ARGS(kexec_op, 2),
- ARGS(tmem_op, 1),
#ifdef CONFIG_ARGO
ARGS(argo_op, 5),
#endif
@@ -76,9 +76,6 @@ const hypercall_table_t pv_hypercall_table[] = {
#ifdef CONFIG_KEXEC
COMPAT_CALL(kexec_op),
#endif
-#ifdef CONFIG_TMEM
- HYPERCALL(tmem_op),
-#endif
#ifdef CONFIG_ARGO
COMPAT_CALL(argo_op),
#endif
@@ -25,7 +25,6 @@
#include <xen/dmi.h>
#include <xen/pfn.h>
#include <xen/nodemask.h>
-#include <xen/tmem_xen.h>
#include <xen/virtual_region.h>
#include <xen/watchdog.h>
#include <public/version.h>
@@ -1485,13 +1484,6 @@ void __init noreturn __start_xen(unsigned long mbi_p)
s = pfn_to_paddr(limit + 1);
init_domheap_pages(s, e);
}
-
- if ( tmem_enabled() )
- {
- printk(XENLOG_WARNING
- "TMEM physical RAM limit exceeded, disabling TMEM\n");
- tmem_disable();
- }
}
else
end_boot_allocator();
@@ -88,21 +88,6 @@ config KEXEC
If unsure, say Y.
-config TMEM
- bool "Transcendent Memory Support (deprecated)" if EXPERT = "y"
- ---help---
- Transcendent memory allows PV-aware guests to collaborate on memory
- usage. Guests can 'swap' their memory to the hypervisor or have an
- collective pool of memory shared across guests. The end result is
- less memory usage by guests allowing higher guest density.
-
- You also have to enable it on the Xen commandline by using tmem=1.
-
- WARNING: This option (and its underlying code) is going to go away
- in a future Xen version.
-
- If unsure, say N.
-
config XENOPROF
def_bool y
prompt "Xen Oprofile Support" if EXPERT = "y"
@@ -73,10 +73,6 @@ obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma $(lzo-y) unl
obj-$(CONFIG_COMPAT) += $(addprefix compat/,domain.o kernel.o memory.o multicall.o xlat.o)
-tmem-y := tmem.o tmem_xen.o tmem_control.o
-tmem-$(CONFIG_COMPAT) += compat/tmem_xen.o
-obj-$(CONFIG_TMEM) += $(tmem-y)
-
extra-y := symbols-dummy.o
subdir-$(CONFIG_COVERAGE) += coverage
deleted file mode 100644
@@ -1,23 +0,0 @@
-/******************************************************************************
- * tmem_xen.c
- *
- */
-
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <xen/domain.h>
-#include <xen/guest_access.h>
-#include <xen/hypercall.h>
-#include <compat/tmem.h>
-
-CHECK_tmem_oid;
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
@@ -41,7 +41,6 @@
#include <public/vcpu.h>
#include <xsm/xsm.h>
#include <xen/trace.h>
-#include <xen/tmem.h>
#include <asm/setup.h>
#ifdef CONFIG_X86
@@ -725,10 +724,8 @@ int domain_kill(struct domain *d)
argo_destroy(d);
evtchn_destroy(d);
gnttab_release_mappings(d);
- tmem_destroy(d->tmem_client);
vnuma_destroy(d->vnuma);
domain_set_outstanding_pages(d, 0);
- d->tmem_client = NULL;
/* fallthrough */
case DOMDYING_dying:
rc = domain_relinquish_resources(d);
@@ -7,6 +7,7 @@
* Copyright (c) 2003-2005, K A Fraser
*/
+#include <xen/domain_page.h>
#include <xen/types.h>
#include <xen/lib.h>
#include <xen/mm.h>
@@ -18,8 +19,6 @@
#include <xen/guest_access.h>
#include <xen/hypercall.h>
#include <xen/errno.h>
-#include <xen/tmem.h>
-#include <xen/tmem_xen.h>
#include <xen/numa.h>
#include <xen/mem_access.h>
#include <xen/trace.h>
@@ -250,11 +249,10 @@ static void populate_physmap(struct memop_args *a)
if ( unlikely(!page) )
{
- if ( !tmem_enabled() || a->extent_order )
- gdprintk(XENLOG_INFO,
- "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n",
- a->extent_order, d->domain_id, a->memflags,
- i, a->nr_extents);
+ gdprintk(XENLOG_INFO,
+ "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n",
+ a->extent_order, d->domain_id, a->memflags,
+ i, a->nr_extents);
goto out;
}
@@ -135,8 +135,6 @@
#include <xen/numa.h>
#include <xen/nodemask.h>
#include <xen/event.h>
-#include <xen/tmem.h>
-#include <xen/tmem_xen.h>
#include <public/sysctl.h>
#include <public/sched.h>
#include <asm/page.h>
@@ -451,10 +449,6 @@ static unsigned long node_need_scrub[MAX_NUMNODES];
static unsigned long *avail[MAX_NUMNODES];
static long total_avail_pages;
-/* TMEM: Reserve a fraction of memory for mid-size (0<order<9) allocations.*/
-static long midsize_alloc_zone_pages;
-#define MIDSIZE_ALLOC_FRAC 128
-
static DEFINE_SPINLOCK(heap_lock);
static long outstanding_claims; /* total outstanding claims by all domains */
@@ -530,16 +524,6 @@ int domain_set_outstanding_pages(struct domain *d, unsigned long pages)
/* how much memory is available? */
avail_pages = total_avail_pages;
- /* Note: The usage of claim means that allocation from a guest *might*
- * have to come from freeable memory. Using free memory is always better, if
- * it is available, than using freeable memory.
- *
- * But that is OK as once the claim has been made, it still can take minutes
- * before the claim is fully satisfied. Tmem can make use of the unclaimed
- * pages during this time (to store ephemeral/freeable pages only,
- * not persistent pages).
- */
- avail_pages += tmem_freeable_pages();
avail_pages -= outstanding_claims;
/*
@@ -711,8 +695,7 @@ static void __init setup_low_mem_virq(void)
static void check_low_mem_virq(void)
{
- unsigned long avail_pages = total_avail_pages +
- tmem_freeable_pages() - outstanding_claims;
+ unsigned long avail_pages = total_avail_pages - outstanding_claims;
if ( unlikely(avail_pages <= low_mem_virq_th) )
{
@@ -939,8 +922,7 @@ static struct page_info *alloc_heap_pages(
* Claimed memory is considered unavailable unless the request
* is made by a domain with sufficient unclaimed pages.
*/
- if ( (outstanding_claims + request >
- total_avail_pages + tmem_freeable_pages()) &&
+ if ( (outstanding_claims + request > total_avail_pages) &&
((memflags & MEMF_no_refcount) ||
!d || d->outstanding_pages < request) )
{
@@ -948,22 +930,6 @@ static struct page_info *alloc_heap_pages(
return NULL;
}
- /*
- * TMEM: When available memory is scarce due to tmem absorbing it, allow
- * only mid-size allocations to avoid worst of fragmentation issues.
- * Others try tmem pools then fail. This is a workaround until all
- * post-dom0-creation-multi-page allocations can be eliminated.
- */
- if ( ((order == 0) || (order >= 9)) &&
- (total_avail_pages <= midsize_alloc_zone_pages) &&
- tmem_freeable_pages() )
- {
- /* Try to free memory from tmem. */
- pg = tmem_relinquish_pages(order, memflags);
- spin_unlock(&heap_lock);
- return pg;
- }
-
pg = get_free_buddy(zone_lo, zone_hi, order, memflags, d);
/* Try getting a dirty buddy if we couldn't get a clean one. */
if ( !pg && !(memflags & MEMF_no_scrub) )
@@ -1443,10 +1409,6 @@ static void free_heap_pages(
else
pg->u.free.first_dirty = INVALID_DIRTY_IDX;
- if ( tmem_enabled() )
- midsize_alloc_zone_pages = max(
- midsize_alloc_zone_pages, total_avail_pages / MIDSIZE_ALLOC_FRAC);
-
/* Merge chunks as far as possible. */
while ( order < MAX_ORDER )
{
@@ -1833,11 +1795,6 @@ static unsigned long avail_heap_pages(
return free_pages;
}
-unsigned long total_free_pages(void)
-{
- return total_avail_pages - midsize_alloc_zone_pages;
-}
-
void __init end_boot_allocator(void)
{
unsigned int i;
@@ -2264,10 +2221,9 @@ int assign_pages(
{
if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
{
- if ( !tmem_enabled() || order != 0 || d->tot_pages != d->max_pages )
- gprintk(XENLOG_INFO, "Over-allocation for domain %u: "
- "%u > %u\n", d->domain_id,
- d->tot_pages + (1 << order), d->max_pages);
+ gprintk(XENLOG_INFO, "Over-allocation for domain %u: "
+ "%u > %u\n", d->domain_id,
+ d->tot_pages + (1 << order), d->max_pages);
rc = -E2BIG;
goto out;
}
@@ -13,7 +13,6 @@
#include <xen/domain.h>
#include <xen/event.h>
#include <xen/domain_page.h>
-#include <xen/tmem.h>
#include <xen/trace.h>
#include <xen/console.h>
#include <xen/iocap.h>
@@ -456,10 +455,6 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
}
#endif
- case XEN_SYSCTL_tmem_op:
- ret = tmem_control(&op->u.tmem_op);
- break;
-
case XEN_SYSCTL_livepatch_op:
ret = livepatch_op(&op->u.livepatch);
if ( ret != -ENOSYS && ret != -EOPNOTSUPP )
deleted file mode 100644
@@ -1,2095 +0,0 @@
-/******************************************************************************
- * tmem.c
- *
- * Transcendent memory
- *
- * Copyright (c) 2009, Dan Magenheimer, Oracle Corp.
- */
-
-/* TODO list: 090129 (updated 100318)
- - any better reclamation policy?
- - use different tlsf pools for each client (maybe each pool)
- - test shared access more completely (ocfs2)
- - add feedback-driven compression (not for persistent pools though!)
- - add data-structure total bytes overhead stats
- */
-
-#ifdef __XEN__
-#include <xen/tmem_xen.h> /* host-specific (eg Xen) code goes here. */
-#endif
-
-#include <public/sysctl.h>
-#include <xen/tmem.h>
-#include <xen/rbtree.h>
-#include <xen/radix-tree.h>
-#include <xen/list.h>
-#include <xen/init.h>
-
-#define TMEM_SPEC_VERSION 1
-
-struct tmem_statistics tmem_stats = {
- .global_obj_count = ATOMIC_INIT(0),
- .global_pgp_count = ATOMIC_INIT(0),
- .global_pcd_count = ATOMIC_INIT(0),
- .global_page_count = ATOMIC_INIT(0),
- .global_rtree_node_count = ATOMIC_INIT(0),
-};
-
-/************ CORE DATA STRUCTURES ************************************/
-
-struct tmem_object_root {
- struct xen_tmem_oid oid;
- struct rb_node rb_tree_node; /* Protected by pool->pool_rwlock. */
- unsigned long objnode_count; /* Atomicity depends on obj_spinlock. */
- long pgp_count; /* Atomicity depends on obj_spinlock. */
- struct radix_tree_root tree_root; /* Tree of pages within object. */
- struct tmem_pool *pool;
- domid_t last_client;
- spinlock_t obj_spinlock;
-};
-
-struct tmem_object_node {
- struct tmem_object_root *obj;
- struct radix_tree_node rtn;
-};
-
-struct tmem_page_descriptor {
- union {
- struct list_head global_eph_pages;
- struct list_head client_inv_pages;
- };
- union {
- struct {
- union {
- struct list_head client_eph_pages;
- struct list_head pool_pers_pages;
- };
- struct tmem_object_root *obj;
- } us;
- struct xen_tmem_oid inv_oid; /* Used for invalid list only. */
- };
- pagesize_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid,
- else compressed data (cdata). */
- uint32_t index;
- bool eviction_attempted; /* CHANGE TO lifetimes? (settable). */
- union {
- struct page_info *pfp; /* Page frame pointer. */
- char *cdata; /* Compressed data. */
- struct tmem_page_content_descriptor *pcd; /* Page dedup. */
- };
- union {
- uint64_t timestamp;
- uint32_t pool_id; /* Used for invalid list only. */
- };
-};
-
-#define PCD_TZE_MAX_SIZE (PAGE_SIZE - (PAGE_SIZE/64))
-
-struct tmem_page_content_descriptor {
- union {
- struct page_info *pfp; /* Page frame pointer. */
- char *cdata; /* If compression_enabled. */
- };
- pagesize_t size; /* If compression_enabled -> 0<size<PAGE_SIZE (*cdata)
- * else if tze, 0<=size<PAGE_SIZE, rounded up to mult of 8
- * else PAGE_SIZE -> *pfp. */
-};
-
-static int tmem_initialized = 0;
-
-struct xmem_pool *tmem_mempool = 0;
-unsigned int tmem_mempool_maxalloc = 0;
-
-DEFINE_SPINLOCK(tmem_page_list_lock);
-PAGE_LIST_HEAD(tmem_page_list);
-unsigned long tmem_page_list_pages = 0;
-
-DEFINE_RWLOCK(tmem_rwlock);
-static DEFINE_SPINLOCK(eph_lists_spinlock); /* Protects global AND clients. */
-static DEFINE_SPINLOCK(pers_lists_spinlock);
-
-#define ASSERT_SPINLOCK(_l) ASSERT(spin_is_locked(_l))
-#define ASSERT_WRITELOCK(_l) ASSERT(rw_is_write_locked(_l))
-
- atomic_t client_weight_total;
-
-struct tmem_global tmem_global = {
- .ephemeral_page_list = LIST_HEAD_INIT(tmem_global.ephemeral_page_list),
- .client_list = LIST_HEAD_INIT(tmem_global.client_list),
- .client_weight_total = ATOMIC_INIT(0),
-};
-
-/*
- * There two types of memory allocation interfaces in tmem.
- * One is based on xmem_pool and the other is used for allocate a whole page.
- * Both of them are based on the lowlevel function __tmem_alloc_page/_thispool().
- * The call trace of alloc path is like below.
- * Persistant pool:
- * 1.tmem_malloc()
- * > xmem_pool_alloc()
- * > tmem_persistent_pool_page_get()
- * > __tmem_alloc_page_thispool()
- * 2.tmem_alloc_page()
- * > __tmem_alloc_page_thispool()
- *
- * Ephemeral pool:
- * 1.tmem_malloc()
- * > xmem_pool_alloc()
- * > tmem_mempool_page_get()
- * > __tmem_alloc_page()
- * 2.tmem_alloc_page()
- * > __tmem_alloc_page()
- *
- * The free path is done in the same manner.
- */
-static void *tmem_malloc(size_t size, struct tmem_pool *pool)
-{
- void *v = NULL;
-
- if ( (pool != NULL) && is_persistent(pool) ) {
- if ( pool->client->persistent_pool )
- v = xmem_pool_alloc(size, pool->client->persistent_pool);
- }
- else
- {
- ASSERT( size < tmem_mempool_maxalloc );
- ASSERT( tmem_mempool != NULL );
- v = xmem_pool_alloc(size, tmem_mempool);
- }
- if ( v == NULL )
- tmem_stats.alloc_failed++;
- return v;
-}
-
-static void tmem_free(void *p, struct tmem_pool *pool)
-{
- if ( pool == NULL || !is_persistent(pool) )
- {
- ASSERT( tmem_mempool != NULL );
- xmem_pool_free(p, tmem_mempool);
- }
- else
- {
- ASSERT( pool->client->persistent_pool != NULL );
- xmem_pool_free(p, pool->client->persistent_pool);
- }
-}
-
-static struct page_info *tmem_alloc_page(struct tmem_pool *pool)
-{
- struct page_info *pfp = NULL;
-
- if ( pool != NULL && is_persistent(pool) )
- pfp = __tmem_alloc_page_thispool(pool->client->domain);
- else
- pfp = __tmem_alloc_page();
- if ( pfp == NULL )
- tmem_stats.alloc_page_failed++;
- else
- atomic_inc_and_max(global_page_count);
- return pfp;
-}
-
-static void tmem_free_page(struct tmem_pool *pool, struct page_info *pfp)
-{
- ASSERT(pfp);
- if ( pool == NULL || !is_persistent(pool) )
- __tmem_free_page(pfp);
- else
- __tmem_free_page_thispool(pfp);
- atomic_dec_and_assert(global_page_count);
-}
-
-static void *tmem_mempool_page_get(unsigned long size)
-{
- struct page_info *pi;
-
- ASSERT(size == PAGE_SIZE);
- if ( (pi = __tmem_alloc_page()) == NULL )
- return NULL;
- return page_to_virt(pi);
-}
-
-static void tmem_mempool_page_put(void *page_va)
-{
- ASSERT(IS_PAGE_ALIGNED(page_va));
- __tmem_free_page(virt_to_page(page_va));
-}
-
-static int __init tmem_mempool_init(void)
-{
- tmem_mempool = xmem_pool_create("tmem", tmem_mempool_page_get,
- tmem_mempool_page_put, PAGE_SIZE, 0, PAGE_SIZE);
- if ( tmem_mempool )
- tmem_mempool_maxalloc = xmem_pool_maxalloc(tmem_mempool);
- return tmem_mempool != NULL;
-}
-
-/* Persistent pools are per-domain. */
-static void *tmem_persistent_pool_page_get(unsigned long size)
-{
- struct page_info *pi;
- struct domain *d = current->domain;
-
- ASSERT(size == PAGE_SIZE);
- if ( (pi = __tmem_alloc_page_thispool(d)) == NULL )
- return NULL;
- ASSERT(IS_VALID_PAGE(pi));
- return page_to_virt(pi);
-}
-
-static void tmem_persistent_pool_page_put(void *page_va)
-{
- struct page_info *pi;
-
- ASSERT(IS_PAGE_ALIGNED(page_va));
- pi = mfn_to_page(_mfn(virt_to_mfn(page_va)));
- ASSERT(IS_VALID_PAGE(pi));
- __tmem_free_page_thispool(pi);
-}
-
-/*
- * Page content descriptor manipulation routines.
- */
-#define NOT_SHAREABLE ((uint16_t)-1UL)
-
-/************ PAGE DESCRIPTOR MANIPULATION ROUTINES *******************/
-
-/* Allocate a struct tmem_page_descriptor and associate it with an object. */
-static struct tmem_page_descriptor *pgp_alloc(struct tmem_object_root *obj)
-{
- struct tmem_page_descriptor *pgp;
- struct tmem_pool *pool;
-
- ASSERT(obj != NULL);
- ASSERT(obj->pool != NULL);
- pool = obj->pool;
- if ( (pgp = tmem_malloc(sizeof(struct tmem_page_descriptor), pool)) == NULL )
- return NULL;
- pgp->us.obj = obj;
- INIT_LIST_HEAD(&pgp->global_eph_pages);
- INIT_LIST_HEAD(&pgp->us.client_eph_pages);
- pgp->pfp = NULL;
- pgp->size = -1;
- pgp->index = -1;
- pgp->timestamp = get_cycles();
- atomic_inc_and_max(global_pgp_count);
- atomic_inc(&pool->pgp_count);
- if ( _atomic_read(pool->pgp_count) > pool->pgp_count_max )
- pool->pgp_count_max = _atomic_read(pool->pgp_count);
- return pgp;
-}
-
-static struct tmem_page_descriptor *pgp_lookup_in_obj(struct tmem_object_root *obj, uint32_t index)
-{
- ASSERT(obj != NULL);
- ASSERT_SPINLOCK(&obj->obj_spinlock);
- ASSERT(obj->pool != NULL);
- return radix_tree_lookup(&obj->tree_root, index);
-}
-
-static void pgp_free_data(struct tmem_page_descriptor *pgp, struct tmem_pool *pool)
-{
- pagesize_t pgp_size = pgp->size;
-
- if ( pgp->pfp == NULL )
- return;
- if ( pgp_size )
- tmem_free(pgp->cdata, pool);
- else
- tmem_free_page(pgp->us.obj->pool,pgp->pfp);
- if ( pool != NULL && pgp_size )
- {
- pool->client->compressed_pages--;
- pool->client->compressed_sum_size -= pgp_size;
- }
- pgp->pfp = NULL;
- pgp->size = -1;
-}
-
-static void __pgp_free(struct tmem_page_descriptor *pgp, struct tmem_pool *pool)
-{
- pgp->us.obj = NULL;
- pgp->index = -1;
- tmem_free(pgp, pool);
-}
-
-static void pgp_free(struct tmem_page_descriptor *pgp)
-{
- struct tmem_pool *pool = NULL;
-
- ASSERT(pgp->us.obj != NULL);
- ASSERT(pgp->us.obj->pool != NULL);
- ASSERT(pgp->us.obj->pool->client != NULL);
-
- pool = pgp->us.obj->pool;
- if ( !is_persistent(pool) )
- {
- ASSERT(list_empty(&pgp->global_eph_pages));
- ASSERT(list_empty(&pgp->us.client_eph_pages));
- }
- pgp_free_data(pgp, pool);
- atomic_dec_and_assert(global_pgp_count);
- atomic_dec(&pool->pgp_count);
- ASSERT(_atomic_read(pool->pgp_count) >= 0);
- pgp->size = -1;
- if ( is_persistent(pool) && pool->client->info.flags.u.migrating )
- {
- pgp->inv_oid = pgp->us.obj->oid;
- pgp->pool_id = pool->pool_id;
- return;
- }
- __pgp_free(pgp, pool);
-}
-
-/* Remove pgp from global/pool/client lists and free it. */
-static void pgp_delist_free(struct tmem_page_descriptor *pgp)
-{
- struct client *client;
- uint64_t life;
-
- ASSERT(pgp != NULL);
- ASSERT(pgp->us.obj != NULL);
- ASSERT(pgp->us.obj->pool != NULL);
- client = pgp->us.obj->pool->client;
- ASSERT(client != NULL);
-
- /* Delist pgp. */
- if ( !is_persistent(pgp->us.obj->pool) )
- {
- spin_lock(&eph_lists_spinlock);
- if ( !list_empty(&pgp->us.client_eph_pages) )
- client->eph_count--;
- ASSERT(client->eph_count >= 0);
- list_del_init(&pgp->us.client_eph_pages);
- if ( !list_empty(&pgp->global_eph_pages) )
- tmem_global.eph_count--;
- ASSERT(tmem_global.eph_count >= 0);
- list_del_init(&pgp->global_eph_pages);
- spin_unlock(&eph_lists_spinlock);
- }
- else
- {
- if ( client->info.flags.u.migrating )
- {
- spin_lock(&pers_lists_spinlock);
- list_add_tail(&pgp->client_inv_pages,
- &client->persistent_invalidated_list);
- if ( pgp != pgp->us.obj->pool->cur_pgp )
- list_del_init(&pgp->us.pool_pers_pages);
- spin_unlock(&pers_lists_spinlock);
- }
- else
- {
- spin_lock(&pers_lists_spinlock);
- list_del_init(&pgp->us.pool_pers_pages);
- spin_unlock(&pers_lists_spinlock);
- }
- }
- life = get_cycles() - pgp->timestamp;
- pgp->us.obj->pool->sum_life_cycles += life;
-
- /* Free pgp. */
- pgp_free(pgp);
-}
-
-/* Called only indirectly by radix_tree_destroy. */
-static void pgp_destroy(void *v)
-{
- struct tmem_page_descriptor *pgp = (struct tmem_page_descriptor *)v;
-
- pgp->us.obj->pgp_count--;
- pgp_delist_free(pgp);
-}
-
-static int pgp_add_to_obj(struct tmem_object_root *obj, uint32_t index, struct tmem_page_descriptor *pgp)
-{
- int ret;
-
- ASSERT_SPINLOCK(&obj->obj_spinlock);
- ret = radix_tree_insert(&obj->tree_root, index, pgp);
- if ( !ret )
- obj->pgp_count++;
- return ret;
-}
-
-static struct tmem_page_descriptor *pgp_delete_from_obj(struct tmem_object_root *obj, uint32_t index)
-{
- struct tmem_page_descriptor *pgp;
-
- ASSERT(obj != NULL);
- ASSERT_SPINLOCK(&obj->obj_spinlock);
- ASSERT(obj->pool != NULL);
- pgp = radix_tree_delete(&obj->tree_root, index);
- if ( pgp != NULL )
- obj->pgp_count--;
- ASSERT(obj->pgp_count >= 0);
-
- return pgp;
-}
-
-/************ RADIX TREE NODE MANIPULATION ROUTINES *******************/
-
-/* Called only indirectly from radix_tree_insert. */
-static struct radix_tree_node *rtn_alloc(void *arg)
-{
- struct tmem_object_node *objnode;
- struct tmem_object_root *obj = (struct tmem_object_root *)arg;
-
- ASSERT(obj->pool != NULL);
- objnode = tmem_malloc(sizeof(struct tmem_object_node),obj->pool);
- if (objnode == NULL)
- return NULL;
- objnode->obj = obj;
- memset(&objnode->rtn, 0, sizeof(struct radix_tree_node));
- if (++obj->pool->objnode_count > obj->pool->objnode_count_max)
- obj->pool->objnode_count_max = obj->pool->objnode_count;
- atomic_inc_and_max(global_rtree_node_count);
- obj->objnode_count++;
- return &objnode->rtn;
-}
-
-/* Called only indirectly from radix_tree_delete/destroy. */
-static void rtn_free(struct radix_tree_node *rtn, void *arg)
-{
- struct tmem_pool *pool;
- struct tmem_object_node *objnode;
-
- ASSERT(rtn != NULL);
- objnode = container_of(rtn,struct tmem_object_node,rtn);
- ASSERT(objnode->obj != NULL);
- ASSERT_SPINLOCK(&objnode->obj->obj_spinlock);
- pool = objnode->obj->pool;
- ASSERT(pool != NULL);
- pool->objnode_count--;
- objnode->obj->objnode_count--;
- objnode->obj = NULL;
- tmem_free(objnode, pool);
- atomic_dec_and_assert(global_rtree_node_count);
-}
-
-/************ POOL OBJECT COLLECTION MANIPULATION ROUTINES *******************/
-
-static int oid_compare(struct xen_tmem_oid *left,
- struct xen_tmem_oid *right)
-{
- if ( left->oid[2] == right->oid[2] )
- {
- if ( left->oid[1] == right->oid[1] )
- {
- if ( left->oid[0] == right->oid[0] )
- return 0;
- else if ( left->oid[0] < right->oid[0] )
- return -1;
- else
- return 1;
- }
- else if ( left->oid[1] < right->oid[1] )
- return -1;
- else
- return 1;
- }
- else if ( left->oid[2] < right->oid[2] )
- return -1;
- else
- return 1;
-}
-
-static void oid_set_invalid(struct xen_tmem_oid *oidp)
-{
- oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
-}
-
-static unsigned oid_hash(struct xen_tmem_oid *oidp)
-{
- return (tmem_hash(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
- BITS_PER_LONG) & OBJ_HASH_BUCKETS_MASK);
-}
-
-/* Searches for object==oid in pool, returns locked object if found. */
-static struct tmem_object_root * obj_find(struct tmem_pool *pool,
- struct xen_tmem_oid *oidp)
-{
- struct rb_node *node;
- struct tmem_object_root *obj;
-
-restart_find:
- read_lock(&pool->pool_rwlock);
- node = pool->obj_rb_root[oid_hash(oidp)].rb_node;
- while ( node )
- {
- obj = container_of(node, struct tmem_object_root, rb_tree_node);
- switch ( oid_compare(&obj->oid, oidp) )
- {
- case 0: /* Equal. */
- if ( !spin_trylock(&obj->obj_spinlock) )
- {
- read_unlock(&pool->pool_rwlock);
- goto restart_find;
- }
- read_unlock(&pool->pool_rwlock);
- return obj;
- case -1:
- node = node->rb_left;
- break;
- case 1:
- node = node->rb_right;
- }
- }
- read_unlock(&pool->pool_rwlock);
- return NULL;
-}
-
-/* Free an object that has no more pgps in it. */
-static void obj_free(struct tmem_object_root *obj)
-{
- struct tmem_pool *pool;
- struct xen_tmem_oid old_oid;
-
- ASSERT_SPINLOCK(&obj->obj_spinlock);
- ASSERT(obj != NULL);
- ASSERT(obj->pgp_count == 0);
- pool = obj->pool;
- ASSERT(pool != NULL);
- ASSERT(pool->client != NULL);
- ASSERT_WRITELOCK(&pool->pool_rwlock);
- if ( obj->tree_root.rnode != NULL ) /* May be a "stump" with no leaves. */
- radix_tree_destroy(&obj->tree_root, pgp_destroy);
- ASSERT((long)obj->objnode_count == 0);
- ASSERT(obj->tree_root.rnode == NULL);
- pool->obj_count--;
- ASSERT(pool->obj_count >= 0);
- obj->pool = NULL;
- old_oid = obj->oid;
- oid_set_invalid(&obj->oid);
- obj->last_client = TMEM_CLI_ID_NULL;
- atomic_dec_and_assert(global_obj_count);
- rb_erase(&obj->rb_tree_node, &pool->obj_rb_root[oid_hash(&old_oid)]);
- spin_unlock(&obj->obj_spinlock);
- tmem_free(obj, pool);
-}
-
-static int obj_rb_insert(struct rb_root *root, struct tmem_object_root *obj)
-{
- struct rb_node **new, *parent = NULL;
- struct tmem_object_root *this;
-
- ASSERT(obj->pool);
- ASSERT_WRITELOCK(&obj->pool->pool_rwlock);
-
- new = &(root->rb_node);
- while ( *new )
- {
- this = container_of(*new, struct tmem_object_root, rb_tree_node);
- parent = *new;
- switch ( oid_compare(&this->oid, &obj->oid) )
- {
- case 0:
- return 0;
- case -1:
- new = &((*new)->rb_left);
- break;
- case 1:
- new = &((*new)->rb_right);
- break;
- }
- }
- rb_link_node(&obj->rb_tree_node, parent, new);
- rb_insert_color(&obj->rb_tree_node, root);
- return 1;
-}
-
-/*
- * Allocate, initialize, and insert an tmem_object_root
- * (should be called only if find failed).
- */
-static struct tmem_object_root * obj_alloc(struct tmem_pool *pool,
- struct xen_tmem_oid *oidp)
-{
- struct tmem_object_root *obj;
-
- ASSERT(pool != NULL);
- if ( (obj = tmem_malloc(sizeof(struct tmem_object_root), pool)) == NULL )
- return NULL;
- pool->obj_count++;
- if (pool->obj_count > pool->obj_count_max)
- pool->obj_count_max = pool->obj_count;
- atomic_inc_and_max(global_obj_count);
- radix_tree_init(&obj->tree_root);
- radix_tree_set_alloc_callbacks(&obj->tree_root, rtn_alloc, rtn_free, obj);
- spin_lock_init(&obj->obj_spinlock);
- obj->pool = pool;
- obj->oid = *oidp;
- obj->objnode_count = 0;
- obj->pgp_count = 0;
- obj->last_client = TMEM_CLI_ID_NULL;
- return obj;
-}
-
-/* Free an object after destroying any pgps in it. */
-static void obj_destroy(struct tmem_object_root *obj)
-{
- ASSERT_WRITELOCK(&obj->pool->pool_rwlock);
- radix_tree_destroy(&obj->tree_root, pgp_destroy);
- obj_free(obj);
-}
-
-/* Destroys all objs in a pool, or only if obj->last_client matches cli_id. */
-static void pool_destroy_objs(struct tmem_pool *pool, domid_t cli_id)
-{
- struct rb_node *node;
- struct tmem_object_root *obj;
- int i;
-
- write_lock(&pool->pool_rwlock);
- pool->is_dying = 1;
- for (i = 0; i < OBJ_HASH_BUCKETS; i++)
- {
- node = rb_first(&pool->obj_rb_root[i]);
- while ( node != NULL )
- {
- obj = container_of(node, struct tmem_object_root, rb_tree_node);
- spin_lock(&obj->obj_spinlock);
- node = rb_next(node);
- if ( obj->last_client == cli_id )
- obj_destroy(obj);
- else
- spin_unlock(&obj->obj_spinlock);
- }
- }
- write_unlock(&pool->pool_rwlock);
-}
-
-
-/************ POOL MANIPULATION ROUTINES ******************************/
-
-static struct tmem_pool * pool_alloc(void)
-{
- struct tmem_pool *pool;
- int i;
-
- if ( (pool = xzalloc(struct tmem_pool)) == NULL )
- return NULL;
- for (i = 0; i < OBJ_HASH_BUCKETS; i++)
- pool->obj_rb_root[i] = RB_ROOT;
- INIT_LIST_HEAD(&pool->persistent_page_list);
- rwlock_init(&pool->pool_rwlock);
- return pool;
-}
-
-static void pool_free(struct tmem_pool *pool)
-{
- pool->client = NULL;
- xfree(pool);
-}
-
-/*
- * Register new_client as a user of this shared pool and return 0 on succ.
- */
-static int shared_pool_join(struct tmem_pool *pool, struct client *new_client)
-{
- struct share_list *sl;
- ASSERT(is_shared(pool));
-
- if ( (sl = tmem_malloc(sizeof(struct share_list), NULL)) == NULL )
- return -1;
- sl->client = new_client;
- list_add_tail(&sl->share_list, &pool->share_list);
- if ( new_client->cli_id != pool->client->cli_id )
- tmem_client_info("adding new %s %d to shared pool owned by %s %d\n",
- tmem_client_str, new_client->cli_id, tmem_client_str,
- pool->client->cli_id);
- else if ( pool->shared_count )
- tmem_client_info("inter-guest sharing of shared pool %s by client %d\n",
- tmem_client_str, pool->client->cli_id);
- ++pool->shared_count;
- return 0;
-}
-
-/* Reassign "ownership" of the pool to another client that shares this pool. */
-static void shared_pool_reassign(struct tmem_pool *pool)
-{
- struct share_list *sl;
- int poolid;
- struct client *old_client = pool->client, *new_client;
-
- ASSERT(is_shared(pool));
- if ( list_empty(&pool->share_list) )
- {
- ASSERT(pool->shared_count == 0);
- return;
- }
- old_client->pools[pool->pool_id] = NULL;
- sl = list_entry(pool->share_list.next, struct share_list, share_list);
- /*
- * The sl->client can be old_client if there are multiple shared pools
- * within an guest.
- */
- pool->client = new_client = sl->client;
- for (poolid = 0; poolid < MAX_POOLS_PER_DOMAIN; poolid++)
- if (new_client->pools[poolid] == pool)
- break;
- ASSERT(poolid != MAX_POOLS_PER_DOMAIN);
- new_client->eph_count += _atomic_read(pool->pgp_count);
- old_client->eph_count -= _atomic_read(pool->pgp_count);
- list_splice_init(&old_client->ephemeral_page_list,
- &new_client->ephemeral_page_list);
- tmem_client_info("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n",
- tmem_cli_id_str, old_client->cli_id, tmem_cli_id_str, new_client->cli_id, poolid);
- pool->pool_id = poolid;
-}
-
-/*
- * Destroy all objects with last_client same as passed cli_id,
- * remove pool's cli_id from list of sharers of this pool.
- */
-static int shared_pool_quit(struct tmem_pool *pool, domid_t cli_id)
-{
- struct share_list *sl;
- int s_poolid;
-
- ASSERT(is_shared(pool));
- ASSERT(pool->client != NULL);
-
- ASSERT_WRITELOCK(&tmem_rwlock);
- pool_destroy_objs(pool, cli_id);
- list_for_each_entry(sl,&pool->share_list, share_list)
- {
- if (sl->client->cli_id != cli_id)
- continue;
- list_del(&sl->share_list);
- tmem_free(sl, pool);
- --pool->shared_count;
- if (pool->client->cli_id == cli_id)
- shared_pool_reassign(pool);
- if (pool->shared_count)
- return pool->shared_count;
- for (s_poolid = 0; s_poolid < MAX_GLOBAL_SHARED_POOLS; s_poolid++)
- if ( (tmem_global.shared_pools[s_poolid]) == pool )
- {
- tmem_global.shared_pools[s_poolid] = NULL;
- break;
- }
- return 0;
- }
- tmem_client_warn("tmem: no match unsharing pool, %s=%d\n",
- tmem_cli_id_str,pool->client->cli_id);
- return -1;
-}
-
-/* Flush all data (owned by cli_id) from a pool and, optionally, free it. */
-static void pool_flush(struct tmem_pool *pool, domid_t cli_id)
-{
- ASSERT(pool != NULL);
- if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) )
- {
- tmem_client_warn("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n",
- tmem_cli_id_str, cli_id, pool->pool_id, tmem_cli_id_str,pool->client->cli_id);
- return;
- }
- tmem_client_info("Destroying %s-%s tmem pool %s=%d pool_id=%d\n",
- is_persistent(pool) ? "persistent" : "ephemeral" ,
- is_shared(pool) ? "shared" : "private",
- tmem_cli_id_str, pool->client->cli_id, pool->pool_id);
- if ( pool->client->info.flags.u.migrating )
- {
- tmem_client_warn("can't destroy pool while %s is live-migrating\n",
- tmem_client_str);
- return;
- }
- pool_destroy_objs(pool, TMEM_CLI_ID_NULL);
- pool->client->pools[pool->pool_id] = NULL;
- pool_free(pool);
-}
-
-/************ CLIENT MANIPULATION OPERATIONS **************************/
-
-struct client *client_create(domid_t cli_id)
-{
- struct client *client = xzalloc(struct client);
- int i, shift;
- char name[5];
- struct domain *d;
-
- tmem_client_info("tmem: initializing tmem capability for %s=%d...",
- tmem_cli_id_str, cli_id);
- if ( client == NULL )
- {
- tmem_client_err("failed... out of memory\n");
- goto fail;
- }
-
- for (i = 0, shift = 12; i < 4; shift -=4, i++)
- name[i] = (((unsigned short)cli_id >> shift) & 0xf) + '0';
- name[4] = '\0';
- client->persistent_pool = xmem_pool_create(name, tmem_persistent_pool_page_get,
- tmem_persistent_pool_page_put, PAGE_SIZE, 0, PAGE_SIZE);
- if ( client->persistent_pool == NULL )
- {
- tmem_client_err("failed... can't alloc persistent pool\n");
- goto fail;
- }
-
- d = rcu_lock_domain_by_id(cli_id);
- if ( d == NULL ) {
- tmem_client_err("failed... can't set client\n");
- xmem_pool_destroy(client->persistent_pool);
- goto fail;
- }
- if ( !d->is_dying ) {
- d->tmem_client = client;
- client->domain = d;
- }
- rcu_unlock_domain(d);
-
- client->cli_id = cli_id;
- client->info.version = TMEM_SPEC_VERSION;
- client->info.maxpools = MAX_POOLS_PER_DOMAIN;
- client->info.flags.u.compress = tmem_compression_enabled();
- for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
- client->shared_auth_uuid[i][0] =
- client->shared_auth_uuid[i][1] = -1L;
- list_add_tail(&client->client_list, &tmem_global.client_list);
- INIT_LIST_HEAD(&client->ephemeral_page_list);
- INIT_LIST_HEAD(&client->persistent_invalidated_list);
- tmem_client_info("ok\n");
- return client;
-
- fail:
- xfree(client);
- return NULL;
-}
-
-static void client_free(struct client *client)
-{
- list_del(&client->client_list);
- xmem_pool_destroy(client->persistent_pool);
- xfree(client);
-}
-
-/* Flush all data from a client and, optionally, free it. */
-static void client_flush(struct client *client)
-{
- int i;
- struct tmem_pool *pool;
-
- for (i = 0; i < MAX_POOLS_PER_DOMAIN; i++)
- {
- if ( (pool = client->pools[i]) == NULL )
- continue;
- pool_flush(pool, client->cli_id);
- client->pools[i] = NULL;
- client->info.nr_pools--;
- }
- client_free(client);
-}
-
-static bool client_over_quota(const struct client *client)
-{
- int total = _atomic_read(tmem_global.client_weight_total);
-
- ASSERT(client != NULL);
- if ( (total == 0) || (client->info.weight == 0) ||
- (client->eph_count == 0) )
- return false;
-
- return (((tmem_global.eph_count * 100L) / client->eph_count) >
- ((total * 100L) / client->info.weight));
-}
-
-/************ MEMORY REVOCATION ROUTINES *******************************/
-
-static bool tmem_try_to_evict_pgp(struct tmem_page_descriptor *pgp,
- bool *hold_pool_rwlock)
-{
- struct tmem_object_root *obj = pgp->us.obj;
- struct tmem_pool *pool = obj->pool;
-
- if ( pool->is_dying )
- return false;
- if ( spin_trylock(&obj->obj_spinlock) )
- {
- if ( obj->pgp_count > 1 )
- return true;
- if ( write_trylock(&pool->pool_rwlock) )
- {
- *hold_pool_rwlock = 1;
- return true;
- }
- spin_unlock(&obj->obj_spinlock);
- }
- return false;
-}
-
-int tmem_evict(void)
-{
- struct client *client = current->domain->tmem_client;
- struct tmem_page_descriptor *pgp = NULL, *pgp_del;
- struct tmem_object_root *obj;
- struct tmem_pool *pool;
- int ret = 0;
- bool hold_pool_rwlock = false;
-
- tmem_stats.evict_attempts++;
- spin_lock(&eph_lists_spinlock);
- if ( (client != NULL) && client_over_quota(client) &&
- !list_empty(&client->ephemeral_page_list) )
- {
- list_for_each_entry(pgp, &client->ephemeral_page_list, us.client_eph_pages)
- if ( tmem_try_to_evict_pgp(pgp, &hold_pool_rwlock) )
- goto found;
- }
- else if ( !list_empty(&tmem_global.ephemeral_page_list) )
- {
- list_for_each_entry(pgp, &tmem_global.ephemeral_page_list, global_eph_pages)
- if ( tmem_try_to_evict_pgp(pgp, &hold_pool_rwlock) )
- {
- client = pgp->us.obj->pool->client;
- goto found;
- }
- }
- /* Global_ephemeral_page_list is empty, so we bail out. */
- spin_unlock(&eph_lists_spinlock);
- goto out;
-
-found:
- /* Delist. */
- list_del_init(&pgp->us.client_eph_pages);
- client->eph_count--;
- list_del_init(&pgp->global_eph_pages);
- tmem_global.eph_count--;
- ASSERT(tmem_global.eph_count >= 0);
- ASSERT(client->eph_count >= 0);
- spin_unlock(&eph_lists_spinlock);
-
- ASSERT(pgp != NULL);
- obj = pgp->us.obj;
- ASSERT(obj != NULL);
- ASSERT(obj->pool != NULL);
- pool = obj->pool;
-
- ASSERT_SPINLOCK(&obj->obj_spinlock);
- pgp_del = pgp_delete_from_obj(obj, pgp->index);
- ASSERT(pgp_del == pgp);
-
- /* pgp already delist, so call pgp_free directly. */
- pgp_free(pgp);
- if ( obj->pgp_count == 0 )
- {
- ASSERT_WRITELOCK(&pool->pool_rwlock);
- obj_free(obj);
- }
- else
- spin_unlock(&obj->obj_spinlock);
- if ( hold_pool_rwlock )
- write_unlock(&pool->pool_rwlock);
- tmem_stats.evicted_pgs++;
- ret = 1;
-out:
- return ret;
-}
-
-
-/*
- * Under certain conditions (e.g. if each client is putting pages for exactly
- * one object), once locks are held, freeing up memory may
- * result in livelocks and very long "put" times, so we try to ensure there
- * is a minimum amount of memory (1MB) available BEFORE any data structure
- * locks are held.
- */
-static inline bool tmem_ensure_avail_pages(void)
-{
- int failed_evict = 10;
- unsigned long free_mem;
-
- do {
- free_mem = (tmem_page_list_pages + total_free_pages())
- >> (20 - PAGE_SHIFT);
- if ( free_mem )
- return true;
- if ( !tmem_evict() )
- failed_evict--;
- } while ( failed_evict > 0 );
-
- return false;
-}
-
-/************ TMEM CORE OPERATIONS ************************************/
-
-static int do_tmem_put_compress(struct tmem_page_descriptor *pgp, xen_pfn_t cmfn,
- tmem_cli_va_param_t clibuf)
-{
- void *dst, *p;
- size_t size;
- int ret = 0;
-
- ASSERT(pgp != NULL);
- ASSERT(pgp->us.obj != NULL);
- ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock);
- ASSERT(pgp->us.obj->pool != NULL);
- ASSERT(pgp->us.obj->pool->client != NULL);
-
- if ( pgp->pfp != NULL )
- pgp_free_data(pgp, pgp->us.obj->pool);
- ret = tmem_compress_from_client(cmfn, &dst, &size, clibuf);
- if ( ret <= 0 )
- goto out;
- else if ( (size == 0) || (size >= tmem_mempool_maxalloc) ) {
- ret = 0;
- goto out;
- } else if ( (p = tmem_malloc(size,pgp->us.obj->pool)) == NULL ) {
- ret = -ENOMEM;
- goto out;
- } else {
- memcpy(p,dst,size);
- pgp->cdata = p;
- }
- pgp->size = size;
- pgp->us.obj->pool->client->compressed_pages++;
- pgp->us.obj->pool->client->compressed_sum_size += size;
- ret = 1;
-
-out:
- return ret;
-}
-
-static int do_tmem_dup_put(struct tmem_page_descriptor *pgp, xen_pfn_t cmfn,
- tmem_cli_va_param_t clibuf)
-{
- struct tmem_pool *pool;
- struct tmem_object_root *obj;
- struct client *client;
- struct tmem_page_descriptor *pgpfound = NULL;
- int ret;
-
- ASSERT(pgp != NULL);
- ASSERT(pgp->pfp != NULL);
- ASSERT(pgp->size != -1);
- obj = pgp->us.obj;
- ASSERT_SPINLOCK(&obj->obj_spinlock);
- ASSERT(obj != NULL);
- pool = obj->pool;
- ASSERT(pool != NULL);
- client = pool->client;
- if ( client->info.flags.u.migrating )
- goto failed_dup; /* No dups allowed when migrating. */
- /* Can we successfully manipulate pgp to change out the data? */
- if ( client->info.flags.u.compress && pgp->size != 0 )
- {
- ret = do_tmem_put_compress(pgp, cmfn, clibuf);
- if ( ret == 1 )
- goto done;
- else if ( ret == 0 )
- goto copy_uncompressed;
- else if ( ret == -ENOMEM )
- goto failed_dup;
- else if ( ret == -EFAULT )
- goto bad_copy;
- }
-
-copy_uncompressed:
- if ( pgp->pfp )
- pgp_free_data(pgp, pool);
- if ( ( pgp->pfp = tmem_alloc_page(pool) ) == NULL )
- goto failed_dup;
- pgp->size = 0;
- ret = tmem_copy_from_client(pgp->pfp, cmfn, tmem_cli_buf_null);
- if ( ret < 0 )
- goto bad_copy;
-
-done:
- /* Successfully replaced data, clean up and return success. */
- if ( is_shared(pool) )
- obj->last_client = client->cli_id;
- spin_unlock(&obj->obj_spinlock);
- pool->dup_puts_replaced++;
- pool->good_puts++;
- if ( is_persistent(pool) )
- client->succ_pers_puts++;
- return 1;
-
-bad_copy:
- tmem_stats.failed_copies++;
- goto cleanup;
-
-failed_dup:
- /*
- * Couldn't change out the data, flush the old data and return
- * -ENOSPC instead of -ENOMEM to differentiate failed _dup_ put.
- */
- ret = -ENOSPC;
-cleanup:
- pgpfound = pgp_delete_from_obj(obj, pgp->index);
- ASSERT(pgpfound == pgp);
- pgp_delist_free(pgpfound);
- if ( obj->pgp_count == 0 )
- {
- write_lock(&pool->pool_rwlock);
- obj_free(obj);
- write_unlock(&pool->pool_rwlock);
- } else {
- spin_unlock(&obj->obj_spinlock);
- }
- pool->dup_puts_flushed++;
- return ret;
-}
-
-static int do_tmem_put(struct tmem_pool *pool,
- struct xen_tmem_oid *oidp, uint32_t index,
- xen_pfn_t cmfn, tmem_cli_va_param_t clibuf)
-{
- struct tmem_object_root *obj = NULL;
- struct tmem_page_descriptor *pgp = NULL;
- struct client *client;
- int ret, newobj = 0;
-
- ASSERT(pool != NULL);
- client = pool->client;
- ASSERT(client != NULL);
- ret = client->info.flags.u.frozen ? -EFROZEN : -ENOMEM;
- pool->puts++;
-
-refind:
- /* Does page already exist (dup)? if so, handle specially. */
- if ( (obj = obj_find(pool, oidp)) != NULL )
- {
- if ((pgp = pgp_lookup_in_obj(obj, index)) != NULL)
- {
- return do_tmem_dup_put(pgp, cmfn, clibuf);
- }
- else
- {
- /* No puts allowed into a frozen pool (except dup puts). */
- if ( client->info.flags.u.frozen )
- goto unlock_obj;
- }
- }
- else
- {
- /* No puts allowed into a frozen pool (except dup puts). */
- if ( client->info.flags.u.frozen )
- return ret;
- if ( (obj = obj_alloc(pool, oidp)) == NULL )
- return -ENOMEM;
-
- write_lock(&pool->pool_rwlock);
- /*
- * Parallel callers may already allocated obj and inserted to obj_rb_root
- * before us.
- */
- if ( !obj_rb_insert(&pool->obj_rb_root[oid_hash(oidp)], obj) )
- {
- tmem_free(obj, pool);
- write_unlock(&pool->pool_rwlock);
- goto refind;
- }
-
- spin_lock(&obj->obj_spinlock);
- newobj = 1;
- write_unlock(&pool->pool_rwlock);
- }
-
- /* When arrive here, we have a spinlocked obj for use. */
- ASSERT_SPINLOCK(&obj->obj_spinlock);
- if ( (pgp = pgp_alloc(obj)) == NULL )
- goto unlock_obj;
-
- ret = pgp_add_to_obj(obj, index, pgp);
- if ( ret == -ENOMEM )
- /* Warning: may result in partially built radix tree ("stump"). */
- goto free_pgp;
-
- pgp->index = index;
- pgp->size = 0;
-
- if ( client->info.flags.u.compress )
- {
- ASSERT(pgp->pfp == NULL);
- ret = do_tmem_put_compress(pgp, cmfn, clibuf);
- if ( ret == 1 )
- goto insert_page;
- if ( ret == -ENOMEM )
- {
- client->compress_nomem++;
- goto del_pgp_from_obj;
- }
- if ( ret == 0 )
- {
- client->compress_poor++;
- goto copy_uncompressed;
- }
- if ( ret == -EFAULT )
- goto bad_copy;
- }
-
-copy_uncompressed:
- if ( ( pgp->pfp = tmem_alloc_page(pool) ) == NULL )
- {
- ret = -ENOMEM;
- goto del_pgp_from_obj;
- }
- ret = tmem_copy_from_client(pgp->pfp, cmfn, clibuf);
- if ( ret < 0 )
- goto bad_copy;
-
-insert_page:
- if ( !is_persistent(pool) )
- {
- spin_lock(&eph_lists_spinlock);
- list_add_tail(&pgp->global_eph_pages, &tmem_global.ephemeral_page_list);
- if (++tmem_global.eph_count > tmem_stats.global_eph_count_max)
- tmem_stats.global_eph_count_max = tmem_global.eph_count;
- list_add_tail(&pgp->us.client_eph_pages,
- &client->ephemeral_page_list);
- if (++client->eph_count > client->eph_count_max)
- client->eph_count_max = client->eph_count;
- spin_unlock(&eph_lists_spinlock);
- }
- else
- { /* is_persistent. */
- spin_lock(&pers_lists_spinlock);
- list_add_tail(&pgp->us.pool_pers_pages,
- &pool->persistent_page_list);
- spin_unlock(&pers_lists_spinlock);
- }
-
- if ( is_shared(pool) )
- obj->last_client = client->cli_id;
-
- /* Free the obj spinlock. */
- spin_unlock(&obj->obj_spinlock);
- pool->good_puts++;
-
- if ( is_persistent(pool) )
- client->succ_pers_puts++;
- else
- tmem_stats.tot_good_eph_puts++;
- return 1;
-
-bad_copy:
- tmem_stats.failed_copies++;
-
-del_pgp_from_obj:
- ASSERT((obj != NULL) && (pgp != NULL) && (pgp->index != -1));
- pgp_delete_from_obj(obj, pgp->index);
-
-free_pgp:
- pgp_free(pgp);
-unlock_obj:
- if ( newobj )
- {
- write_lock(&pool->pool_rwlock);
- obj_free(obj);
- write_unlock(&pool->pool_rwlock);
- }
- else
- {
- spin_unlock(&obj->obj_spinlock);
- }
- pool->no_mem_puts++;
- return ret;
-}
-
-static int do_tmem_get(struct tmem_pool *pool,
- struct xen_tmem_oid *oidp, uint32_t index,
- xen_pfn_t cmfn, tmem_cli_va_param_t clibuf)
-{
- struct tmem_object_root *obj;
- struct tmem_page_descriptor *pgp;
- struct client *client = pool->client;
- int rc;
-
- if ( !_atomic_read(pool->pgp_count) )
- return -EEMPTY;
-
- pool->gets++;
- obj = obj_find(pool,oidp);
- if ( obj == NULL )
- return 0;
-
- ASSERT_SPINLOCK(&obj->obj_spinlock);
- if (is_shared(pool) || is_persistent(pool) )
- pgp = pgp_lookup_in_obj(obj, index);
- else
- pgp = pgp_delete_from_obj(obj, index);
- if ( pgp == NULL )
- {
- spin_unlock(&obj->obj_spinlock);
- return 0;
- }
- ASSERT(pgp->size != -1);
- if ( pgp->size != 0 )
- {
- rc = tmem_decompress_to_client(cmfn, pgp->cdata, pgp->size, clibuf);
- }
- else
- rc = tmem_copy_to_client(cmfn, pgp->pfp, clibuf);
- if ( rc <= 0 )
- goto bad_copy;
-
- if ( !is_persistent(pool) )
- {
- if ( !is_shared(pool) )
- {
- pgp_delist_free(pgp);
- if ( obj->pgp_count == 0 )
- {
- write_lock(&pool->pool_rwlock);
- obj_free(obj);
- obj = NULL;
- write_unlock(&pool->pool_rwlock);
- }
- } else {
- spin_lock(&eph_lists_spinlock);
- list_del(&pgp->global_eph_pages);
- list_add_tail(&pgp->global_eph_pages,&tmem_global.ephemeral_page_list);
- list_del(&pgp->us.client_eph_pages);
- list_add_tail(&pgp->us.client_eph_pages,&client->ephemeral_page_list);
- spin_unlock(&eph_lists_spinlock);
- obj->last_client = current->domain->domain_id;
- }
- }
- if ( obj != NULL )
- {
- spin_unlock(&obj->obj_spinlock);
- }
- pool->found_gets++;
- if ( is_persistent(pool) )
- client->succ_pers_gets++;
- else
- client->succ_eph_gets++;
- return 1;
-
-bad_copy:
- spin_unlock(&obj->obj_spinlock);
- tmem_stats.failed_copies++;
- return rc;
-}
-
-static int do_tmem_flush_page(struct tmem_pool *pool,
- struct xen_tmem_oid *oidp, uint32_t index)
-{
- struct tmem_object_root *obj;
- struct tmem_page_descriptor *pgp;
-
- pool->flushs++;
- obj = obj_find(pool,oidp);
- if ( obj == NULL )
- goto out;
- pgp = pgp_delete_from_obj(obj, index);
- if ( pgp == NULL )
- {
- spin_unlock(&obj->obj_spinlock);
- goto out;
- }
- pgp_delist_free(pgp);
- if ( obj->pgp_count == 0 )
- {
- write_lock(&pool->pool_rwlock);
- obj_free(obj);
- write_unlock(&pool->pool_rwlock);
- } else {
- spin_unlock(&obj->obj_spinlock);
- }
- pool->flushs_found++;
-
-out:
- if ( pool->client->info.flags.u.frozen )
- return -EFROZEN;
- else
- return 1;
-}
-
-static int do_tmem_flush_object(struct tmem_pool *pool,
- struct xen_tmem_oid *oidp)
-{
- struct tmem_object_root *obj;
-
- pool->flush_objs++;
- obj = obj_find(pool,oidp);
- if ( obj == NULL )
- goto out;
- write_lock(&pool->pool_rwlock);
- obj_destroy(obj);
- pool->flush_objs_found++;
- write_unlock(&pool->pool_rwlock);
-
-out:
- if ( pool->client->info.flags.u.frozen )
- return -EFROZEN;
- else
- return 1;
-}
-
-static int do_tmem_destroy_pool(uint32_t pool_id)
-{
- struct client *client = current->domain->tmem_client;
- struct tmem_pool *pool;
-
- if ( pool_id >= MAX_POOLS_PER_DOMAIN )
- return 0;
- if ( (pool = client->pools[pool_id]) == NULL )
- return 0;
- client->pools[pool_id] = NULL;
- pool_flush(pool, client->cli_id);
- client->info.nr_pools--;
- return 1;
-}
-
-int do_tmem_new_pool(domid_t this_cli_id,
- uint32_t d_poolid, uint32_t flags,
- uint64_t uuid_lo, uint64_t uuid_hi)
-{
- struct client *client;
- domid_t cli_id;
- int persistent = flags & TMEM_POOL_PERSIST;
- int shared = flags & TMEM_POOL_SHARED;
- int pagebits = (flags >> TMEM_POOL_PAGESIZE_SHIFT)
- & TMEM_POOL_PAGESIZE_MASK;
- int specversion = (flags >> TMEM_POOL_VERSION_SHIFT)
- & TMEM_POOL_VERSION_MASK;
- struct tmem_pool *pool, *shpool;
- int i, first_unused_s_poolid;
-
- if ( this_cli_id == TMEM_CLI_ID_NULL )
- cli_id = current->domain->domain_id;
- else
- cli_id = this_cli_id;
- tmem_client_info("tmem: allocating %s-%s tmem pool for %s=%d...",
- persistent ? "persistent" : "ephemeral" ,
- shared ? "shared" : "private", tmem_cli_id_str, cli_id);
- if ( specversion != TMEM_SPEC_VERSION )
- {
- tmem_client_err("failed... unsupported spec version\n");
- return -EPERM;
- }
- if ( shared && persistent )
- {
- tmem_client_err("failed... unable to create a shared-persistant pool\n");
- return -EPERM;
- }
- if ( pagebits != (PAGE_SHIFT - 12) )
- {
- tmem_client_err("failed... unsupported pagesize %d\n",
- 1 << (pagebits + 12));
- return -EPERM;
- }
- if ( flags & TMEM_POOL_PRECOMPRESSED )
- {
- tmem_client_err("failed... precompression flag set but unsupported\n");
- return -EPERM;
- }
- if ( flags & TMEM_POOL_RESERVED_BITS )
- {
- tmem_client_err("failed... reserved bits must be zero\n");
- return -EPERM;
- }
- if ( this_cli_id != TMEM_CLI_ID_NULL )
- {
- if ( (client = tmem_client_from_cli_id(this_cli_id)) == NULL
- || d_poolid >= MAX_POOLS_PER_DOMAIN
- || client->pools[d_poolid] != NULL )
- return -EPERM;
- }
- else
- {
- client = current->domain->tmem_client;
- ASSERT(client != NULL);
- for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ )
- if ( client->pools[d_poolid] == NULL )
- break;
- if ( d_poolid >= MAX_POOLS_PER_DOMAIN )
- {
- tmem_client_err("failed... no more pool slots available for this %s\n",
- tmem_client_str);
- return -EPERM;
- }
- }
-
- if ( (pool = pool_alloc()) == NULL )
- {
- tmem_client_err("failed... out of memory\n");
- return -ENOMEM;
- }
- client->pools[d_poolid] = pool;
- pool->client = client;
- pool->pool_id = d_poolid;
- pool->shared = shared;
- pool->persistent = persistent;
- pool->uuid[0] = uuid_lo;
- pool->uuid[1] = uuid_hi;
-
- /*
- * Already created a pool when arrived here, but need some special process
- * for shared pool.
- */
- if ( shared )
- {
- if ( uuid_lo == -1L && uuid_hi == -1L )
- {
- tmem_client_info("Invalid uuid, create non shared pool instead!\n");
- pool->shared = 0;
- goto out;
- }
- if ( !tmem_global.shared_auth )
- {
- for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
- if ( (client->shared_auth_uuid[i][0] == uuid_lo) &&
- (client->shared_auth_uuid[i][1] == uuid_hi) )
- break;
- if ( i == MAX_GLOBAL_SHARED_POOLS )
- {
- tmem_client_info("Shared auth failed, create non shared pool instead!\n");
- pool->shared = 0;
- goto out;
- }
- }
-
- /*
- * Authorize okay, match a global shared pool or use the newly allocated
- * one.
- */
- first_unused_s_poolid = MAX_GLOBAL_SHARED_POOLS;
- for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++ )
- {
- if ( (shpool = tmem_global.shared_pools[i]) != NULL )
- {
- if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi )
- {
- /* Succ to match a global shared pool. */
- tmem_client_info("(matches shared pool uuid=%"PRIx64".%"PRIx64") pool_id=%d\n",
- uuid_hi, uuid_lo, d_poolid);
- client->pools[d_poolid] = shpool;
- if ( !shared_pool_join(shpool, client) )
- {
- pool_free(pool);
- goto out;
- }
- else
- goto fail;
- }
- }
- else
- {
- if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS )
- first_unused_s_poolid = i;
- }
- }
-
- /* Failed to find a global shared pool slot. */
- if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS )
- {
- tmem_client_warn("tmem: failed... no global shared pool slots available\n");
- goto fail;
- }
- /* Add pool to global shared pool. */
- else
- {
- INIT_LIST_HEAD(&pool->share_list);
- pool->shared_count = 0;
- if ( shared_pool_join(pool, client) )
- goto fail;
- tmem_global.shared_pools[first_unused_s_poolid] = pool;
- }
- }
-
-out:
- tmem_client_info("pool_id=%d\n", d_poolid);
- client->info.nr_pools++;
- return d_poolid;
-
-fail:
- pool_free(pool);
- return -EPERM;
-}
-
-/************ TMEM CONTROL OPERATIONS ************************************/
-
-int tmemc_shared_pool_auth(domid_t cli_id, uint64_t uuid_lo,
- uint64_t uuid_hi, bool auth)
-{
- struct client *client;
- int i, free = -1;
-
- if ( cli_id == TMEM_CLI_ID_NULL )
- {
- tmem_global.shared_auth = auth;
- return 1;
- }
- client = tmem_client_from_cli_id(cli_id);
- if ( client == NULL )
- return -EINVAL;
-
- for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++)
- {
- if ( auth == 0 )
- {
- if ( (client->shared_auth_uuid[i][0] == uuid_lo) &&
- (client->shared_auth_uuid[i][1] == uuid_hi) )
- {
- client->shared_auth_uuid[i][0] = -1L;
- client->shared_auth_uuid[i][1] = -1L;
- return 1;
- }
- }
- else
- {
- if ( (client->shared_auth_uuid[i][0] == -1L) &&
- (client->shared_auth_uuid[i][1] == -1L) )
- {
- free = i;
- break;
- }
- }
- }
- if ( auth == 0 )
- return 0;
- else if ( free == -1)
- return -ENOMEM;
- else
- {
- client->shared_auth_uuid[free][0] = uuid_lo;
- client->shared_auth_uuid[free][1] = uuid_hi;
- return 1;
- }
-}
-
-static int tmemc_save_subop(int cli_id, uint32_t pool_id,
- uint32_t subop, tmem_cli_va_param_t buf, uint32_t arg)
-{
- struct client *client = tmem_client_from_cli_id(cli_id);
- uint32_t p;
- struct tmem_page_descriptor *pgp, *pgp2;
- int rc = -ENOENT;
-
- switch(subop)
- {
- case XEN_SYSCTL_TMEM_OP_SAVE_BEGIN:
- if ( client == NULL )
- break;
- for (p = 0; p < MAX_POOLS_PER_DOMAIN; p++)
- if ( client->pools[p] != NULL )
- break;
-
- if ( p == MAX_POOLS_PER_DOMAIN )
- break;
-
- client->was_frozen = client->info.flags.u.frozen;
- client->info.flags.u.frozen = 1;
- if ( arg != 0 )
- client->info.flags.u.migrating = 1;
- rc = 0;
- break;
- case XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN:
- if ( client == NULL )
- rc = client_create(cli_id) ? 0 : -ENOMEM;
- else
- rc = -EEXIST;
- break;
- case XEN_SYSCTL_TMEM_OP_SAVE_END:
- if ( client == NULL )
- break;
- client->info.flags.u.migrating = 0;
- if ( !list_empty(&client->persistent_invalidated_list) )
- list_for_each_entry_safe(pgp,pgp2,
- &client->persistent_invalidated_list, client_inv_pages)
- __pgp_free(pgp, client->pools[pgp->pool_id]);
- client->info.flags.u.frozen = client->was_frozen;
- rc = 0;
- break;
- }
- return rc;
-}
-
-static int tmemc_save_get_next_page(int cli_id, uint32_t pool_id,
- tmem_cli_va_param_t buf, uint32_t bufsize)
-{
- struct client *client = tmem_client_from_cli_id(cli_id);
- struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN)
- ? NULL : client->pools[pool_id];
- struct tmem_page_descriptor *pgp;
- struct xen_tmem_oid *oid;
- int ret = 0;
- struct tmem_handle h;
-
- if ( pool == NULL || !is_persistent(pool) )
- return -1;
-
- if ( bufsize < PAGE_SIZE + sizeof(struct tmem_handle) )
- return -ENOMEM;
-
- spin_lock(&pers_lists_spinlock);
- if ( list_empty(&pool->persistent_page_list) )
- {
- ret = -1;
- goto out;
- }
- /* Note: pool->cur_pgp is the pgp last returned by get_next_page. */
- if ( pool->cur_pgp == NULL )
- {
- /* Process the first one. */
- pool->cur_pgp = pgp = list_entry((&pool->persistent_page_list)->next,
- struct tmem_page_descriptor,us.pool_pers_pages);
- } else if ( list_is_last(&pool->cur_pgp->us.pool_pers_pages,
- &pool->persistent_page_list) )
- {
- /* Already processed the last one in the list. */
- ret = -1;
- goto out;
- }
- pgp = list_entry((&pool->cur_pgp->us.pool_pers_pages)->next,
- struct tmem_page_descriptor,us.pool_pers_pages);
- pool->cur_pgp = pgp;
- oid = &pgp->us.obj->oid;
- h.pool_id = pool_id;
- BUILD_BUG_ON(sizeof(h.oid) != sizeof(*oid));
- memcpy(&(h.oid), oid, sizeof(h.oid));
- h.index = pgp->index;
- if ( copy_to_guest(guest_handle_cast(buf, void), &h, 1) )
- {
- ret = -EFAULT;
- goto out;
- }
- guest_handle_add_offset(buf, sizeof(h));
- ret = do_tmem_get(pool, oid, pgp->index, 0, buf);
-
-out:
- spin_unlock(&pers_lists_spinlock);
- return ret;
-}
-
-static int tmemc_save_get_next_inv(int cli_id, tmem_cli_va_param_t buf,
- uint32_t bufsize)
-{
- struct client *client = tmem_client_from_cli_id(cli_id);
- struct tmem_page_descriptor *pgp;
- struct tmem_handle h;
- int ret = 0;
-
- if ( client == NULL )
- return 0;
- if ( bufsize < sizeof(struct tmem_handle) )
- return 0;
- spin_lock(&pers_lists_spinlock);
- if ( list_empty(&client->persistent_invalidated_list) )
- goto out;
- if ( client->cur_pgp == NULL )
- {
- pgp = list_entry((&client->persistent_invalidated_list)->next,
- struct tmem_page_descriptor,client_inv_pages);
- client->cur_pgp = pgp;
- } else if ( list_is_last(&client->cur_pgp->client_inv_pages,
- &client->persistent_invalidated_list) )
- {
- client->cur_pgp = NULL;
- ret = 0;
- goto out;
- } else {
- pgp = list_entry((&client->cur_pgp->client_inv_pages)->next,
- struct tmem_page_descriptor,client_inv_pages);
- client->cur_pgp = pgp;
- }
- h.pool_id = pgp->pool_id;
- BUILD_BUG_ON(sizeof(h.oid) != sizeof(pgp->inv_oid));
- memcpy(&(h.oid), &(pgp->inv_oid), sizeof(h.oid));
- h.index = pgp->index;
- ret = 1;
- if ( copy_to_guest(guest_handle_cast(buf, void), &h, 1) )
- ret = -EFAULT;
-out:
- spin_unlock(&pers_lists_spinlock);
- return ret;
-}
-
-static int tmemc_restore_put_page(int cli_id, uint32_t pool_id,
- struct xen_tmem_oid *oidp,
- uint32_t index, tmem_cli_va_param_t buf,
- uint32_t bufsize)
-{
- struct client *client = tmem_client_from_cli_id(cli_id);
- struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN)
- ? NULL : client->pools[pool_id];
-
- if ( pool == NULL )
- return -1;
- if (bufsize != PAGE_SIZE) {
- tmem_client_err("tmem: %s: invalid parameter bufsize(%d) != (%ld)\n",
- __func__, bufsize, PAGE_SIZE);
- return -EINVAL;
- }
- return do_tmem_put(pool, oidp, index, 0, buf);
-}
-
-static int tmemc_restore_flush_page(int cli_id, uint32_t pool_id,
- struct xen_tmem_oid *oidp,
- uint32_t index)
-{
- struct client *client = tmem_client_from_cli_id(cli_id);
- struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN)
- ? NULL : client->pools[pool_id];
-
- if ( pool == NULL )
- return -1;
- return do_tmem_flush_page(pool,oidp,index);
-}
-
-int do_tmem_control(struct xen_sysctl_tmem_op *op)
-{
- int ret;
- uint32_t pool_id = op->pool_id;
- uint32_t cmd = op->cmd;
- struct xen_tmem_oid *oidp = &op->oid;
-
- ASSERT(rw_is_write_locked(&tmem_rwlock));
-
- switch (cmd)
- {
- case XEN_SYSCTL_TMEM_OP_SAVE_BEGIN:
- case XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN:
- case XEN_SYSCTL_TMEM_OP_SAVE_END:
- ret = tmemc_save_subop(op->cli_id, pool_id, cmd,
- guest_handle_cast(op->u.buf, char), op->arg);
- break;
- case XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE:
- ret = tmemc_save_get_next_page(op->cli_id, pool_id,
- guest_handle_cast(op->u.buf, char), op->len);
- break;
- case XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV:
- ret = tmemc_save_get_next_inv(op->cli_id,
- guest_handle_cast(op->u.buf, char), op->len);
- break;
- case XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE:
- ret = tmemc_restore_put_page(op->cli_id, pool_id, oidp, op->arg,
- guest_handle_cast(op->u.buf, char), op->len);
- break;
- case XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE:
- ret = tmemc_restore_flush_page(op->cli_id, pool_id, oidp, op->arg);
- break;
- default:
- ret = -1;
- }
-
- return ret;
-}
-
-/************ EXPORTed FUNCTIONS **************************************/
-
-long do_tmem_op(tmem_cli_op_t uops)
-{
- struct tmem_op op;
- struct client *client = current->domain->tmem_client;
- struct tmem_pool *pool = NULL;
- struct xen_tmem_oid *oidp;
- int rc = 0;
-
- if ( !tmem_initialized )
- return -ENODEV;
-
- if ( xsm_tmem_op(XSM_HOOK) )
- return -EPERM;
-
- tmem_stats.total_tmem_ops++;
-
- if ( client != NULL && client->domain->is_dying )
- {
- tmem_stats.errored_tmem_ops++;
- return -ENODEV;
- }
-
- if ( unlikely(tmem_get_tmemop_from_client(&op, uops) != 0) )
- {
- tmem_client_err("tmem: can't get tmem struct from %s\n", tmem_client_str);
- tmem_stats.errored_tmem_ops++;
- return -EFAULT;
- }
-
- /* Acquire write lock for all commands at first. */
- write_lock(&tmem_rwlock);
-
- switch ( op.cmd )
- {
- case TMEM_CONTROL:
- case TMEM_RESTORE_NEW:
- case TMEM_AUTH:
- rc = -EOPNOTSUPP;
- break;
-
- default:
- /*
- * For other commands, create per-client tmem structure dynamically on
- * first use by client.
- */
- if ( client == NULL )
- {
- if ( (client = client_create(current->domain->domain_id)) == NULL )
- {
- tmem_client_err("tmem: can't create tmem structure for %s\n",
- tmem_client_str);
- rc = -ENOMEM;
- goto out;
- }
- }
-
- if ( op.cmd == TMEM_NEW_POOL || op.cmd == TMEM_DESTROY_POOL )
- {
- if ( op.cmd == TMEM_NEW_POOL )
- rc = do_tmem_new_pool(TMEM_CLI_ID_NULL, 0, op.u.creat.flags,
- op.u.creat.uuid[0], op.u.creat.uuid[1]);
- else
- rc = do_tmem_destroy_pool(op.pool_id);
- }
- else
- {
- if ( ((uint32_t)op.pool_id >= MAX_POOLS_PER_DOMAIN) ||
- ((pool = client->pools[op.pool_id]) == NULL) )
- {
- tmem_client_err("tmem: operation requested on uncreated pool\n");
- rc = -ENODEV;
- goto out;
- }
- /* Commands that only need read lock. */
- write_unlock(&tmem_rwlock);
- read_lock(&tmem_rwlock);
-
- oidp = &op.u.gen.oid;
- switch ( op.cmd )
- {
- case TMEM_NEW_POOL:
- case TMEM_DESTROY_POOL:
- BUG(); /* Done earlier. */
- break;
- case TMEM_PUT_PAGE:
- if (tmem_ensure_avail_pages())
- rc = do_tmem_put(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
- tmem_cli_buf_null);
- else
- rc = -ENOMEM;
- break;
- case TMEM_GET_PAGE:
- rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
- tmem_cli_buf_null);
- break;
- case TMEM_FLUSH_PAGE:
- rc = do_tmem_flush_page(pool, oidp, op.u.gen.index);
- break;
- case TMEM_FLUSH_OBJECT:
- rc = do_tmem_flush_object(pool, oidp);
- break;
- default:
- tmem_client_warn("tmem: op %d not implemented\n", op.cmd);
- rc = -ENOSYS;
- break;
- }
- read_unlock(&tmem_rwlock);
- if ( rc < 0 )
- tmem_stats.errored_tmem_ops++;
- return rc;
- }
- break;
-
- }
-out:
- write_unlock(&tmem_rwlock);
- if ( rc < 0 )
- tmem_stats.errored_tmem_ops++;
- return rc;
-}
-
-/* This should be called when the host is destroying a client (domain). */
-void tmem_destroy(void *v)
-{
- struct client *client = (struct client *)v;
-
- if ( client == NULL )
- return;
-
- if ( !client->domain->is_dying )
- {
- printk("tmem: tmem_destroy can only destroy dying client\n");
- return;
- }
-
- write_lock(&tmem_rwlock);
-
- printk("tmem: flushing tmem pools for %s=%d\n",
- tmem_cli_id_str, client->cli_id);
- client_flush(client);
-
- write_unlock(&tmem_rwlock);
-}
-
-#define MAX_EVICTS 10 /* Should be variable or set via XEN_SYSCTL_TMEM_OP_ ?? */
-void *tmem_relinquish_pages(unsigned int order, unsigned int memflags)
-{
- struct page_info *pfp;
- unsigned long evicts_per_relinq = 0;
- int max_evictions = 10;
-
- if (!tmem_enabled() || !tmem_freeable_pages())
- return NULL;
-
- tmem_stats.relinq_attempts++;
- if ( order > 0 )
- {
-#ifndef NDEBUG
- printk("tmem_relinquish_page: failing order=%d\n", order);
-#endif
- return NULL;
- }
-
- while ( (pfp = tmem_page_list_get()) == NULL )
- {
- if ( (max_evictions-- <= 0) || !tmem_evict())
- break;
- evicts_per_relinq++;
- }
- if ( evicts_per_relinq > tmem_stats.max_evicts_per_relinq )
- tmem_stats.max_evicts_per_relinq = evicts_per_relinq;
- if ( pfp != NULL )
- {
- if ( !(memflags & MEMF_tmem) )
- scrub_one_page(pfp);
- tmem_stats.relinq_pgs++;
- }
-
- return pfp;
-}
-
-unsigned long tmem_freeable_pages(void)
-{
- if ( !tmem_enabled() )
- return 0;
-
- return tmem_page_list_pages + _atomic_read(freeable_page_count);
-}
-
-/* Called at hypervisor startup. */
-static int __init init_tmem(void)
-{
- if ( !tmem_enabled() )
- return 0;
-
- if ( !tmem_mempool_init() )
- return 0;
-
- if ( tmem_init() )
- {
- printk("tmem: initialized comp=%d\n", tmem_compression_enabled());
- tmem_initialized = 1;
- }
- else
- printk("tmem: initialization FAILED\n");
-
- return 0;
-}
-__initcall(init_tmem);
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
deleted file mode 100644
@@ -1,560 +0,0 @@
-/*
- * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
- *
- */
-
-#include <xen/init.h>
-#include <xen/list.h>
-#include <xen/radix-tree.h>
-#include <xen/rbtree.h>
-#include <xen/rwlock.h>
-#include <xen/tmem_control.h>
-#include <xen/tmem.h>
-#include <xen/tmem_xen.h>
-#include <public/sysctl.h>
-
-/************ TMEM CONTROL OPERATIONS ************************************/
-
-/* Freeze/thaw all pools belonging to client cli_id (all domains if -1). */
-static int tmemc_freeze_pools(domid_t cli_id, int arg)
-{
- struct client *client;
- bool freeze = arg == XEN_SYSCTL_TMEM_OP_FREEZE;
- bool destroy = arg == XEN_SYSCTL_TMEM_OP_DESTROY;
- char *s;
-
- s = destroy ? "destroyed" : ( freeze ? "frozen" : "thawed" );
- if ( cli_id == TMEM_CLI_ID_NULL )
- {
- list_for_each_entry(client,&tmem_global.client_list,client_list)
- client->info.flags.u.frozen = freeze;
- tmem_client_info("tmem: all pools %s for all %ss\n", s, tmem_client_str);
- }
- else
- {
- if ( (client = tmem_client_from_cli_id(cli_id)) == NULL)
- return -1;
- client->info.flags.u.frozen = freeze;
- tmem_client_info("tmem: all pools %s for %s=%d\n",
- s, tmem_cli_id_str, cli_id);
- }
- return 0;
-}
-
-static unsigned long tmem_flush_npages(unsigned long n)
-{
- unsigned long avail_pages = 0;
-
- while ( (avail_pages = tmem_page_list_pages) < n )
- {
- if ( !tmem_evict() )
- break;
- }
- if ( avail_pages )
- {
- spin_lock(&tmem_page_list_lock);
- while ( !page_list_empty(&tmem_page_list) )
- {
- struct page_info *pg = page_list_remove_head(&tmem_page_list);
- scrub_one_page(pg);
- tmem_page_list_pages--;
- free_domheap_page(pg);
- }
- ASSERT(tmem_page_list_pages == 0);
- INIT_PAGE_LIST_HEAD(&tmem_page_list);
- spin_unlock(&tmem_page_list_lock);
- }
- return avail_pages;
-}
-
-static int tmemc_flush_mem(domid_t cli_id, uint32_t kb)
-{
- uint32_t npages, flushed_pages, flushed_kb;
-
- if ( cli_id != TMEM_CLI_ID_NULL )
- {
- tmem_client_warn("tmem: %s-specific flush not supported yet, use --all\n",
- tmem_client_str);
- return -1;
- }
- /* Convert kb to pages, rounding up if necessary. */
- npages = (kb + ((1 << (PAGE_SHIFT-10))-1)) >> (PAGE_SHIFT-10);
- flushed_pages = tmem_flush_npages(npages);
- flushed_kb = flushed_pages << (PAGE_SHIFT-10);
- return flushed_kb;
-}
-
-/*
- * These tmemc_list* routines output lots of stats in a format that is
- * intended to be program-parseable, not human-readable. Further, by
- * tying each group of stats to a line format indicator (e.g. G= for
- * global stats) and each individual stat to a two-letter specifier
- * (e.g. Ec:nnnnn in the G= line says there are nnnnn pages in the
- * global ephemeral pool), it should allow the stats reported to be
- * forward and backwards compatible as tmem evolves.
- */
-#define BSIZE 1024
-
-static int tmemc_list_client(struct client *c, tmem_cli_va_param_t buf,
- int off, uint32_t len, bool use_long)
-{
- char info[BSIZE];
- int i, n = 0, sum = 0;
- struct tmem_pool *p;
- bool s;
-
- n = scnprintf(info,BSIZE,"C=CI:%d,ww:%d,co:%d,fr:%d,"
- "Tc:%"PRIu64",Ge:%ld,Pp:%ld,Gp:%ld%c",
- c->cli_id, c->info.weight, c->info.flags.u.compress, c->info.flags.u.frozen,
- c->total_cycles, c->succ_eph_gets, c->succ_pers_puts, c->succ_pers_gets,
- use_long ? ',' : '\n');
- if (use_long)
- n += scnprintf(info+n,BSIZE-n,
- "Ec:%ld,Em:%ld,cp:%ld,cb:%"PRId64",cn:%ld,cm:%ld\n",
- c->eph_count, c->eph_count_max,
- c->compressed_pages, c->compressed_sum_size,
- c->compress_poor, c->compress_nomem);
- if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
- sum += n;
- for ( i = 0; i < MAX_POOLS_PER_DOMAIN; i++ )
- {
- if ( (p = c->pools[i]) == NULL )
- continue;
- s = is_shared(p);
- n = scnprintf(info,BSIZE,"P=CI:%d,PI:%d,"
- "PT:%c%c,U0:%"PRIx64",U1:%"PRIx64"%c",
- c->cli_id, p->pool_id,
- is_persistent(p) ? 'P' : 'E', s ? 'S' : 'P',
- (uint64_t)(s ? p->uuid[0] : 0),
- (uint64_t)(s ? p->uuid[1] : 0LL),
- use_long ? ',' : '\n');
- if (use_long)
- n += scnprintf(info+n,BSIZE-n,
- "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu,"
- "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu,"
- "fs:%lu,ft:%lu,os:%lu,ot:%lu\n",
- _atomic_read(p->pgp_count), p->pgp_count_max,
- p->obj_count, p->obj_count_max,
- p->objnode_count, p->objnode_count_max,
- p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced,
- p->no_mem_puts,
- p->found_gets, p->gets,
- p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs);
- if ( sum + n >= len )
- return sum;
- if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
- sum += n;
- }
- return sum;
-}
-
-static int tmemc_list_shared(tmem_cli_va_param_t buf, int off, uint32_t len,
- bool use_long)
-{
- char info[BSIZE];
- int i, n = 0, sum = 0;
- struct tmem_pool *p;
- struct share_list *sl;
-
- for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++ )
- {
- if ( (p = tmem_global.shared_pools[i]) == NULL )
- continue;
- n = scnprintf(info+n,BSIZE-n,"S=SI:%d,PT:%c%c,U0:%"PRIx64",U1:%"PRIx64,
- i, is_persistent(p) ? 'P' : 'E',
- is_shared(p) ? 'S' : 'P',
- p->uuid[0], p->uuid[1]);
- list_for_each_entry(sl,&p->share_list, share_list)
- n += scnprintf(info+n,BSIZE-n,",SC:%d",sl->client->cli_id);
- n += scnprintf(info+n,BSIZE-n,"%c", use_long ? ',' : '\n');
- if (use_long)
- n += scnprintf(info+n,BSIZE-n,
- "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu,"
- "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu,"
- "fs:%lu,ft:%lu,os:%lu,ot:%lu\n",
- _atomic_read(p->pgp_count), p->pgp_count_max,
- p->obj_count, p->obj_count_max,
- p->objnode_count, p->objnode_count_max,
- p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced,
- p->no_mem_puts,
- p->found_gets, p->gets,
- p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs);
- if ( sum + n >= len )
- return sum;
- if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
- sum += n;
- }
- return sum;
-}
-
-static int tmemc_list_global_perf(tmem_cli_va_param_t buf, int off,
- uint32_t len, bool use_long)
-{
- char info[BSIZE];
- int n = 0, sum = 0;
-
- n = scnprintf(info+n,BSIZE-n,"T=");
- n--; /* Overwrite trailing comma. */
- n += scnprintf(info+n,BSIZE-n,"\n");
- if ( sum + n >= len )
- return sum;
- if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
- sum += n;
- return sum;
-}
-
-static int tmemc_list_global(tmem_cli_va_param_t buf, int off, uint32_t len,
- bool use_long)
-{
- char info[BSIZE];
- int n = 0, sum = off;
-
- n += scnprintf(info,BSIZE,"G="
- "Tt:%lu,Te:%lu,Cf:%lu,Af:%lu,Pf:%lu,Ta:%lu,"
- "Lm:%lu,Et:%lu,Ea:%lu,Rt:%lu,Ra:%lu,Rx:%lu,Fp:%lu%c",
- tmem_stats.total_tmem_ops, tmem_stats.errored_tmem_ops, tmem_stats.failed_copies,
- tmem_stats.alloc_failed, tmem_stats.alloc_page_failed, tmem_page_list_pages,
- tmem_stats.low_on_memory, tmem_stats.evicted_pgs,
- tmem_stats.evict_attempts, tmem_stats.relinq_pgs, tmem_stats.relinq_attempts,
- tmem_stats.max_evicts_per_relinq,
- tmem_stats.total_flush_pool, use_long ? ',' : '\n');
- if (use_long)
- n += scnprintf(info+n,BSIZE-n,
- "Ec:%ld,Em:%ld,Oc:%d,Om:%d,Nc:%d,Nm:%d,Pc:%d,Pm:%d,"
- "Fc:%d,Fm:%d,Sc:%d,Sm:%d,Ep:%lu,Gd:%lu,Zt:%lu,Gz:%lu\n",
- tmem_global.eph_count, tmem_stats.global_eph_count_max,
- _atomic_read(tmem_stats.global_obj_count), tmem_stats.global_obj_count_max,
- _atomic_read(tmem_stats.global_rtree_node_count), tmem_stats.global_rtree_node_count_max,
- _atomic_read(tmem_stats.global_pgp_count), tmem_stats.global_pgp_count_max,
- _atomic_read(tmem_stats.global_page_count), tmem_stats.global_page_count_max,
- _atomic_read(tmem_stats.global_pcd_count), tmem_stats.global_pcd_count_max,
- tmem_stats.tot_good_eph_puts,tmem_stats.deduped_puts,tmem_stats.pcd_tot_tze_size,
- tmem_stats.pcd_tot_csize);
- if ( sum + n >= len )
- return sum;
- if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
- sum += n;
- return sum;
-}
-
-static int tmemc_list(domid_t cli_id, tmem_cli_va_param_t buf, uint32_t len,
- bool use_long)
-{
- struct client *client;
- int off = 0;
-
- if ( cli_id == TMEM_CLI_ID_NULL ) {
- off = tmemc_list_global(buf,0,len,use_long);
- off += tmemc_list_shared(buf,off,len-off,use_long);
- list_for_each_entry(client,&tmem_global.client_list,client_list)
- off += tmemc_list_client(client, buf, off, len-off, use_long);
- off += tmemc_list_global_perf(buf,off,len-off,use_long);
- }
- else if ( (client = tmem_client_from_cli_id(cli_id)) == NULL)
- return -1;
- else
- off = tmemc_list_client(client, buf, 0, len, use_long);
-
- return 0;
-}
-
-static int __tmemc_set_client_info(struct client *client,
- XEN_GUEST_HANDLE(xen_tmem_client_t) buf)
-{
- domid_t cli_id;
- uint32_t old_weight;
- xen_tmem_client_t info = { };
-
- ASSERT(client);
-
- if ( copy_from_guest(&info, buf, 1) )
- return -EFAULT;
-
- if ( info.version != TMEM_SPEC_VERSION )
- return -EOPNOTSUPP;
-
- if ( info.maxpools > MAX_POOLS_PER_DOMAIN )
- return -ERANGE;
-
- /* Ignore info.nr_pools. */
- cli_id = client->cli_id;
-
- if ( info.weight != client->info.weight )
- {
- old_weight = client->info.weight;
- client->info.weight = info.weight;
- tmem_client_info("tmem: weight set to %d for %s=%d\n",
- info.weight, tmem_cli_id_str, cli_id);
- atomic_sub(old_weight,&tmem_global.client_weight_total);
- atomic_add(client->info.weight,&tmem_global.client_weight_total);
- }
-
-
- if ( info.flags.u.compress != client->info.flags.u.compress )
- {
- client->info.flags.u.compress = info.flags.u.compress;
- tmem_client_info("tmem: compression %s for %s=%d\n",
- info.flags.u.compress ? "enabled" : "disabled",
- tmem_cli_id_str,cli_id);
- }
- return 0;
-}
-
-static int tmemc_set_client_info(domid_t cli_id,
- XEN_GUEST_HANDLE(xen_tmem_client_t) info)
-{
- struct client *client;
- int ret = -ENOENT;
-
- if ( cli_id == TMEM_CLI_ID_NULL )
- {
- list_for_each_entry(client,&tmem_global.client_list,client_list)
- {
- ret = __tmemc_set_client_info(client, info);
- if (ret)
- break;
- }
- }
- else
- {
- client = tmem_client_from_cli_id(cli_id);
- if ( client )
- ret = __tmemc_set_client_info(client, info);
- }
- return ret;
-}
-
-static int tmemc_get_client_info(int cli_id,
- XEN_GUEST_HANDLE(xen_tmem_client_t) info)
-{
- struct client *client = tmem_client_from_cli_id(cli_id);
-
- if ( client )
- {
- if ( copy_to_guest(info, &client->info, 1) )
- return -EFAULT;
- }
- else
- {
- static const xen_tmem_client_t generic = {
- .version = TMEM_SPEC_VERSION,
- .maxpools = MAX_POOLS_PER_DOMAIN
- };
-
- if ( copy_to_guest(info, &generic, 1) )
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int tmemc_get_pool(int cli_id,
- XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
- uint32_t len)
-{
- struct client *client = tmem_client_from_cli_id(cli_id);
- unsigned int i, idx;
- int rc = 0;
- unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
-
- if ( len % sizeof(xen_tmem_pool_info_t) )
- return -EINVAL;
-
- if ( nr > MAX_POOLS_PER_DOMAIN )
- return -E2BIG;
-
- if ( !guest_handle_okay(pools, nr) )
- return -EINVAL;
-
- if ( !client )
- return -EINVAL;
-
- for ( idx = 0, i = 0; i < MAX_POOLS_PER_DOMAIN; i++ )
- {
- struct tmem_pool *pool = client->pools[i];
- xen_tmem_pool_info_t out;
-
- if ( pool == NULL )
- continue;
-
- out.flags.raw = (pool->persistent ? TMEM_POOL_PERSIST : 0) |
- (pool->shared ? TMEM_POOL_SHARED : 0) |
- (POOL_PAGESHIFT << TMEM_POOL_PAGESIZE_SHIFT) |
- (TMEM_SPEC_VERSION << TMEM_POOL_VERSION_SHIFT);
- out.n_pages = _atomic_read(pool->pgp_count);
- out.uuid[0] = pool->uuid[0];
- out.uuid[1] = pool->uuid[1];
- out.id = i;
-
- /* N.B. 'idx' != 'i'. */
- if ( __copy_to_guest_offset(pools, idx, &out, 1) )
- {
- rc = -EFAULT;
- break;
- }
- idx++;
- /* Don't try to put more than what was requested. */
- if ( idx >= nr )
- break;
- }
-
- /* And how many we have processed. */
- return rc ? : idx;
-}
-
-static int tmemc_set_pools(int cli_id,
- XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
- uint32_t len)
-{
- unsigned int i;
- int rc = 0;
- unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
- struct client *client = tmem_client_from_cli_id(cli_id);
-
- if ( len % sizeof(xen_tmem_pool_info_t) )
- return -EINVAL;
-
- if ( nr > MAX_POOLS_PER_DOMAIN )
- return -E2BIG;
-
- if ( !guest_handle_okay(pools, nr) )
- return -EINVAL;
-
- if ( !client )
- {
- client = client_create(cli_id);
- if ( !client )
- return -ENOMEM;
- }
- for ( i = 0; i < nr; i++ )
- {
- xen_tmem_pool_info_t pool;
-
- if ( __copy_from_guest_offset(&pool, pools, i, 1 ) )
- return -EFAULT;
-
- if ( pool.n_pages )
- return -EINVAL;
-
- rc = do_tmem_new_pool(cli_id, pool.id, pool.flags.raw,
- pool.uuid[0], pool.uuid[1]);
- if ( rc < 0 )
- break;
-
- pool.id = rc;
- if ( __copy_to_guest_offset(pools, i, &pool, 1) )
- return -EFAULT;
- }
-
- /* And how many we have processed. */
- return rc ? : i;
-}
-
-static int tmemc_auth_pools(int cli_id,
- XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
- uint32_t len)
-{
- unsigned int i;
- int rc = 0;
- unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
- struct client *client = tmem_client_from_cli_id(cli_id);
-
- if ( len % sizeof(xen_tmem_pool_info_t) )
- return -EINVAL;
-
- if ( nr > MAX_POOLS_PER_DOMAIN )
- return -E2BIG;
-
- if ( !guest_handle_okay(pools, nr) )
- return -EINVAL;
-
- if ( !client )
- {
- client = client_create(cli_id);
- if ( !client )
- return -ENOMEM;
- }
-
- for ( i = 0; i < nr; i++ )
- {
- xen_tmem_pool_info_t pool;
-
- if ( __copy_from_guest_offset(&pool, pools, i, 1 ) )
- return -EFAULT;
-
- if ( pool.n_pages )
- return -EINVAL;
-
- rc = tmemc_shared_pool_auth(cli_id, pool.uuid[0], pool.uuid[1],
- pool.flags.u.auth);
-
- if ( rc < 0 )
- break;
-
- }
-
- /* And how many we have processed. */
- return rc ? : i;
-}
-
-int tmem_control(struct xen_sysctl_tmem_op *op)
-{
- int ret;
- uint32_t cmd = op->cmd;
-
- if ( op->pad != 0 )
- return -EINVAL;
-
- write_lock(&tmem_rwlock);
-
- switch (cmd)
- {
- case XEN_SYSCTL_TMEM_OP_THAW:
- case XEN_SYSCTL_TMEM_OP_FREEZE:
- case XEN_SYSCTL_TMEM_OP_DESTROY:
- ret = tmemc_freeze_pools(op->cli_id, cmd);
- break;
- case XEN_SYSCTL_TMEM_OP_FLUSH:
- ret = tmemc_flush_mem(op->cli_id, op->arg);
- break;
- case XEN_SYSCTL_TMEM_OP_LIST:
- ret = tmemc_list(op->cli_id,
- guest_handle_cast(op->u.buf, char), op->len, op->arg);
- break;
- case XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO:
- ret = tmemc_set_client_info(op->cli_id, op->u.client);
- break;
- case XEN_SYSCTL_TMEM_OP_QUERY_FREEABLE_MB:
- ret = tmem_freeable_pages() >> (20 - PAGE_SHIFT);
- break;
- case XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO:
- ret = tmemc_get_client_info(op->cli_id, op->u.client);
- break;
- case XEN_SYSCTL_TMEM_OP_GET_POOLS:
- ret = tmemc_get_pool(op->cli_id, op->u.pool, op->len);
- break;
- case XEN_SYSCTL_TMEM_OP_SET_POOLS: /* TMEM_RESTORE_NEW */
- ret = tmemc_set_pools(op->cli_id, op->u.pool, op->len);
- break;
- case XEN_SYSCTL_TMEM_OP_SET_AUTH: /* TMEM_AUTH */
- ret = tmemc_auth_pools(op->cli_id, op->u.pool, op->len);
- break;
- default:
- ret = do_tmem_control(op);
- break;
- }
-
- write_unlock(&tmem_rwlock);
-
- return ret;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
deleted file mode 100644
@@ -1,277 +0,0 @@
-/******************************************************************************
- * tmem-xen.c
- *
- * Xen-specific Transcendent memory
- *
- * Copyright (c) 2009, Dan Magenheimer, Oracle Corp.
- */
-
-#include <xen/tmem.h>
-#include <xen/tmem_xen.h>
-#include <xen/lzo.h> /* compression code */
-#include <xen/paging.h>
-#include <xen/domain_page.h>
-#include <xen/cpu.h>
-#include <xen/init.h>
-
-bool __read_mostly opt_tmem;
-boolean_param("tmem", opt_tmem);
-
-bool __read_mostly opt_tmem_compress;
-boolean_param("tmem_compress", opt_tmem_compress);
-
-atomic_t freeable_page_count = ATOMIC_INIT(0);
-
-/* these are a concurrency bottleneck, could be percpu and dynamically
- * allocated iff opt_tmem_compress */
-#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS
-#define LZO_DSTMEM_PAGES 2
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, workmem);
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, dstmem);
-static DEFINE_PER_CPU_READ_MOSTLY(void *, scratch_page);
-
-#if defined(CONFIG_ARM)
-static inline void *cli_get_page(xen_pfn_t cmfn, mfn_t *pcli_mfn,
- struct page_info **pcli_pfp, bool cli_write)
-{
- ASSERT_UNREACHABLE();
- return NULL;
-}
-
-static inline void cli_put_page(void *cli_va, struct page_info *cli_pfp,
- mfn_t cli_mfn, bool mark_dirty)
-{
- ASSERT_UNREACHABLE();
-}
-#else
-#include <asm/p2m.h>
-
-static inline void *cli_get_page(xen_pfn_t cmfn, mfn_t *pcli_mfn,
- struct page_info **pcli_pfp, bool cli_write)
-{
- p2m_type_t t;
- struct page_info *page;
-
- page = get_page_from_gfn(current->domain, cmfn, &t, P2M_ALLOC);
- if ( !page || t != p2m_ram_rw )
- {
- if ( page )
- put_page(page);
- return NULL;
- }
-
- if ( cli_write && !get_page_type(page, PGT_writable_page) )
- {
- put_page(page);
- return NULL;
- }
-
- *pcli_mfn = page_to_mfn(page);
- *pcli_pfp = page;
-
- return map_domain_page(*pcli_mfn);
-}
-
-static inline void cli_put_page(void *cli_va, struct page_info *cli_pfp,
- mfn_t cli_mfn, bool mark_dirty)
-{
- if ( mark_dirty )
- {
- put_page_and_type(cli_pfp);
- paging_mark_dirty(current->domain, cli_mfn);
- }
- else
- put_page(cli_pfp);
- unmap_domain_page(cli_va);
-}
-#endif
-
-int tmem_copy_from_client(struct page_info *pfp,
- xen_pfn_t cmfn, tmem_cli_va_param_t clibuf)
-{
- mfn_t tmem_mfn, cli_mfn = INVALID_MFN;
- char *tmem_va, *cli_va = NULL;
- struct page_info *cli_pfp = NULL;
- int rc = 1;
-
- ASSERT(pfp != NULL);
- tmem_mfn = page_to_mfn(pfp);
- tmem_va = map_domain_page(tmem_mfn);
- if ( guest_handle_is_null(clibuf) )
- {
- cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0);
- if ( cli_va == NULL )
- {
- unmap_domain_page(tmem_va);
- return -EFAULT;
- }
- }
- smp_mb();
- if ( cli_va )
- {
- memcpy(tmem_va, cli_va, PAGE_SIZE);
- cli_put_page(cli_va, cli_pfp, cli_mfn, 0);
- }
- else
- rc = -EINVAL;
- unmap_domain_page(tmem_va);
- return rc;
-}
-
-int tmem_compress_from_client(xen_pfn_t cmfn,
- void **out_va, size_t *out_len, tmem_cli_va_param_t clibuf)
-{
- int ret = 0;
- unsigned char *dmem = this_cpu(dstmem);
- unsigned char *wmem = this_cpu(workmem);
- char *scratch = this_cpu(scratch_page);
- struct page_info *cli_pfp = NULL;
- mfn_t cli_mfn = INVALID_MFN;
- void *cli_va = NULL;
-
- if ( dmem == NULL || wmem == NULL )
- return 0; /* no buffer, so can't compress */
- if ( guest_handle_is_null(clibuf) )
- {
- cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0);
- if ( cli_va == NULL )
- return -EFAULT;
- }
- else if ( !scratch )
- return 0;
- else if ( copy_from_guest(scratch, clibuf, PAGE_SIZE) )
- return -EFAULT;
- smp_mb();
- ret = lzo1x_1_compress(cli_va ?: scratch, PAGE_SIZE, dmem, out_len, wmem);
- ASSERT(ret == LZO_E_OK);
- *out_va = dmem;
- if ( cli_va )
- cli_put_page(cli_va, cli_pfp, cli_mfn, 0);
- return 1;
-}
-
-int tmem_copy_to_client(xen_pfn_t cmfn, struct page_info *pfp,
- tmem_cli_va_param_t clibuf)
-{
- mfn_t tmem_mfn, cli_mfn = INVALID_MFN;
- char *tmem_va, *cli_va = NULL;
- struct page_info *cli_pfp = NULL;
- int rc = 1;
-
- ASSERT(pfp != NULL);
- if ( guest_handle_is_null(clibuf) )
- {
- cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
- if ( cli_va == NULL )
- return -EFAULT;
- }
- tmem_mfn = page_to_mfn(pfp);
- tmem_va = map_domain_page(tmem_mfn);
-
- if ( cli_va )
- {
- memcpy(cli_va, tmem_va, PAGE_SIZE);
- cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
- }
- else
- rc = -EINVAL;
- unmap_domain_page(tmem_va);
- smp_mb();
- return rc;
-}
-
-int tmem_decompress_to_client(xen_pfn_t cmfn, void *tmem_va,
- size_t size, tmem_cli_va_param_t clibuf)
-{
- mfn_t cli_mfn = INVALID_MFN;
- struct page_info *cli_pfp = NULL;
- void *cli_va = NULL;
- char *scratch = this_cpu(scratch_page);
- size_t out_len = PAGE_SIZE;
- int ret;
-
- if ( guest_handle_is_null(clibuf) )
- {
- cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
- if ( cli_va == NULL )
- return -EFAULT;
- }
- else if ( !scratch )
- return 0;
- ret = lzo1x_decompress_safe(tmem_va, size, cli_va ?: scratch, &out_len);
- ASSERT(ret == LZO_E_OK);
- ASSERT(out_len == PAGE_SIZE);
- if ( cli_va )
- cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
- else if ( copy_to_guest(clibuf, scratch, PAGE_SIZE) )
- return -EFAULT;
- smp_mb();
- return 1;
-}
-
-/****************** XEN-SPECIFIC HOST INITIALIZATION ********************/
-static int dstmem_order, workmem_order;
-
-static int cpu_callback(
- struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch ( action )
- {
- case CPU_UP_PREPARE: {
- if ( per_cpu(dstmem, cpu) == NULL )
- per_cpu(dstmem, cpu) = alloc_xenheap_pages(dstmem_order, 0);
- if ( per_cpu(workmem, cpu) == NULL )
- per_cpu(workmem, cpu) = alloc_xenheap_pages(workmem_order, 0);
- if ( per_cpu(scratch_page, cpu) == NULL )
- per_cpu(scratch_page, cpu) = alloc_xenheap_page();
- break;
- }
- case CPU_DEAD:
- case CPU_UP_CANCELED: {
- if ( per_cpu(dstmem, cpu) != NULL )
- {
- free_xenheap_pages(per_cpu(dstmem, cpu), dstmem_order);
- per_cpu(dstmem, cpu) = NULL;
- }
- if ( per_cpu(workmem, cpu) != NULL )
- {
- free_xenheap_pages(per_cpu(workmem, cpu), workmem_order);
- per_cpu(workmem, cpu) = NULL;
- }
- if ( per_cpu(scratch_page, cpu) != NULL )
- {
- free_xenheap_page(per_cpu(scratch_page, cpu));
- per_cpu(scratch_page, cpu) = NULL;
- }
- break;
- }
- default:
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block cpu_nfb = {
- .notifier_call = cpu_callback
-};
-
-int __init tmem_init(void)
-{
- unsigned int cpu;
-
- dstmem_order = get_order_from_pages(LZO_DSTMEM_PAGES);
- workmem_order = get_order_from_bytes(LZO1X_1_MEM_COMPRESS);
-
- for_each_online_cpu ( cpu )
- {
- void *hcpu = (void *)(long)cpu;
- cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
- }
-
- register_cpu_notifier(&cpu_nfb);
-
- return 1;
-}
@@ -17,7 +17,6 @@ headers-y := \
compat/physdev.h \
compat/platform.h \
compat/sched.h \
- compat/tmem.h \
compat/trace.h \
compat/vcpu.h \
compat/version.h \
@@ -34,7 +34,6 @@
#include "xen.h"
#include "domctl.h"
#include "physdev.h"
-#include "tmem.h"
#define XEN_SYSCTL_INTERFACE_VERSION 0x00000012
@@ -732,110 +731,6 @@ struct xen_sysctl_psr_alloc {
} u;
};
-#define XEN_SYSCTL_TMEM_OP_ALL_CLIENTS 0xFFFFU
-
-#define XEN_SYSCTL_TMEM_OP_THAW 0
-#define XEN_SYSCTL_TMEM_OP_FREEZE 1
-#define XEN_SYSCTL_TMEM_OP_FLUSH 2
-#define XEN_SYSCTL_TMEM_OP_DESTROY 3
-#define XEN_SYSCTL_TMEM_OP_LIST 4
-#define XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO 5
-#define XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO 6
-#define XEN_SYSCTL_TMEM_OP_GET_POOLS 7
-#define XEN_SYSCTL_TMEM_OP_QUERY_FREEABLE_MB 8
-#define XEN_SYSCTL_TMEM_OP_SET_POOLS 9
-#define XEN_SYSCTL_TMEM_OP_SAVE_BEGIN 10
-#define XEN_SYSCTL_TMEM_OP_SET_AUTH 11
-#define XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE 19
-#define XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV 20
-#define XEN_SYSCTL_TMEM_OP_SAVE_END 21
-#define XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN 30
-#define XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE 32
-#define XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE 33
-
-/*
- * XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_[PAGE|INV] override the 'buf' in
- * xen_sysctl_tmem_op with this structure - sometimes with an extra
- * page tackled on.
- */
-struct tmem_handle {
- uint32_t pool_id;
- uint32_t index;
- xen_tmem_oid_t oid;
-};
-
-/*
- * XEN_SYSCTL_TMEM_OP_[GET,SAVE]_CLIENT uses the 'client' in
- * xen_tmem_op with this structure, which is mostly used during migration.
- */
-struct xen_tmem_client {
- uint32_t version; /* If mismatched we will get XEN_EOPNOTSUPP. */
- uint32_t maxpools; /* If greater than what hypervisor supports, will get
- XEN_ERANGE. */
- uint32_t nr_pools; /* Current amount of pools. Ignored on SET*/
- union { /* See TMEM_CLIENT_[COMPRESS,FROZEN] */
- uint32_t raw;
- struct {
- uint8_t frozen:1,
- compress:1,
- migrating:1;
- } u;
- } flags;
- uint32_t weight;
-};
-typedef struct xen_tmem_client xen_tmem_client_t;
-DEFINE_XEN_GUEST_HANDLE(xen_tmem_client_t);
-
-/*
- * XEN_SYSCTL_TMEM_OP_[GET|SET]_POOLS or XEN_SYSCTL_TMEM_OP_SET_AUTH
- * uses the 'pool' array in * xen_sysctl_tmem_op with this structure.
- * The XEN_SYSCTL_TMEM_OP_GET_POOLS hypercall will
- * return the number of entries in 'pool' or a negative value
- * if an error was encountered.
- * The XEN_SYSCTL_TMEM_OP_SET_[AUTH|POOLS] will return the number of
- * entries in 'pool' processed or a negative value if an error
- * was encountered.
- */
-struct xen_tmem_pool_info {
- union {
- uint32_t raw;
- struct {
- uint32_t persist:1, /* See TMEM_POOL_PERSIST. */
- shared:1, /* See TMEM_POOL_SHARED. */
- auth:1, /* See TMEM_POOL_AUTH. */
- rsv1:1,
- pagebits:8, /* TMEM_POOL_PAGESIZE_[SHIFT,MASK]. */
- rsv2:12,
- version:8; /* TMEM_POOL_VERSION_[SHIFT,MASK]. */
- } u;
- } flags;
- uint32_t id; /* Less than tmem_client.maxpools. */
- uint64_t n_pages; /* Zero on XEN_SYSCTL_TMEM_OP_SET_[AUTH|POOLS]. */
- uint64_aligned_t uuid[2];
-};
-typedef struct xen_tmem_pool_info xen_tmem_pool_info_t;
-DEFINE_XEN_GUEST_HANDLE(xen_tmem_pool_info_t);
-
-struct xen_sysctl_tmem_op {
- uint32_t cmd; /* IN: XEN_SYSCTL_TMEM_OP_* . */
- int32_t pool_id; /* IN: 0 by default unless _SAVE_*, RESTORE_* .*/
- uint32_t cli_id; /* IN: client id, 0 for XEN_SYSCTL_TMEM_QUERY_FREEABLE_MB
- for all others can be the domain id or
- XEN_SYSCTL_TMEM_OP_ALL_CLIENTS for all. */
- uint32_t len; /* IN: length of 'buf'. If not applicable to use 0. */
- uint32_t arg; /* IN: If not applicable to command use 0. */
- uint32_t pad; /* Padding so structure is the same under 32 and 64. */
- xen_tmem_oid_t oid; /* IN: If not applicable to command use 0s. */
- union {
- XEN_GUEST_HANDLE_64(char) buf; /* IN/OUT: Buffer to save/restore */
- XEN_GUEST_HANDLE_64(xen_tmem_client_t) client; /* IN/OUT for */
- /* XEN_SYSCTL_TMEM_OP_[GET,SAVE]_CLIENT. */
- XEN_GUEST_HANDLE_64(xen_tmem_pool_info_t) pool; /* OUT for */
- /* XEN_SYSCTL_TMEM_OP_GET_POOLS. Must have 'len' */
- /* of them. */
- } u;
-};
-
/*
* XEN_SYSCTL_get_cpu_levelling_caps (x86 specific)
*
@@ -1124,7 +1019,7 @@ struct xen_sysctl {
#define XEN_SYSCTL_psr_cmt_op 21
#define XEN_SYSCTL_pcitopoinfo 22
#define XEN_SYSCTL_psr_alloc 23
-#define XEN_SYSCTL_tmem_op 24
+/* #define XEN_SYSCTL_tmem_op 24 */
#define XEN_SYSCTL_get_cpu_levelling_caps 25
#define XEN_SYSCTL_get_cpu_featureset 26
#define XEN_SYSCTL_livepatch_op 27
@@ -1154,7 +1049,6 @@ struct xen_sysctl {
struct xen_sysctl_coverage_op coverage_op;
struct xen_sysctl_psr_cmt_op psr_cmt_op;
struct xen_sysctl_psr_alloc psr_alloc;
- struct xen_sysctl_tmem_op tmem_op;
struct xen_sysctl_cpu_levelling_caps cpu_levelling_caps;
struct xen_sysctl_cpu_featureset cpu_featureset;
struct xen_sysctl_livepatch_op livepatch;
@@ -1,8 +1,8 @@
/******************************************************************************
* tmem.h
- *
+ *
* Guest OS interface to Xen Transcendent Memory.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
@@ -29,15 +29,11 @@
#include "xen.h"
+#if __XEN_INTERFACE_VERSION__ < 0x00041200
+
/* version of ABI */
#define TMEM_SPEC_VERSION 1
-/* Commands to HYPERVISOR_tmem_op() */
-#ifdef __XEN__
-#define TMEM_CONTROL 0 /* Now called XEN_SYSCTL_tmem_op */
-#else
-#undef TMEM_CONTROL
-#endif
#define TMEM_NEW_POOL 1
#define TMEM_DESTROY_POOL 2
#define TMEM_PUT_PAGE 4
@@ -111,6 +107,8 @@ typedef struct tmem_op tmem_op_t;
DEFINE_XEN_GUEST_HANDLE(tmem_op_t);
#endif
+#endif /* __XEN_INTERFACE_VERSION__ < 0x00041200 */
+
#endif /* __XEN_PUBLIC_TMEM_H__ */
/*
@@ -12,7 +12,6 @@
#include <public/sysctl.h>
#include <public/platform.h>
#include <public/event_channel.h>
-#include <public/tmem.h>
#include <public/version.h>
#include <public/pmu.h>
#include <public/hvm/dm_op.h>
@@ -130,12 +129,6 @@ extern long
do_xsm_op(
XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_xsm_op);
-#ifdef CONFIG_TMEM
-extern long
-do_tmem_op(
- XEN_GUEST_HANDLE_PARAM(tmem_op_t) uops);
-#endif
-
#ifdef CONFIG_ARGO
extern long do_argo_op(
unsigned int cmd,
@@ -209,7 +209,6 @@ unsigned long avail_node_heap_pages(unsigned int);
unsigned int online_page(unsigned long mfn, uint32_t *status);
int offline_page(unsigned long mfn, int broken, uint32_t *status);
int query_page_offline(unsigned long mfn, uint32_t *status);
-unsigned long total_free_pages(void);
void heap_init_late(void);
@@ -249,8 +248,6 @@ struct npfec {
#define MEMF_no_refcount (1U<<_MEMF_no_refcount)
#define _MEMF_populate_on_demand 1
#define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand)
-#define _MEMF_tmem 2
-#define MEMF_tmem (1U<<_MEMF_tmem)
#define _MEMF_no_dma 3
#define MEMF_no_dma (1U<<_MEMF_no_dma)
#define _MEMF_exact_node 4
@@ -454,9 +454,6 @@ struct domain
*/
spinlock_t hypercall_deadlock_mutex;
- /* transcendent memory, auto-allocated on first tmem op by each domain */
- struct client *tmem_client;
-
struct lock_profile_qhead profile_head;
/* Various vm_events */
deleted file mode 100644
@@ -1,45 +0,0 @@
-/******************************************************************************
- * tmem.h
- *
- * Transcendent memory
- *
- * Copyright (c) 2008, Dan Magenheimer, Oracle Corp.
- */
-
-#ifndef __XEN_TMEM_H__
-#define __XEN_TMEM_H__
-
-struct xen_sysctl_tmem_op;
-
-#ifdef CONFIG_TMEM
-extern int tmem_control(struct xen_sysctl_tmem_op *op);
-extern void tmem_destroy(void *);
-extern void *tmem_relinquish_pages(unsigned int, unsigned int);
-extern unsigned long tmem_freeable_pages(void);
-#else
-static inline int
-tmem_control(struct xen_sysctl_tmem_op *op)
-{
- return -ENOSYS;
-}
-
-static inline void
-tmem_destroy(void *p)
-{
- return;
-}
-
-static inline void *
-tmem_relinquish_pages(unsigned int x, unsigned int y)
-{
- return NULL;
-}
-
-static inline unsigned long
-tmem_freeable_pages(void)
-{
- return 0;
-}
-#endif /* CONFIG_TMEM */
-
-#endif /* __XEN_TMEM_H__ */
deleted file mode 100644
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
- *
- */
-
-#ifndef __XEN_TMEM_CONTROL_H__
-#define __XEN_TMEM_CONTROL_H__
-
-#ifdef CONFIG_TMEM
-#include <public/sysctl.h>
-/* Variables and functions that tmem_control.c needs from tmem.c */
-
-extern struct tmem_statistics tmem_stats;
-extern struct tmem_global tmem_global;
-
-extern rwlock_t tmem_rwlock;
-
-int tmem_evict(void);
-int do_tmem_control(struct xen_sysctl_tmem_op *op);
-
-struct client *client_create(domid_t cli_id);
-int do_tmem_new_pool(domid_t this_cli_id, uint32_t d_poolid, uint32_t flags,
- uint64_t uuid_lo, uint64_t uuid_hi);
-
-int tmemc_shared_pool_auth(domid_t cli_id, uint64_t uuid_lo,
- uint64_t uuid_hi, bool auth);
-#endif /* CONFIG_TMEM */
-
-#endif /* __XEN_TMEM_CONTROL_H__ */
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
deleted file mode 100644
@@ -1,343 +0,0 @@
-/******************************************************************************
- * tmem_xen.h
- *
- * Xen-specific Transcendent memory
- *
- * Copyright (c) 2009, Dan Magenheimer, Oracle Corp.
- */
-
-#ifndef __XEN_TMEM_XEN_H__
-#define __XEN_TMEM_XEN_H__
-
-#include <xen/mm.h> /* heap alloc/free */
-#include <xen/pfn.h>
-#include <xen/xmalloc.h> /* xmalloc/xfree */
-#include <xen/sched.h> /* struct domain */
-#include <xen/guest_access.h> /* copy_from_guest */
-#include <xen/hash.h> /* hash_long */
-#include <xen/domain_page.h> /* __map_domain_page */
-#include <xen/rbtree.h> /* struct rb_root */
-#include <xsm/xsm.h> /* xsm_tmem_control */
-#include <public/tmem.h>
-#ifdef CONFIG_COMPAT
-#include <compat/tmem.h>
-#endif
-typedef uint32_t pagesize_t; /* like size_t, must handle largest PAGE_SIZE */
-
-#define IS_PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
-#define IS_VALID_PAGE(_pi) mfn_valid(page_to_mfn(_pi))
-
-extern struct page_list_head tmem_page_list;
-extern spinlock_t tmem_page_list_lock;
-extern unsigned long tmem_page_list_pages;
-extern atomic_t freeable_page_count;
-
-extern int tmem_init(void);
-#define tmem_hash hash_long
-
-extern bool opt_tmem_compress;
-static inline bool tmem_compression_enabled(void)
-{
- return opt_tmem_compress;
-}
-
-#ifdef CONFIG_TMEM
-extern bool opt_tmem;
-static inline bool tmem_enabled(void)
-{
- return opt_tmem;
-}
-
-static inline void tmem_disable(void)
-{
- opt_tmem = false;
-}
-#else
-static inline bool tmem_enabled(void)
-{
- return false;
-}
-
-static inline void tmem_disable(void)
-{
-}
-#endif /* CONFIG_TMEM */
-
-/*
- * Memory free page list management
- */
-
-static inline struct page_info *tmem_page_list_get(void)
-{
- struct page_info *pi;
-
- spin_lock(&tmem_page_list_lock);
- if ( (pi = page_list_remove_head(&tmem_page_list)) != NULL )
- tmem_page_list_pages--;
- spin_unlock(&tmem_page_list_lock);
- ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
- return pi;
-}
-
-static inline void tmem_page_list_put(struct page_info *pi)
-{
- ASSERT(IS_VALID_PAGE(pi));
- spin_lock(&tmem_page_list_lock);
- page_list_add(pi, &tmem_page_list);
- tmem_page_list_pages++;
- spin_unlock(&tmem_page_list_lock);
-}
-
-/*
- * Memory allocation for persistent data
- */
-static inline struct page_info *__tmem_alloc_page_thispool(struct domain *d)
-{
- struct page_info *pi;
-
- /* note that this tot_pages check is not protected by d->page_alloc_lock,
- * so may race and periodically fail in donate_page or alloc_domheap_pages
- * That's OK... neither is a problem, though chatty if log_lvl is set */
- if ( d->tot_pages >= d->max_pages )
- return NULL;
-
- if ( tmem_page_list_pages )
- {
- if ( (pi = tmem_page_list_get()) != NULL )
- {
- if ( donate_page(d,pi,0) == 0 )
- goto out;
- else
- tmem_page_list_put(pi);
- }
- }
-
- pi = alloc_domheap_pages(d,0,MEMF_tmem);
-
-out:
- ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
- return pi;
-}
-
-static inline void __tmem_free_page_thispool(struct page_info *pi)
-{
- struct domain *d = page_get_owner(pi);
-
- ASSERT(IS_VALID_PAGE(pi));
- if ( (d == NULL) || steal_page(d,pi,0) == 0 )
- tmem_page_list_put(pi);
- else
- {
- scrub_one_page(pi);
- ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
- free_domheap_pages(pi,0);
- }
-}
-
-/*
- * Memory allocation for ephemeral (non-persistent) data
- */
-static inline struct page_info *__tmem_alloc_page(void)
-{
- struct page_info *pi = tmem_page_list_get();
-
- if ( pi == NULL)
- pi = alloc_domheap_pages(0,0,MEMF_tmem);
-
- if ( pi )
- atomic_inc(&freeable_page_count);
- ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
- return pi;
-}
-
-static inline void __tmem_free_page(struct page_info *pi)
-{
- ASSERT(IS_VALID_PAGE(pi));
- tmem_page_list_put(pi);
- atomic_dec(&freeable_page_count);
-}
-
-/* "Client" (==domain) abstraction */
-static inline struct client *tmem_client_from_cli_id(domid_t cli_id)
-{
- struct client *c;
- struct domain *d = rcu_lock_domain_by_id(cli_id);
- if (d == NULL)
- return NULL;
- c = d->tmem_client;
- rcu_unlock_domain(d);
- return c;
-}
-
-/* these typedefs are in the public/tmem.h interface
-typedef XEN_GUEST_HANDLE(void) cli_mfn_t;
-typedef XEN_GUEST_HANDLE(char) cli_va_t;
-*/
-typedef XEN_GUEST_HANDLE_PARAM(tmem_op_t) tmem_cli_op_t;
-typedef XEN_GUEST_HANDLE_PARAM(char) tmem_cli_va_param_t;
-
-static inline int tmem_get_tmemop_from_client(tmem_op_t *op, tmem_cli_op_t uops)
-{
-#ifdef CONFIG_COMPAT
- if ( is_hvm_vcpu(current) ? hvm_guest_x86_mode(current) != 8
- : is_pv_32bit_vcpu(current) )
- {
- int rc;
- enum XLAT_tmem_op_u u;
- tmem_op_compat_t cop;
-
- rc = copy_from_guest(&cop, guest_handle_cast(uops, void), 1);
- if ( rc )
- return rc;
- switch ( cop.cmd )
- {
- case TMEM_NEW_POOL: u = XLAT_tmem_op_u_creat; break;
- default: u = XLAT_tmem_op_u_gen ; break;
- }
- XLAT_tmem_op(op, &cop);
- return 0;
- }
-#endif
- return copy_from_guest(op, uops, 1);
-}
-
-#define tmem_cli_buf_null guest_handle_from_ptr(NULL, char)
-#define TMEM_CLI_ID_NULL ((domid_t)((domid_t)-1L))
-#define tmem_cli_id_str "domid"
-#define tmem_client_str "domain"
-
-int tmem_decompress_to_client(xen_pfn_t, void *, size_t,
- tmem_cli_va_param_t);
-int tmem_compress_from_client(xen_pfn_t, void **, size_t *,
- tmem_cli_va_param_t);
-
-int tmem_copy_from_client(struct page_info *, xen_pfn_t, tmem_cli_va_param_t);
-int tmem_copy_to_client(xen_pfn_t, struct page_info *, tmem_cli_va_param_t);
-
-#define tmem_client_err(fmt, args...) printk(XENLOG_G_ERR fmt, ##args)
-#define tmem_client_warn(fmt, args...) printk(XENLOG_G_WARNING fmt, ##args)
-#define tmem_client_info(fmt, args...) printk(XENLOG_G_INFO fmt, ##args)
-
-/* Global statistics (none need to be locked). */
-struct tmem_statistics {
- unsigned long total_tmem_ops;
- unsigned long errored_tmem_ops;
- unsigned long total_flush_pool;
- unsigned long alloc_failed;
- unsigned long alloc_page_failed;
- unsigned long evicted_pgs;
- unsigned long evict_attempts;
- unsigned long relinq_pgs;
- unsigned long relinq_attempts;
- unsigned long max_evicts_per_relinq;
- unsigned long low_on_memory;
- unsigned long deduped_puts;
- unsigned long tot_good_eph_puts;
- int global_obj_count_max;
- int global_pgp_count_max;
- int global_pcd_count_max;
- int global_page_count_max;
- int global_rtree_node_count_max;
- long global_eph_count_max;
- unsigned long failed_copies;
- unsigned long pcd_tot_tze_size;
- unsigned long pcd_tot_csize;
- /* Global counters (should use long_atomic_t access). */
- atomic_t global_obj_count;
- atomic_t global_pgp_count;
- atomic_t global_pcd_count;
- atomic_t global_page_count;
- atomic_t global_rtree_node_count;
-};
-
-#define atomic_inc_and_max(_c) do { \
- atomic_inc(&tmem_stats._c); \
- if ( _atomic_read(tmem_stats._c) > tmem_stats._c##_max ) \
- tmem_stats._c##_max = _atomic_read(tmem_stats._c); \
-} while (0)
-
-#define atomic_dec_and_assert(_c) do { \
- atomic_dec(&tmem_stats._c); \
- ASSERT(_atomic_read(tmem_stats._c) >= 0); \
-} while (0)
-
-#define MAX_GLOBAL_SHARED_POOLS 16
-struct tmem_global {
- struct list_head ephemeral_page_list; /* All pages in ephemeral pools. */
- struct list_head client_list;
- struct tmem_pool *shared_pools[MAX_GLOBAL_SHARED_POOLS];
- bool shared_auth;
- long eph_count; /* Atomicity depends on eph_lists_spinlock. */
- atomic_t client_weight_total;
-};
-
-#define MAX_POOLS_PER_DOMAIN 16
-
-struct tmem_pool;
-struct tmem_page_descriptor;
-struct tmem_page_content_descriptor;
-struct client {
- struct list_head client_list;
- struct tmem_pool *pools[MAX_POOLS_PER_DOMAIN];
- struct domain *domain;
- struct xmem_pool *persistent_pool;
- struct list_head ephemeral_page_list;
- long eph_count, eph_count_max;
- domid_t cli_id;
- xen_tmem_client_t info;
- /* For save/restore/migration. */
- bool was_frozen;
- struct list_head persistent_invalidated_list;
- struct tmem_page_descriptor *cur_pgp;
- /* Statistics collection. */
- unsigned long compress_poor, compress_nomem;
- unsigned long compressed_pages;
- uint64_t compressed_sum_size;
- uint64_t total_cycles;
- unsigned long succ_pers_puts, succ_eph_gets, succ_pers_gets;
- /* Shared pool authentication. */
- uint64_t shared_auth_uuid[MAX_GLOBAL_SHARED_POOLS][2];
-};
-
-#define POOL_PAGESHIFT (PAGE_SHIFT - 12)
-#define OBJ_HASH_BUCKETS 256 /* Must be power of two. */
-#define OBJ_HASH_BUCKETS_MASK (OBJ_HASH_BUCKETS-1)
-
-#define is_persistent(_p) (_p->persistent)
-#define is_shared(_p) (_p->shared)
-
-struct tmem_pool {
- bool shared;
- bool persistent;
- bool is_dying;
- struct client *client;
- uint64_t uuid[2]; /* 0 for private, non-zero for shared. */
- uint32_t pool_id;
- rwlock_t pool_rwlock;
- struct rb_root obj_rb_root[OBJ_HASH_BUCKETS]; /* Protected by pool_rwlock. */
- struct list_head share_list; /* Valid if shared. */
- int shared_count; /* Valid if shared. */
- /* For save/restore/migration. */
- struct list_head persistent_page_list;
- struct tmem_page_descriptor *cur_pgp;
- /* Statistics collection. */
- atomic_t pgp_count;
- int pgp_count_max;
- long obj_count; /* Atomicity depends on pool_rwlock held for write. */
- long obj_count_max;
- unsigned long objnode_count, objnode_count_max;
- uint64_t sum_life_cycles;
- uint64_t sum_evicted_cycles;
- unsigned long puts, good_puts, no_mem_puts;
- unsigned long dup_puts_flushed, dup_puts_replaced;
- unsigned long gets, found_gets;
- unsigned long flushs, flushs_found;
- unsigned long flush_objs, flush_objs_found;
-};
-
-struct share_list {
- struct list_head share_list;
- struct client *client;
-};
-
-#endif /* __XEN_TMEM_XEN_H__ */
@@ -135,8 +135,6 @@
? sched_pin_override sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
-? tmem_oid tmem.h
-! tmem_op tmem.h
? t_buf trace.h
? vcpu_get_physid vcpu.h
? vcpu_register_vcpu_info vcpu.h
@@ -433,12 +433,6 @@ static XSM_INLINE int xsm_page_offline(XSM_DEFAULT_ARG uint32_t cmd)
return xsm_default_action(action, current->domain, NULL);
}
-static XSM_INLINE int xsm_tmem_op(XSM_DEFAULT_VOID)
-{
- XSM_ASSERT_ACTION(XSM_HOOK);
- return xsm_default_action(action, current->domain, NULL);
-}
-
static XSM_INLINE long xsm_do_xsm_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) op)
{
return -ENOSYS;
@@ -127,7 +127,6 @@ struct xsm_operations {
int (*resource_setup_misc) (void);
int (*page_offline)(uint32_t cmd);
- int (*tmem_op)(void);
long (*do_xsm_op) (XEN_GUEST_HANDLE_PARAM(xsm_op_t) op);
#ifdef CONFIG_COMPAT
@@ -537,11 +536,6 @@ static inline int xsm_page_offline(xsm_default_t def, uint32_t cmd)
return xsm_ops->page_offline(cmd);
}
-static inline int xsm_tmem_op(xsm_default_t def)
-{
- return xsm_ops->tmem_op();
-}
-
static inline long xsm_do_xsm_op (XEN_GUEST_HANDLE_PARAM(xsm_op_t) op)
{
return xsm_ops->do_xsm_op(op);
@@ -103,7 +103,6 @@ void __init xsm_fixup_ops (struct xsm_operations *ops)
set_to_dummy_if_null(ops, resource_setup_misc);
set_to_dummy_if_null(ops, page_offline);
- set_to_dummy_if_null(ops, tmem_op);
set_to_dummy_if_null(ops, hvm_param);
set_to_dummy_if_null(ops, hvm_control);
set_to_dummy_if_null(ops, hvm_param_nested);
@@ -810,9 +810,6 @@ static int flask_sysctl(int cmd)
return avc_current_has_perm(SECINITSID_XEN, SECCLASS_XEN2,
XEN2__PSR_ALLOC, NULL);
- case XEN_SYSCTL_tmem_op:
- return domain_has_xen(current->domain, XEN__TMEM_CONTROL);
-
case XEN_SYSCTL_get_cpu_levelling_caps:
return avc_current_has_perm(SECINITSID_XEN, SECCLASS_XEN2,
XEN2__GET_CPU_LEVELLING_CAPS, NULL);
@@ -1178,11 +1175,6 @@ static inline int flask_page_offline(uint32_t cmd)
}
}
-static inline int flask_tmem_op(void)
-{
- return domain_has_xen(current->domain, XEN__TMEM_OP);
-}
-
static int flask_add_to_physmap(struct domain *d1, struct domain *d2)
{
return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP);
@@ -1818,7 +1810,6 @@ static struct xsm_operations flask_ops = {
.resource_setup_misc = flask_resource_setup_misc,
.page_offline = flask_page_offline,
- .tmem_op = flask_tmem_op,
.hvm_param = flask_hvm_param,
.hvm_control = flask_hvm_param,
.hvm_param_nested = flask_hvm_param_nested,
@@ -67,10 +67,6 @@ class xen
lockprof
# XEN_SYSCTL_cpupool_op
cpupool_op
-# tmem hypercall (any access)
- tmem_op
-# XEN_SYSCTL_tmem_op command of tmem (part of sysctl)
- tmem_control
# XEN_SYSCTL_scheduler_op with XEN_DOMCTL_SCHEDOP_getinfo, XEN_SYSCTL_sched_id, XEN_DOMCTL_SCHEDOP_getvcpuinfo
getscheduler
# XEN_SYSCTL_scheduler_op with XEN_DOMCTL_SCHEDOP_putinfo, XEN_DOMCTL_SCHEDOP_putvcpuinfo