From patchwork Wed Mar 9 12:18:52 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: johan.xx.mossberg@stericsson.com X-Patchwork-Id: 621191 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p29CJfwl009404 for ; Wed, 9 Mar 2011 12:19:41 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932199Ab1CIMTc (ORCPT ); Wed, 9 Mar 2011 07:19:32 -0500 Received: from eu1sys200aog108.obsmtp.com ([207.126.144.125]:56754 "EHLO eu1sys200aog108.obsmtp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932239Ab1CIMT3 (ORCPT ); Wed, 9 Mar 2011 07:19:29 -0500 Received: from source ([138.198.100.35]) (using TLSv1) by eu1sys200aob108.postini.com ([207.126.147.11]) with SMTP ID DSNKTXdwRAmWM3owBzj71oLNkOHcKGksjkhb@postini.com; Wed, 09 Mar 2011 12:19:27 UTC Received: from zeta.dmz-ap.st.com (ns6.st.com [138.198.234.13]) by beta.dmz-ap.st.com (STMicroelectronics) with ESMTP id C543A1D0; Wed, 9 Mar 2011 12:19:13 +0000 (GMT) Received: from relay2.stm.gmessaging.net (unknown [10.230.100.18]) by zeta.dmz-ap.st.com (STMicroelectronics) with ESMTP id 0608F52B; Wed, 9 Mar 2011 12:19:13 +0000 (GMT) Received: from exdcvycastm004.EQ1STM.local (alteon-source-exch [10.230.100.61]) (using TLSv1 with cipher RC4-MD5 (128/128 bits)) (Client CN "exdcvycastm004", Issuer "exdcvycastm004" (not verified)) by relay2.stm.gmessaging.net (Postfix) with ESMTPS id 8D298A8065; Wed, 9 Mar 2011 13:19:07 +0100 (CET) Received: from localhost.localdomain (10.230.100.153) by smtp.stericsson.com (10.230.100.2) with Microsoft SMTP Server (TLS) id 8.2.254.0; Wed, 9 Mar 2011 13:19:11 +0100 From: To: , , , Cc: , Subject: [PATCHv2 2/3] hwmem: Add hwmem (part 2) Date: Wed, 9 Mar 2011 13:18:52 +0100 Message-ID: <1299673133-26464-3-git-send-email-johan.xx.mossberg@stericsson.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1299673133-26464-1-git-send-email-johan.xx.mossberg@stericsson.com> References: <1299673133-26464-1-git-send-email-johan.xx.mossberg@stericsson.com> MIME-Version: 1.0 Sender: linux-media-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-media@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Wed, 09 Mar 2011 12:19:41 +0000 (UTC) diff --git a/drivers/misc/hwmem/cache_handler.c b/drivers/misc/hwmem/cache_handler.c new file mode 100644 index 0000000..e0ab4ee --- /dev/null +++ b/drivers/misc/hwmem/cache_handler.c @@ -0,0 +1,510 @@ +/* + * Copyright (C) ST-Ericsson SA 2010 + * + * Cache handler + * + * Author: Johan Mossberg + * for ST-Ericsson. + * + * License terms: GNU General Public License (GPL), version 2. + */ + +#include + +#include + +#include + +#include "cache_handler.h" + +#define U32_MAX (~(u32)0) + +enum hwmem_alloc_flags cachi_get_cache_settings( + enum hwmem_alloc_flags requested_cache_settings); +void cachi_set_pgprot_cache_options(enum hwmem_alloc_flags cache_settings, + pgprot_t *pgprot); + +static void sync_buf_pre_cpu(struct cach_buf *buf, enum hwmem_access access, + struct hwmem_region *region); +static void sync_buf_post_cpu(struct cach_buf *buf, + enum hwmem_access next_access, struct hwmem_region *next_region); + +static void invalidate_cpu_cache(struct cach_buf *buf, + struct cach_range *range_2b_used); +static void clean_cpu_cache(struct cach_buf *buf, + struct cach_range *range_2b_used); +static void flush_cpu_cache(struct cach_buf *buf, + struct cach_range *range_2b_used); + +static void null_range(struct cach_range *range); +static void expand_range(struct cach_range *range, + struct cach_range *range_2_add); +/* + * Expands range to one of enclosing_range's two edges. The function will + * choose which of enclosing_range's edges to expand range to in such a + * way that the size of range is minimized. range must be located inside + * enclosing_range. + */ +static void expand_range_2_edge(struct cach_range *range, + struct cach_range *enclosing_range); +static void shrink_range(struct cach_range *range, + struct cach_range *range_2_remove); +static bool is_non_empty_range(struct cach_range *range); +static void intersect_range(struct cach_range *range_1, + struct cach_range *range_2, struct cach_range *intersection); +/* Align_up restrictions apply here to */ +static void align_range_up(struct cach_range *range, u32 alignment); +static u32 range_length(struct cach_range *range); +static void region_2_range(struct hwmem_region *region, u32 buffer_size, + struct cach_range *range); + +static void *offset_2_vaddr(struct cach_buf *buf, u32 offset); +static u32 offset_2_paddr(struct cach_buf *buf, u32 offset); + +/* Saturates, might return unaligned values when that happens */ +static u32 align_up(u32 value, u32 alignment); +static u32 align_down(u32 value, u32 alignment); + +/* + * Exported functions + */ + +void cach_init_buf(struct cach_buf *buf, enum hwmem_alloc_flags cache_settings, + u32 size) +{ + buf->vstart = NULL; + buf->pstart = 0; + buf->size = size; + + buf->cache_settings = cachi_get_cache_settings(cache_settings); +} + +void cach_set_buf_addrs(struct cach_buf *buf, void* vaddr, u32 paddr) +{ + bool tmp; + + buf->vstart = vaddr; + buf->pstart = paddr; + + if (buf->cache_settings & HWMEM_ALLOC_HINT_CACHED) { + /* + * Keep whatever is in the cache. This way we avoid an + * unnecessary synch if CPU is the first user. + */ + buf->range_in_cpu_cache.start = 0; + buf->range_in_cpu_cache.end = buf->size; + align_range_up(&buf->range_in_cpu_cache, + get_dcache_granularity()); + buf->range_dirty_in_cpu_cache.start = 0; + buf->range_dirty_in_cpu_cache.end = buf->size; + align_range_up(&buf->range_dirty_in_cpu_cache, + get_dcache_granularity()); + } else { + flush_cpu_dcache(buf->vstart, buf->pstart, buf->size, false, + &tmp); + drain_cpu_write_buf(); + + null_range(&buf->range_in_cpu_cache); + null_range(&buf->range_dirty_in_cpu_cache); + } + null_range(&buf->range_invalid_in_cpu_cache); +} + +void cach_set_pgprot_cache_options(struct cach_buf *buf, pgprot_t *pgprot) +{ + cachi_set_pgprot_cache_options(buf->cache_settings, pgprot); +} + +void cach_set_domain(struct cach_buf *buf, enum hwmem_access access, + enum hwmem_domain domain, struct hwmem_region *region) +{ + struct hwmem_region *__region; + struct hwmem_region full_region; + + if (region != NULL) { + __region = region; + } else { + full_region.offset = 0; + full_region.count = 1; + full_region.start = 0; + full_region.end = buf->size; + full_region.size = buf->size; + + __region = &full_region; + } + + switch (domain) { + case HWMEM_DOMAIN_SYNC: + sync_buf_post_cpu(buf, access, __region); + + break; + + case HWMEM_DOMAIN_CPU: + sync_buf_pre_cpu(buf, access, __region); + + break; + } +} + +/* + * Local functions + */ + +enum hwmem_alloc_flags __attribute__((weak)) cachi_get_cache_settings( + enum hwmem_alloc_flags requested_cache_settings) +{ + static const u32 CACHE_ON_FLAGS_MASK = HWMEM_ALLOC_HINT_CACHED | + HWMEM_ALLOC_HINT_CACHE_WB | HWMEM_ALLOC_HINT_CACHE_WT | + HWMEM_ALLOC_HINT_CACHE_NAOW | HWMEM_ALLOC_HINT_CACHE_AOW | + HWMEM_ALLOC_HINT_INNER_AND_OUTER_CACHE | + HWMEM_ALLOC_HINT_INNER_CACHE_ONLY; + /* We don't know the cache setting so we assume worst case. */ + static const u32 CACHE_SETTING = HWMEM_ALLOC_HINT_WRITE_COMBINE | + HWMEM_ALLOC_HINT_CACHED | HWMEM_ALLOC_HINT_CACHE_WB | + HWMEM_ALLOC_HINT_CACHE_AOW | + HWMEM_ALLOC_HINT_INNER_AND_OUTER_CACHE; + + if (requested_cache_settings & CACHE_ON_FLAGS_MASK) + return CACHE_SETTING; + else if (requested_cache_settings & HWMEM_ALLOC_HINT_WRITE_COMBINE || + (requested_cache_settings & HWMEM_ALLOC_HINT_UNCACHED && + !(requested_cache_settings & + HWMEM_ALLOC_HINT_NO_WRITE_COMBINE))) + return HWMEM_ALLOC_HINT_WRITE_COMBINE; + else if (requested_cache_settings & + (HWMEM_ALLOC_HINT_NO_WRITE_COMBINE | + HWMEM_ALLOC_HINT_UNCACHED)) + return 0; + else + /* Nothing specified, use cached */ + return CACHE_SETTING; +} + +void __attribute__((weak)) cachi_set_pgprot_cache_options( + enum hwmem_alloc_flags cache_settings, pgprot_t *pgprot) +{ + if (cache_settings & HWMEM_ALLOC_HINT_CACHED) + *pgprot = *pgprot; /* To silence compiler and checkpatch */ + else if (cache_settings & HWMEM_ALLOC_HINT_WRITE_COMBINE) + *pgprot = pgprot_writecombine(*pgprot); + else + *pgprot = pgprot_noncached(*pgprot); +} + +bool __attribute__((weak)) speculative_data_prefetch(void) +{ + /* We don't know so we go with the safe alternative */ + return true; +} + +static void sync_buf_pre_cpu(struct cach_buf *buf, enum hwmem_access access, + struct hwmem_region *region) +{ + bool write = access & HWMEM_ACCESS_WRITE; + bool read = access & HWMEM_ACCESS_READ; + + if (!write && !read) + return; + + if (buf->cache_settings & HWMEM_ALLOC_HINT_CACHED) { + struct cach_range region_range; + + region_2_range(region, buf->size, ®ion_range); + + if (read || (write && buf->cache_settings & + HWMEM_ALLOC_HINT_CACHE_WB)) + /* Perform defered invalidates */ + invalidate_cpu_cache(buf, ®ion_range); + if (read || (write && buf->cache_settings & + HWMEM_ALLOC_HINT_CACHE_AOW)) + expand_range(&buf->range_in_cpu_cache, ®ion_range); + if (write && buf->cache_settings & HWMEM_ALLOC_HINT_CACHE_WB) { + struct cach_range dirty_range_addition; + + if (buf->cache_settings & HWMEM_ALLOC_HINT_CACHE_AOW) + dirty_range_addition = region_range; + else + intersect_range(&buf->range_in_cpu_cache, + ®ion_range, &dirty_range_addition); + + expand_range(&buf->range_dirty_in_cpu_cache, + &dirty_range_addition); + } + } + if (buf->cache_settings & HWMEM_ALLOC_HINT_WRITE_COMBINE) { + if (write) + buf->in_cpu_write_buf = true; + } +} + +static void sync_buf_post_cpu(struct cach_buf *buf, + enum hwmem_access next_access, struct hwmem_region *next_region) +{ + bool write = next_access & HWMEM_ACCESS_WRITE; + bool read = next_access & HWMEM_ACCESS_READ; + struct cach_range region_range; + + if (!write && !read) + return; + + region_2_range(next_region, buf->size, ®ion_range); + + if (write) { + if (speculative_data_prefetch()) { + /* Defer invalidate */ + struct cach_range intersection; + + intersect_range(&buf->range_in_cpu_cache, + ®ion_range, &intersection); + + expand_range(&buf->range_invalid_in_cpu_cache, + &intersection); + + clean_cpu_cache(buf, ®ion_range); + } else { + flush_cpu_cache(buf, ®ion_range); + } + } + if (read) + clean_cpu_cache(buf, ®ion_range); + + if (buf->in_cpu_write_buf) { + drain_cpu_write_buf(); + + buf->in_cpu_write_buf = false; + } +} + +static void invalidate_cpu_cache(struct cach_buf *buf, struct cach_range *range) +{ + struct cach_range intersection; + + intersect_range(&buf->range_invalid_in_cpu_cache, range, + &intersection); + if (is_non_empty_range(&intersection)) { + bool flushed_everything; + + expand_range_2_edge(&intersection, + &buf->range_invalid_in_cpu_cache); + + /* + * Cache handler never uses invalidate to discard data in the + * cache so we can use flush instead which is considerably + * faster for large buffers. + */ + flush_cpu_dcache( + offset_2_vaddr(buf, intersection.start), + offset_2_paddr(buf, intersection.start), + range_length(&intersection), + buf->cache_settings & + HWMEM_ALLOC_HINT_INNER_CACHE_ONLY, + &flushed_everything); + + if (flushed_everything) { + null_range(&buf->range_invalid_in_cpu_cache); + null_range(&buf->range_dirty_in_cpu_cache); + } else { + /* + * No need to shrink range_in_cpu_cache as invalidate + * is only used when we can't keep track of what's in + * the CPU cache. + */ + shrink_range(&buf->range_invalid_in_cpu_cache, + &intersection); + } + } +} + +static void clean_cpu_cache(struct cach_buf *buf, struct cach_range *range) +{ + struct cach_range intersection; + + intersect_range(&buf->range_dirty_in_cpu_cache, range, &intersection); + if (is_non_empty_range(&intersection)) { + bool cleaned_everything; + + expand_range_2_edge(&intersection, + &buf->range_dirty_in_cpu_cache); + + clean_cpu_dcache( + offset_2_vaddr(buf, intersection.start), + offset_2_paddr(buf, intersection.start), + range_length(&intersection), + buf->cache_settings & + HWMEM_ALLOC_HINT_INNER_CACHE_ONLY, + &cleaned_everything); + + if (cleaned_everything) + null_range(&buf->range_dirty_in_cpu_cache); + else + shrink_range(&buf->range_dirty_in_cpu_cache, + &intersection); + } +} + +static void flush_cpu_cache(struct cach_buf *buf, struct cach_range *range) +{ + struct cach_range intersection; + + intersect_range(&buf->range_in_cpu_cache, range, &intersection); + if (is_non_empty_range(&intersection)) { + bool flushed_everything; + + expand_range_2_edge(&intersection, &buf->range_in_cpu_cache); + + flush_cpu_dcache( + offset_2_vaddr(buf, intersection.start), + offset_2_paddr(buf, intersection.start), + range_length(&intersection), + buf->cache_settings & + HWMEM_ALLOC_HINT_INNER_CACHE_ONLY, + &flushed_everything); + + if (flushed_everything) { + if (!speculative_data_prefetch()) + null_range(&buf->range_in_cpu_cache); + null_range(&buf->range_dirty_in_cpu_cache); + null_range(&buf->range_invalid_in_cpu_cache); + } else { + if (!speculative_data_prefetch()) + shrink_range(&buf->range_in_cpu_cache, + &intersection); + shrink_range(&buf->range_dirty_in_cpu_cache, + &intersection); + shrink_range(&buf->range_invalid_in_cpu_cache, + &intersection); + } + } +} + +static void null_range(struct cach_range *range) +{ + range->start = U32_MAX; + range->end = 0; +} + +static void expand_range(struct cach_range *range, + struct cach_range *range_2_add) +{ + range->start = min(range->start, range_2_add->start); + range->end = max(range->end, range_2_add->end); +} + +/* + * Expands range to one of enclosing_range's two edges. The function will + * choose which of enclosing_range's edges to expand range to in such a + * way that the size of range is minimized. range must be located inside + * enclosing_range. + */ +static void expand_range_2_edge(struct cach_range *range, + struct cach_range *enclosing_range) +{ + u32 space_on_low_side = range->start - enclosing_range->start; + u32 space_on_high_side = enclosing_range->end - range->end; + + if (space_on_low_side < space_on_high_side) + range->start = enclosing_range->start; + else + range->end = enclosing_range->end; +} + +static void shrink_range(struct cach_range *range, + struct cach_range *range_2_remove) +{ + if (range_2_remove->start > range->start) + range->end = min(range->end, range_2_remove->start); + else + range->start = max(range->start, range_2_remove->end); + + if (range->start >= range->end) + null_range(range); +} + +static bool is_non_empty_range(struct cach_range *range) +{ + return range->end > range->start; +} + +static void intersect_range(struct cach_range *range_1, + struct cach_range *range_2, struct cach_range *intersection) +{ + intersection->start = max(range_1->start, range_2->start); + intersection->end = min(range_1->end, range_2->end); + + if (intersection->start >= intersection->end) + null_range(intersection); +} + +/* Align_up restrictions apply here to */ +static void align_range_up(struct cach_range *range, u32 alignment) +{ + if (!is_non_empty_range(range)) + return; + + range->start = align_down(range->start, alignment); + range->end = align_up(range->end, alignment); +} + +static u32 range_length(struct cach_range *range) +{ + if (is_non_empty_range(range)) + return range->end - range->start; + else + return 0; +} + +static void region_2_range(struct hwmem_region *region, u32 buffer_size, + struct cach_range *range) +{ + /* + * We don't care about invalid regions, instead we limit the region's + * range to the buffer's range. This should work good enough, worst + * case we synch the entire buffer when we get an invalid region which + * is acceptable. + */ + range->start = region->offset + region->start; + range->end = min(region->offset + (region->count * region->size) - + (region->size - region->end), buffer_size); + if (range->start >= range->end) { + null_range(range); + return; + } + + align_range_up(range, get_dcache_granularity()); +} + +static void *offset_2_vaddr(struct cach_buf *buf, u32 offset) +{ + return (void *)((u32)buf->vstart + offset); +} + +static u32 offset_2_paddr(struct cach_buf *buf, u32 offset) +{ + return buf->pstart + offset; +} + +/* Saturates, might return unaligned values when that happens */ +static u32 align_up(u32 value, u32 alignment) +{ + u32 remainder = value % alignment; + u32 value_2_add; + + if (remainder == 0) + return value; + + value_2_add = alignment - remainder; + + if (value_2_add > U32_MAX - value) /* Will overflow */ + return U32_MAX; + + return value + value_2_add; +} + +static u32 align_down(u32 value, u32 alignment) +{ + u32 remainder = value % alignment; + if (remainder == 0) + return value; + + return value - remainder; +} diff --git a/drivers/misc/hwmem/cache_handler.h b/drivers/misc/hwmem/cache_handler.h new file mode 100644 index 0000000..7921051 --- /dev/null +++ b/drivers/misc/hwmem/cache_handler.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) ST-Ericsson SA 2010 + * + * Cache handler + * + * Author: Johan Mossberg + * for ST-Ericsson. + * + * License terms: GNU General Public License (GPL), version 2. + */ + +/* + * Cache handler can not handle simultaneous execution! The caller has to + * ensure such a situation does not occur. + */ + +#ifndef _CACHE_HANDLER_H_ +#define _CACHE_HANDLER_H_ + +#include +#include + +/* + * To not have to double all datatypes we've used hwmem datatypes. If someone + * want's to use cache handler but not hwmem then we'll have to define our own + * datatypes. + */ + +struct cach_range { + u32 start; /* Inclusive */ + u32 end; /* Exclusive */ +}; + +/* + * Internal, do not touch! + */ +struct cach_buf { + void *vstart; + u32 pstart; + u32 size; + + /* Remaining hints are active */ + enum hwmem_alloc_flags cache_settings; + + bool in_cpu_write_buf; + struct cach_range range_in_cpu_cache; + struct cach_range range_dirty_in_cpu_cache; + struct cach_range range_invalid_in_cpu_cache; +}; + +void cach_init_buf(struct cach_buf *buf, + enum hwmem_alloc_flags cache_settings, u32 size); + +void cach_set_buf_addrs(struct cach_buf *buf, void* vaddr, u32 paddr); + +void cach_set_pgprot_cache_options(struct cach_buf *buf, pgprot_t *pgprot); + +void cach_set_domain(struct cach_buf *buf, enum hwmem_access access, + enum hwmem_domain domain, struct hwmem_region *region); + +#endif /* _CACHE_HANDLER_H_ */