@@ -118,6 +118,7 @@ extern int radeon_bapm;
#define RADEON_DEBUGFS_MAX_COMPONENTS 32
#define RADEONFB_CONN_LIMIT 4
#define RADEON_BIOS_NUM_SCRATCH 8
+#define RADEON_SEQ_WRAP_VALUE (1 << 30)
/* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL
@@ -355,6 +356,7 @@ struct radeon_fence_driver {
/* sync_seq is protected by ring emission lock */
uint64_t sync_seq[RADEON_NUM_RINGS];
atomic64_t last_seq;
+ int32_t wrap_seq;
bool initialized;
};
@@ -404,6 +404,17 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
parser->ib.fence);
+ if (parser->chunk_flags && parser->chunk_flags->length_dw > 4) {
+ uint32_t __user *to = parser->chunk_flags->user_ptr;
+ uint32_t tmp;
+
+ tmp = lower_32_bits(parser->ib.fence->seq);
+ copy_to_user(&to[3], &tmp, sizeof(uint32_t));
+ tmp = parser->ib.fence->ring;
+ copy_to_user(&to[4], &tmp, sizeof(uint32_t));
+ tmp = rdev->fence_drv[tmp]->wrap_seq;
+ copy_to_user(&to[5], &tmp, sizeof(uint32_t));
+ }
} else if (backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
@@ -823,3 +834,52 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
*cs_reloc = p->relocs_ptr[(idx / 4)];
return 0;
}
+
+int radeon_cs_done_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct radeon_device *rdev = dev->dev_private;
+ struct drm_radeon_cs_done *args = data;
+ unsigned i = args->ring;
+ int32_t last_seq, sync_seq, wrap_seq;
+
+ /* FIXME check args->ring value is ok. */
+
+ /*
+ * The memory barrier is match with the one in radeon_fence_emit() and
+ * it insure us that we get the right matching wrap_seq and sync_seq.
+ *
+ * Note that no need to protect the fence_drv.sync_seq here as barrier
+ * insure us we will get the coherency we need.
+ */
+ wrap_seq = ACCESS_ONCE(rdev->fence_drv[i].wrap_seq);
+ smp_rmb();
+ sync_seq = lower_32_bits(ACCESS_ONCE(rdev->fence_drv[i].sync_seq[i]));
+
+ /*
+ * So if current wrap_seq and one we are queried with are differ by
+ * more than one this means that we are queried about a very old fence
+ * seq value and we can assume it is long done.
+ *
+ * Well this is not entirely true, for it to be true we would need to
+ * stall when we increment the wrap counter if cs in previous wrap were
+ * not completed but this is highly unlikely. So live with the trill of
+ * it going wrong !
+ */
+ if (abs((unsigned)wrap_seq - (unsigned)args->wrap) > 1)
+ return 1;
+ /* Now check if currently reported fence seq is done or not. */
+ /* FIXME call fence func to update last_seq just in case. */
+ last_seq = lower_32_bits(atomic64_read(&rdev->fence_drv[i].last_seq));
+ if ((last_seq - arg->seq) >= 0)
+ return 1;
+ /*
+ * Last failsafe to handle the horrible case were userspace holded on
+ * a wrap and seq value for so long without querying that it we wrapped
+ * around. This is here to avoid userspace waiting for a fence that was
+ * emited a long time ago but the current sync_seq[ring] value migth be
+ * stuck and thus we might go bigger than this very old seq value.
+ */
+ if (((sync_seq - args->seq) < 0) && args->wrap == wrap_seq)
+ return 1;
+ return 0;
+}
@@ -119,6 +119,10 @@ int radeon_fence_emit(struct radeon_device *rdev,
kref_init(&((*fence)->kref));
(*fence)->rdev = rdev;
(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
+ /* Barrier is important for radeon_fence_cs_done_ioctl. */
+ smp_wmb();
+ if (rdev->fence_drv[ring].sync_seq[ring] == RADEON_SEQ_WRAP_VALUE)
+ rdev->fence_drv[ring].wrap_seq++;
(*fence)->ring = ring;
radeon_fence_ring_emit(rdev, ring, *fence);
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
@@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(RADEON_CS_DONE, radeon_cs_done_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
};
int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms);
@@ -554,6 +554,7 @@ typedef struct {
#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va)
#define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op)
+#define DRM_IOCTL_RADEON_CS_DONE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS_DONE, struct drm_radeon_cs_done)
typedef struct drm_radeon_init {
enum {
@@ -936,6 +937,7 @@ struct drm_radeon_gem_va {
#define RADEON_CS_RING_VCE 4
/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
/* 0 = normal, + = higher priority, - = lower priority */
+/* The fifth, sixth, seventh dword are a 32bit fence ID, ring id and wrap id of this CS */
struct drm_radeon_cs_chunk {
uint32_t chunk_id;
@@ -1038,4 +1040,11 @@ struct drm_radeon_info {
#define CIK_TILE_MODE_DEPTH_STENCIL_1D 5
+struct drm_radeon_cs_done {
+ int32_t seq;
+ int32_t ring;
+ int32_t wrap;
+ int32_t pad;
+};
+
#endif