Message ID | 1511883178-3128-1-git-send-email-andrey.grodzovsky@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Am 28.11.2017 um 16:32 schrieb Andrey Grodzovsky: > The test is as following: > > 1) Create context A & B > 2) Send a command submission using context A which fires up a compute shader. > 3) The shader wait a bit and then write a value to a memory location. > 4) Send a command submission using context B which writes another value to the same memory location, but having an explicit dependency on the first command submission. > 5) Wait with the CPU for both submissions to finish and inspect the written value. > > Test passes if the value seen in the memory location after both submissions is from command B. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Would probably be nice to add this to the deadlock.c tests instead of the basic_tests.c, but I think that won't work because of the PM4 defines needed. Anyway patch is Acked-by: Christian König <christian.koenig@amd.com> Regards, Christian. > --- > tests/amdgpu/amdgpu_test.c | 18 ++++ > tests/amdgpu/basic_tests.c | 264 +++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 282 insertions(+) > > diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c > index 50da17c..8fa3399 100644 > --- a/tests/amdgpu/amdgpu_test.c > +++ b/tests/amdgpu/amdgpu_test.c > @@ -49,6 +49,7 @@ > #include "CUnit/Basic.h" > > #include "amdgpu_test.h" > +#include "amdgpu_internal.h" > > /* Test suit names */ > #define BASIC_TESTS_STR "Basic Tests" > @@ -401,9 +402,20 @@ static int amdgpu_find_device(uint8_t bus, uint16_t dev) > > static void amdgpu_disable_suits() > { > + amdgpu_device_handle device_handle; > + uint32_t major_version, minor_version, family_id; > int i; > int size = sizeof(suites_active_stat) / sizeof(suites_active_stat[0]); > > + if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, > + &minor_version, &device_handle)) > + return; > + > + family_id = device_handle->info.family_id; > + > + if (amdgpu_device_deinitialize(device_handle)) > + return; > + > /* Set active status for suits based on their policies */ > for (i = 0; i < size; ++i) > if (amdgpu_set_suite_active(suites_active_stat[i].pName, > @@ -420,6 +432,12 @@ static void amdgpu_disable_suits() > > if (amdgpu_set_test_active(BO_TESTS_STR, "Metadata", CU_FALSE)) > fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); > + > + > + /* This test was ran on GFX8 and GFX9 only */ > + if (family_id < AMDGPU_FAMILY_VI || family_id > AMDGPU_FAMILY_RV) > + if (amdgpu_set_test_active(BASIC_TESTS_STR, "Sync dependency Test", CU_FALSE)) > + fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); > } > > /* The main() function for setting up and running the tests. > diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c > index e7f48e3..a78cf52 100644 > --- a/tests/amdgpu/basic_tests.c > +++ b/tests/amdgpu/basic_tests.c > @@ -50,6 +50,7 @@ static void amdgpu_command_submission_multi_fence(void); > static void amdgpu_command_submission_sdma(void); > static void amdgpu_userptr_test(void); > static void amdgpu_semaphore_test(void); > +static void amdgpu_sync_dependency_test(void); > > static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); > static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); > @@ -63,6 +64,7 @@ CU_TestInfo basic_tests[] = { > { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, > { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, > { "SW semaphore Test", amdgpu_semaphore_test }, > + { "Sync dependency Test", amdgpu_sync_dependency_test }, > CU_TEST_INFO_NULL, > }; > #define BUFFER_SIZE (8 * 1024) > @@ -226,6 +228,60 @@ CU_TestInfo basic_tests[] = { > */ > # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) > > + > +#define PKT3_CONTEXT_CONTROL 0x28 > +#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) > +#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) > +#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) > + > +#define PKT3_CLEAR_STATE 0x12 > + > +#define PKT3_SET_SH_REG 0x76 > +#define PACKET3_SET_SH_REG_START 0x00002c00 > + > +#define PACKET3_DISPATCH_DIRECT 0x15 > + > + > +/* gfx 8 */ > +#define mmCOMPUTE_PGM_LO 0x2e0c > +#define mmCOMPUTE_PGM_RSRC1 0x2e12 > +#define mmCOMPUTE_TMPRING_SIZE 0x2e18 > +#define mmCOMPUTE_USER_DATA_0 0x2e40 > +#define mmCOMPUTE_USER_DATA_1 0x2e41 > +#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 > +#define mmCOMPUTE_NUM_THREAD_X 0x2e07 > + > + > + > +#define SWAP_32(num) ((num>>24)&0xff) | \ > + ((num<<8)&0xff0000) | \ > + ((num>>8)&0xff00) | \ > + ((num<<24)&0xff000000) > + > + > +/* Shader code > + * void main() > +{ > + > + float x = some_input; > + for (unsigned i = 0; i < 1000000; i++) > + x = sin(x); > + > + u[0] = 42u; > +} > +*/ > + > +static uint32_t shader_bin[] = { > + SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), > + SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), > + SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), > + SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) > +}; > + > +#define CODE_OFFSET 512 > +#define DATA_OFFSET 1024 > + > + > int suite_basic_tests_init(void) > { > struct amdgpu_gpu_info gpu_info = {0}; > @@ -1386,3 +1442,211 @@ static void amdgpu_userptr_test(void) > > wait(NULL); > } > + > +static void amdgpu_sync_dependency_test(void) > +{ > + amdgpu_context_handle context_handle[2]; > + amdgpu_bo_handle ib_result_handle; > + void *ib_result_cpu; > + uint64_t ib_result_mc_address; > + struct amdgpu_cs_request ibs_request; > + struct amdgpu_cs_ib_info ib_info; > + struct amdgpu_cs_fence fence_status; > + uint32_t expired; > + int i, j, r, instance; > + amdgpu_bo_list_handle bo_list; > + amdgpu_va_handle va_handle; > + static uint32_t *ptr; > + uint64_t seq_no; > + > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); > + CU_ASSERT_EQUAL(r, 0); > + r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, > + AMDGPU_GEM_DOMAIN_GTT, 0, > + &ib_result_handle, &ib_result_cpu, > + &ib_result_mc_address, &va_handle); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, > + &bo_list); > + CU_ASSERT_EQUAL(r, 0); > + > + ptr = ib_result_cpu; > + i = 0; > + > + memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); > + > + /* Dispatch minimal init config and verify it's executed */ > + ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); > + ptr[i++] = 0x80000000; > + ptr[i++] = 0x80000000; > + > + ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); > + ptr[i++] = 0x80000000; > + > + > + /* Program compute regs */ > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); > + ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; > + ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; > + ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; > + > + > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); > + ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; > + /* > + * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 > + SGPRS = 1 > + PRIORITY = 0 > + FLOAT_MODE = 192 (0xc0) > + PRIV = 0 > + DX10_CLAMP = 1 > + DEBUG_MODE = 0 > + IEEE_MODE = 0 > + BULKY = 0 > + CDBG_USER = 0 > + * > + */ > + ptr[i++] = 0x002c0040; > + > + > + /* > + * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 > + USER_SGPR = 8 > + TRAP_PRESENT = 0 > + TGID_X_EN = 0 > + TGID_Y_EN = 0 > + TGID_Z_EN = 0 > + TG_SIZE_EN = 0 > + TIDIG_COMP_CNT = 0 > + EXCP_EN_MSB = 0 > + LDS_SIZE = 0 > + EXCP_EN = 0 > + * > + */ > + ptr[i++] = 0x00000010; > + > + > +/* > + * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) > + WAVESIZE = 0 > + * > + */ > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); > + ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; > + ptr[i++] = 0x00000100; > + > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); > + ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; > + ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); > + ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; > + > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); > + ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; > + ptr[i++] = 0; > + > + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); > + ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; > + ptr[i++] = 1; > + ptr[i++] = 1; > + ptr[i++] = 1; > + > + > + /* Dispatch */ > + ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); > + ptr[i++] = 1; > + ptr[i++] = 1; > + ptr[i++] = 1; > + ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ > + > + > + while (i & 7) > + ptr[i++] = 0xffff1000; /* type3 nop packet */ > + > + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); > + ib_info.ib_mc_address = ib_result_mc_address; > + ib_info.size = i; > + > + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); > + ibs_request.ip_type = AMDGPU_HW_IP_GFX; > + ibs_request.ring = 0; > + ibs_request.number_of_ibs = 1; > + ibs_request.ibs = &ib_info; > + ibs_request.resources = bo_list; > + ibs_request.fence_info.handle = NULL; > + > + r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); > + CU_ASSERT_EQUAL(r, 0); > + seq_no = ibs_request.seq_no; > + > + > + > + /* Prepare second command with dependency on the first */ > + j = i; > + ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); > + ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; > + ptr[i++] = 0xfffffffc & ib_result_mc_address + DATA_OFFSET * 4; > + ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; > + ptr[i++] = 99; > + > + while (i & 7) > + ptr[i++] = 0xffff1000; /* type3 nop packet */ > + > + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); > + ib_info.ib_mc_address = ib_result_mc_address + j * 4; > + ib_info.size = i - j; > + > + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); > + ibs_request.ip_type = AMDGPU_HW_IP_GFX; > + ibs_request.ring = 0; > + ibs_request.number_of_ibs = 1; > + ibs_request.ibs = &ib_info; > + ibs_request.resources = bo_list; > + ibs_request.fence_info.handle = NULL; > + > + ibs_request.number_of_dependencies = 1; > + > + ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); > + ibs_request.dependencies[0].context = context_handle[1]; > + ibs_request.dependencies[0].ip_instance = 0; > + ibs_request.dependencies[0].ring = 0; > + ibs_request.dependencies[0].fence = seq_no; > + > + > + r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); > + CU_ASSERT_EQUAL(r, 0); > + > + > + memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); > + fence_status.context = context_handle[0]; > + fence_status.ip_type = AMDGPU_HW_IP_GFX; > + fence_status.ip_instance = 0; > + fence_status.ring = 0; > + fence_status.fence = ibs_request.seq_no; > + > + r = amdgpu_cs_query_fence_status(&fence_status, > + AMDGPU_TIMEOUT_INFINITE,0, &expired); > + CU_ASSERT_EQUAL(r, 0); > + > + /* Expect the second command to wait for shader to complete */ > + CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); > + > + r = amdgpu_bo_list_destroy(bo_list); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, > + ib_result_mc_address, 4096); > + CU_ASSERT_EQUAL(r, 0); > + > + r = amdgpu_cs_ctx_free(context_handle[0]); > + CU_ASSERT_EQUAL(r, 0); > + r = amdgpu_cs_ctx_free(context_handle[1]); > + CU_ASSERT_EQUAL(r, 0); > + > + free(ibs_request.dependencies); > +}
diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c index 50da17c..8fa3399 100644 --- a/tests/amdgpu/amdgpu_test.c +++ b/tests/amdgpu/amdgpu_test.c @@ -49,6 +49,7 @@ #include "CUnit/Basic.h" #include "amdgpu_test.h" +#include "amdgpu_internal.h" /* Test suit names */ #define BASIC_TESTS_STR "Basic Tests" @@ -401,9 +402,20 @@ static int amdgpu_find_device(uint8_t bus, uint16_t dev) static void amdgpu_disable_suits() { + amdgpu_device_handle device_handle; + uint32_t major_version, minor_version, family_id; int i; int size = sizeof(suites_active_stat) / sizeof(suites_active_stat[0]); + if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, + &minor_version, &device_handle)) + return; + + family_id = device_handle->info.family_id; + + if (amdgpu_device_deinitialize(device_handle)) + return; + /* Set active status for suits based on their policies */ for (i = 0; i < size; ++i) if (amdgpu_set_suite_active(suites_active_stat[i].pName, @@ -420,6 +432,12 @@ static void amdgpu_disable_suits() if (amdgpu_set_test_active(BO_TESTS_STR, "Metadata", CU_FALSE)) fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); + + + /* This test was ran on GFX8 and GFX9 only */ + if (family_id < AMDGPU_FAMILY_VI || family_id > AMDGPU_FAMILY_RV) + if (amdgpu_set_test_active(BASIC_TESTS_STR, "Sync dependency Test", CU_FALSE)) + fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg()); } /* The main() function for setting up and running the tests. diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index e7f48e3..a78cf52 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -50,6 +50,7 @@ static void amdgpu_command_submission_multi_fence(void); static void amdgpu_command_submission_sdma(void); static void amdgpu_userptr_test(void); static void amdgpu_semaphore_test(void); +static void amdgpu_sync_dependency_test(void); static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); @@ -63,6 +64,7 @@ CU_TestInfo basic_tests[] = { { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, { "SW semaphore Test", amdgpu_semaphore_test }, + { "Sync dependency Test", amdgpu_sync_dependency_test }, CU_TEST_INFO_NULL, }; #define BUFFER_SIZE (8 * 1024) @@ -226,6 +228,60 @@ CU_TestInfo basic_tests[] = { */ # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) + +#define PKT3_CONTEXT_CONTROL 0x28 +#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) +#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) +#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) + +#define PKT3_CLEAR_STATE 0x12 + +#define PKT3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_START 0x00002c00 + +#define PACKET3_DISPATCH_DIRECT 0x15 + + +/* gfx 8 */ +#define mmCOMPUTE_PGM_LO 0x2e0c +#define mmCOMPUTE_PGM_RSRC1 0x2e12 +#define mmCOMPUTE_TMPRING_SIZE 0x2e18 +#define mmCOMPUTE_USER_DATA_0 0x2e40 +#define mmCOMPUTE_USER_DATA_1 0x2e41 +#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 +#define mmCOMPUTE_NUM_THREAD_X 0x2e07 + + + +#define SWAP_32(num) ((num>>24)&0xff) | \ + ((num<<8)&0xff0000) | \ + ((num>>8)&0xff00) | \ + ((num<<24)&0xff000000) + + +/* Shader code + * void main() +{ + + float x = some_input; + for (unsigned i = 0; i < 1000000; i++) + x = sin(x); + + u[0] = 42u; +} +*/ + +static uint32_t shader_bin[] = { + SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), + SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), + SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), + SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) +}; + +#define CODE_OFFSET 512 +#define DATA_OFFSET 1024 + + int suite_basic_tests_init(void) { struct amdgpu_gpu_info gpu_info = {0}; @@ -1386,3 +1442,211 @@ static void amdgpu_userptr_test(void) wait(NULL); } + +static void amdgpu_sync_dependency_test(void) +{ + amdgpu_context_handle context_handle[2]; + amdgpu_bo_handle ib_result_handle; + void *ib_result_cpu; + uint64_t ib_result_mc_address; + struct amdgpu_cs_request ibs_request; + struct amdgpu_cs_ib_info ib_info; + struct amdgpu_cs_fence fence_status; + uint32_t expired; + int i, j, r, instance; + amdgpu_bo_list_handle bo_list; + amdgpu_va_handle va_handle; + static uint32_t *ptr; + uint64_t seq_no; + + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); + CU_ASSERT_EQUAL(r, 0); + r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address, &va_handle); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, + &bo_list); + CU_ASSERT_EQUAL(r, 0); + + ptr = ib_result_cpu; + i = 0; + + memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); + + /* Dispatch minimal init config and verify it's executed */ + ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); + ptr[i++] = 0x80000000; + ptr[i++] = 0x80000000; + + ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); + ptr[i++] = 0x80000000; + + + /* Program compute regs */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); + ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; + ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; + ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; + + + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); + ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; + /* + * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 + SGPRS = 1 + PRIORITY = 0 + FLOAT_MODE = 192 (0xc0) + PRIV = 0 + DX10_CLAMP = 1 + DEBUG_MODE = 0 + IEEE_MODE = 0 + BULKY = 0 + CDBG_USER = 0 + * + */ + ptr[i++] = 0x002c0040; + + + /* + * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 + USER_SGPR = 8 + TRAP_PRESENT = 0 + TGID_X_EN = 0 + TGID_Y_EN = 0 + TGID_Z_EN = 0 + TG_SIZE_EN = 0 + TIDIG_COMP_CNT = 0 + EXCP_EN_MSB = 0 + LDS_SIZE = 0 + EXCP_EN = 0 + * + */ + ptr[i++] = 0x00000010; + + +/* + * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) + WAVESIZE = 0 + * + */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); + ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; + ptr[i++] = 0x00000100; + + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); + ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; + ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); + ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; + + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); + ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; + ptr[i++] = 0; + + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); + ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; + ptr[i++] = 1; + ptr[i++] = 1; + ptr[i++] = 1; + + + /* Dispatch */ + ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ptr[i++] = 1; + ptr[i++] = 1; + ptr[i++] = 1; + ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ + + + while (i & 7) + ptr[i++] = 0xffff1000; /* type3 nop packet */ + + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); + ib_info.ib_mc_address = ib_result_mc_address; + ib_info.size = i; + + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); + ibs_request.ip_type = AMDGPU_HW_IP_GFX; + ibs_request.ring = 0; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.resources = bo_list; + ibs_request.fence_info.handle = NULL; + + r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + seq_no = ibs_request.seq_no; + + + + /* Prepare second command with dependency on the first */ + j = i; + ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); + ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ptr[i++] = 0xfffffffc & ib_result_mc_address + DATA_OFFSET * 4; + ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; + ptr[i++] = 99; + + while (i & 7) + ptr[i++] = 0xffff1000; /* type3 nop packet */ + + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); + ib_info.ib_mc_address = ib_result_mc_address + j * 4; + ib_info.size = i - j; + + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); + ibs_request.ip_type = AMDGPU_HW_IP_GFX; + ibs_request.ring = 0; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.resources = bo_list; + ibs_request.fence_info.handle = NULL; + + ibs_request.number_of_dependencies = 1; + + ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); + ibs_request.dependencies[0].context = context_handle[1]; + ibs_request.dependencies[0].ip_instance = 0; + ibs_request.dependencies[0].ring = 0; + ibs_request.dependencies[0].fence = seq_no; + + + r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + + + memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); + fence_status.context = context_handle[0]; + fence_status.ip_type = AMDGPU_HW_IP_GFX; + fence_status.ip_instance = 0; + fence_status.ring = 0; + fence_status.fence = ibs_request.seq_no; + + r = amdgpu_cs_query_fence_status(&fence_status, + AMDGPU_TIMEOUT_INFINITE,0, &expired); + CU_ASSERT_EQUAL(r, 0); + + /* Expect the second command to wait for shader to complete */ + CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); + + r = amdgpu_bo_list_destroy(bo_list); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, + ib_result_mc_address, 4096); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_cs_ctx_free(context_handle[0]); + CU_ASSERT_EQUAL(r, 0); + r = amdgpu_cs_ctx_free(context_handle[1]); + CU_ASSERT_EQUAL(r, 0); + + free(ibs_request.dependencies); +}
The test is as following: 1) Create context A & B 2) Send a command submission using context A which fires up a compute shader. 3) The shader wait a bit and then write a value to a memory location. 4) Send a command submission using context B which writes another value to the same memory location, but having an explicit dependency on the first command submission. 5) Wait with the CPU for both submissions to finish and inspect the written value. Test passes if the value seen in the memory location after both submissions is from command B. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- tests/amdgpu/amdgpu_test.c | 18 ++++ tests/amdgpu/basic_tests.c | 264 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+)