This patch-set continues to prepare amdkfd so it could support VI APU. it prepares DQM and KQ modules to support more than one ASIC.
Note: there is no change in the IOCTLs.
Oded
Ben Goz (2): drm/amdkfd: Add initial VI support for DQM drm/amdkfd: Add initial VI support for KQ
Oded Gabbay (2): drm/amdkfd: Encapsulate DQM functions in ops structure drm/amdkfd: Encapsulate KQ functions in ops structure
drivers/gpu/drm/amd/amdkfd/Makefile | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 +- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 181 +++++++-------------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 36 +++- .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 135 +++++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 64 ++++++++ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 45 +++-- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 38 ++++- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 44 +++++ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 56 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 +-- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 16 +- 13 files changed, 483 insertions(+), 170 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
This patch does some re-org on the device_queue_manager structure. It takes out all the function pointers from the structure and puts them in a new structure, called device_queue_manager_ops. Then, it puts an instance of that structure inside device_queue_manager.
This re-org is done to prepare the DQM module to support more than one AMD APU (Kaveri).
Signed-off-by: Oded Gabbay oded.gabbay@amd.com --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 +- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 68 +++++++++++----------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 25 +++++--- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 16 ++--- 6 files changed, 65 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index b008fd6..38b6150 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -439,7 +439,7 @@ static long kfd_ioctl_set_memory_policy(struct file *filep, (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent;
- if (!dev->dqm->set_cache_memory_policy(dev->dqm, + if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a23ed24..a770ec6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -253,7 +253,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; }
- if (kfd->dqm->start(kfd->dqm) != 0) { + if (kfd->dqm->ops.start(kfd->dqm) != 0) { dev_err(kfd_device, "Error starting queuen manager for device (%x:%x)\n", kfd->pdev->vendor, kfd->pdev->device); @@ -307,7 +307,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) BUG_ON(kfd == NULL);
if (kfd->init_complete) { - kfd->dqm->stop(kfd->dqm); + kfd->dqm->ops.stop(kfd->dqm); amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); amd_iommu_free_device(kfd->pdev); } @@ -328,7 +328,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) return -ENXIO; amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); - kfd->dqm->start(kfd->dqm); + kfd->dqm->ops.start(kfd->dqm); }
return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c83f011..12c8448 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -271,7 +271,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
BUG_ON(!dqm || !q || !qpd);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); if (mqd == NULL) return -ENOMEM;
@@ -305,14 +305,14 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, mutex_lock(&dqm->lock);
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); if (mqd == NULL) { retval = -ENOMEM; goto out; } deallocate_hqd(dqm, q); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); if (mqd == NULL) { retval = -ENOMEM; goto out; @@ -348,7 +348,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) BUG_ON(!dqm || !q || !q->mqd);
mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, q->properties.type); + mqd = dqm->ops.get_mqd_manager(dqm, q->properties.type); if (mqd == NULL) { mutex_unlock(&dqm->lock); return -ENOMEM; @@ -515,7 +515,7 @@ static int init_pipelines(struct device_queue_manager *dqm,
memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); if (mqd == NULL) { kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); return -ENOMEM; @@ -646,7 +646,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd; int retval;
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); if (!mqd) return -ENOMEM;
@@ -849,7 +849,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) select_sdma_engine_id(q);
- mqd = dqm->get_mqd_manager(dqm, + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type));
if (mqd == NULL) { @@ -994,7 +994,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
/* remove queue from list to prevent rescheduling after preemption */ mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; @@ -1116,40 +1116,40 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case KFD_SCHED_POLICY_HWS: case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: /* initialize dqm for cp scheduling */ - dqm->create_queue = create_queue_cpsch; - dqm->initialize = initialize_cpsch; - dqm->start = start_cpsch; - dqm->stop = stop_cpsch; - dqm->destroy_queue = destroy_queue_cpsch; - dqm->update_queue = update_queue; - dqm->get_mqd_manager = get_mqd_manager_nocpsch; - dqm->register_process = register_process_nocpsch; - dqm->unregister_process = unregister_process_nocpsch; - dqm->uninitialize = uninitialize_nocpsch; - dqm->create_kernel_queue = create_kernel_queue_cpsch; - dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch; - dqm->set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.create_queue = create_queue_cpsch; + dqm->ops.initialize = initialize_cpsch; + dqm->ops.start = start_cpsch; + dqm->ops.stop = stop_cpsch; + dqm->ops.destroy_queue = destroy_queue_cpsch; + dqm->ops.update_queue = update_queue; + dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; + dqm->ops.register_process = register_process_nocpsch; + dqm->ops.unregister_process = unregister_process_nocpsch; + dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; + dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; + dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ - dqm->start = start_nocpsch; - dqm->stop = stop_nocpsch; - dqm->create_queue = create_queue_nocpsch; - dqm->destroy_queue = destroy_queue_nocpsch; - dqm->update_queue = update_queue; - dqm->get_mqd_manager = get_mqd_manager_nocpsch; - dqm->register_process = register_process_nocpsch; - dqm->unregister_process = unregister_process_nocpsch; - dqm->initialize = initialize_nocpsch; - dqm->uninitialize = uninitialize_nocpsch; - dqm->set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.start = start_nocpsch; + dqm->ops.stop = stop_nocpsch; + dqm->ops.create_queue = create_queue_nocpsch; + dqm->ops.destroy_queue = destroy_queue_nocpsch; + dqm->ops.update_queue = update_queue; + dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; + dqm->ops.register_process = register_process_nocpsch; + dqm->ops.unregister_process = unregister_process_nocpsch; + dqm->ops.initialize = initialize_nocpsch; + dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; default: BUG(); break; }
- if (dqm->initialize(dqm) != 0) { + if (dqm->ops.initialize(dqm) != 0) { kfree(dqm); return NULL; } @@ -1161,7 +1161,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) { BUG_ON(!dqm);
- dqm->uninitialize(dqm); + dqm->ops.uninitialize(dqm); kfree(dqm); }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 554c06e..72d2ca0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -46,7 +46,7 @@ struct device_process_node { };
/** - * struct device_queue_manager + * struct device_queue_manager_ops * * @create_queue: Queue creation routine. * @@ -81,15 +81,9 @@ struct device_process_node { * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the * memory apertures. * - * This struct is a base class for the kfd queues scheduler in the - * device level. The device base class should expose the basic operations - * for queue creation and queue destruction. This base class hides the - * scheduling mode of the driver and the specific implementation of the - * concrete device. This class is the only class in the queues scheduler - * that configures the H/W. */
-struct device_queue_manager { +struct device_queue_manager_ops { int (*create_queue)(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, @@ -124,7 +118,22 @@ struct device_queue_manager { enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); +}; + +/** + * struct device_queue_manager + * + * This struct is a base class for the kfd queues scheduler in the + * device level. The device base class should expose the basic operations + * for queue creation and queue destruction. This base class hides the + * scheduling mode of the driver and the specific implementation of the + * concrete device. This class is the only class in the queues scheduler + * that configures the H/W. + * + */
+struct device_queue_manager { + struct device_queue_manager_ops ops;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct packet_manager packets; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 773c213..add0fb4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -56,7 +56,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, switch (type) { case KFD_QUEUE_TYPE_DIQ: case KFD_QUEUE_TYPE_HIQ: - kq->mqd = dev->dqm->get_mqd_manager(dev->dqm, + kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm, KFD_MQD_TYPE_HIQ); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 948b1ca..513eeb6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -178,7 +178,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (list_empty(&pqm->queues)) { pdd->qpd.pqm = pqm; - dev->dqm->register_process(dev->dqm, &pdd->qpd); + dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); }
pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL); @@ -204,7 +204,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd, + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, &q->properties.vmid); print_queue(q); break; @@ -217,7 +217,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, kq->queue->properties.queue_id = *qid; pqn->kq = kq; pqn->q = NULL; - retval = dev->dqm->create_kernel_queue(dev->dqm, kq, &pdd->qpd); + retval = dev->dqm->ops.create_kernel_queue(dev->dqm, + kq, &pdd->qpd); break; default: BUG(); @@ -285,13 +286,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (pqn->kq) { /* destroy kernel queue (DIQ) */ dqm = pqn->kq->dev->dqm; - dqm->destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); + dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); kernel_queue_uninit(pqn->kq); }
if (pqn->q) { dqm = pqn->q->device->dqm; - retval = dqm->destroy_queue(dqm, &pdd->qpd, pqn->q); + retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval != 0) return retval;
@@ -303,7 +304,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) clear_bit(qid, pqm->queue_slot_bitmap);
if (list_empty(&pqm->queues)) - dqm->unregister_process(dqm, &pdd->qpd); + dqm->ops.unregister_process(dqm, &pdd->qpd);
return retval; } @@ -324,7 +325,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, pqn->q->properties.queue_percent = p->queue_percent; pqn->q->properties.priority = p->priority;
- retval = pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q); + retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q); if (retval != 0) return retval;
From: Ben Goz ben.goz@amd.com
This patch starts to add support for the VI APU in the DQM module.
Because most (more than 90%) of the DQM code is shared among AMD's APUs, we chose a design that performs most/all the code in the shared DQM file (kfd_device_queue_manager.c). If there is H/W specific code to be executed, than it is written in an asic-specific extension function for that H/W.
That asic-specific extension function is called from the shared function at the appropriate time. This requires that for every asic-specific extension function that is implemented in a specific ASIC, there will be an equivalent implementation in ALL ASICs, even if those implementations are just stubs.
That way we achieve:
- Maintainability: by having one copy of most of the code, we only need to fix bugs at one locations
- Readability: very clear what is the shared code and what is done per ASIC
- Extensibility: very easy to add new H/W specific files/functions
Signed-off-by: Ben Goz ben.goz@amd.com Signed-off-by: Oded Gabbay oded.gabbay@amd.com --- drivers/gpu/drm/amd/amdkfd/Makefile | 1 + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 113 +++++------------ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 11 +- .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 135 +++++++++++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 64 ++++++++++ 5 files changed, 238 insertions(+), 86 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index bc6053f..7558683 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -10,6 +10,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \ kfd_kernel_queue.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ + kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ kfd_interrupt.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 12c8448..b201624 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -61,7 +61,7 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) return KFD_MQD_TYPE_CP; }
-static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) +inline unsigned int get_pipes_num(struct device_queue_manager *dqm) { BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.compute_pipe_count; @@ -78,7 +78,7 @@ static inline unsigned int get_pipes_num_cpsch(void) return PIPE_PER_ME_CP_SCHEDULING; }
-static inline unsigned int +inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) { uint32_t nybble; @@ -88,7 +88,7 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) return nybble; }
-static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) +inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { unsigned int shared_base;
@@ -97,41 +97,7 @@ static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) return shared_base; }
-static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble); -static void init_process_memory(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct kfd_process_device *pdd; - unsigned int temp; - - BUG_ON(!dqm || !qpd); - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | - DEFAULT_MTYPE(MTYPE_NONCACHED) | - APE1_MTYPE(MTYPE_NONCACHED); - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } - - if (qpd->pqm->process->is_32bit_user_mode) { - temp = get_sh_mem_bases_32(pdd); - qpd->sh_mem_bases = SHARED_BASE(temp); - qpd->sh_mem_config |= PTR32; - } else { - temp = get_sh_mem_bases_nybble_64(pdd); - qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); - } - - pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", - qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); -} - -static void program_sh_mem_settings(struct device_queue_manager *dqm, +void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, @@ -391,6 +357,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; + int retval;
BUG_ON(!dqm || !qpd);
@@ -405,12 +372,13 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, mutex_lock(&dqm->lock); list_add(&n->list, &dqm->queues);
- init_process_memory(dqm, qpd); + retval = dqm->ops_asic_specific.register_process(dqm, qpd); + dqm->processes_count++;
mutex_unlock(&dqm->lock);
- return 0; + return retval; }
static int unregister_process_nocpsch(struct device_queue_manager *dqm, @@ -455,34 +423,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, vmid); }
-static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) -{ - /* In 64-bit mode, we can only control the top 3 bits of the LDS, - * scratch and GPUVM apertures. - * The hardware fills in the remaining 59 bits according to the - * following pattern: - * LDS: X0000000'00000000 - X0000001'00000000 (4GB) - * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) - * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) - * - * (where X/Y is the configurable nybble with the low-bit 0) - * - * LDS and scratch will have the same top nybble programmed in the - * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. - * GPUVM can have a different top nybble programmed in the - * top 3 bits of SH_MEM_BASES.SHARED_BASE. - * We don't bother to support different top nybbles - * for LDS/Scratch and GPUVM. - */ - - BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || - top_address_nybble == 0); - - return PRIVATE_BASE(top_address_nybble << 12) | - SHARED_BASE(top_address_nybble << 12); -} - -static int init_pipelines(struct device_queue_manager *dqm, +int init_pipelines(struct device_queue_manager *dqm, unsigned int pipes_num, unsigned int first_pipe) { void *hpdptr; @@ -715,7 +656,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; - retval = init_pipelines(dqm, get_pipes_num(dqm), 0); + retval = dqm->ops_asic_specific.initialize(dqm); if (retval != 0) goto fail_init_pipelines;
@@ -1035,8 +976,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - uint32_t default_mtype; - uint32_t ape1_mtype; + bool retval;
pr_debug("kfd: In func %s\n", __func__);
@@ -1073,18 +1013,13 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit = limit >> 16; }
- default_mtype = (default_policy == cache_policy_coherent) ? - MTYPE_NONCACHED : - MTYPE_CACHED; - - ape1_mtype = (alternate_policy == cache_policy_coherent) ? - MTYPE_NONCACHED : - MTYPE_CACHED; - - qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) - | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) - | DEFAULT_MTYPE(default_mtype) - | APE1_MTYPE(ape1_mtype); + retval = dqm->ops_asic_specific.set_cache_memory_policy( + dqm, + qpd, + default_policy, + alternate_policy, + alternate_aperture_base, + alternate_aperture_size);
if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); @@ -1094,7 +1029,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit);
mutex_unlock(&dqm->lock); - return true; + return retval;
out: mutex_unlock(&dqm->lock); @@ -1107,6 +1042,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
BUG_ON(!dev);
+ pr_debug("kfd: loading device queue manager\n"); + dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); if (!dqm) return NULL; @@ -1149,6 +1086,13 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) break; }
+ switch (dev->device_info->asic_family) { + case CHIP_CARRIZO: + device_queue_manager_init_vi(&dqm->ops_asic_specific); + case CHIP_KAVERI: + device_queue_manager_init_cik(&dqm->ops_asic_specific); + } + if (dqm->ops.initialize(dqm) != 0) { kfree(dqm); return NULL; @@ -1164,4 +1108,3 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) dqm->ops.uninitialize(dqm); kfree(dqm); } - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 72d2ca0..1934795 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -134,6 +134,7 @@ struct device_queue_manager_ops {
struct device_queue_manager { struct device_queue_manager_ops ops; + struct device_queue_manager_ops ops_asic_specific;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct packet_manager packets; @@ -155,6 +156,14 @@ struct device_queue_manager { bool active_runlist; };
- +void device_queue_manager_init_cik(struct device_queue_manager_ops *ops); +void device_queue_manager_init_vi(struct device_queue_manager_ops *ops); +void program_sh_mem_settings(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *qpd); +inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd); +int init_pipelines(struct device_queue_manager *dqm, + unsigned int pipes_num, unsigned int first_pipe); +inline unsigned int get_pipes_num(struct device_queue_manager *dqm);
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c new file mode 100644 index 0000000..6b07246 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -0,0 +1,135 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" +#include "cik_regs.h" + +static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +static int register_process_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static int initialize_cpsch_cik(struct device_queue_manager *dqm); + +void device_queue_manager_init_cik(struct device_queue_manager_ops *ops) +{ + ops->set_cache_memory_policy = set_cache_memory_policy_cik; + ops->register_process = register_process_cik; + ops->initialize = initialize_cpsch_cik; +} + +static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) +{ + /* In 64-bit mode, we can only control the top 3 bits of the LDS, + * scratch and GPUVM apertures. + * The hardware fills in the remaining 59 bits according to the + * following pattern: + * LDS: X0000000'00000000 - X0000001'00000000 (4GB) + * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) + * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) + * + * (where X/Y is the configurable nybble with the low-bit 0) + * + * LDS and scratch will have the same top nybble programmed in the + * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. + * GPUVM can have a different top nybble programmed in the + * top 3 bits of SH_MEM_BASES.SHARED_BASE. + * We don't bother to support different top nybbles + * for LDS/Scratch and GPUVM. + */ + + BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + top_address_nybble == 0); + + return PRIVATE_BASE(top_address_nybble << 12) | + SHARED_BASE(top_address_nybble << 12); +} + +static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + uint32_t default_mtype; + uint32_t ape1_mtype; + + default_mtype = (default_policy == cache_policy_coherent) ? + MTYPE_NONCACHED : + MTYPE_CACHED; + + ape1_mtype = (alternate_policy == cache_policy_coherent) ? + MTYPE_NONCACHED : + MTYPE_CACHED; + + qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) + | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) + | DEFAULT_MTYPE(default_mtype) + | APE1_MTYPE(ape1_mtype); + + return true; +} + +static int register_process_cik(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + BUG_ON(!dqm || !qpd); + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | + DEFAULT_MTYPE(MTYPE_NONCACHED) | + APE1_MTYPE(MTYPE_NONCACHED); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + if (qpd->pqm->process->is_32bit_user_mode) { + temp = get_sh_mem_bases_32(pdd); + qpd->sh_mem_bases = SHARED_BASE(temp); + qpd->sh_mem_config |= PTR32; + } else { + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + } + + pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; +} + +static int initialize_cpsch_cik(struct device_queue_manager *dqm) +{ + return init_pipelines(dqm, get_pipes_num(dqm), 0); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c new file mode 100644 index 0000000..20553dc --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -0,0 +1,64 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" + +static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +static int register_process_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static int initialize_cpsch_vi(struct device_queue_manager *dqm); + +void device_queue_manager_init_vi(struct device_queue_manager_ops *ops) +{ + pr_warn("amdkfd: VI DQM is not currently supported\n"); + + ops->set_cache_memory_policy = set_cache_memory_policy_vi; + ops->register_process = register_process_vi; + ops->initialize = initialize_cpsch_vi; +} + +static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + return false; +} + +static int register_process_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + return -1; +} + +static int initialize_cpsch_vi(struct device_queue_manager *dqm) +{ + return 0; +}
This patch does some re-org on the kernel_queue structure. It takes out all the function pointers from the structure and puts them in a new structure, called kernel_queue_ops. Then, it puts an instance of that structure inside kernel_queue.
This re-org is done to prepare the KQ module to support more than one AMD APU (Kaveri).
Signed-off-by: Oded Gabbay oded.gabbay@amd.com --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 24 +++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 31 +++++++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 ++++++++++----------- 3 files changed, 54 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index add0fb4..731635d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -293,14 +293,14 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, if (!kq) return NULL;
- kq->initialize = initialize; - kq->uninitialize = uninitialize; - kq->acquire_packet_buffer = acquire_packet_buffer; - kq->submit_packet = submit_packet; - kq->sync_with_hw = sync_with_hw; - kq->rollback_packet = rollback_packet; - - if (kq->initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { + kq->ops.initialize = initialize; + kq->ops.uninitialize = uninitialize; + kq->ops.acquire_packet_buffer = acquire_packet_buffer; + kq->ops.submit_packet = submit_packet; + kq->ops.sync_with_hw = sync_with_hw; + kq->ops.rollback_packet = rollback_packet; + + if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { pr_err("kfd: failed to init kernel queue\n"); kfree(kq); return NULL; @@ -312,7 +312,7 @@ void kernel_queue_uninit(struct kernel_queue *kq) { BUG_ON(!kq);
- kq->uninitialize(kq); + kq->ops.uninitialize(kq); kfree(kq); }
@@ -329,12 +329,12 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); BUG_ON(!kq);
- retval = kq->acquire_packet_buffer(kq, 5, &buffer); + retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); BUG_ON(retval != 0); for (i = 0; i < 5; i++) buffer[i] = kq->nop_packet; - kq->submit_packet(kq); - kq->sync_with_hw(kq, 1000); + kq->ops.submit_packet(kq); + kq->ops.sync_with_hw(kq, 1000);
pr_debug("kfd: ending kernel queue test\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index dcd2bdb..e01b77b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -28,8 +28,31 @@ #include <linux/types.h> #include "kfd_priv.h"
-struct kernel_queue { - /* interface */ +/** + * struct kernel_queue_ops + * + * @initialize: Initialize a kernel queue, including allocations of GART memory + * needed for the queue. + * + * @uninitialize: Uninitialize a kernel queue and free all its memory usages. + * + * @acquire_packet_buffer: Returns a pointer to the location in the kernel + * queue ring buffer where the calling function can write its packet. It is + * Guaranteed that there is enough space for that packet. It also updates the + * pending write pointer to that location so subsequent calls to + * acquire_packet_buffer will get a correct write pointer + * + * @submit_packet: Update the write pointer and doorbell of a kernel queue. + * + * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel + * queue are equal, which means the CP has read all the submitted packets. + * + * @rollback_packet: This routine is called if we failed to build an acquired + * packet for some reason. It just overwrites the pending wptr with the current + * one + * + */ +struct kernel_queue_ops { bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); void (*uninitialize)(struct kernel_queue *kq); @@ -41,6 +64,10 @@ struct kernel_queue { int (*sync_with_hw)(struct kernel_queue *kq, unsigned long timeout_ms); void (*rollback_packet)(struct kernel_queue *kq); +}; + +struct kernel_queue { + struct kernel_queue_ops ops;
/* data */ struct kfd_dev *dev; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 3cda952..5fb5c03 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -348,7 +348,7 @@ int pm_send_set_resources(struct packet_manager *pm, pr_debug("kfd: In func %s\n", __func__);
mutex_lock(&pm->lock); - pm->priv_queue->acquire_packet_buffer(pm->priv_queue, + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, sizeof(*packet) / sizeof(uint32_t), (unsigned int **)&packet); if (packet == NULL) { @@ -375,8 +375,8 @@ int pm_send_set_resources(struct packet_manager *pm, packet->queue_mask_lo = lower_32_bits(res->queue_mask); packet->queue_mask_hi = upper_32_bits(res->queue_mask);
- pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); + pm->priv_queue->ops.sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
mutex_unlock(&pm->lock);
@@ -402,7 +402,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t); mutex_lock(&pm->lock);
- retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue, + retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, packet_size_dwords, &rl_buffer); if (retval != 0) goto fail_acquire_packet_buffer; @@ -412,15 +412,15 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval != 0) goto fail_create_runlist;
- pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); + pm->priv_queue->ops.sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
mutex_unlock(&pm->lock);
return retval;
fail_create_runlist: - pm->priv_queue->rollback_packet(pm->priv_queue); + pm->priv_queue->ops.rollback_packet(pm->priv_queue); fail_acquire_packet_buffer: mutex_unlock(&pm->lock); fail_create_runlist_ib: @@ -438,7 +438,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, BUG_ON(!pm || !fence_address);
mutex_lock(&pm->lock); - retval = pm->priv_queue->acquire_packet_buffer( + retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, sizeof(struct pm4_query_status) / sizeof(uint32_t), (unsigned int **)&packet); @@ -459,8 +459,8 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, packet->data_hi = upper_32_bits((uint64_t)fence_value); packet->data_lo = lower_32_bits((uint64_t)fence_value);
- pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); + pm->priv_queue->ops.sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); mutex_unlock(&pm->lock);
return 0; @@ -482,7 +482,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, BUG_ON(!pm);
mutex_lock(&pm->lock); - retval = pm->priv_queue->acquire_packet_buffer( + retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, sizeof(struct pm4_unmap_queues) / sizeof(uint32_t), &buffer); @@ -537,8 +537,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, break; };
- pm->priv_queue->submit_packet(pm->priv_queue); - pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + pm->priv_queue->ops.submit_packet(pm->priv_queue); + pm->priv_queue->ops.sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
mutex_unlock(&pm->lock); return 0;
From: Ben Goz ben.goz@amd.com
This patch starts to add support for the VI APU in the KQ (kernel queue) module.
Because most (more than 90%) of the KQ code is shared among AMD's APUs, we chose a design that performs most/all the code in the shared KQ file (kfd_kernel_queue.c). If there is H/W specific code to be executed, than it is written in an asic-specific extension function for that H/W.
That asic-specific extension function is called from the shared function at the appropriate time. This requires that for every asic-specific extension function that is implemented in a specific ASIC, there will be an equivalent implementation in ALL ASICs, even if those implementations are just stubs.
That way we achieve:
- Maintainability: by having one copy of most of the code, we only need to fix bugs at one locations
- Readability: very clear what is the shared code and what is done per ASIC
- Extensibility: very easy to add new H/W specific files/functions
Signed-off-by: Ben Goz ben.goz@amd.com Signed-off-by: Oded Gabbay oded.gabbay@amd.com --- drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 21 +++++++-- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 7 +++ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 44 ++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 56 +++++++++++++++++++++++ 5 files changed, 127 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 7558683..cd09c05 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -8,7 +8,8 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \ - kfd_kernel_queue.o kfd_packet_manager.o \ + kfd_kernel_queue.o kfd_kernel_queue_cik.o \ + kfd_kernel_queue_vi.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ kfd_interrupt.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 731635d..75950ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -73,13 +73,16 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, goto err_get_kernel_doorbell;
retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); - if (retval != 0) goto err_pq_allocate_vidmem;
kq->pq_kernel_addr = kq->pq->cpu_ptr; kq->pq_gpu_addr = kq->pq->gpu_addr;
+ retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); + if (retval == false) + goto err_eop_allocate_vidmem; + retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), &kq->rptr_mem);
@@ -111,6 +114,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.queue_address = kq->pq_gpu_addr; prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr; prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; + prop.eop_ring_buffer_address = kq->eop_gpu_addr; + prop.eop_ring_buffer_size = PAGE_SIZE;
if (init_queue(&kq->queue, prop) != 0) goto err_init_queue; @@ -156,6 +161,8 @@ err_init_queue: err_wptr_allocate_vidmem: kfd_gtt_sa_free(dev, kq->rptr_mem); err_rptr_allocate_vidmem: + kfd_gtt_sa_free(dev, kq->eop_mem); +err_eop_allocate_vidmem: kfd_gtt_sa_free(dev, kq->pq); err_pq_allocate_vidmem: pr_err("kfd: error init pq\n"); @@ -182,6 +189,7 @@ static void uninitialize(struct kernel_queue *kq)
kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem); + kq->ops_asic_specific.uninitialize(kq); kfd_gtt_sa_free(kq->dev, kq->pq); kfd_release_kernel_doorbell(kq->dev, kq->queue->properties.doorbell_ptr); @@ -300,6 +308,13 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, kq->ops.sync_with_hw = sync_with_hw; kq->ops.rollback_packet = rollback_packet;
+ switch (dev->device_info->asic_family) { + case CHIP_CARRIZO: + kernel_queue_init_vi(&kq->ops_asic_specific); + case CHIP_KAVERI: + kernel_queue_init_cik(&kq->ops_asic_specific); + } + if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { pr_err("kfd: failed to init kernel queue\n"); kfree(kq); @@ -324,7 +339,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
BUG_ON(!dev);
- pr_debug("kfd: starting kernel queue test\n"); + pr_err("kfd: starting kernel queue test\n");
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); BUG_ON(!kq); @@ -336,7 +351,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) kq->ops.submit_packet(kq); kq->ops.sync_with_hw(kq, 1000);
- pr_debug("kfd: ending kernel queue test\n"); + pr_err("kfd: ending kernel queue test\n"); }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index e01b77b..2659d93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -68,6 +68,7 @@ struct kernel_queue_ops {
struct kernel_queue { struct kernel_queue_ops ops; + struct kernel_queue_ops ops_asic_specific;
/* data */ struct kfd_dev *dev; @@ -85,6 +86,9 @@ struct kernel_queue { struct kfd_mem_obj *pq; uint64_t pq_gpu_addr; uint32_t *pq_kernel_addr; + struct kfd_mem_obj *eop_mem; + uint64_t eop_gpu_addr; + uint32_t *eop_kernel_addr;
struct kfd_mem_obj *fence_mem_obj; uint64_t fence_gpu_addr; @@ -93,4 +97,7 @@ struct kernel_queue { struct list_head list; };
+void kernel_queue_init_cik(struct kernel_queue_ops *ops); +void kernel_queue_init_vi(struct kernel_queue_ops *ops); + #endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c new file mode 100644 index 0000000..a90eb44 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c @@ -0,0 +1,44 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" + +static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_cik(struct kernel_queue *kq); + +void kernel_queue_init_cik(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_cik; + ops->uninitialize = uninitialize_cik; +} + +static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + return true; +} + +static void uninitialize_cik(struct kernel_queue *kq) +{ +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c new file mode 100644 index 0000000..f1d4828 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -0,0 +1,56 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" + +static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_vi(struct kernel_queue *kq); + +void kernel_queue_init_vi(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_vi; + ops->uninitialize = uninitialize_vi; +} + +static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval != 0) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + + return true; +} + +static void uninitialize_vi(struct kernel_queue *kq) +{ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); +}
On Tue, Jan 13, 2015 at 6:19 AM, Oded Gabbay oded.gabbay@amd.com wrote:
This patch-set continues to prepare amdkfd so it could support VI APU. it prepares DQM and KQ modules to support more than one ASIC.
Note: there is no change in the IOCTLs.
Series is: Reviewed-by: Alex Deucher alexander.deucher@amd.com
Oded
Ben Goz (2): drm/amdkfd: Add initial VI support for DQM drm/amdkfd: Add initial VI support for KQ
Oded Gabbay (2): drm/amdkfd: Encapsulate DQM functions in ops structure drm/amdkfd: Encapsulate KQ functions in ops structure
drivers/gpu/drm/amd/amdkfd/Makefile | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 +- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 181 +++++++-------------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 36 +++- .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 135 +++++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 64 ++++++++ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 45 +++-- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 38 ++++- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 44 +++++ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 56 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 +-- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 16 +- 13 files changed, 483 insertions(+), 170 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
-- 1.9.1
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org