From: Michel Dänzer michel.daenzer@amd.com
This was only the case if the GPU reset was triggered from the CS ioctl, otherwise other processes could happily enter the CS ioctl and wreak havoc during the GPU reset.
This is a little complicated because the GPU reset can be triggered from the CS ioctl, in which case we're already holding the mutex, or from other call paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't allow nested locking or finding out the mutex owner, so we need to handle this with some helper functions.
Signed-off-by: Michel Dänzer michel.daenzer@amd.com --- drivers/gpu/drm/radeon/radeon.h | 60 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_cs.c | 14 ++++---- drivers/gpu/drm/radeon/radeon_device.c | 16 ++++++++ 3 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ef0e0e0..89304d9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1203,6 +1203,8 @@ struct radeon_device { struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct mutex cs_mutex; + struct task_struct *cs_mutex_owner; + struct mutex cs_mutex_owner_mutex; struct radeon_wb wb; struct radeon_dummy_page dummy_page; bool gpu_lockup; @@ -1241,6 +1243,64 @@ struct radeon_device { struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS]; };
+ +/* + * CS mutex helpers + */ + +static inline void cs_mutex_lock(struct radeon_device *rdev) +{ + mutex_lock(&rdev->cs_mutex); + + mutex_lock(&rdev->cs_mutex_owner_mutex); + rdev->cs_mutex_owner = current; + mutex_unlock(&rdev->cs_mutex_owner_mutex); +} + +static inline void cs_mutex_unlock(struct radeon_device *rdev) +{ + mutex_lock(&rdev->cs_mutex_owner_mutex); + rdev->cs_mutex_owner = NULL; + mutex_unlock(&rdev->cs_mutex_owner_mutex); + + mutex_unlock(&rdev->cs_mutex); +} + +/* + * Check if this process locked the CS mutex already; if it didn't, lock it. + * + * Returns: + * true: This function locked the mutex. + * false: This function didn't lock the mutex (this process already locked it + * before), so the caller probably shouldn't unlock it. + */ +static inline int cs_mutex_ensure_locked(struct radeon_device *rdev) +{ + int took_mutex; + int have_mutex; + + mutex_lock(&rdev->cs_mutex_owner_mutex); + took_mutex = mutex_trylock(&rdev->cs_mutex); + if (took_mutex) { + /* The mutex was unlocked before, so it's ours now */ + rdev->cs_mutex_owner = current; + have_mutex = true; + } else { + /* Somebody already locked the mutex. Was it this process? */ + have_mutex = (rdev->cs_mutex_owner == current); + } + mutex_unlock(&rdev->cs_mutex_owner_mutex); + + if (!have_mutex) { + /* Another process locked the mutex, take it */ + cs_mutex_lock(rdev); + took_mutex = true; + } + + return took_mutex; +} + + int radeon_device_init(struct radeon_device *rdev, struct drm_device *ddev, struct pci_dev *pdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index fae00c0..61e3063 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_cs_chunk *ib_chunk; int r;
- mutex_lock(&rdev->cs_mutex); + cs_mutex_lock(rdev); /* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp; @@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) { DRM_ERROR("Failed to initialize parser !\n"); radeon_cs_parser_fini(&parser, r); - mutex_unlock(&rdev->cs_mutex); + cs_mutex_unlock(rdev); return r; } r = radeon_ib_get(rdev, &parser.ib); if (r) { DRM_ERROR("Failed to get ib !\n"); radeon_cs_parser_fini(&parser, r); - mutex_unlock(&rdev->cs_mutex); + cs_mutex_unlock(rdev); return r; } r = radeon_cs_parser_relocs(&parser); @@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r != -ERESTARTSYS) DRM_ERROR("Failed to parse relocation %d!\n", r); radeon_cs_parser_fini(&parser, r); - mutex_unlock(&rdev->cs_mutex); + cs_mutex_unlock(rdev); return r; } /* Copy the packet into the IB, the parser will read from the @@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r || parser.parser_error) { DRM_ERROR("Invalid command stream !\n"); radeon_cs_parser_fini(&parser, r); - mutex_unlock(&rdev->cs_mutex); + cs_mutex_unlock(rdev); return r; } r = radeon_cs_finish_pages(&parser); if (r) { DRM_ERROR("Invalid command stream !\n"); radeon_cs_parser_fini(&parser, r); - mutex_unlock(&rdev->cs_mutex); + cs_mutex_unlock(rdev); return r; } r = radeon_ib_schedule(rdev, parser.ib); @@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to schedule IB !\n"); } radeon_cs_parser_fini(&parser, r); - mutex_unlock(&rdev->cs_mutex); + cs_mutex_unlock(rdev); return r; }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 5f0fd85..5f3d02d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev,
/* mutex initialization are all done here so we * can recall function without having locking issues */ + mutex_init(&rdev->cs_mutex_owner_mutex); + rdev->cs_mutex_owner = NULL; mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ib_pool.mutex); mutex_init(&rdev->cp.mutex); @@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev) { int r; int resched; + int took_cs_mutex; + + /* Prevent CS ioctl from interfering */ + took_cs_mutex = cs_mutex_ensure_locked(rdev); + if (!rdev->gpu_lockup) { + DRM_DEBUG("GPU already reset by previous CS mutex holder\n"); + if (took_cs_mutex) + cs_mutex_unlock(rdev); + return 0; + }
radeon_save_bios_scratch_regs(rdev); /* block TTM */ @@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev) radeon_restore_bios_scratch_regs(rdev); drm_helper_resume_force_mode(rdev->ddev); ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); + if (took_cs_mutex) + cs_mutex_unlock(rdev); return 0; } + if (took_cs_mutex) + cs_mutex_unlock(rdev); /* bad news, how to tell it to userspace ? */ dev_info(rdev->dev, "GPU reset failed\n"); return r;
From: Michel Dänzer michel.daenzer@amd.com
Signed-off-by: Michel Dänzer michel.daenzer@amd.com ---
Not sure the CS ioctl can actually run concurrently with suspend/resume, but might be better safe than sorry?
drivers/gpu/drm/radeon/radeon_device.c | 7 +++++++ 1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 5f3d02d..80b4799 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -881,6 +881,10 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) } } } + + /* Prevent CS ioctl from interfering */ + cs_mutex_lock(rdev); + /* evict vram memory */ radeon_bo_evict_vram(rdev); /* wait for gpu to finish processing current batch */ @@ -944,6 +948,9 @@ int radeon_resume_kms(struct drm_device *dev) list_for_each_entry(connector, &dev->mode_config.connector_list, head) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } + + cs_mutex_unlock(rdev); + return 0; }
2011/9/15 Michel Dänzer michel@daenzer.net:
From: Michel Dänzer michel.daenzer@amd.com
This was only the case if the GPU reset was triggered from the CS ioctl, otherwise other processes could happily enter the CS ioctl and wreak havoc during the GPU reset.
This is a little complicated because the GPU reset can be triggered from the CS ioctl, in which case we're already holding the mutex, or from other call paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't allow nested locking or finding out the mutex owner, so we need to handle this with some helper functions.
Signed-off-by: Michel Dänzer michel.daenzer@amd.com
For both patches:
Reviewed-by: Alex Deucher alexander.deucher@amd.com
drivers/gpu/drm/radeon/radeon.h | 60 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_cs.c | 14 ++++---- drivers/gpu/drm/radeon/radeon_device.c | 16 ++++++++ 3 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ef0e0e0..89304d9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1203,6 +1203,8 @@ struct radeon_device { struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct mutex cs_mutex;
- struct task_struct *cs_mutex_owner;
- struct mutex cs_mutex_owner_mutex;
struct radeon_wb wb; struct radeon_dummy_page dummy_page; bool gpu_lockup; @@ -1241,6 +1243,64 @@ struct radeon_device { struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS]; };
+/*
- CS mutex helpers
- */
+static inline void cs_mutex_lock(struct radeon_device *rdev) +{
- mutex_lock(&rdev->cs_mutex);
- mutex_lock(&rdev->cs_mutex_owner_mutex);
- rdev->cs_mutex_owner = current;
- mutex_unlock(&rdev->cs_mutex_owner_mutex);
+}
+static inline void cs_mutex_unlock(struct radeon_device *rdev) +{
- mutex_lock(&rdev->cs_mutex_owner_mutex);
- rdev->cs_mutex_owner = NULL;
- mutex_unlock(&rdev->cs_mutex_owner_mutex);
- mutex_unlock(&rdev->cs_mutex);
+}
+/*
- Check if this process locked the CS mutex already; if it didn't, lock it.
- Returns:
- true: This function locked the mutex.
- false: This function didn't lock the mutex (this process already locked it
- before), so the caller probably shouldn't unlock it.
- */
+static inline int cs_mutex_ensure_locked(struct radeon_device *rdev) +{
- int took_mutex;
- int have_mutex;
- mutex_lock(&rdev->cs_mutex_owner_mutex);
- took_mutex = mutex_trylock(&rdev->cs_mutex);
- if (took_mutex) {
- /* The mutex was unlocked before, so it's ours now */
- rdev->cs_mutex_owner = current;
- have_mutex = true;
- } else {
- /* Somebody already locked the mutex. Was it this process? */
- have_mutex = (rdev->cs_mutex_owner == current);
- }
- mutex_unlock(&rdev->cs_mutex_owner_mutex);
- if (!have_mutex) {
- /* Another process locked the mutex, take it */
- cs_mutex_lock(rdev);
- took_mutex = true;
- }
- return took_mutex;
+}
int radeon_device_init(struct radeon_device *rdev, struct drm_device *ddev, struct pci_dev *pdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index fae00c0..61e3063 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_cs_chunk *ib_chunk; int r;
- mutex_lock(&rdev->cs_mutex);
- cs_mutex_lock(rdev);
/* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp; @@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) { DRM_ERROR("Failed to initialize parser !\n"); radeon_cs_parser_fini(&parser, r);
- mutex_unlock(&rdev->cs_mutex);
- cs_mutex_unlock(rdev);
return r; } r = radeon_ib_get(rdev, &parser.ib); if (r) { DRM_ERROR("Failed to get ib !\n"); radeon_cs_parser_fini(&parser, r);
- mutex_unlock(&rdev->cs_mutex);
- cs_mutex_unlock(rdev);
return r; } r = radeon_cs_parser_relocs(&parser); @@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r != -ERESTARTSYS) DRM_ERROR("Failed to parse relocation %d!\n", r); radeon_cs_parser_fini(&parser, r);
- mutex_unlock(&rdev->cs_mutex);
- cs_mutex_unlock(rdev);
return r; } /* Copy the packet into the IB, the parser will read from the @@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r || parser.parser_error) { DRM_ERROR("Invalid command stream !\n"); radeon_cs_parser_fini(&parser, r);
- mutex_unlock(&rdev->cs_mutex);
- cs_mutex_unlock(rdev);
return r; } r = radeon_cs_finish_pages(&parser); if (r) { DRM_ERROR("Invalid command stream !\n"); radeon_cs_parser_fini(&parser, r);
- mutex_unlock(&rdev->cs_mutex);
- cs_mutex_unlock(rdev);
return r; } r = radeon_ib_schedule(rdev, parser.ib); @@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to schedule IB !\n"); } radeon_cs_parser_fini(&parser, r);
- mutex_unlock(&rdev->cs_mutex);
- cs_mutex_unlock(rdev);
return r; }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 5f0fd85..5f3d02d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev,
/* mutex initialization are all done here so we * can recall function without having locking issues */
- mutex_init(&rdev->cs_mutex_owner_mutex);
- rdev->cs_mutex_owner = NULL;
mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ib_pool.mutex); mutex_init(&rdev->cp.mutex); @@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev) { int r; int resched;
- int took_cs_mutex;
- /* Prevent CS ioctl from interfering */
- took_cs_mutex = cs_mutex_ensure_locked(rdev);
- if (!rdev->gpu_lockup) {
- DRM_DEBUG("GPU already reset by previous CS mutex holder\n");
- if (took_cs_mutex)
- cs_mutex_unlock(rdev);
- return 0;
- }
radeon_save_bios_scratch_regs(rdev); /* block TTM */ @@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev) radeon_restore_bios_scratch_regs(rdev); drm_helper_resume_force_mode(rdev->ddev); ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
- if (took_cs_mutex)
- cs_mutex_unlock(rdev);
return 0; }
- if (took_cs_mutex)
- cs_mutex_unlock(rdev);
/* bad news, how to tell it to userspace ? */ dev_info(rdev->dev, "GPU reset failed\n"); return r; -- 1.7.6.3
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
On Thu, Sep 15, 2011 at 08:21:00PM +0200, Michel Dänzer wrote:
From: Michel Dänzer michel.daenzer@amd.com
This was only the case if the GPU reset was triggered from the CS ioctl, otherwise other processes could happily enter the CS ioctl and wreak havoc during the GPU reset.
This is a little complicated because the GPU reset can be triggered from the CS ioctl, in which case we're already holding the mutex, or from other call paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't allow nested locking or finding out the mutex owner, so we need to handle this with some helper functions.
Signed-off-by: Michel Dänzer michel.daenzer@amd.com
drivers/gpu/drm/radeon/radeon.h | 60 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_cs.c | 14 ++++---- drivers/gpu/drm/radeon/radeon_device.c | 16 ++++++++ 3 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ef0e0e0..89304d9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1203,6 +1203,8 @@ struct radeon_device { struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct mutex cs_mutex;
- struct task_struct *cs_mutex_owner;
- struct mutex cs_mutex_owner_mutex;
That is a bit of 'mutex.. mutex'. What about just having the same name as before?
struct radeon_wb wb; struct radeon_dummy_page dummy_page; bool gpu_lockup; @@ -1241,6 +1243,64 @@ struct radeon_device { struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS]; };
+/*
- CS mutex helpers
- */
+static inline void cs_mutex_lock(struct radeon_device *rdev) +{
- mutex_lock(&rdev->cs_mutex);
- mutex_lock(&rdev->cs_mutex_owner_mutex);
- rdev->cs_mutex_owner = current;
- mutex_unlock(&rdev->cs_mutex_owner_mutex);
+}
+static inline void cs_mutex_unlock(struct radeon_device *rdev) +{
- mutex_lock(&rdev->cs_mutex_owner_mutex);
- rdev->cs_mutex_owner = NULL;
- mutex_unlock(&rdev->cs_mutex_owner_mutex);
- mutex_unlock(&rdev->cs_mutex);
+}
+/*
- Check if this process locked the CS mutex already; if it didn't, lock it.
- Returns:
- true: This function locked the mutex.
- false: This function didn't lock the mutex (this process already locked it
- before), so the caller probably shouldn't unlock it.
- */
+static inline int cs_mutex_ensure_locked(struct radeon_device *rdev) +{
- int took_mutex;
- int have_mutex;
I think you meant 'bool'?
- mutex_lock(&rdev->cs_mutex_owner_mutex);
- took_mutex = mutex_trylock(&rdev->cs_mutex);
- if (took_mutex) {
/* The mutex was unlocked before, so it's ours now */
rdev->cs_mutex_owner = current;
have_mutex = true;
consider you set that here..
- } else {
/* Somebody already locked the mutex. Was it this process? */
have_mutex = (rdev->cs_mutex_owner == current);
- }
- mutex_unlock(&rdev->cs_mutex_owner_mutex);
- if (!have_mutex) {
/* Another process locked the mutex, take it */
cs_mutex_lock(rdev);
took_mutex = true;
- }
- return took_mutex;
and if it is really going to be bool, you might as well change the return signature and make it the function decleration return bool instead of int.
+}
int radeon_device_init(struct radeon_device *rdev, struct drm_device *ddev, struct pci_dev *pdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index fae00c0..61e3063 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_cs_chunk *ib_chunk; int r;
- mutex_lock(&rdev->cs_mutex);
- cs_mutex_lock(rdev); /* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp;
@@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) { DRM_ERROR("Failed to initialize parser !\n"); radeon_cs_parser_fini(&parser, r);
mutex_unlock(&rdev->cs_mutex);
return r; } r = radeon_ib_get(rdev, &parser.ib); if (r) { DRM_ERROR("Failed to get ib !\n"); radeon_cs_parser_fini(&parser, r);cs_mutex_unlock(rdev);
mutex_unlock(&rdev->cs_mutex);
return r; } r = radeon_cs_parser_relocs(&parser);cs_mutex_unlock(rdev);
@@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r != -ERESTARTSYS) DRM_ERROR("Failed to parse relocation %d!\n", r); radeon_cs_parser_fini(&parser, r);
mutex_unlock(&rdev->cs_mutex);
return r; } /* Copy the packet into the IB, the parser will read from thecs_mutex_unlock(rdev);
@@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r || parser.parser_error) { DRM_ERROR("Invalid command stream !\n"); radeon_cs_parser_fini(&parser, r);
mutex_unlock(&rdev->cs_mutex);
return r; } r = radeon_cs_finish_pages(&parser); if (r) { DRM_ERROR("Invalid command stream !\n"); radeon_cs_parser_fini(&parser, r);cs_mutex_unlock(rdev);
mutex_unlock(&rdev->cs_mutex);
return r; } r = radeon_ib_schedule(rdev, parser.ib);cs_mutex_unlock(rdev);
@@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to schedule IB !\n"); } radeon_cs_parser_fini(&parser, r);
- mutex_unlock(&rdev->cs_mutex);
- cs_mutex_unlock(rdev); return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 5f0fd85..5f3d02d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev,
/* mutex initialization are all done here so we * can recall function without having locking issues */
- mutex_init(&rdev->cs_mutex_owner_mutex);
- rdev->cs_mutex_owner = NULL; mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ib_pool.mutex); mutex_init(&rdev->cp.mutex);
@@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev) { int r; int resched;
int took_cs_mutex;
/* Prevent CS ioctl from interfering */
took_cs_mutex = cs_mutex_ensure_locked(rdev);
if (!rdev->gpu_lockup) {
DRM_DEBUG("GPU already reset by previous CS mutex holder\n");
if (took_cs_mutex)
cs_mutex_unlock(rdev);
return 0;
}
radeon_save_bios_scratch_regs(rdev); /* block TTM */
@@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev) radeon_restore_bios_scratch_regs(rdev); drm_helper_resume_force_mode(rdev->ddev); ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
if (took_cs_mutex)
return 0; }cs_mutex_unlock(rdev);
- if (took_cs_mutex)
/* bad news, how to tell it to userspace ? */ dev_info(rdev->dev, "GPU reset failed\n"); return r;cs_mutex_unlock(rdev);
-- 1.7.6.3
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
On Fre, 2011-09-16 at 04:31 -0400, Konrad Rzeszutek Wilk wrote:
On Thu, Sep 15, 2011 at 08:21:00PM +0200, Michel Dänzer wrote:
From: Michel Dänzer michel.daenzer@amd.com
This was only the case if the GPU reset was triggered from the CS ioctl, otherwise other processes could happily enter the CS ioctl and wreak havoc during the GPU reset.
This is a little complicated because the GPU reset can be triggered from the CS ioctl, in which case we're already holding the mutex, or from other call paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't allow nested locking or finding out the mutex owner, so we need to handle this with some helper functions.
Signed-off-by: Michel Dänzer michel.daenzer@amd.com
drivers/gpu/drm/radeon/radeon.h | 60 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_cs.c | 14 ++++---- drivers/gpu/drm/radeon/radeon_device.c | 16 ++++++++ 3 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ef0e0e0..89304d9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1203,6 +1203,8 @@ struct radeon_device { struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct mutex cs_mutex;
- struct task_struct *cs_mutex_owner;
- struct mutex cs_mutex_owner_mutex;
That is a bit of 'mutex.. mutex'. What about just having the same name as before?
What do you mean? This adds a second mutex protecting the owner field.
Though now you got me thinking... Maybe the second mutex isn't necessary at all. Let me try that.
+/*
- Check if this process locked the CS mutex already; if it didn't, lock it.
- Returns:
- true: This function locked the mutex.
- false: This function didn't lock the mutex (this process already locked it
- before), so the caller probably shouldn't unlock it.
- */
+static inline int cs_mutex_ensure_locked(struct radeon_device *rdev) +{
- int took_mutex;
- int have_mutex;
I think you meant 'bool'?
- mutex_lock(&rdev->cs_mutex_owner_mutex);
- took_mutex = mutex_trylock(&rdev->cs_mutex);
- if (took_mutex) {
/* The mutex was unlocked before, so it's ours now */
rdev->cs_mutex_owner = current;
have_mutex = true;
consider you set that here..
- } else {
/* Somebody already locked the mutex. Was it this process? */
have_mutex = (rdev->cs_mutex_owner == current);
- }
- mutex_unlock(&rdev->cs_mutex_owner_mutex);
- if (!have_mutex) {
/* Another process locked the mutex, take it */
cs_mutex_lock(rdev);
took_mutex = true;
- }
- return took_mutex;
and if it is really going to be bool, you might as well change the return signature and make it the function decleration return bool instead of int.
Yeah, I can change that. I'll send a v2 patch.
On Fri, Sep 16, 2011 at 10:40:23AM +0200, Michel Dänzer wrote:
On Fre, 2011-09-16 at 04:31 -0400, Konrad Rzeszutek Wilk wrote:
On Thu, Sep 15, 2011 at 08:21:00PM +0200, Michel Dänzer wrote:
From: Michel Dänzer michel.daenzer@amd.com
This was only the case if the GPU reset was triggered from the CS ioctl, otherwise other processes could happily enter the CS ioctl and wreak havoc during the GPU reset.
This is a little complicated because the GPU reset can be triggered from the CS ioctl, in which case we're already holding the mutex, or from other call paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't allow nested locking or finding out the mutex owner, so we need to handle this with some helper functions.
Signed-off-by: Michel Dänzer michel.daenzer@amd.com
drivers/gpu/drm/radeon/radeon.h | 60 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_cs.c | 14 ++++---- drivers/gpu/drm/radeon/radeon_device.c | 16 ++++++++ 3 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ef0e0e0..89304d9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1203,6 +1203,8 @@ struct radeon_device { struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct mutex cs_mutex;
- struct task_struct *cs_mutex_owner;
- struct mutex cs_mutex_owner_mutex;
That is a bit of 'mutex.. mutex'. What about just having the same name as before?
What do you mean? This adds a second mutex protecting the owner field.
Oh, somehow I thought it was replacing the 'cs_mutex'.. but now that I look again that is not the case.
Though now you got me thinking... Maybe the second mutex isn't necessary at all. Let me try that.
Ok.
+/*
- Check if this process locked the CS mutex already; if it didn't, lock it.
- Returns:
- true: This function locked the mutex.
- false: This function didn't lock the mutex (this process already locked it
- before), so the caller probably shouldn't unlock it.
- */
+static inline int cs_mutex_ensure_locked(struct radeon_device *rdev) +{
- int took_mutex;
- int have_mutex;
I think you meant 'bool'?
- mutex_lock(&rdev->cs_mutex_owner_mutex);
- took_mutex = mutex_trylock(&rdev->cs_mutex);
- if (took_mutex) {
/* The mutex was unlocked before, so it's ours now */
rdev->cs_mutex_owner = current;
have_mutex = true;
consider you set that here..
- } else {
/* Somebody already locked the mutex. Was it this process? */
have_mutex = (rdev->cs_mutex_owner == current);
- }
- mutex_unlock(&rdev->cs_mutex_owner_mutex);
- if (!have_mutex) {
/* Another process locked the mutex, take it */
cs_mutex_lock(rdev);
took_mutex = true;
- }
- return took_mutex;
and if it is really going to be bool, you might as well change the return signature and make it the function decleration return bool instead of int.
Yeah, I can change that. I'll send a v2 patch.
Excellent.
dri-devel@lists.freedesktop.org