For a deferred-free cache of unreferenced bound objects, a simple reference count is required without the baggage of kref.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_gem.c | 12 +++--------- drivers/gpu/drm/drm_info.c | 2 +- include/drm/drmP.h | 16 +++++++++------- 3 files changed, 13 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index ea1c4b0..874e776 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -141,7 +141,7 @@ int drm_gem_object_init(struct drm_device *dev, if (IS_ERR(obj->filp)) return -ENOMEM;
- kref_init(&obj->refcount); + atomic_set(&obj->refcount, 1); atomic_set(&obj->handle_count, 0); obj->size = size;
@@ -436,9 +436,8 @@ EXPORT_SYMBOL(drm_gem_object_release); * Frees the object */ void -drm_gem_object_free(struct kref *kref) +drm_gem_object_free(struct drm_gem_object *obj) { - struct drm_gem_object *obj = (struct drm_gem_object *) kref; struct drm_device *dev = obj->dev;
BUG_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -448,11 +447,6 @@ drm_gem_object_free(struct kref *kref) } EXPORT_SYMBOL(drm_gem_object_free);
-static void drm_gem_object_ref_bug(struct kref *list_kref) -{ - BUG(); -} - /** * Called after the last handle to the object has been closed * @@ -476,7 +470,7 @@ void drm_gem_object_handle_free(struct drm_gem_object *obj) * * This cannot be the last reference, since the handle holds one too. */ - kref_put(&obj->refcount, drm_gem_object_ref_bug); + atomic_dec(&obj->refcount); } else spin_unlock(&dev->object_name_lock);
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index 3cdbaf3..4a80bd8 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -256,7 +256,7 @@ int drm_gem_one_name_info(int id, void *ptr, void *data) seq_printf(m, "%6d %8zd %7d %8d\n", obj->name, obj->size, atomic_read(&obj->handle_count), - atomic_read(&obj->refcount.refcount)); + atomic_read(&obj->refcount)); return 0; }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 2b33980..f332c56 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -609,7 +609,7 @@ struct drm_gem_mm { */ struct drm_gem_object { /** Reference count of this object */ - struct kref refcount; + atomic_t refcount;
/** Handle count of this object. Each handle also holds a reference */ atomic_t handle_count; /* number of handles on this object */ @@ -1504,7 +1504,7 @@ extern void drm_sysfs_connector_remove(struct drm_connector *connector); int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); void drm_gem_object_release(struct drm_gem_object *obj); -void drm_gem_object_free(struct kref *kref); +void drm_gem_object_free(struct drm_gem_object *obj); struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, @@ -1519,23 +1519,25 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); static inline void drm_gem_object_reference(struct drm_gem_object *obj) { - kref_get(&obj->refcount); + atomic_inc(&obj->refcount); + smp_mb__after_atomic_inc(); }
static inline void drm_gem_object_unreference(struct drm_gem_object *obj) { - if (obj != NULL) - kref_put(&obj->refcount, drm_gem_object_free); + if (obj != NULL && atomic_dec_and_test(&obj->refcount)) + drm_gem_object_free(obj); }
static inline void drm_gem_object_unreference_unlocked(struct drm_gem_object *obj) { - if (obj != NULL) { + if (obj != NULL && atomic_dec_and_test(&obj->refcount)) { struct drm_device *dev = obj->dev; mutex_lock(&dev->struct_mutex); - kref_put(&obj->refcount, drm_gem_object_free); + if (atomic_read(&obj->refcount) == 0) + drm_gem_object_free(obj); mutex_unlock(&dev->struct_mutex); } }
On Thu, 2010-11-25 at 21:40 +0000, Chris Wilson wrote:
For a deferred-free cache of unreferenced bound objects, a simple reference count is required without the baggage of kref.
eh?
you've just out of lined kref for no real gain.
the whole point of kref is that its standard and doesn't require auditing for the people not inlining it.
The only place I can see the advantage is not taking the struct mutex in the free path and really we should just fix the free function to take the struct mutex if required and wrap the real free.
Dave.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk
drivers/gpu/drm/drm_gem.c | 12 +++--------- drivers/gpu/drm/drm_info.c | 2 +- include/drm/drmP.h | 16 +++++++++------- 3 files changed, 13 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index ea1c4b0..874e776 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -141,7 +141,7 @@ int drm_gem_object_init(struct drm_device *dev, if (IS_ERR(obj->filp)) return -ENOMEM;
- kref_init(&obj->refcount);
- atomic_set(&obj->refcount, 1); atomic_set(&obj->handle_count, 0); obj->size = size;
@@ -436,9 +436,8 @@ EXPORT_SYMBOL(drm_gem_object_release);
- Frees the object
*/ void -drm_gem_object_free(struct kref *kref) +drm_gem_object_free(struct drm_gem_object *obj) {
struct drm_gem_object *obj = (struct drm_gem_object *) kref; struct drm_device *dev = obj->dev;
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
@@ -448,11 +447,6 @@ drm_gem_object_free(struct kref *kref) } EXPORT_SYMBOL(drm_gem_object_free);
-static void drm_gem_object_ref_bug(struct kref *list_kref) -{
- BUG();
-}
/**
- Called after the last handle to the object has been closed
@@ -476,7 +470,7 @@ void drm_gem_object_handle_free(struct drm_gem_object *obj) * * This cannot be the last reference, since the handle holds one too. */
kref_put(&obj->refcount, drm_gem_object_ref_bug);
} else spin_unlock(&dev->object_name_lock);atomic_dec(&obj->refcount);
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index 3cdbaf3..4a80bd8 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -256,7 +256,7 @@ int drm_gem_one_name_info(int id, void *ptr, void *data) seq_printf(m, "%6d %8zd %7d %8d\n", obj->name, obj->size, atomic_read(&obj->handle_count),
atomic_read(&obj->refcount.refcount));
return 0;atomic_read(&obj->refcount));
}
diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 2b33980..f332c56 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -609,7 +609,7 @@ struct drm_gem_mm { */ struct drm_gem_object { /** Reference count of this object */
- struct kref refcount;
atomic_t refcount;
/** Handle count of this object. Each handle also holds a reference */ atomic_t handle_count; /* number of handles on this object */
@@ -1504,7 +1504,7 @@ extern void drm_sysfs_connector_remove(struct drm_connector *connector); int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); void drm_gem_object_release(struct drm_gem_object *obj); -void drm_gem_object_free(struct kref *kref); +void drm_gem_object_free(struct drm_gem_object *obj); struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, @@ -1519,23 +1519,25 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); static inline void drm_gem_object_reference(struct drm_gem_object *obj) {
- kref_get(&obj->refcount);
- atomic_inc(&obj->refcount);
- smp_mb__after_atomic_inc();
}
static inline void drm_gem_object_unreference(struct drm_gem_object *obj) {
- if (obj != NULL)
kref_put(&obj->refcount, drm_gem_object_free);
- if (obj != NULL && atomic_dec_and_test(&obj->refcount))
drm_gem_object_free(obj);
}
static inline void drm_gem_object_unreference_unlocked(struct drm_gem_object *obj) {
- if (obj != NULL) {
- if (obj != NULL && atomic_dec_and_test(&obj->refcount)) { struct drm_device *dev = obj->dev; mutex_lock(&dev->struct_mutex);
kref_put(&obj->refcount, drm_gem_object_free);
if (atomic_read(&obj->refcount) == 0)
mutex_unlock(&dev->struct_mutex); }drm_gem_object_free(obj);
}
On Fri, 26 Nov 2010 08:38:29 +1000, Dave Airlie airlied@redhat.com wrote:
On Thu, 2010-11-25 at 21:40 +0000, Chris Wilson wrote:
For a deferred-free cache of unreferenced bound objects, a simple reference count is required without the baggage of kref.
eh?
The issue with kref is that it does:
void kref_get(struct kref *kref) { WARN_ON(!atomic_read(&kref->refcount)); atomic_inc(&kref->refcount); smp_mb__after_atomic_inc(); }
which causes havoc when you are trying to keep a list of unreferenced objects. That's all I'm trying to avoid. -Chris
void kref_get(struct kref *kref) { WARN_ON(!atomic_read(&kref->refcount)); atomic_inc(&kref->refcount); smp_mb__after_atomic_inc(); }
which causes havoc when you are trying to keep a list of unreferenced objects. That's all I'm trying to avoid.
You cannot have a list of unreferenced objects, the list itself is a set of references.
You might want something to scavenge objects whose refcount is the list only but thats not quite the same thing, and kref's are quite happy if used that way.
Alan
On 11/25/2010 11:38 PM, Dave Airlie wrote:
On Thu, 2010-11-25 at 21:40 +0000, Chris Wilson wrote:
For a deferred-free cache of unreferenced bound objects, a simple reference count is required without the baggage of kref.
eh?
you've just out of lined kref for no real gain.
the whole point of kref is that its standard and doesn't require auditing for the people not inlining it.
The only place I can see the advantage is not taking the struct mutex in the free path and really we should just fix the free function to take the struct mutex if required and wrap the real free.
This last thing is really a generic problem with objects looked up from user-space. Consider the lookup path:
read_lock() lookup_object_name(); kref_get(); read_unlock();
And the destroy path:
vs the desired put path if (reference_put()) { write_lock(): remove_name(); write_unlock(); destroy(); }
This is racy, in that the kref_get() can hit a zero refcount. I think an ideal thing here would be to add a kref_get_unless_zero() for this situation, that returns an error if the refcount was indeed zero. I don't think that would violate the kref idea, since we still never increase the refcount of a zeroed object, and the user needs to provide the necessary synchronization to make sure the object isn't gone while still the refcount is zero.
/Thomas
Dave.
On Sun, Nov 28, 2010 at 01:32:27PM +0100, Thomas Hellstrom wrote:
This is racy, in that the kref_get() can hit a zero refcount. I think an ideal thing here would be to add a kref_get_unless_zero() for this situation, that returns an error if the refcount was indeed zero. I don't think that would violate the kref idea, since we still never increase the refcount of a zeroed object, and the user needs to provide the necessary synchronization to make sure the object isn't gone while still the refcount is zero.
I've got curious rechecked the code and it all looks sane. The trick is that all the userspace-visible names hold a ref to the underlying gem object (including the flink name). If all userspace handles are destroyed (save the flink name) the flink name gets destroyed, too (see obj->handle_count). All this is independently protected with spinlocks and it happens _before_ dropping the ref to the underlying bo (and taking any necessary locks to do so). I don't see any races nor locking problems, there. -Daniel
On 11/28/2010 02:35 PM, Daniel Vetter wrote:
On Sun, Nov 28, 2010 at 01:32:27PM +0100, Thomas Hellstrom wrote:
This is racy, in that the kref_get() can hit a zero refcount. I think an ideal thing here would be to add a kref_get_unless_zero() for this situation, that returns an error if the refcount was indeed zero. I don't think that would violate the kref idea, since we still never increase the refcount of a zeroed object, and the user needs to provide the necessary synchronization to make sure the object isn't gone while still the refcount is zero.
I've got curious rechecked the code and it all looks sane. The trick is that all the userspace-visible names hold a ref to the underlying gem object (including the flink name). If all userspace handles are destroyed (save the flink name) the flink name gets destroyed, too (see obj->handle_count). All this is independently protected with spinlocks and it happens _before_ dropping the ref to the underlying bo (and taking any necessary locks to do so). I don't see any races nor locking problems, there. -Daniel
Hi, Daniel!
I'm not saying that the current gem code is incorrect. In particular, the gem name system allows upping the handle_count even if it's zero and the name is just about to be destroyed. I see that more as a curiosity, and gem would work just as well if it used kref_get_unless_zero on the handle_count.
However this is a generic problem. It appears in the general user-visible object case. For example, currently gem needs to take the struct_mutex *before* unreferencing to avoid hitting this problem with a racing mmap lookup (At least I think that's why the struct_mutex is taken). If mmap were using kref_get_unless_zero, and returning error on failure, the struct_mutex would be needed only inside the kref release function, and that is what Dave was referring to in his mail.
An identical problem appears in TTM where we need to take the bdev::vm_lock around unreffing a ttm bo, and in the ttm_object functionality were we also have a general lookup mechanism for stuff like contexts, surfaces, user-visible fences or whatever.
In particular this also applies to the rcu case, were we cannot block readers. If we were to move over all object lookup to use RCU locks, we'd want to be completely 100% sure that once a refcount reaches zero, it won't be upped again, and we can safely go ahead and use call_rcu for destruction. Let's say this wasn't true, call_rcu could in principle be called multiple times for the same object, and that isn't allowed until the callback has been run...
And the simple solution to this problem is kref_get_unless_zero().
/Thomas
Hi Thomas,
On Sun, Nov 28, 2010 at 03:19:50PM +0100, Thomas Hellstrom wrote:
I'm not saying that the current gem code is incorrect. In particular, the gem name system allows upping the handle_count even if it's zero and the name is just about to be destroyed. I see that more as a curiosity, and gem would work just as well if it used kref_get_unless_zero on the handle_count.
Yeah, noticed that, too. But that's just userspace racing against itself, so no problem.
However this is a generic problem. It appears in the general user-visible object case. For example, currently gem needs to take the struct_mutex *before* unreferencing to avoid hitting this problem with a racing mmap lookup (At least I think that's why the struct_mutex is taken). If mmap were using kref_get_unless_zero, and returning error on failure, the struct_mutex would be needed only inside the kref release function, and that is what Dave was referring to in his mail.
Indeed, I haven't noticed that. drm/i915 doesn't hold a ref to the underlying bo for the offset_hash, which requires the dev->struct_mutex around unref. An actual mmap gets itself a ref (protected by dev->struct_mutex) so from there on its all fine. I think the offset_hash should hold a reference too, which gets destroyed when handle_count reaches 0 (like the flink name). Haven't checked what ttm does for this case.
I think having a pointer without an accompanying reference is a bug, i.e. I've jotted this down on my list of things to fix. Then we could move the mutex taking inside the actual free function.
In particular this also applies to the rcu case, were we cannot block readers. If we were to move over all object lookup to use RCU locks, we'd want to be completely 100% sure that once a refcount reaches zero, it won't be upped again, and we can safely go ahead and use call_rcu for destruction. Let's say this wasn't true, call_rcu could in principle be called multiple times for the same object, and that isn't allowed until the callback has been run...
Well, if the lookup structure has a reference to the underlying object we only need to defer the unref till all readers are gone with call_rcu.
And the simple solution to this problem is kref_get_unless_zero().
IMHO kref is for kernel references. And shouldn't be abused for userspace lookup with strange semantics. So kref_get_unless_zero sounds like a very bad idea hiding brittle locking semantics at best, bugs at worst.
Cheers, Daniel
On 11/28/2010 04:13 PM, Daniel Vetter wrote:
Hi Thomas,
On Sun, Nov 28, 2010 at 03:19:50PM +0100, Thomas Hellstrom wrote:
I'm not saying that the current gem code is incorrect. In particular, the gem name system allows upping the handle_count even if it's zero and the name is just about to be destroyed. I see that more as a curiosity, and gem would work just as well if it used kref_get_unless_zero on the handle_count.
Yeah, noticed that, too. But that's just userspace racing against itself, so no problem.
However this is a generic problem. It appears in the general user-visible object case. For example, currently gem needs to take the struct_mutex *before* unreferencing to avoid hitting this problem with a racing mmap lookup (At least I think that's why the struct_mutex is taken). If mmap were using kref_get_unless_zero, and returning error on failure, the struct_mutex would be needed only inside the kref release function, and that is what Dave was referring to in his mail.
Indeed, I haven't noticed that. drm/i915 doesn't hold a ref to the underlying bo for the offset_hash, which requires the dev->struct_mutex around unref. An actual mmap gets itself a ref (protected by dev->struct_mutex) so from there on its all fine. I think the offset_hash should hold a reference too, which gets destroyed when handle_count reaches 0 (like the flink name). Haven't checked what ttm does for this case.
For TTM the "name" does not hold a reference just like the gem vm handle, but we take a lock around unref'ing so that destruction of the "name" and unreffing is atomic. That doesn't work with RCU since we can't block readers.
I think having a pointer without an accompanying reference is a bug, i.e. I've jotted this down on my list of things to fix. Then we could move the mutex taking inside the actual free function.
In particular this also applies to the rcu case, were we cannot block readers. If we were to move over all object lookup to use RCU locks, we'd want to be completely 100% sure that once a refcount reaches zero, it won't be upped again, and we can safely go ahead and use call_rcu for destruction. Let's say this wasn't true, call_rcu could in principle be called multiple times for the same object, and that isn't allowed until the callback has been run...
Well, if the lookup structure has a reference to the underlying object we only need to defer the unref till all readers are gone with call_rcu.
So how do you determine when to delete the lookup structure (or in the gem case, the name)? Currently gem does this when all handles are gone, but then again, how do you block a handle from registering a new reference just after you've removed the name and called rcu, but before the rcu callback is called? You can't block readers while you remove first the name and then the reference.
What you *can* do is to check again rcu in the callback for the refcount to be nonzero and abort, but then again you need a protection to avoid the rcu callback to be called again when the refcount reaches zero the second time, while the rcu head sits on the run list.... This quickly gets quite ugly, but if you have a good solution to this problem, I'm all ears.
And the simple solution to this problem is kref_get_unless_zero().
IMHO kref is for kernel references. And shouldn't be abused for userspace lookup with strange semantics.
These are perfectly valid kernel references. Typically the pointer in the handle's referencing idr entry. If kref_get_unless_zero() returns an error, you're not allowed to create a new pointer, and lookup returns an error. It's as easy as that.
So kref_get_unless_zero sounds like a very bad idea hiding brittle locking semantics at best, bugs at worst.
I don't agree. IMHO it's a good thing to do in situations where you might be racing with lookup object removal, but the underlying object is guaranteed to not have been destroyed. Perfectly suitable for RCU-like situations.
/Thomas
Cheers, Danie
dri-devel@lists.freedesktop.org