Daniel pointed me towards this function and there are multiple obvious problems in the implementation.
First of all the retry loop is not working as intended. In general the retry makes only sense if you grab the reference first and then check the sequence values.
Then we should always also wait for the exclusive fence.
It's also good practice to keep the reference around when installing callbacks to fences you don't own.
And last the whole implementation was unnecessary complex and rather hard to understand which could lead to probably unexpected behavior of the IOCTL.
Fix all this by reworking the implementation from scratch. Dropping the whole RCU approach and taking the lock instead.
Only mildly tested and needs a thoughtful review of the code.
v2: fix the reference counting as well v3: keep the excl fence handling as is for stable v4: back to testing all fences, drop RCU v5: handle in and out separately
Signed-off-by: Christian König christian.koenig@amd.com CC: stable@vger.kernel.org --- drivers/dma-buf/dma-buf.c | 152 +++++++++++++++++--------------------- include/linux/dma-buf.h | 2 +- 2 files changed, 68 insertions(+), 86 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index eadd1eaa2fb5..439e2379e1cb 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry) * If you hit this BUG() it means someone dropped their ref to the * dma-buf while still having pending operation to the buffer. */ - BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active); + BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
dmabuf->ops->release(dmabuf);
@@ -202,16 +202,57 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) wake_up_locked_poll(dcb->poll, dcb->active); dcb->active = 0; spin_unlock_irqrestore(&dcb->poll->lock, flags); + dma_fence_put(fence); +} + +static bool dma_buf_poll_shared(struct dma_resv *resv, + struct dma_buf_poll_cb_t *dcb) +{ + struct dma_resv_list *fobj = dma_resv_get_list(resv); + struct dma_fence *fence; + int i, r; + + if (!fobj) + return false; + + for (i = 0; i < fobj->shared_count; ++i) { + fence = rcu_dereference_protected(fobj->shared[i], + dma_resv_held(resv)); + dma_fence_get(fence); + r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); + if (!r) + return true; + dma_fence_put(fence); + } + + return false; +} + +static bool dma_buf_poll_excl(struct dma_resv *resv, + struct dma_buf_poll_cb_t *dcb) +{ + struct dma_fence *fence = dma_resv_get_excl(resv); + int r; + + if (!fence) + return false; + + dma_fence_get(fence); + r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); + if (!r) + return true; + dma_fence_put(fence); + + return false; }
static __poll_t dma_buf_poll(struct file *file, poll_table *poll) { struct dma_buf *dmabuf; struct dma_resv *resv; - struct dma_resv_list *fobj; - struct dma_fence *fence_excl; + unsigned shared_count; __poll_t events; - unsigned shared_count, seq; + int r, i;
dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -225,101 +266,42 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
-retry: - seq = read_seqcount_begin(&resv->seq); - rcu_read_lock(); - - fobj = rcu_dereference(resv->fence); - if (fobj) - shared_count = fobj->shared_count; - else - shared_count = 0; - fence_excl = rcu_dereference(resv->fence_excl); - if (read_seqcount_retry(&resv->seq, seq)) { - rcu_read_unlock(); - goto retry; - } - - if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) { - struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl; - __poll_t pevents = EPOLLIN; + dma_resv_lock(resv, NULL);
- if (shared_count == 0) - pevents |= EPOLLOUT; + if (events & EPOLLOUT) { + struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out;
+ /* Check that callback isn't busy */ spin_lock_irq(&dmabuf->poll.lock); - if (dcb->active) { - dcb->active |= pevents; - events &= ~pevents; - } else - dcb->active = pevents; + if (dcb->active) + events &= ~EPOLLOUT; + else + dcb->active = EPOLLOUT; spin_unlock_irq(&dmabuf->poll.lock);
- if (events & pevents) { - if (!dma_fence_get_rcu(fence_excl)) { - /* force a recheck */ - events &= ~pevents; - dma_buf_poll_cb(NULL, &dcb->cb); - } else if (!dma_fence_add_callback(fence_excl, &dcb->cb, - dma_buf_poll_cb)) { - events &= ~pevents; - dma_fence_put(fence_excl); - } else { - /* - * No callback queued, wake up any additional - * waiters. - */ - dma_fence_put(fence_excl); - dma_buf_poll_cb(NULL, &dcb->cb); - } - } + if (events & EPOLLOUT && !dma_buf_poll_shared(resv, dcb) && + !dma_buf_poll_excl(resv, dcb)) + /* No callback queued, wake up any other waiters */ + dma_buf_poll_cb(NULL, &dcb->cb); }
- if ((events & EPOLLOUT) && shared_count > 0) { - struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared; - int i; + if (events & EPOLLIN) { + struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_in;
- /* Only queue a new callback if no event has fired yet */ + /* Check that callback isn't busy */ spin_lock_irq(&dmabuf->poll.lock); if (dcb->active) - events &= ~EPOLLOUT; + events &= ~EPOLLIN; else - dcb->active = EPOLLOUT; + dcb->active = EPOLLIN; spin_unlock_irq(&dmabuf->poll.lock);
- if (!(events & EPOLLOUT)) - goto out; - - for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = rcu_dereference(fobj->shared[i]); - - if (!dma_fence_get_rcu(fence)) { - /* - * fence refcount dropped to zero, this means - * that fobj has been freed - * - * call dma_buf_poll_cb and force a recheck! - */ - events &= ~EPOLLOUT; - dma_buf_poll_cb(NULL, &dcb->cb); - break; - } - if (!dma_fence_add_callback(fence, &dcb->cb, - dma_buf_poll_cb)) { - dma_fence_put(fence); - events &= ~EPOLLOUT; - break; - } - dma_fence_put(fence); - } - - /* No callback queued, wake up any additional waiters. */ - if (i == shared_count) + if (events & EPOLLIN && !dma_buf_poll_excl(resv, dcb)) + /* No callback queued, wake up any other waiters */ dma_buf_poll_cb(NULL, &dcb->cb); }
-out: - rcu_read_unlock(); + dma_resv_unlock(resv); return events; }
@@ -562,8 +544,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) dmabuf->owner = exp_info->owner; spin_lock_init(&dmabuf->name_lock); init_waitqueue_head(&dmabuf->poll); - dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll; - dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0; + dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll; + dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
if (!resv) { resv = (struct dma_resv *)&dmabuf[1]; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index efdc56b9d95f..7e747ad54c81 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -329,7 +329,7 @@ struct dma_buf { wait_queue_head_t *poll;
__poll_t active; - } cb_excl, cb_shared; + } cb_in, cb_out; };
/**
Sorry that was the wrong patch.
Still not feeling that well :(
Christian.
Am 08.07.21 um 13:19 schrieb Christian König:
Daniel pointed me towards this function and there are multiple obvious problems in the implementation.
First of all the retry loop is not working as intended. In general the retry makes only sense if you grab the reference first and then check the sequence values.
Then we should always also wait for the exclusive fence.
It's also good practice to keep the reference around when installing callbacks to fences you don't own.
And last the whole implementation was unnecessary complex and rather hard to understand which could lead to probably unexpected behavior of the IOCTL.
Fix all this by reworking the implementation from scratch. Dropping the whole RCU approach and taking the lock instead.
Only mildly tested and needs a thoughtful review of the code.
v2: fix the reference counting as well v3: keep the excl fence handling as is for stable v4: back to testing all fences, drop RCU v5: handle in and out separately
Signed-off-by: Christian König christian.koenig@amd.com CC: stable@vger.kernel.org
drivers/dma-buf/dma-buf.c | 152 +++++++++++++++++--------------------- include/linux/dma-buf.h | 2 +- 2 files changed, 68 insertions(+), 86 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index eadd1eaa2fb5..439e2379e1cb 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry) * If you hit this BUG() it means someone dropped their ref to the * dma-buf while still having pending operation to the buffer. */
- BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
dmabuf->ops->release(dmabuf);
@@ -202,16 +202,57 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) wake_up_locked_poll(dcb->poll, dcb->active); dcb->active = 0; spin_unlock_irqrestore(&dcb->poll->lock, flags);
- dma_fence_put(fence);
+}
+static bool dma_buf_poll_shared(struct dma_resv *resv,
struct dma_buf_poll_cb_t *dcb)
+{
- struct dma_resv_list *fobj = dma_resv_get_list(resv);
- struct dma_fence *fence;
- int i, r;
- if (!fobj)
return false;
- for (i = 0; i < fobj->shared_count; ++i) {
fence = rcu_dereference_protected(fobj->shared[i],
dma_resv_held(resv));
dma_fence_get(fence);
r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
if (!r)
return true;
dma_fence_put(fence);
- }
- return false;
+}
+static bool dma_buf_poll_excl(struct dma_resv *resv,
struct dma_buf_poll_cb_t *dcb)
+{
struct dma_fence *fence = dma_resv_get_excl(resv);
int r;
if (!fence)
return false;
dma_fence_get(fence);
r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
if (!r)
return true;
dma_fence_put(fence);
return false; }
static __poll_t dma_buf_poll(struct file *file, poll_table *poll) { struct dma_buf *dmabuf; struct dma_resv *resv;
- struct dma_resv_list *fobj;
- struct dma_fence *fence_excl;
- unsigned shared_count; __poll_t events;
- unsigned shared_count, seq;
int r, i;
dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv)
@@ -225,101 +266,42 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
-retry:
- seq = read_seqcount_begin(&resv->seq);
- rcu_read_lock();
- fobj = rcu_dereference(resv->fence);
- if (fobj)
shared_count = fobj->shared_count;
- else
shared_count = 0;
- fence_excl = rcu_dereference(resv->fence_excl);
- if (read_seqcount_retry(&resv->seq, seq)) {
rcu_read_unlock();
goto retry;
- }
- if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
__poll_t pevents = EPOLLIN;
- dma_resv_lock(resv, NULL);
if (shared_count == 0)
pevents |= EPOLLOUT;
if (events & EPOLLOUT) {
struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out;
/* Check that callback isn't busy */
spin_lock_irq(&dmabuf->poll.lock);
if (dcb->active) {
dcb->active |= pevents;
events &= ~pevents;
} else
dcb->active = pevents;
if (dcb->active)
events &= ~EPOLLOUT;
else
spin_unlock_irq(&dmabuf->poll.lock);dcb->active = EPOLLOUT;
if (events & pevents) {
if (!dma_fence_get_rcu(fence_excl)) {
/* force a recheck */
events &= ~pevents;
dma_buf_poll_cb(NULL, &dcb->cb);
} else if (!dma_fence_add_callback(fence_excl, &dcb->cb,
dma_buf_poll_cb)) {
events &= ~pevents;
dma_fence_put(fence_excl);
} else {
/*
* No callback queued, wake up any additional
* waiters.
*/
dma_fence_put(fence_excl);
dma_buf_poll_cb(NULL, &dcb->cb);
}
}
if (events & EPOLLOUT && !dma_buf_poll_shared(resv, dcb) &&
!dma_buf_poll_excl(resv, dcb))
/* No callback queued, wake up any other waiters */
}dma_buf_poll_cb(NULL, &dcb->cb);
- if ((events & EPOLLOUT) && shared_count > 0) {
struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
int i;
- if (events & EPOLLIN) {
struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_in;
/* Only queue a new callback if no event has fired yet */
spin_lock_irq(&dmabuf->poll.lock); if (dcb->active)/* Check that callback isn't busy */
events &= ~EPOLLOUT;
elseevents &= ~EPOLLIN;
dcb->active = EPOLLOUT;
spin_unlock_irq(&dmabuf->poll.lock);dcb->active = EPOLLIN;
if (!(events & EPOLLOUT))
goto out;
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
if (!dma_fence_get_rcu(fence)) {
/*
* fence refcount dropped to zero, this means
* that fobj has been freed
*
* call dma_buf_poll_cb and force a recheck!
*/
events &= ~EPOLLOUT;
dma_buf_poll_cb(NULL, &dcb->cb);
break;
}
if (!dma_fence_add_callback(fence, &dcb->cb,
dma_buf_poll_cb)) {
dma_fence_put(fence);
events &= ~EPOLLOUT;
break;
}
dma_fence_put(fence);
}
/* No callback queued, wake up any additional waiters. */
if (i == shared_count)
if (events & EPOLLIN && !dma_buf_poll_excl(resv, dcb))
}/* No callback queued, wake up any other waiters */ dma_buf_poll_cb(NULL, &dcb->cb);
-out:
- rcu_read_unlock();
- dma_resv_unlock(resv); return events; }
@@ -562,8 +544,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) dmabuf->owner = exp_info->owner; spin_lock_init(&dmabuf->name_lock); init_waitqueue_head(&dmabuf->poll);
- dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
- dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll;
dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
if (!resv) { resv = (struct dma_resv *)&dmabuf[1];
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index efdc56b9d95f..7e747ad54c81 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -329,7 +329,7 @@ struct dma_buf { wait_queue_head_t *poll;
__poll_t active;
- } cb_excl, cb_shared;
} cb_in, cb_out; };
/**
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on tegra-drm/drm/tegra/for-next] [also build test WARNING on v5.13] [cannot apply to linus/master next-20210709] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/dma-buf-fix-and-rew... base: git://anongit.freedesktop.org/tegra/linux.git drm/tegra/for-next config: mips-randconfig-r032-20210709 (attached as .config) compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 8d69635ed9ecf36fd0ca85906bfde17949671cbe) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install mips cross compiling tool for clang build # apt-get install binutils-mips-linux-gnu # https://github.com/0day-ci/linux/commit/07538f7542063f6043e1a5dab5f4aa572a09... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/dma-buf-fix-and-rework-dma_buf_poll-v5/20210708-192030 git checkout 07538f7542063f6043e1a5dab5f4aa572a091c96 # save the attached .config to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross O=build_dir ARCH=mips SHELL=/bin/bash drivers/dma-buf/
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
All warnings (new ones prefixed by >>):
drivers/dma-buf/dma-buf.c:253:11: warning: unused variable 'shared_count'
unsigned shared_count; ^
drivers/dma-buf/dma-buf.c:255:9: warning: unused variable 'i'
int r, i; ^
drivers/dma-buf/dma-buf.c:255:6: warning: unused variable 'r'
int r, i; ^ fatal error: error in backend: Nested variants found in inline asm string: ' .set push .set mips64r2 .if ( 0x00 ) != -1)) 0x00 ) != -1)) : ($( static struct ftrace_branch_data __attribute__((__aligned__(4))) __attribute__((__section__("_ftrace_branch"))) __if_trace = $( .func = __func__, .file = "arch/mips/include/asm/atomic.h", .line = 153, $); 0x00 ) != -1)) : $))) ) && ( 0 ); .set push; .set mips64r2; .rept 1; sync 0x00; .endr; .set pop; .else; ; .endif 1: ll $1, $2 # atomic_fetch_add addu $0, $1, $3 sc $0, $2 beqz $0, 1b .set pop move $0, $1 ' clang-13: error: clang frontend command failed with exit code 70 (use -v to see invocation) clang version 13.0.0 (git://gitmirror/llvm_project 8d69635ed9ecf36fd0ca85906bfde17949671cbe) Target: mipsel-unknown-linux-gnu Thread model: posix InstalledDir: /opt/cross/clang-8d69635ed9/bin clang-13: note: diagnostic msg: Makefile arch drivers include kernel mm scripts source usr
vim +/shared_count +253 drivers/dma-buf/dma-buf.c
248 249 static __poll_t dma_buf_poll(struct file *file, poll_table *poll) 250 { 251 struct dma_buf *dmabuf; 252 struct dma_resv *resv;
253 unsigned shared_count;
254 __poll_t events;
255 int r, i;
256 257 dmabuf = file->private_data; 258 if (!dmabuf || !dmabuf->resv) 259 return EPOLLERR; 260 261 resv = dmabuf->resv; 262 263 poll_wait(file, &dmabuf->poll, poll); 264 265 events = poll_requested_events(poll) & (EPOLLIN | EPOLLOUT); 266 if (!events) 267 return 0; 268 269 dma_resv_lock(resv, NULL); 270 271 if (events & EPOLLOUT) { 272 struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_out; 273 274 /* Check that callback isn't busy */ 275 spin_lock_irq(&dmabuf->poll.lock); 276 if (dcb->active) 277 events &= ~EPOLLOUT; 278 else 279 dcb->active = EPOLLOUT; 280 spin_unlock_irq(&dmabuf->poll.lock); 281 282 if (events & EPOLLOUT && !dma_buf_poll_shared(resv, dcb) && 283 !dma_buf_poll_excl(resv, dcb)) 284 /* No callback queued, wake up any other waiters */ 285 dma_buf_poll_cb(NULL, &dcb->cb); 286 } 287 288 if (events & EPOLLIN) { 289 struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_in; 290 291 /* Check that callback isn't busy */ 292 spin_lock_irq(&dmabuf->poll.lock); 293 if (dcb->active) 294 events &= ~EPOLLIN; 295 else 296 dcb->active = EPOLLIN; 297 spin_unlock_irq(&dmabuf->poll.lock); 298 299 if (events & EPOLLIN && !dma_buf_poll_excl(resv, dcb)) 300 /* No callback queued, wake up any other waiters */ 301 dma_buf_poll_cb(NULL, &dcb->cb); 302 } 303 304 dma_resv_unlock(resv); 305 return events; 306 } 307
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
dri-devel@lists.freedesktop.org