When removing of the BKL the locking around lastclose() was rearranged and resulted in the holding of the open_count spinlock over the call into drm_lastclose(). The drivers were not ready for this path to be atomic - it may indeed involve long waits to release old objects and cleanup the GPU - and so we ended up scheduling whilst atomic.
[ 54.625598] BUG: scheduling while atomic: X/3546/0x00000002 [ 54.625600] Modules linked in: sco bridge stp llc input_polldev rfcomm bnep l2cap crc16 sch_sfq ipv6 md_mod acpi_cpufreq mperf cryptd aes_x86_64 aes_generic xts gf128mul dm_crypt dm_mod btusb bluetooth usbhid hid zaurus cdc_ether usbnet mii cdc_wdm cdc_acm uvcvideo videodev v4l1_compat v4l2_compat_ioctl32 snd_hda_codec_conexant arc4 pcmcia ecb snd_hda_intel joydev sdhci_pci sdhci snd_hda_codec tpm_tis firewire_ohci mmc_core e1000e uhci_hcd thinkpad_acpi nvram yenta_socket pcmcia_rsrc pcmcia_core tpm wmi sr_mod firewire_core iwlagn ehci_hcd snd_hwdep snd_pcm usbcore tpm_bios thermal led_class snd_timer iwlcore snd soundcore ac snd_page_alloc pcspkr psmouse serio_raw battery sg mac80211 evdev cfg80211 i2c_i801 iTCO_wdt iTCO_vendor_support cdrom processor crc_itu_t rfkill xfs exportfs sd_mod crc_t10dif ahci libahci libata scsi_mod [last unloaded: scsi_wait_scan] [ 54.625663] Pid: 3546, comm: X Not tainted 2.6.35-04771-g1787985 #301 [ 54.625665] Call Trace: [ 54.625671] [<ffffffff8102d599>] __schedule_bug+0x57/0x5c [ 54.625675] [<ffffffff81384141>] schedule+0xe5/0x832 [ 54.625679] [<ffffffff81163e77>] ? put_dec+0x20/0x3c [ 54.625682] [<ffffffff81384dd4>] schedule_timeout+0x275/0x29f [ 54.625686] [<ffffffff810455e1>] ? process_timeout+0x0/0xb [ 54.625688] [<ffffffff81384e17>] schedule_timeout_uninterruptible+0x19/0x1b [ 54.625691] [<ffffffff81045893>] msleep+0x16/0x1d [ 54.625695] [<ffffffff812a2e53>] i9xx_crtc_dpms+0x273/0x2ae [ 54.625698] [<ffffffff812a18be>] intel_crtc_dpms+0x28/0xe7 [ 54.625702] [<ffffffff811ec0fa>] drm_helper_disable_unused_functions+0xf0/0x118 [ 54.625705] [<ffffffff811ecde3>] drm_crtc_helper_set_config+0x644/0x7c8 [ 54.625708] [<ffffffff811f12dd>] ? drm_copy_field+0x40/0x50 [ 54.625711] [<ffffffff811ebca2>] drm_fb_helper_force_kernel_mode+0x3e/0x85 [ 54.625713] [<ffffffff811ebcf2>] drm_fb_helper_restore+0x9/0x24 [ 54.625717] [<ffffffff81290a41>] i915_driver_lastclose+0x2b/0x5c [ 54.625720] [<ffffffff811f14a7>] drm_lastclose+0x44/0x2ad [ 54.625722] [<ffffffff811f1ed2>] drm_release+0x5c6/0x609 [ 54.625726] [<ffffffff810d1275>] fput+0x109/0x1c7 [ 54.625728] [<ffffffff810ce5e4>] filp_close+0x61/0x6b [ 54.625731] [<ffffffff810ce680>] sys_close+0x92/0xd4 [ 54.625734] [<ffffffff81002a2b>] system_call_fastpath+0x16/0x1b
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_fops.c | 9 ++++----- 1 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 2ca8df8..6367961 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -572,16 +572,15 @@ int drm_release(struct inode *inode, struct file *filp) atomic_inc(&dev->counts[_DRM_STAT_CLOSES]); spin_lock(&dev->count_lock); if (!--dev->open_count) { + spin_unlock(&dev->count_lock); + if (atomic_read(&dev->ioctl_count)) { DRM_ERROR("Device busy: %d\n", atomic_read(&dev->ioctl_count)); retcode = -EBUSY; - goto out; - } - retcode = drm_lastclose(dev); + } else + retcode = drm_lastclose(dev); } -out: - spin_unlock(&dev->count_lock); mutex_unlock(&drm_global_mutex);
return retcode;
On Sat, Aug 7, 2010 at 10:49 PM, Chris Wilson chris@chris-wilson.co.uk wrote:
When removing of the BKL the locking around lastclose() was rearranged and resulted in the holding of the open_count spinlock over the call into drm_lastclose(). The drivers were not ready for this path to be atomic - it may indeed involve long waits to release old objects and cleanup the GPU - and so we ended up scheduling whilst atomic.
I might be missing something, but what stops the race with something reopening while we are in lastclose now?
Dave.
[ 54.625598] BUG: scheduling while atomic: X/3546/0x00000002 [ 54.625600] Modules linked in: sco bridge stp llc input_polldev rfcomm bnep l2cap crc16 sch_sfq ipv6 md_mod acpi_cpufreq mperf cryptd aes_x86_64 aes_generic xts gf128mul dm_crypt dm_mod btusb bluetooth usbhid hid zaurus cdc_ether usbnet mii cdc_wdm cdc_acm uvcvideo videodev v4l1_compat v4l2_compat_ioctl32 snd_hda_codec_conexant arc4 pcmcia ecb snd_hda_intel joydev sdhci_pci sdhci snd_hda_codec tpm_tis firewire_ohci mmc_core e1000e uhci_hcd thinkpad_acpi nvram yenta_socket pcmcia_rsrc pcmcia_core tpm wmi sr_mod firewire_core iwlagn ehci_hcd snd_hwdep snd_pcm usbcore tpm_bios thermal led_class snd_timer iwlcore snd soundcore ac snd_page_alloc pcspkr psmouse serio_raw battery sg mac80211 evdev cfg80211 i2c_i801 iTCO_wdt iTCO_vendor_support cdrom processor crc_itu_t rfkill xfs exportfs sd_mod crc_t10dif ahci libahci libata scsi_mod [last unloaded: scsi_wait_scan] [ 54.625663] Pid: 3546, comm: X Not tainted 2.6.35-04771-g1787985 #301 [ 54.625665] Call Trace: [ 54.625671] [<ffffffff8102d599>] __schedule_bug+0x57/0x5c [ 54.625675] [<ffffffff81384141>] schedule+0xe5/0x832 [ 54.625679] [<ffffffff81163e77>] ? put_dec+0x20/0x3c [ 54.625682] [<ffffffff81384dd4>] schedule_timeout+0x275/0x29f [ 54.625686] [<ffffffff810455e1>] ? process_timeout+0x0/0xb [ 54.625688] [<ffffffff81384e17>] schedule_timeout_uninterruptible+0x19/0x1b [ 54.625691] [<ffffffff81045893>] msleep+0x16/0x1d [ 54.625695] [<ffffffff812a2e53>] i9xx_crtc_dpms+0x273/0x2ae [ 54.625698] [<ffffffff812a18be>] intel_crtc_dpms+0x28/0xe7 [ 54.625702] [<ffffffff811ec0fa>] drm_helper_disable_unused_functions+0xf0/0x118 [ 54.625705] [<ffffffff811ecde3>] drm_crtc_helper_set_config+0x644/0x7c8 [ 54.625708] [<ffffffff811f12dd>] ? drm_copy_field+0x40/0x50 [ 54.625711] [<ffffffff811ebca2>] drm_fb_helper_force_kernel_mode+0x3e/0x85 [ 54.625713] [<ffffffff811ebcf2>] drm_fb_helper_restore+0x9/0x24 [ 54.625717] [<ffffffff81290a41>] i915_driver_lastclose+0x2b/0x5c [ 54.625720] [<ffffffff811f14a7>] drm_lastclose+0x44/0x2ad [ 54.625722] [<ffffffff811f1ed2>] drm_release+0x5c6/0x609 [ 54.625726] [<ffffffff810d1275>] fput+0x109/0x1c7 [ 54.625728] [<ffffffff810ce5e4>] filp_close+0x61/0x6b [ 54.625731] [<ffffffff810ce680>] sys_close+0x92/0xd4 [ 54.625734] [<ffffffff81002a2b>] system_call_fastpath+0x16/0x1b
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk
drivers/gpu/drm/drm_fops.c | 9 ++++----- 1 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 2ca8df8..6367961 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -572,16 +572,15 @@ int drm_release(struct inode *inode, struct file *filp) atomic_inc(&dev->counts[_DRM_STAT_CLOSES]); spin_lock(&dev->count_lock); if (!--dev->open_count) {
- spin_unlock(&dev->count_lock);
if (atomic_read(&dev->ioctl_count)) { DRM_ERROR("Device busy: %d\n", atomic_read(&dev->ioctl_count)); retcode = -EBUSY;
- goto out;
- }
- retcode = drm_lastclose(dev);
- } else
- retcode = drm_lastclose(dev);
} -out:
- spin_unlock(&dev->count_lock);
mutex_unlock(&drm_global_mutex);
return retcode;
1.7.1
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
Dave Airlie: I might be missing something, but what stops the race with something reopening while we are in lastclose now?
The global_mutex which appears to fill this role is only taken inside the drm_stub_open() and not drm_open() itself. As that mutex appears to serve no other role, retask it to serialise open/lastclose.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_fops.c | 35 ++++++++++++++++++----------------- 1 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 6367961..71bcd1b 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -123,27 +123,30 @@ int drm_open(struct inode *inode, struct file *filp) struct drm_device *dev = NULL; int minor_id = iminor(inode); struct drm_minor *minor; - int retcode = 0; + int retcode = -ENODEV;
+ mutex_lock(&drm_global_mutex); minor = idr_find(&drm_minors_idr, minor_id); if (!minor) - return -ENODEV; + goto err;
if (!(dev = minor->dev)) - return -ENODEV; + goto err;
retcode = drm_open_helper(inode, filp, dev); - if (!retcode) { - atomic_inc(&dev->counts[_DRM_STAT_OPENS]); - spin_lock(&dev->count_lock); - if (!dev->open_count++) { - spin_unlock(&dev->count_lock); - retcode = drm_setup(dev); - goto out; - } + if (retcode) + goto err; + + atomic_inc(&dev->counts[_DRM_STAT_OPENS]); + spin_lock(&dev->count_lock); + if (!dev->open_count++) { spin_unlock(&dev->count_lock); - } -out: + retcode = drm_setup(dev); + } else + spin_unlock(&dev->count_lock); +err: + mutex_unlock(&drm_global_mutex); + if (!retcode) { mutex_lock(&dev->struct_mutex); if (minor->type == DRM_MINOR_LEGACY) { @@ -178,7 +181,6 @@ int drm_stub_open(struct inode *inode, struct file *filp)
DRM_DEBUG("\n");
- mutex_lock(&drm_global_mutex); minor = idr_find(&drm_minors_idr, minor_id); if (!minor) goto out; @@ -198,8 +200,8 @@ int drm_stub_open(struct inode *inode, struct file *filp) } fops_put(old_fops);
+ ret = 0; out: - mutex_unlock(&drm_global_mutex); return err; }
@@ -474,8 +476,6 @@ int drm_release(struct inode *inode, struct file *filp) struct drm_device *dev = file_priv->minor->dev; int retcode = 0;
- mutex_lock(&drm_global_mutex); - DRM_DEBUG("open_count = %d\n", dev->open_count);
if (dev->driver->preclose) @@ -569,6 +569,7 @@ int drm_release(struct inode *inode, struct file *filp) * End inline drm_release */
+ mutex_lock(&drm_global_mutex); atomic_inc(&dev->counts[_DRM_STAT_CLOSES]); spin_lock(&dev->count_lock); if (!--dev->open_count) {
On Sun, 8 Aug 2010 09:37:24 +0100, Chris Wilson chris@chris-wilson.co.uk wrote:
Dave Airlie: I might be missing something, but what stops the race with something reopening while we are in lastclose now?
The global_mutex which appears to fill this role is only taken inside the drm_stub_open() and not drm_open() itself. As that mutex appears to serve no other role, retask it to serialise open/lastclose.
This patch was just wrong. The ordering is drm_stub_open() -> drm_open() so open/lastclose are currently serialised using drm_global_mutex.
On 08/08/2010 10:37 AM, Chris Wilson wrote:
Dave Airlie: I might be missing something, but what stops the race with something reopening while we are in lastclose now?
The global_mutex which appears to fill this role is only taken inside the drm_stub_open() and not drm_open() itself. As that mutex appears to serve no other role, retask it to serialise open/lastclose.
Hmm.
But, if used this way, drm_global_mutex needs to protect *all* accesses to dev::open_count, what is count_clock used for? Can't it be removed?
/Thomas
Signed-off-by: Chris Wilsonchris@chris-wilson.co.uk
drivers/gpu/drm/drm_fops.c | 35 ++++++++++++++++++----------------- 1 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 6367961..71bcd1b 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -123,27 +123,30 @@ int drm_open(struct inode *inode, struct file *filp) struct drm_device *dev = NULL; int minor_id = iminor(inode); struct drm_minor *minor;
- int retcode = 0;
int retcode = -ENODEV;
mutex_lock(&drm_global_mutex); minor = idr_find(&drm_minors_idr, minor_id); if (!minor)
return -ENODEV;
goto err;
if (!(dev = minor->dev))
return -ENODEV;
goto err;
retcode = drm_open_helper(inode, filp, dev);
- if (!retcode) {
atomic_inc(&dev->counts[_DRM_STAT_OPENS]);
spin_lock(&dev->count_lock);
if (!dev->open_count++) {
spin_unlock(&dev->count_lock);
retcode = drm_setup(dev);
goto out;
}
- if (retcode)
goto err;
- atomic_inc(&dev->counts[_DRM_STAT_OPENS]);
- spin_lock(&dev->count_lock);
- if (!dev->open_count++) { spin_unlock(&dev->count_lock);
- }
-out:
retcode = drm_setup(dev);
- } else
spin_unlock(&dev->count_lock);
+err:
- mutex_unlock(&drm_global_mutex);
- if (!retcode) { mutex_lock(&dev->struct_mutex); if (minor->type == DRM_MINOR_LEGACY) {
@@ -178,7 +181,6 @@ int drm_stub_open(struct inode *inode, struct file *filp)
DRM_DEBUG("\n");
- mutex_lock(&drm_global_mutex); minor = idr_find(&drm_minors_idr, minor_id); if (!minor) goto out;
@@ -198,8 +200,8 @@ int drm_stub_open(struct inode *inode, struct file *filp) } fops_put(old_fops);
- ret = 0; out:
- mutex_unlock(&drm_global_mutex); return err; }
@@ -474,8 +476,6 @@ int drm_release(struct inode *inode, struct file *filp) struct drm_device *dev = file_priv->minor->dev; int retcode = 0;
mutex_lock(&drm_global_mutex);
DRM_DEBUG("open_count = %d\n", dev->open_count);
if (dev->driver->preclose)
@@ -569,6 +569,7 @@ int drm_release(struct inode *inode, struct file *filp) * End inline drm_release */
- mutex_lock(&drm_global_mutex); atomic_inc(&dev->counts[_DRM_STAT_CLOSES]); spin_lock(&dev->count_lock); if (!--dev->open_count) {
When removing of the BKL the locking around lastclose() was rearranged and resulted in the holding of the open_count spinlock over the call into drm_lastclose(). The drivers were not ready for this path to be atomic - it may indeed involve long waits to release old objects and cleanup the GPU - and so we ended up scheduling whilst atomic.
[ 54.625598] BUG: scheduling while atomic: X/3546/0x00000002 [ 54.625600] Modules linked in: sco bridge stp llc input_polldev rfcomm bnep l2cap crc16 sch_sfq ipv6 md_mod acpi_cpufreq mperf cryptd aes_x86_64 aes_generic xts gf128mul dm_crypt dm_mod btusb bluetooth usbhid hid zaurus cdc_ether usbnet mii cdc_wdm cdc_acm uvcvideo videodev v4l1_compat v4l2_compat_ioctl32 snd_hda_codec_conexant arc4 pcmcia ecb snd_hda_intel joydev sdhci_pci sdhci snd_hda_codec tpm_tis firewire_ohci mmc_core e1000e uhci_hcd thinkpad_acpi nvram yenta_socket pcmcia_rsrc pcmcia_core tpm wmi sr_mod firewire_core iwlagn ehci_hcd snd_hwdep snd_pcm usbcore tpm_bios thermal led_class snd_timer iwlcore snd soundcore ac snd_page_alloc pcspkr psmouse serio_raw battery sg mac80211 evdev cfg80211 i2c_i801 iTCO_wdt iTCO_vendor_support cdrom processor crc_itu_t rfkill xfs exportfs sd_mod crc_t10dif ahci libahci libata scsi_mod [last unloaded: scsi_wait_scan] [ 54.625663] Pid: 3546, comm: X Not tainted 2.6.35-04771-g1787985 #301 [ 54.625665] Call Trace: [ 54.625671] [<ffffffff8102d599>] __schedule_bug+0x57/0x5c [ 54.625675] [<ffffffff81384141>] schedule+0xe5/0x832 [ 54.625679] [<ffffffff81163e77>] ? put_dec+0x20/0x3c [ 54.625682] [<ffffffff81384dd4>] schedule_timeout+0x275/0x29f [ 54.625686] [<ffffffff810455e1>] ? process_timeout+0x0/0xb [ 54.625688] [<ffffffff81384e17>] schedule_timeout_uninterruptible+0x19/0x1b [ 54.625691] [<ffffffff81045893>] msleep+0x16/0x1d [ 54.625695] [<ffffffff812a2e53>] i9xx_crtc_dpms+0x273/0x2ae [ 54.625698] [<ffffffff812a18be>] intel_crtc_dpms+0x28/0xe7 [ 54.625702] [<ffffffff811ec0fa>] drm_helper_disable_unused_functions+0xf0/0x118 [ 54.625705] [<ffffffff811ecde3>] drm_crtc_helper_set_config+0x644/0x7c8 [ 54.625708] [<ffffffff811f12dd>] ? drm_copy_field+0x40/0x50 [ 54.625711] [<ffffffff811ebca2>] drm_fb_helper_force_kernel_mode+0x3e/0x85 [ 54.625713] [<ffffffff811ebcf2>] drm_fb_helper_restore+0x9/0x24 [ 54.625717] [<ffffffff81290a41>] i915_driver_lastclose+0x2b/0x5c [ 54.625720] [<ffffffff811f14a7>] drm_lastclose+0x44/0x2ad [ 54.625722] [<ffffffff811f1ed2>] drm_release+0x5c6/0x609 [ 54.625726] [<ffffffff810d1275>] fput+0x109/0x1c7 [ 54.625728] [<ffffffff810ce5e4>] filp_close+0x61/0x6b [ 54.625731] [<ffffffff810ce680>] sys_close+0x92/0xd4 [ 54.625734] [<ffffffff81002a2b>] system_call_fastpath+0x16/0x1b
v2: The spinlock is actually superfluous as access to open_count is entirely serialised by drm_global_mutex and so can be dropped. The count_lock spinlock instead appears to be used to protect access to dev->buf_alloc and dev->buf_use.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_fops.c | 16 +++------------- 1 files changed, 3 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 2ca8df8..3a652a6 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -135,15 +135,9 @@ int drm_open(struct inode *inode, struct file *filp) retcode = drm_open_helper(inode, filp, dev); if (!retcode) { atomic_inc(&dev->counts[_DRM_STAT_OPENS]); - spin_lock(&dev->count_lock); - if (!dev->open_count++) { - spin_unlock(&dev->count_lock); + if (!dev->open_count++) retcode = drm_setup(dev); - goto out; - } - spin_unlock(&dev->count_lock); } -out: if (!retcode) { mutex_lock(&dev->struct_mutex); if (minor->type == DRM_MINOR_LEGACY) { @@ -570,18 +564,14 @@ int drm_release(struct inode *inode, struct file *filp) */
atomic_inc(&dev->counts[_DRM_STAT_CLOSES]); - spin_lock(&dev->count_lock); if (!--dev->open_count) { if (atomic_read(&dev->ioctl_count)) { DRM_ERROR("Device busy: %d\n", atomic_read(&dev->ioctl_count)); retcode = -EBUSY; - goto out; - } - retcode = drm_lastclose(dev); + } else + retcode = drm_lastclose(dev); } -out: - spin_unlock(&dev->count_lock); mutex_unlock(&drm_global_mutex);
return retcode;
dri-devel@lists.freedesktop.org