Hi,
My laptop oopses early on with nothing on the screen; after some debugging I managed to obtain a backtrace:
BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page PGD 0 P4D 0 Oops: 0010 [#1] PREEMPT SMP PTI RIP: 0010:0x0 Code: Bad RIP value. [..] Call Trace: i915_gemfs_init+0x6e/0xa0 [i915] i915_gem_init_early+0x76/0x90 [i915] i915_driver_probe+0x30a/0x1640 [i915] ? kernfs_activate+0x5a/0x80 ? kernfs_add_one+0xdd/0x130 pci_device_probe+0x9e/0x110 really_probe+0xce/0x230 driver_probe_device+0x4b/0xc0 device_driver_attach+0x4e/0x60 __driver_attach+0x47/0xb0 ? device_driver_attach+0x60/0x60 bus_for_each_dev+0x61/0x90 bus_add_driver+0x167/0x1b0 driver_register+0x67/0xaa ? 0xffffffffc0522000 do_one_initcall+0x37/0x13f ? kmem_cache_alloc+0x11a/0x150 do_init_module+0x51/0x200 __se_sys_init_module+0xef/0x100 do_syscall_64+0x49/0x250 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP is at 0x00, which is never good
It sort of boils down to commit 144df3b288c4 (vfs: Convert ramfs, shmem, tmpfs, devtmpfs, rootfs to use the new mount API), which removed ->remount_fs from tmpfs' ops:
=== @@ -3736,7 +3849,6 @@ static const struct super_operations shmem_ops = { .destroy_inode = shmem_destroy_inode, #ifdef CONFIG_TMPFS .statfs = shmem_statfs, - .remount_fs = shmem_remount_fs, .show_options = shmem_show_options, #endif .evict_inode = shmem_evict_inode, ===
So i915 init executes NULL
get_fs_type("tmpfs"); sb->s_op->remount_fs(sb, &flags, options);
For the time being the following (obvious and wrong) patch at least boots -next:
---
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 099f3397aada..1f95d9ea319a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -39,6 +39,9 @@ int i915_gemfs_init(struct drm_i915_private *i915) int flags = 0; int err;
+ if (!sb->s_op->remount_fs) + return -ENODEV; + err = sb->s_op->remount_fs(sb, &flags, options); if (err) { kern_unmount(gemfs); ---
-ss
On (07/21/19 23:29), Sergey Senozhatsky wrote:
BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page PGD 0 P4D 0 Oops: 0010 [#1] PREEMPT SMP PTI RIP: 0010:0x0 Code: Bad RIP value. [..] Call Trace: i915_gemfs_init+0x6e/0xa0 [i915] i915_gem_init_early+0x76/0x90 [i915] i915_driver_probe+0x30a/0x1640 [i915] ? kernfs_activate+0x5a/0x80 ? kernfs_add_one+0xdd/0x130 pci_device_probe+0x9e/0x110 really_probe+0xce/0x230 driver_probe_device+0x4b/0xc0 device_driver_attach+0x4e/0x60 __driver_attach+0x47/0xb0 ? device_driver_attach+0x60/0x60 bus_for_each_dev+0x61/0x90 bus_add_driver+0x167/0x1b0 driver_register+0x67/0xaa ? 0xffffffffc0522000 do_one_initcall+0x37/0x13f ? kmem_cache_alloc+0x11a/0x150 do_init_module+0x51/0x200 __se_sys_init_module+0xef/0x100 do_syscall_64+0x49/0x250 entry_SYSCALL_64_after_hwframe+0x44/0xa9
So "the new mount API" conversion probably looks like something below. But I'm not 100% sure.
--- drivers/gpu/drm/i915/gem/i915_gemfs.c | 32 +++++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 099f3397aada..2e365b26f8ee 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -7,12 +7,14 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/pagemap.h> +#include <linux/fs_context.h>
#include "i915_drv.h" #include "i915_gemfs.h"
int i915_gemfs_init(struct drm_i915_private *i915) { + struct fs_context *fc; struct file_system_type *type; struct vfsmount *gemfs;
@@ -36,19 +38,35 @@ int i915_gemfs_init(struct drm_i915_private *i915) struct super_block *sb = gemfs->mnt_sb; /* FIXME: Disabled until we get W/A for read BW issue. */ char options[] = "huge=never"; - int flags = 0; int err;
- err = sb->s_op->remount_fs(sb, &flags, options); - if (err) { - kern_unmount(gemfs); - return err; - } + fc = fs_context_for_reconfigure(sb->s_root, 0, 0); + if (IS_ERR(fc)) + goto err; + + if (!fc->ops->parse_monolithic) + goto err; + + err = fc->ops->parse_monolithic(fc, options); + if (err) + goto err; + + if (!fc->ops->reconfigure) + goto err; + + err = fc->ops->reconfigure(fc); + if (err) + goto err; }
i915->mm.gemfs = gemfs; - return 0; + +err: + pr_err("i915 gemfs init() failed\n"); + put_fs_context(fc); + kern_unmount(gemfs); + return -EINVAL; }
void i915_gemfs_fini(struct drm_i915_private *i915)
Quoting Sergey Senozhatsky (2019-07-31 17:48:29)
@@ -36,19 +38,35 @@ int i915_gemfs_init(struct drm_i915_private *i915) struct super_block *sb = gemfs->mnt_sb; /* FIXME: Disabled until we get W/A for read BW issue. */ char options[] = "huge=never";
int flags = 0; int err;
err = sb->s_op->remount_fs(sb, &flags, options);
if (err) {
kern_unmount(gemfs);
return err;
}
fc = fs_context_for_reconfigure(sb->s_root, 0, 0);
if (IS_ERR(fc))
goto err;
if (!fc->ops->parse_monolithic)
goto err;
err = fc->ops->parse_monolithic(fc, options);
if (err)
goto err;
if (!fc->ops->reconfigure)
It would be odd for fs_context_for_reconfigure() to allow creation of a context if that context couldn't perform a reconfigre, nevertheless that seems to be the case.
goto err;
err = fc->ops->reconfigure(fc);
if (err)
goto err;
Only thing that stands out is that we should put_fs_context() here as well. I guess it's better than poking at the SB_INFO directly ourselves.
I think though we shouldn't bail if we can't change the thp setting, and just accept whatever with a warning.
Looks like the API is already available in dinq, so we can apply this ahead of the next merge window. -Chris
On (08/01/19 18:30), Chris Wilson wrote:
Quoting Sergey Senozhatsky (2019-07-31 17:48:29)
@@ -36,19 +38,35 @@ int i915_gemfs_init(struct drm_i915_private *i915)
[..]
if (!fc->ops->parse_monolithic)
goto err;
err = fc->ops->parse_monolithic(fc, options);
if (err)
goto err;
if (!fc->ops->reconfigure)
It would be odd for fs_context_for_reconfigure() to allow creation of a context if that context couldn't perform a reconfigre, nevertheless that seems to be the case.
Well, I kept those checks just because fs/ code does the same.
E.g. fs/super.c
reconfigure_super() { if (fc->ops->reconfigure) fc->ops->reconfigure(fc) }
do_emergency_remount_callback() { fc = fs_context_for_reconfigure(); reconfigure_super(fc); }
goto err;
err = fc->ops->reconfigure(fc);
if (err)
goto err;
Only thing that stands out is that we should put_fs_context() here as well.
Oh... Indeed, somehow I forgot to put_fs_context().
I guess it's better than poking at the SB_INFO directly ourselves. I think though we shouldn't bail if we can't change the thp setting, and just accept whatever with a warning.
OK.
Looks like the API is already available in dinq, so we can apply this ahead of the next merge window.
OK, will cook a formal patch then. Thanks!
-ss
dri-devel@lists.freedesktop.org