On Wed, 2017-04-26 at 00:49 +0200, Karol Herbst wrote:
Hi Lyude,
thanks for the great work. Just a view comments inline.
2017-04-25 20:38 GMT+02:00 Lyude lyude@redhat.com:
This adds support for enabling automatic clockgating on nvidia GPUs for Fermi and later generations. This saves a little bit of power, bringing my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my kepler's idle power consumption from ~23.6W to ~21.65W.
Similar to how the nvidia driver seems to handle this, we enable clockgating for each engine that supports it after it's initialization.
Signed-off-by: Lyude lyude@redhat.com
.../gpu/drm/nouveau/include/nvkm/subdev/therm.h | 4 ++ drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +++++- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c | 49 ++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 ++++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ 11 files changed, 175 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h index b268b96..904aa56 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h @@ -84,6 +84,9 @@ struct nvkm_therm {
int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type); int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
+ int (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx); + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable);
remove those and have a simple "nvkm_therm_clkgate_engine" function
This way you know that every user calls this function and don't have to check for silly function pointers like you currently do in engine.c
};
int nvkm_therm_temp_get(struct nvkm_therm *); @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c index b6c9169..473ad3e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c @@ -26,6 +26,7 @@ #include <core/option.h>
#include <subdev/fb.h> +#include <subdev/therm.h>
bool nvkm_engine_chsw_load(struct nvkm_engine *engine) @@ -86,6 +87,13 @@ static int nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_engine *engine = nvkm_engine(subdev); + struct nvkm_therm *therm = subdev->device->therm; + int gate_idx;
+ gate_idx = therm->clkgate_engine(therm, subdev->index); + if (gate_idx != -1) + therm->clkgate_set(therm, gate_idx, false);
move this code inside "nvkm_therm_clkgate_engine". Nobody outside nvkm_therm should even care about the index.
if (engine->func->fini) return engine->func->fini(engine, suspend); return 0; @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) { struct nvkm_engine *engine = nvkm_engine(subdev); struct nvkm_fb *fb = subdev->device->fb; + struct nvkm_therm *therm = subdev->device->therm; int ret = 0, i; s64 time;
if (!engine->usecount) { nvkm_trace(subdev, "init skipped, engine has no users\n"); - return ret; + goto finish; }
if (engine->func->oneinit && !engine->subdev.oneinit) { @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
for (i = 0; fb && i < fb->tile.regions; i++) nvkm_engine_tile(engine, i);
+finish: + if (!ret) { + int gate_idx = therm->clkgate_engine(therm, subdev-
index);
+ if (gate_idx != -1) + therm->clkgate_set(therm, gate_idx, true); + }
same code as above. More code sharing!
return ret; }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b690bc1..d133016 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1355,7 +1355,7 @@ nvc0_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1392,7 +1392,7 @@ nvc1_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1428,7 +1428,7 @@ nvc3_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1464,7 +1464,7 @@ nvc4_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1501,7 +1501,7 @@ nvc8_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1538,7 +1538,7 @@ nvce_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1575,7 +1575,7 @@ nvcf_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild index 135758b..cbb9465 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild @@ -1,4 +1,5 @@ nvkm-y += nvkm/subdev/therm/base.o +nvkm-y += nvkm/subdev/therm/clkgate.o nvkm-y += nvkm/subdev/therm/fan.o nvkm-y += nvkm/subdev/therm/fannil.o nvkm-y += nvkm/subdev/therm/fanpwm.o @@ -9,5 +10,6 @@ nvkm-y += nvkm/subdev/therm/nv40.o nvkm-y += nvkm/subdev/therm/nv50.o nvkm-y += nvkm/subdev/therm/g84.o nvkm-y += nvkm/subdev/therm/gt215.o +nvkm-y += nvkm/subdev/therm/gf100.o nvkm-y += nvkm/subdev/therm/gf119.o nvkm-y += nvkm/subdev/therm/gm107.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c index df949fa..723c0c1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c @@ -393,6 +393,8 @@ nvkm_therm_new_(const struct nvkm_therm_func *func, struct nvkm_device *device, therm->fan_set = nvkm_therm_fan_user_set; therm->attr_get = nvkm_therm_attr_get; therm->attr_set = nvkm_therm_attr_set; + therm->clkgate_engine = nvkm_therm_clkgate_engine; + therm->clkgate_set = nvkm_therm_clkgate_set;
remove those, because we should only have a nvkm_therm_clkgate_engine call
therm->mode = therm->suspend = -1; /* undefined */ return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c new file mode 100644 index 0000000..c030ea9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c @@ -0,0 +1,49 @@ +/*
- Copyright 2017 Red Hat Inc.
- Permission is hereby granted, free of charge, to any person
obtaining a
- copy of this software and associated documentation files (the
"Software"),
- to deal in the Software without restriction, including without
limitation
- the rights to use, copy, modify, merge, publish, distribute,
sublicense,
- and/or sell copies of the Software, and to permit persons to
whom the
- Software is furnished to do so, subject to the following
conditions:
- The above copyright notice and this permission notice shall be
included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
EVENT SHALL
- THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
DAMAGES OR
- OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE,
- ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR
- OTHER DEALINGS IN THE SOFTWARE.
- Authors: Lyude Paul
- */
+#include "priv.h"
+int +nvkm_therm_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx subdev) +{ + if (!therm->func->clkgate_engine) + return -1;
+ return therm->func->clkgate_engine(subdev); +}
+void +nvkm_therm_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) +{ + if (!therm->func->clkgate_set) + return;
+ if (enable) + nvkm_trace(&therm->subdev, + "Enabling clockgating for gate 0x%x\n", gate_idx); + else + nvkm_trace(&therm->subdev, + "Disabling clockgating for gate 0x%x\n", gate_idx);
+ therm->func->clkgate_set(therm, gate_idx, enable); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c new file mode 100644 index 0000000..820934f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c @@ -0,0 +1,77 @@ +/*
- Copyright 2017 Red Hat Inc.
- Permission is hereby granted, free of charge, to any person
obtaining a
- copy of this software and associated documentation files (the
"Software"),
- to deal in the Software without restriction, including without
limitation
- the rights to use, copy, modify, merge, publish, distribute,
sublicense,
- and/or sell copies of the Software, and to permit persons to
whom the
- Software is furnished to do so, subject to the following
conditions:
- The above copyright notice and this permission notice shall be
included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
EVENT SHALL
- THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
DAMAGES OR
- OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE,
- ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR
- OTHER DEALINGS IN THE SOFTWARE.
- Authors: Lyude Paul
- */
+#include <core/device.h>
+#include "priv.h"
+int +gf100_clkgate_engine(enum nvkm_devidx subdev) +{ + switch (subdev) { + case NVKM_ENGINE_GR: return 0x00; + case NVKM_ENGINE_MSPDEC: return 0x04; + case NVKM_ENGINE_MSPPP: return 0x08; + case NVKM_ENGINE_MSVLD: return 0x0c; + case NVKM_ENGINE_CE0: return 0x10; + case NVKM_ENGINE_CE1: return 0x14; + case NVKM_ENGINE_MSENC: return 0x18; + case NVKM_ENGINE_CE2: return 0x1c; + default: return -1; + } +}
+void +gf100_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) +{ + u8 data;
+ if (enable) /* ENG_CLK=auto, BLK_CLK=auto, ENG_PWR=run, BLK_PWR=auto */ + data = 0x45; + else /* ENG_CLK=run, BLK_CLK=run, ENG_PWR=run, BLK_PWR=run */ + data = 0x0;
I would rather use 0x44 here as Nvidia does? I don't think they disable it completly, maybe they only leave it on kepler? not quite sure.
JFYI: according to the vbios repo the nvidia blob actually uses 0x45 for everything except for PTHERM.UNK254_CG_CTRL:
./nv136/<REDACTED>/gp106_mmiotrace.xz: [1] 854.334687 MMIO32 W 0x020200 0x2772ed45 PTHERM.PGRAPH_CG_CTRL <= { ENG_CLK = AUTO | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | ENG_FILTER = 0xd | ENG_MANT = 0x7 | ENG_DLY_BEFORE = 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 }
vs
./nv136/<REDACTED>/gp106_mmiotrace.xz: [1] 854.251848 MMIO32 W 0x020254 0x27722444 PTHERM.UNK254_CG_CTRL <= { ENG_CLK = RUN | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | ENG_FILTER = 0x4 | ENG_MANT = 0x1 | ENG_DLY_BEFORE = 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 }
+ nvkm_mask(therm->subdev.device, 0x20200 + gate_idx, 0xff, data); +}
+static const struct nvkm_therm_func +gf100_therm = { + .init = gt215_therm_init, + .fini = g84_therm_fini, + .pwm_ctrl = nv50_fan_pwm_ctrl, + .pwm_get = nv50_fan_pwm_get, + .pwm_set = nv50_fan_pwm_set, + .pwm_clock = nv50_fan_pwm_clock, + .temp_get = g84_temp_get, + .fan_sense = gt215_therm_fan_sense, + .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, + .clkgate_set = gf100_clkgate_set, +};
+int +gf100_therm_new(struct nvkm_device *device, int index, + struct nvkm_therm **ptherm) +{ + return nvkm_therm_new_(&gf100_therm, device, index, ptherm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c index 06dcfd6..a2626fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c @@ -143,6 +143,8 @@ gf119_therm = { .temp_get = g84_temp_get, .fan_sense = gt215_therm_fan_sense, .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, + .clkgate_set = gf100_clkgate_set, };
int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c index 86848ec..c580c39 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c @@ -65,6 +65,8 @@ gm107_therm = { .temp_get = g84_temp_get, .fan_sense = gt215_therm_fan_sense, .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, + .clkgate_set = gf100_clkgate_set, };
int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c index c08097f..4caf401 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c @@ -36,7 +36,7 @@ gt215_therm_fan_sense(struct nvkm_therm *therm) return -ENODEV; }
-static void +void gt215_therm_init(struct nvkm_therm *therm) { struct nvkm_device *device = therm->subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h index 235a5d8..80367a7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h @@ -81,6 +81,9 @@ void nvkm_therm_sensor_event(struct nvkm_therm *, enum nvkm_therm_thrs, enum nvkm_therm_thrs_direction); void nvkm_therm_program_alarms_polling(struct nvkm_therm *);
+int nvkm_therm_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx); +void nvkm_therm_clkgate_set(struct nvkm_therm *, int gate_idx, bool enable);
struct nvkm_therm_func { void (*init)(struct nvkm_therm *); void (*fini)(struct nvkm_therm *); @@ -96,6 +99,9 @@ struct nvkm_therm_func { int (*fan_sense)(struct nvkm_therm *);
void (*program_alarms)(struct nvkm_therm *);
+ int (*clkgate_engine)(enum nvkm_devidx); + void (*clkgate_set)(struct nvkm_therm *, int, bool); };
void nv40_therm_intr(struct nvkm_therm *); @@ -110,6 +116,10 @@ void g84_sensor_setup(struct nvkm_therm *); void g84_therm_fini(struct nvkm_therm *);
int gt215_therm_fan_sense(struct nvkm_therm *); +void gt215_therm_init(struct nvkm_therm *);
+int gf100_clkgate_engine(enum nvkm_devidx); +void gf100_clkgate_set(struct nvkm_therm *, int, bool);
void gf119_therm_init(struct nvkm_therm *);
-- 2.9.3