This series add support for GPU DDR bandwidth scaling and is based on the bindings from Georgi [1]. This is mostly a rebase of Sharat's patches [2] on the tip of msm-next branch.
[1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/vireshk/pm/+log/opp... [2] https://patchwork.freedesktop.org/series/75291/
Changes from v5: - Added "interconnect-names" property
Changes from v4: - Squashed a patch to another one to fix Jonathan's comment - Add back the pm_runtime_get_if_in_use() check
Changes from v3: - Rebased on top of Jonathan's patch which adds support for changing gpu freq through hfi on newer targets - As suggested by Rob, left the icc_path intact for pre-a6xx GPUs
Sharat Masetty (6): dt-bindings: drm/msm/gpu: Document gpu opp table drm: msm: a6xx: send opp instead of a frequency drm: msm: a6xx: use dev_pm_opp_set_bw to scale DDR arm64: dts: qcom: SDM845: Enable GPU DDR bw scaling arm64: dts: qcom: sc7180: Add interconnects property for GPU arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp
.../devicetree/bindings/display/msm/gpu.txt | 28 ++++++ arch/arm64/boot/dts/qcom/sc7180.dtsi | 10 ++ arch/arm64/boot/dts/qcom/sdm845.dtsi | 10 ++ drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 108 ++++++++++++--------- drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 2 +- drivers/gpu/drm/msm/msm_gpu.c | 3 +- drivers/gpu/drm/msm/msm_gpu.h | 3 +- 7 files changed, 114 insertions(+), 50 deletions(-)
From: Sharat Masetty smasetty@codeaurora.org
Update documentation to list the gpu opp table bindings including the newly added "opp-peak-kBps" needed for GPU-DDR bandwidth scaling.
Signed-off-by: Sharat Masetty smasetty@codeaurora.org Acked-by: Rob Herring robh@kernel.org Signed-off-by: Akhil P Oommen akhilpo@codeaurora.org --- .../devicetree/bindings/display/msm/gpu.txt | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+)
diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt b/Documentation/devicetree/bindings/display/msm/gpu.txt index fd779cd..1af0ff1 100644 --- a/Documentation/devicetree/bindings/display/msm/gpu.txt +++ b/Documentation/devicetree/bindings/display/msm/gpu.txt @@ -112,6 +112,34 @@ Example a6xx (with GMU): interconnects = <&rsc_hlos MASTER_GFX3D &rsc_hlos SLAVE_EBI1>; interconnect-names = "gfx-mem";
+ gpu_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-430000000 { + opp-hz = /bits/ 64 <430000000>; + opp-level = <RPMH_REGULATOR_LEVEL_SVS_L1>; + opp-peak-kBps = <5412000>; + }; + + opp-355000000 { + opp-hz = /bits/ 64 <355000000>; + opp-level = <RPMH_REGULATOR_LEVEL_SVS>; + opp-peak-kBps = <3072000>; + }; + + opp-267000000 { + opp-hz = /bits/ 64 <267000000>; + opp-level = <RPMH_REGULATOR_LEVEL_LOW_SVS>; + opp-peak-kBps = <3072000>; + }; + + opp-180000000 { + opp-hz = /bits/ 64 <180000000>; + opp-level = <RPMH_REGULATOR_LEVEL_MIN_SVS>; + opp-peak-kBps = <1804000>; + }; + }; + qcom,gmu = <&gmu>;
zap-shader {
From: Sharat Masetty smasetty@codeaurora.org
This patch changes the plumbing to send the devfreq recommended opp rather than the frequency. Also consolidate and rearrange the code in a6xx to set the GPU frequency and the icc vote in preparation for the upcoming changes for GPU->DDR scaling votes.
Signed-off-by: Sharat Masetty smasetty@codeaurora.org Signed-off-by: Akhil P Oommen akhilpo@codeaurora.org --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 89 +++++++++++++++++++---------------- drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 2 +- drivers/gpu/drm/msm/msm_gpu.c | 3 +- drivers/gpu/drm/msm/msm_gpu.h | 3 +- 4 files changed, 52 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 21e77d6..856db46 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -103,17 +103,45 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); }
-static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) +void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) { - struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); - struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; - struct msm_gpu *gpu = &adreno_gpu->base; - int ret; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 perf_index; + unsigned long gpu_freq; + int ret = 0; + + gpu_freq = dev_pm_opp_get_freq(opp); + + if (gpu_freq == gmu->freq) + return; + + for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++) + if (gpu_freq == gmu->gpu_freqs[perf_index]) + break; + + gmu->current_perf_index = perf_index; + gmu->freq = gmu->gpu_freqs[perf_index]; + + /* + * This can get called from devfreq while the hardware is idle. Don't + * bring up the power if it isn't already active + */ + if (pm_runtime_get_if_in_use(gmu->dev) == 0) + return; + + if (!gmu->legacy) { + a6xx_hfi_set_freq(gmu, perf_index); + icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); + pm_runtime_put(gmu->dev); + return; + }
gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING, - ((3 & 0xf) << 28) | index); + ((3 & 0xf) << 28) | perf_index);
/* * Send an invalid index as a vote for the bus bandwidth and let the @@ -134,37 +162,6 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index) * for now leave it at max so that the performance is nominal. */ icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); -} - -void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq) -{ - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct a6xx_gmu *gmu = &a6xx_gpu->gmu; - u32 perf_index = 0; - - if (freq == gmu->freq) - return; - - for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++) - if (freq == gmu->gpu_freqs[perf_index]) - break; - - gmu->current_perf_index = perf_index; - gmu->freq = gmu->gpu_freqs[perf_index]; - - /* - * This can get called from devfreq while the hardware is idle. Don't - * bring up the power if it isn't already active - */ - if (pm_runtime_get_if_in_use(gmu->dev) == 0) - return; - - if (gmu->legacy) - __a6xx_gmu_set_freq(gmu, perf_index); - else - a6xx_hfi_set_freq(gmu, perf_index); - pm_runtime_put(gmu->dev); }
@@ -839,6 +836,19 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) a6xx_gmu_rpmh_off(gmu); }
+static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) +{ + struct dev_pm_opp *gpu_opp; + unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index]; + + gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true); + if (IS_ERR_OR_NULL(gpu_opp)) + return; + + a6xx_gmu_set_freq(gpu, gpu_opp); + dev_pm_opp_put(gpu_opp); +} + int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -898,10 +908,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) enable_irq(gmu->hfi_irq);
/* Set the GPU to the current freq */ - if (gmu->legacy) - __a6xx_gmu_set_freq(gmu, gmu->current_perf_index); - else - a6xx_hfi_set_freq(gmu, gmu->current_perf_index); + a6xx_gmu_set_initial_freq(gpu, gmu);
/* * "enable" the GX power domain which won't actually do anything but it diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 7239b8b..03ba60d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -63,7 +63,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
-void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq); +void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp); unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu);
void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index a22d306..82eb727 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -13,7 +13,6 @@
#include <generated/utsrelease.h> #include <linux/string_helpers.h> -#include <linux/pm_opp.h> #include <linux/devfreq.h> #include <linux/devcoredump.h> #include <linux/sched/task.h> @@ -34,7 +33,7 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq, return PTR_ERR(opp);
if (gpu->funcs->gpu_set_freq) - gpu->funcs->gpu_set_freq(gpu, (u64)*freq); + gpu->funcs->gpu_set_freq(gpu, opp); else clk_set_rate(gpu->core_clk, *freq);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 429cb40..0db117a 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -9,6 +9,7 @@
#include <linux/clk.h> #include <linux/interconnect.h> +#include <linux/pm_opp.h> #include <linux/regulator/consumer.h>
#include "msm_drv.h" @@ -61,7 +62,7 @@ struct msm_gpu_funcs { struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu); int (*gpu_state_put)(struct msm_gpu_state *state); unsigned long (*gpu_get_freq)(struct msm_gpu *gpu); - void (*gpu_set_freq)(struct msm_gpu *gpu, unsigned long freq); + void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp); struct msm_gem_address_space *(*create_address_space) (struct msm_gpu *gpu, struct platform_device *pdev); };
From: Sharat Masetty smasetty@codeaurora.org
This patches replaces the previously used static DDR vote and uses dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling GPU frequency. Also since the icc path voting is handled completely in the opp driver, remove the icc_path handle and its usage in the drm driver.
Signed-off-by: Sharat Masetty smasetty@codeaurora.org Signed-off-by: Akhil P Oommen akhilpo@codeaurora.org --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 856db46..a6f43ff 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -133,7 +133,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
if (!gmu->legacy) { a6xx_hfi_set_freq(gmu, perf_index); - icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); + dev_pm_opp_set_bw(&gpu->pdev->dev, opp); pm_runtime_put(gmu->dev); return; } @@ -157,11 +157,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) if (ret) dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret);
- /* - * Eventually we will want to scale the path vote with the frequency but - * for now leave it at max so that the performance is nominal. - */ - icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216)); + dev_pm_opp_set_bw(&gpu->pdev->dev, opp); pm_runtime_put(gmu->dev); }
@@ -849,6 +845,19 @@ static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) dev_pm_opp_put(gpu_opp); }
+static void a6xx_gmu_set_initial_bw(struct msm_gpu *gpu, struct a6xx_gmu *gmu) +{ + struct dev_pm_opp *gpu_opp; + unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index]; + + gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true); + if (IS_ERR_OR_NULL(gpu_opp)) + return; + + dev_pm_opp_set_bw(&gpu->pdev->dev, gpu_opp); + dev_pm_opp_put(gpu_opp); +} + int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -873,7 +882,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) }
/* Set the bus quota to a reasonable value for boot */ - icc_set_bw(gpu->icc_path, 0, MBps_to_icc(3072)); + a6xx_gmu_set_initial_bw(gpu, gmu);
/* Enable the GMU interrupt */ gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0); @@ -1049,7 +1058,7 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) a6xx_gmu_shutdown(gmu);
/* Remove the bus vote */ - icc_set_bw(gpu->icc_path, 0, 0); + dev_pm_opp_set_bw(&gpu->pdev->dev, NULL);
/* * Make sure the GX domain is off before turning off the GMU (CX)
From: Sharat Masetty smasetty@codeaurora.org
This patch adds the interconnects property for the gpu node and the opp-peak-kBps property to the opps of the gpu opp table. This should help enable DDR bandwidth scaling dynamically and proportionally to the GPU frequency.
Signed-off-by: Sharat Masetty smasetty@codeaurora.org Signed-off-by: Akhil P Oommen akhilpo@codeaurora.org --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 8eb5a31..1cd2dae 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -3515,42 +3515,52 @@
qcom,gmu = <&gmu>;
+ interconnects = <&mem_noc MASTER_GFX3D &mem_noc SLAVE_EBI1>; + interconnect-names = "gfx-mem"; + gpu_opp_table: opp-table { compatible = "operating-points-v2";
opp-710000000 { opp-hz = /bits/ 64 <710000000>; opp-level = <RPMH_REGULATOR_LEVEL_TURBO_L1>; + opp-peak-kBps = <7216000>; };
opp-675000000 { opp-hz = /bits/ 64 <675000000>; opp-level = <RPMH_REGULATOR_LEVEL_TURBO>; + opp-peak-kBps = <7216000>; };
opp-596000000 { opp-hz = /bits/ 64 <596000000>; opp-level = <RPMH_REGULATOR_LEVEL_NOM_L1>; + opp-peak-kBps = <6220000>; };
opp-520000000 { opp-hz = /bits/ 64 <520000000>; opp-level = <RPMH_REGULATOR_LEVEL_NOM>; + opp-peak-kBps = <6220000>; };
opp-414000000 { opp-hz = /bits/ 64 <414000000>; opp-level = <RPMH_REGULATOR_LEVEL_SVS_L1>; + opp-peak-kBps = <4068000>; };
opp-342000000 { opp-hz = /bits/ 64 <342000000>; opp-level = <RPMH_REGULATOR_LEVEL_SVS>; + opp-peak-kBps = <2724000>; };
opp-257000000 { opp-hz = /bits/ 64 <257000000>; opp-level = <RPMH_REGULATOR_LEVEL_LOW_SVS>; + opp-peak-kBps = <1648000>; }; }; };
From: Sharat Masetty smasetty@codeaurora.org
This patch adds the interconnects property to the GPU node. This enables the GPU->DDR path bandwidth voting.
Signed-off-by: Sharat Masetty smasetty@codeaurora.org Signed-off-by: Akhil P Oommen akhilpo@codeaurora.org --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 31b9217..80fe54b 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1470,6 +1470,9 @@ operating-points-v2 = <&gpu_opp_table>; qcom,gmu = <&gmu>;
+ interconnects = <&gem_noc MASTER_GFX3D &mc_virt SLAVE_EBI1>; + interconnect-names = "gfx-mem"; + gpu_opp_table: opp-table { compatible = "operating-points-v2";
From: Sharat Masetty smasetty@codeaurora.org
Add opp-peak-kBps bindings to the GPU opp table, listing the peak GPU -> DDR bandwidth requirement for each opp level. This will be used to scale the DDR bandwidth along with the GPU frequency dynamically.
Signed-off-by: Sharat Masetty smasetty@codeaurora.org Reviewed-by: Matthias Kaehlcke mka@chromium.org Signed-off-by: Akhil P Oommen akhilpo@codeaurora.org --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 80fe54b..ff4ddf1 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1479,36 +1479,43 @@ opp-800000000 { opp-hz = /bits/ 64 <800000000>; opp-level = <RPMH_REGULATOR_LEVEL_TURBO>; + opp-peak-kBps = <8532000>; };
opp-650000000 { opp-hz = /bits/ 64 <650000000>; opp-level = <RPMH_REGULATOR_LEVEL_NOM_L1>; + opp-peak-kBps = <7216000>; };
opp-565000000 { opp-hz = /bits/ 64 <565000000>; opp-level = <RPMH_REGULATOR_LEVEL_NOM>; + opp-peak-kBps = <5412000>; };
opp-430000000 { opp-hz = /bits/ 64 <430000000>; opp-level = <RPMH_REGULATOR_LEVEL_SVS_L1>; + opp-peak-kBps = <5412000>; };
opp-355000000 { opp-hz = /bits/ 64 <355000000>; opp-level = <RPMH_REGULATOR_LEVEL_SVS>; + opp-peak-kBps = <3072000>; };
opp-267000000 { opp-hz = /bits/ 64 <267000000>; opp-level = <RPMH_REGULATOR_LEVEL_LOW_SVS>; + opp-peak-kBps = <3072000>; };
opp-180000000 { opp-hz = /bits/ 64 <180000000>; opp-level = <RPMH_REGULATOR_LEVEL_MIN_SVS>; + opp-peak-kBps = <1804000>; }; }; };
dri-devel@lists.freedesktop.org