On Thu, May 07, 2015 at 03:15:50PM +0100, Robert Bragg wrote:
- /* We bypass the default perf core perf_paranoid_cpu() ||
* CAP_SYS_ADMIN check by using the PERF_PMU_CAP_IS_DEVICE
* flag and instead authenticate based on whether the current
* pid owns the specified context, or require CAP_SYS_ADMIN
* when collecting cross-context metrics.
*/
- dev_priv->oa_pmu.specific_ctx = NULL;
- if (oa_attr.single_context) {
u32 ctx_id = oa_attr.ctx_id;
unsigned int drm_fd = oa_attr.drm_fd;
struct fd fd = fdget(drm_fd);
if (fd.file) {
Specify a ctx and not providing the right fd should be its own error, either EBADF or EINVAL.
dev_priv->oa_pmu.specific_ctx =
lookup_context(dev_priv, fd.file, ctx_id);
}
Missing fdput
- }
- if (!dev_priv->oa_pmu.specific_ctx && !capable(CAP_SYS_ADMIN))
return -EACCES;
- mutex_lock(&dev_priv->dev->struct_mutex);
i915_mutex_interruptible, probably best to couple into the GPU error handling here as well especially as init_oa_buffer() will go onto touch GPU internals.
- ret = init_oa_buffer(event);
- mutex_unlock(&dev_priv->dev->struct_mutex);
- if (ret)
return ret;
- BUG_ON(dev_priv->oa_pmu.exclusive_event);
- dev_priv->oa_pmu.exclusive_event = event;
- event->destroy = i915_oa_event_destroy;
- /* PRM - observability performance counters:
*
* OACONTROL, performance counter enable, note:
*
* "When this bit is set, in order to have coherent counts,
* RC6 power state and trunk clock gating must be disabled.
* This can be achieved by programming MMIO registers as
* 0xA094=0 and 0xA090[31]=1"
*
* In our case we are expected that taking pm + FORCEWAKE
* references will effectively disable RC6 and trunk clock
* gating.
*/
- intel_runtime_pm_get(dev_priv);
- intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
That is a nuisance. Aside: Why isn't OA inside the powerctx? Is a subset valid with forcewake? It does perturb the system greatly to disable rc6, so I wonder if it could be made optional?
- return 0;
+}
+static void update_oacontrol(struct drm_i915_private *dev_priv) +{
- BUG_ON(!spin_is_locked(&dev_priv->oa_pmu.lock));
- if (dev_priv->oa_pmu.event_active) {
unsigned long ctx_id = 0;
bool pinning_ok = false;
if (dev_priv->oa_pmu.specific_ctx) {
struct intel_context *ctx =
dev_priv->oa_pmu.specific_ctx;
struct drm_i915_gem_object *obj =
ctx->legacy_hw_ctx.rcs_state;
If only there was ctx->legacy_hw_ctx.rcs_vma...
if (i915_gem_obj_is_pinned(obj)) {
ctx_id = i915_gem_obj_ggtt_offset(obj);
pinning_ok = true;
}
}
if ((ctx_id == 0 || pinning_ok)) {
bool periodic = dev_priv->oa_pmu.periodic;
u32 period_exponent = dev_priv->oa_pmu.period_exponent;
u32 report_format = dev_priv->oa_pmu.oa_buffer.format;
I915_WRITE(GEN7_OACONTROL,
(ctx_id & GEN7_OACONTROL_CTX_MASK) |
(period_exponent <<
GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
(periodic ?
GEN7_OACONTROL_TIMER_ENABLE : 0) |
(report_format <<
GEN7_OACONTROL_FORMAT_SHIFT) |
(ctx_id ?
GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
GEN7_OACONTROL_ENABLE);
I notice you don't use any write barriers... -Chris