Now that we use memremap instead of ioremap, Use WRITE_ONCE / READ_ONCE instead of iowrite / ioread.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com Reviewed-by: Sinclair Yeh syeh@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 14 ++--- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 28 ++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 24 ++++---- drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 109 ++++++++++++++++------------------ drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 16 ++--- drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 9 +-- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 23 +++---- 7 files changed, 118 insertions(+), 105 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index bee0a45..d1c34ab 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -752,14 +752,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); dev_priv->active_master = &dev_priv->fbdev_master;
- /* - * Force __iomem for this mapping until the implied compiler - * barriers and {READ|WRITE}_ONCE semantics from the - * io{read|write}32() accessors can be replaced with explicit - * barriers. - */ - dev_priv->mmio_virt = (void __iomem *) memremap(dev_priv->mmio_start, - dev_priv->mmio_size, MEMREMAP_WB); + dev_priv->mmio_virt = memremap(dev_priv->mmio_start, + dev_priv->mmio_size, MEMREMAP_WB);
if (unlikely(dev_priv->mmio_virt == NULL)) { ret = -ENOMEM; @@ -913,7 +907,7 @@ out_no_irq: out_no_device: ttm_object_device_release(&dev_priv->tdev); out_err4: - memunmap((void __force *) dev_priv->mmio_virt); + memunmap(dev_priv->mmio_virt); out_err3: vmw_ttm_global_release(dev_priv); out_err0: @@ -964,7 +958,7 @@ static int vmw_driver_unload(struct drm_device *dev) pci_release_regions(dev->pdev);
ttm_object_device_release(&dev_priv->tdev); - memunmap((void __force *) dev_priv->mmio_virt); + memunmap(dev_priv->mmio_virt); if (dev_priv->ctx.staged_bindings) vmw_binding_state_free(dev_priv->ctx.staged_bindings); vmw_ttm_global_release(dev_priv); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a613bd4..198c8b1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -375,7 +375,7 @@ struct vmw_private { uint32_t stdu_max_height; uint32_t initial_width; uint32_t initial_height; - u32 __iomem *mmio_virt; + u32 *mmio_virt; uint32_t capabilities; uint32_t max_gmr_ids; uint32_t max_gmr_pages; @@ -1206,4 +1206,30 @@ static inline void vmw_fifo_resource_dec(struct vmw_private *dev_priv) { atomic_dec(&dev_priv->num_fifo_resources); } + +/** + * vmw_mmio_read - Perform a MMIO read from volatile memory + * + * @addr: The address to read from + * + * This function is intended to be equivalent to ioread32() on + * memremap'd memory, but without byteswapping. + */ +static inline u32 vmw_mmio_read(u32 *addr) +{ + return READ_ONCE(*addr); +} + +/** + * vmw_mmio_write - Perform a MMIO write to volatile memory + * + * @addr: The address to write to + * + * This function is intended to be equivalent to iowrite32 on + * memremap'd memory, but without byteswapping. + */ +static inline void vmw_mmio_write(u32 value, u32 *addr) +{ + WRITE_ONCE(*addr, value); +} #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 567dded..8e689b4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -142,8 +142,8 @@ static bool vmw_fence_enable_signaling(struct fence *f) struct vmw_fence_manager *fman = fman_from_fence(fence); struct vmw_private *dev_priv = fman->dev_priv;
- u32 __iomem *fifo_mem = dev_priv->mmio_virt; - u32 seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE); + u32 *fifo_mem = dev_priv->mmio_virt; + u32 seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE); if (seqno - fence->base.seqno < VMW_FENCE_WRAP) return false;
@@ -386,14 +386,14 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, u32 passed_seqno) { u32 goal_seqno; - u32 __iomem *fifo_mem; + u32 *fifo_mem; struct vmw_fence_obj *fence;
if (likely(!fman->seqno_valid)) return false;
fifo_mem = fman->dev_priv->mmio_virt; - goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL); + goal_seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE_GOAL); if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP)) return false;
@@ -401,8 +401,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, list_for_each_entry(fence, &fman->fence_list, head) { if (!list_empty(&fence->seq_passed_actions)) { fman->seqno_valid = true; - iowrite32(fence->base.seqno, - fifo_mem + SVGA_FIFO_FENCE_GOAL); + vmw_mmio_write(fence->base.seqno, + fifo_mem + SVGA_FIFO_FENCE_GOAL); break; } } @@ -430,18 +430,18 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence) { struct vmw_fence_manager *fman = fman_from_fence(fence); u32 goal_seqno; - u32 __iomem *fifo_mem; + u32 *fifo_mem;
if (fence_is_signaled_locked(&fence->base)) return false;
fifo_mem = fman->dev_priv->mmio_virt; - goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL); + goal_seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE_GOAL); if (likely(fman->seqno_valid && goal_seqno - fence->base.seqno < VMW_FENCE_WRAP)) return false;
- iowrite32(fence->base.seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL); + vmw_mmio_write(fence->base.seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL); fman->seqno_valid = true;
return true; @@ -453,9 +453,9 @@ static void __vmw_fences_update(struct vmw_fence_manager *fman) struct list_head action_list; bool needs_rerun; uint32_t seqno, new_seqno; - u32 __iomem *fifo_mem = fman->dev_priv->mmio_virt; + u32 *fifo_mem = fman->dev_priv->mmio_virt;
- seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE); + seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE); rerun: list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { if (seqno - fence->base.seqno < VMW_FENCE_WRAP) { @@ -477,7 +477,7 @@ rerun:
needs_rerun = vmw_fence_goal_new_locked(fman, seqno); if (unlikely(needs_rerun)) { - new_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE); + new_seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE); if (new_seqno != seqno) { seqno = new_seqno; goto rerun; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c index 80c40c3..0cbaf88 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c @@ -36,7 +36,7 @@ struct vmw_temp_set_context {
bool vmw_fifo_have_3d(struct vmw_private *dev_priv) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; + u32 *fifo_mem = dev_priv->mmio_virt; uint32_t fifo_min, hwversion; const struct vmw_fifo_state *fifo = &dev_priv->fifo;
@@ -60,15 +60,15 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv) if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO)) return false;
- fifo_min = ioread32(fifo_mem + SVGA_FIFO_MIN); + fifo_min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN); if (fifo_min <= SVGA_FIFO_3D_HWVERSION * sizeof(unsigned int)) return false;
- hwversion = ioread32(fifo_mem + - ((fifo->capabilities & - SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ? - SVGA_FIFO_3D_HWVERSION_REVISED : - SVGA_FIFO_3D_HWVERSION)); + hwversion = vmw_mmio_read(fifo_mem + + ((fifo->capabilities & + SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ? + SVGA_FIFO_3D_HWVERSION_REVISED : + SVGA_FIFO_3D_HWVERSION));
if (hwversion == 0) return false; @@ -85,13 +85,13 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; + u32 *fifo_mem = dev_priv->mmio_virt; uint32_t caps;
if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO)) return false;
- caps = ioread32(fifo_mem + SVGA_FIFO_CAPABILITIES); + caps = vmw_mmio_read(fifo_mem + SVGA_FIFO_CAPABILITIES); if (caps & SVGA_FIFO_CAP_PITCHLOCK) return true;
@@ -100,7 +100,7 @@ bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv)
int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; + u32 *fifo_mem = dev_priv->mmio_virt; uint32_t max; uint32_t min;
@@ -137,19 +137,19 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) if (min < PAGE_SIZE) min = PAGE_SIZE;
- iowrite32(min, fifo_mem + SVGA_FIFO_MIN); - iowrite32(dev_priv->mmio_size, fifo_mem + SVGA_FIFO_MAX); + vmw_mmio_write(min, fifo_mem + SVGA_FIFO_MIN); + vmw_mmio_write(dev_priv->mmio_size, fifo_mem + SVGA_FIFO_MAX); wmb(); - iowrite32(min, fifo_mem + SVGA_FIFO_NEXT_CMD); - iowrite32(min, fifo_mem + SVGA_FIFO_STOP); - iowrite32(0, fifo_mem + SVGA_FIFO_BUSY); + vmw_mmio_write(min, fifo_mem + SVGA_FIFO_NEXT_CMD); + vmw_mmio_write(min, fifo_mem + SVGA_FIFO_STOP); + vmw_mmio_write(0, fifo_mem + SVGA_FIFO_BUSY); mb();
vmw_write(dev_priv, SVGA_REG_CONFIG_DONE, 1);
- max = ioread32(fifo_mem + SVGA_FIFO_MAX); - min = ioread32(fifo_mem + SVGA_FIFO_MIN); - fifo->capabilities = ioread32(fifo_mem + SVGA_FIFO_CAPABILITIES); + max = vmw_mmio_read(fifo_mem + SVGA_FIFO_MAX); + min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN); + fifo->capabilities = vmw_mmio_read(fifo_mem + SVGA_FIFO_CAPABILITIES);
DRM_INFO("Fifo max 0x%08x min 0x%08x cap 0x%08x\n", (unsigned int) max, @@ -157,7 +157,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) (unsigned int) fifo->capabilities);
atomic_set(&dev_priv->marker_seq, dev_priv->last_read_seqno); - iowrite32(dev_priv->last_read_seqno, fifo_mem + SVGA_FIFO_FENCE); + vmw_mmio_write(dev_priv->last_read_seqno, fifo_mem + SVGA_FIFO_FENCE); vmw_marker_queue_init(&fifo->marker_queue);
return 0; @@ -165,31 +165,23 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; - static DEFINE_SPINLOCK(ping_lock); - unsigned long irq_flags; + u32 *fifo_mem = dev_priv->mmio_virt;
- /* - * The ping_lock is needed because we don't have an atomic - * test-and-set of the SVGA_FIFO_BUSY register. - */ - spin_lock_irqsave(&ping_lock, irq_flags); - if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) { - iowrite32(1, fifo_mem + SVGA_FIFO_BUSY); + preempt_disable(); + if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) vmw_write(dev_priv, SVGA_REG_SYNC, reason); - } - spin_unlock_irqrestore(&ping_lock, irq_flags); + preempt_enable(); }
void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; + u32 *fifo_mem = dev_priv->mmio_virt;
vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC); while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0) ;
- dev_priv->last_read_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE); + dev_priv->last_read_seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE);
vmw_write(dev_priv, SVGA_REG_CONFIG_DONE, dev_priv->config_done_state); @@ -213,11 +205,11 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
static bool vmw_fifo_is_full(struct vmw_private *dev_priv, uint32_t bytes) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; - uint32_t max = ioread32(fifo_mem + SVGA_FIFO_MAX); - uint32_t next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD); - uint32_t min = ioread32(fifo_mem + SVGA_FIFO_MIN); - uint32_t stop = ioread32(fifo_mem + SVGA_FIFO_STOP); + u32 *fifo_mem = dev_priv->mmio_virt; + uint32_t max = vmw_mmio_read(fifo_mem + SVGA_FIFO_MAX); + uint32_t next_cmd = vmw_mmio_read(fifo_mem + SVGA_FIFO_NEXT_CMD); + uint32_t min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN); + uint32_t stop = vmw_mmio_read(fifo_mem + SVGA_FIFO_STOP);
return ((max - next_cmd) + (stop - min) <= bytes); } @@ -321,7 +313,7 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes) { struct vmw_fifo_state *fifo_state = &dev_priv->fifo; - u32 __iomem *fifo_mem = dev_priv->mmio_virt; + u32 *fifo_mem = dev_priv->mmio_virt; uint32_t max; uint32_t min; uint32_t next_cmd; @@ -329,9 +321,9 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv, int ret;
mutex_lock(&fifo_state->fifo_mutex); - max = ioread32(fifo_mem + SVGA_FIFO_MAX); - min = ioread32(fifo_mem + SVGA_FIFO_MIN); - next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD); + max = vmw_mmio_read(fifo_mem + SVGA_FIFO_MAX); + min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN); + next_cmd = vmw_mmio_read(fifo_mem + SVGA_FIFO_NEXT_CMD);
if (unlikely(bytes >= (max - min))) goto out_err; @@ -342,7 +334,7 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv, fifo_state->reserved_size = bytes;
while (1) { - uint32_t stop = ioread32(fifo_mem + SVGA_FIFO_STOP); + uint32_t stop = vmw_mmio_read(fifo_mem + SVGA_FIFO_STOP); bool need_bounce = false; bool reserve_in_place = false;
@@ -376,8 +368,8 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv, fifo_state->using_bounce_buffer = false;
if (reserveable) - iowrite32(bytes, fifo_mem + - SVGA_FIFO_RESERVED); + vmw_mmio_write(bytes, fifo_mem + + SVGA_FIFO_RESERVED); return (void __force *) (fifo_mem + (next_cmd >> 2)); } else { @@ -427,7 +419,7 @@ void *vmw_fifo_reserve_dx(struct vmw_private *dev_priv, uint32_t bytes, }
static void vmw_fifo_res_copy(struct vmw_fifo_state *fifo_state, - u32 __iomem *fifo_mem, + u32 *fifo_mem, uint32_t next_cmd, uint32_t max, uint32_t min, uint32_t bytes) { @@ -439,17 +431,16 @@ static void vmw_fifo_res_copy(struct vmw_fifo_state *fifo_state, if (bytes < chunk_size) chunk_size = bytes;
- iowrite32(bytes, fifo_mem + SVGA_FIFO_RESERVED); + vmw_mmio_write(bytes, fifo_mem + SVGA_FIFO_RESERVED); mb(); - memcpy_toio(fifo_mem + (next_cmd >> 2), buffer, chunk_size); + memcpy(fifo_mem + (next_cmd >> 2), buffer, chunk_size); rest = bytes - chunk_size; if (rest) - memcpy_toio(fifo_mem + (min >> 2), buffer + (chunk_size >> 2), - rest); + memcpy(fifo_mem + (min >> 2), buffer + (chunk_size >> 2), rest); }
static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state, - u32 __iomem *fifo_mem, + u32 *fifo_mem, uint32_t next_cmd, uint32_t max, uint32_t min, uint32_t bytes) { @@ -457,12 +448,12 @@ static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state, fifo_state->dynamic_buffer : fifo_state->static_buffer;
while (bytes > 0) { - iowrite32(*buffer++, fifo_mem + (next_cmd >> 2)); + vmw_mmio_write(*buffer++, fifo_mem + (next_cmd >> 2)); next_cmd += sizeof(uint32_t); if (unlikely(next_cmd == max)) next_cmd = min; mb(); - iowrite32(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD); + vmw_mmio_write(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD); mb(); bytes -= sizeof(uint32_t); } @@ -471,10 +462,10 @@ static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state, static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes) { struct vmw_fifo_state *fifo_state = &dev_priv->fifo; - u32 __iomem *fifo_mem = dev_priv->mmio_virt; - uint32_t next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD); - uint32_t max = ioread32(fifo_mem + SVGA_FIFO_MAX); - uint32_t min = ioread32(fifo_mem + SVGA_FIFO_MIN); + u32 *fifo_mem = dev_priv->mmio_virt; + uint32_t next_cmd = vmw_mmio_read(fifo_mem + SVGA_FIFO_NEXT_CMD); + uint32_t max = vmw_mmio_read(fifo_mem + SVGA_FIFO_MAX); + uint32_t min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN); bool reserveable = fifo_state->capabilities & SVGA_FIFO_CAP_RESERVE;
if (fifo_state->dx) @@ -507,11 +498,11 @@ static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes) if (next_cmd >= max) next_cmd -= max - min; mb(); - iowrite32(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD); + vmw_mmio_write(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD); }
if (reserveable) - iowrite32(0, fifo_mem + SVGA_FIFO_RESERVED); + vmw_mmio_write(0, fifo_mem + SVGA_FIFO_RESERVED); mb(); up_write(&fifo_state->rwsem); vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index a3e3c83..b8c6a03 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -64,7 +64,7 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, break; case DRM_VMW_PARAM_FIFO_HW_VERSION: { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; + u32 *fifo_mem = dev_priv->mmio_virt; const struct vmw_fifo_state *fifo = &dev_priv->fifo;
if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS)) { @@ -73,11 +73,11 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, }
param->value = - ioread32(fifo_mem + - ((fifo->capabilities & - SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ? - SVGA_FIFO_3D_HWVERSION_REVISED : - SVGA_FIFO_3D_HWVERSION)); + vmw_mmio_read(fifo_mem + + ((fifo->capabilities & + SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ? + SVGA_FIFO_3D_HWVERSION_REVISED : + SVGA_FIFO_3D_HWVERSION)); break; } case DRM_VMW_PARAM_MAX_SURF_MEMORY: @@ -179,7 +179,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, (struct drm_vmw_get_3d_cap_arg *) data; struct vmw_private *dev_priv = vmw_priv(dev); uint32_t size; - u32 __iomem *fifo_mem; + u32 *fifo_mem; void __user *buffer = (void __user *)((unsigned long)(arg->buffer)); void *bounce; int ret; @@ -229,7 +229,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, goto out_err; } else { fifo_mem = dev_priv->mmio_virt; - memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size); + memcpy(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size); }
ret = copy_to_user(buffer, bounce, size); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index 9498a5e..ac3eccd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -72,8 +72,8 @@ static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno) void vmw_update_seqno(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo_state) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; - uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE); + u32 *fifo_mem = dev_priv->mmio_virt; + uint32_t seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE);
if (dev_priv->last_read_seqno != seqno) { dev_priv->last_read_seqno = seqno; @@ -178,8 +178,9 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, } finish_wait(&dev_priv->fence_queue, &__wait); if (ret == 0 && fifo_idle) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; - iowrite32(signal_seq, fifo_mem + SVGA_FIFO_FENCE); + u32 *fifo_mem = dev_priv->mmio_virt; + + vmw_mmio_write(signal_seq, fifo_mem + SVGA_FIFO_FENCE); } wake_up_all(&dev_priv->fence_queue); out_err: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 03ffab2..a94b24d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -123,14 +123,14 @@ err_unreserve: void vmw_cursor_update_position(struct vmw_private *dev_priv, bool show, int x, int y) { - u32 __iomem *fifo_mem = dev_priv->mmio_virt; + u32 *fifo_mem = dev_priv->mmio_virt; uint32_t count;
- iowrite32(show ? 1 : 0, fifo_mem + SVGA_FIFO_CURSOR_ON); - iowrite32(x, fifo_mem + SVGA_FIFO_CURSOR_X); - iowrite32(y, fifo_mem + SVGA_FIFO_CURSOR_Y); - count = ioread32(fifo_mem + SVGA_FIFO_CURSOR_COUNT); - iowrite32(++count, fifo_mem + SVGA_FIFO_CURSOR_COUNT); + vmw_mmio_write(show ? 1 : 0, fifo_mem + SVGA_FIFO_CURSOR_ON); + vmw_mmio_write(x, fifo_mem + SVGA_FIFO_CURSOR_X); + vmw_mmio_write(y, fifo_mem + SVGA_FIFO_CURSOR_Y); + count = vmw_mmio_read(fifo_mem + SVGA_FIFO_CURSOR_COUNT); + vmw_mmio_write(++count, fifo_mem + SVGA_FIFO_CURSOR_COUNT); }
int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv, @@ -1155,7 +1155,8 @@ int vmw_kms_write_svga(struct vmw_private *vmw_priv, if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK) vmw_write(vmw_priv, SVGA_REG_PITCHLOCK, pitch); else if (vmw_fifo_have_pitchlock(vmw_priv)) - iowrite32(pitch, vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK); + vmw_mmio_write(pitch, vmw_priv->mmio_virt + + SVGA_FIFO_PITCHLOCK); vmw_write(vmw_priv, SVGA_REG_WIDTH, width); vmw_write(vmw_priv, SVGA_REG_HEIGHT, height); vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, bpp); @@ -1181,8 +1182,8 @@ int vmw_kms_save_vga(struct vmw_private *vmw_priv) vmw_priv->vga_pitchlock = vmw_read(vmw_priv, SVGA_REG_PITCHLOCK); else if (vmw_fifo_have_pitchlock(vmw_priv)) - vmw_priv->vga_pitchlock = ioread32(vmw_priv->mmio_virt + - SVGA_FIFO_PITCHLOCK); + vmw_priv->vga_pitchlock = vmw_mmio_read(vmw_priv->mmio_virt + + SVGA_FIFO_PITCHLOCK);
if (!(vmw_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY)) return 0; @@ -1230,8 +1231,8 @@ int vmw_kms_restore_vga(struct vmw_private *vmw_priv) vmw_write(vmw_priv, SVGA_REG_PITCHLOCK, vmw_priv->vga_pitchlock); else if (vmw_fifo_have_pitchlock(vmw_priv)) - iowrite32(vmw_priv->vga_pitchlock, - vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK); + vmw_mmio_write(vmw_priv->vga_pitchlock, + vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK);
if (!(vmw_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY)) return 0;
Reduce the time in hardware irq context and hardware irq latency.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com Reviewed-by: Sinclair Yeh syeh@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 108 ++++++++++++++++++++-------------- drivers/gpu/drm/vmwgfx/vmwgfx_fence.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 6 +- 3 files changed, 68 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 8e689b4..f40c36e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -47,6 +47,7 @@ struct vmw_fence_manager { bool seqno_valid; /* Protected by @lock, and may not be set to true without the @goal_irq_mutex held. */ unsigned ctx; + struct tasklet_struct tasklet; };
struct vmw_user_fence { @@ -81,6 +82,8 @@ struct vmw_event_fence_action { uint32_t *tv_usec; };
+static void vmw_fence_tasklet(unsigned long data); + static struct vmw_fence_manager * fman_from_fence(struct vmw_fence_obj *fence) { @@ -115,12 +118,11 @@ static void vmw_fence_obj_destroy(struct fence *f) container_of(f, struct vmw_fence_obj, base);
struct vmw_fence_manager *fman = fman_from_fence(fence); - unsigned long irq_flags;
- spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock); list_del_init(&fence->head); --fman->num_fence_objects; - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock); fence->destroy(fence); }
@@ -177,7 +179,6 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) struct vmw_private *dev_priv = fman->dev_priv; struct vmwgfx_wait_cb cb; long ret = timeout; - unsigned long irq_flags;
if (likely(vmw_fence_obj_signaled(fence))) return timeout; @@ -185,7 +186,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); vmw_seqno_waiter_add(dev_priv);
- spin_lock_irqsave(f->lock, irq_flags); + spin_lock_bh(f->lock);
if (intr && signal_pending(current)) { ret = -ERESTARTSYS; @@ -205,11 +206,11 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock_irqrestore(f->lock, irq_flags); + spin_unlock_bh(f->lock);
ret = schedule_timeout(ret);
- spin_lock_irqsave(f->lock, irq_flags); + spin_lock_bh(f->lock); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } @@ -219,7 +220,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) __set_current_state(TASK_RUNNING);
out: - spin_unlock_irqrestore(f->lock, irq_flags); + spin_unlock_bh(f->lock);
vmw_seqno_waiter_remove(dev_priv);
@@ -300,21 +301,22 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv) ttm_round_pot(sizeof(struct vmw_event_fence_action)); mutex_init(&fman->goal_irq_mutex); fman->ctx = fence_context_alloc(1); + tasklet_init(&fman->tasklet, vmw_fence_tasklet, + (unsigned long) fman);
return fman; }
void vmw_fence_manager_takedown(struct vmw_fence_manager *fman) { - unsigned long irq_flags; bool lists_empty;
(void) cancel_work_sync(&fman->work);
- spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock); lists_empty = list_empty(&fman->fence_list) && list_empty(&fman->cleanup_list); - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock);
BUG_ON(!lists_empty); kfree(fman); @@ -324,7 +326,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, struct vmw_fence_obj *fence, u32 seqno, void (*destroy) (struct vmw_fence_obj *fence)) { - unsigned long irq_flags; int ret = 0;
fence_init(&fence->base, &vmw_fence_ops, &fman->lock, @@ -332,7 +333,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, INIT_LIST_HEAD(&fence->seq_passed_actions); fence->destroy = destroy;
- spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock); if (unlikely(fman->fifo_down)) { ret = -EBUSY; goto out_unlock; @@ -341,7 +342,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, ++fman->num_fence_objects;
out_unlock: - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock); return ret;
} @@ -490,11 +491,9 @@ rerun:
void vmw_fences_update(struct vmw_fence_manager *fman) { - unsigned long irq_flags; - - spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock); __vmw_fences_update(fman); - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock); }
bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence) @@ -694,11 +693,9 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
void vmw_fence_fifo_up(struct vmw_fence_manager *fman) { - unsigned long irq_flags; - - spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock); fman->fifo_down = false; - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock); }
@@ -825,10 +822,9 @@ void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman, { struct vmw_event_fence_action *eaction; struct drm_pending_event *event; - unsigned long irq_flags;
while (1) { - spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock); if (list_empty(event_list)) goto out_unlock; eaction = list_first_entry(event_list, @@ -837,11 +833,11 @@ void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman, list_del_init(&eaction->fpriv_head); event = eaction->event; eaction->event = NULL; - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock); event->destroy(event); } out_unlock: - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock); }
@@ -854,7 +850,7 @@ out_unlock: * This function is called when the seqno of the fence where @action is * attached has passed. It queues the event on the submitter's event list. * This function is always called from atomic context, and may be called - * from irq context. + * from tasklet context. */ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action) { @@ -863,13 +859,12 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action) struct drm_device *dev = eaction->dev; struct drm_pending_event *event = eaction->event; struct drm_file *file_priv; - unsigned long irq_flags;
if (unlikely(event == NULL)) return;
file_priv = event->file_priv; - spin_lock_irqsave(&dev->event_lock, irq_flags); + spin_lock_bh(&dev->event_lock);
if (likely(eaction->tv_sec != NULL)) { struct timeval tv; @@ -883,7 +878,7 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action) list_add_tail(&eaction->event->link, &file_priv->event_list); eaction->event = NULL; wake_up_all(&file_priv->event_wait); - spin_unlock_irqrestore(&dev->event_lock, irq_flags); + spin_unlock_bh(&dev->event_lock); }
/** @@ -900,11 +895,10 @@ static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action) struct vmw_event_fence_action *eaction = container_of(action, struct vmw_event_fence_action, action); struct vmw_fence_manager *fman = fman_from_fence(eaction->fence); - unsigned long irq_flags;
- spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock); list_del(&eaction->fpriv_head); - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock);
vmw_fence_obj_unreference(&eaction->fence); kfree(eaction); @@ -924,11 +918,10 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence, struct vmw_fence_action *action) { struct vmw_fence_manager *fman = fman_from_fence(fence); - unsigned long irq_flags; bool run_update = false;
mutex_lock(&fman->goal_irq_mutex); - spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock);
fman->pending_actions[action->type]++; if (fence_is_signaled_locked(&fence->base)) { @@ -947,7 +940,7 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence, run_update = vmw_fence_goal_check_locked(fence); }
- spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock);
if (run_update) { if (!fman->goal_irq_on) { @@ -985,7 +978,6 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv, struct vmw_event_fence_action *eaction; struct vmw_fence_manager *fman = fman_from_fence(fence); struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); - unsigned long irq_flags;
eaction = kzalloc(sizeof(*eaction), GFP_KERNEL); if (unlikely(eaction == NULL)) @@ -1002,9 +994,9 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv, eaction->tv_sec = tv_sec; eaction->tv_usec = tv_usec;
- spin_lock_irqsave(&fman->lock, irq_flags); + spin_lock_bh(&fman->lock); list_add_tail(&eaction->fpriv_head, &vmw_fp->fence_events); - spin_unlock_irqrestore(&fman->lock, irq_flags); + spin_unlock_bh(&fman->lock);
vmw_fence_obj_add_action(fence, &eaction->action);
@@ -1025,16 +1017,15 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv, struct vmw_event_fence_pending *event; struct vmw_fence_manager *fman = fman_from_fence(fence); struct drm_device *dev = fman->dev_priv->dev; - unsigned long irq_flags; int ret;
- spin_lock_irqsave(&dev->event_lock, irq_flags); + spin_lock_bh(&dev->event_lock);
ret = (file_priv->event_space < sizeof(event->event)) ? -EBUSY : 0; if (likely(ret == 0)) file_priv->event_space -= sizeof(event->event);
- spin_unlock_irqrestore(&dev->event_lock, irq_flags); + spin_unlock_bh(&dev->event_lock);
if (unlikely(ret != 0)) { DRM_ERROR("Failed to allocate event space for this file.\n"); @@ -1078,9 +1069,9 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv, out_no_queue: event->base.destroy(&event->base); out_no_event: - spin_lock_irqsave(&dev->event_lock, irq_flags); + spin_lock_bh(&dev->event_lock); file_priv->event_space += sizeof(*event); - spin_unlock_irqrestore(&dev->event_lock, irq_flags); + spin_unlock_bh(&dev->event_lock); out_no_space: return ret; } @@ -1172,3 +1163,32 @@ out_no_ref_obj: vmw_fence_obj_unreference(&fence); return ret; } + +/** + * vmw_fence_tasklet - Fence manager tasklet entry point + * + * @data: The tasklet closure - A pointer to the fence manager cast to an + * unsigned long. + */ +static void vmw_fence_tasklet(unsigned long data) +{ + struct vmw_fence_manager *fman = (struct vmw_fence_manager *) data; + + spin_lock(&fman->lock); + __vmw_fences_update(fman); + spin_unlock(&fman->lock); + wake_up_all(&fman->dev_priv->fence_queue); +} + +/** + * vmw_fence_tasklet_schedule - Schedule a fence manager tasklet run + * + * @fman: Pointer to a fence manager + */ +void vmw_fence_tasklet_schedule(struct vmw_fence_manager *fman) +{ + if (!fman) + return; + + tasklet_schedule(&fman->tasklet); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h index 8be6c29..e55b2c9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h @@ -124,4 +124,6 @@ extern int vmw_event_fence_action_queue(struct drm_file *filee_priv, uint32_t *tv_sec, uint32_t *tv_usec, bool interruptible); +extern void vmw_fence_tasklet_schedule(struct vmw_fence_manager *fman); + #endif /* _VMWGFX_FENCE_H_ */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index ac3eccd..b0a6e65 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -48,10 +48,8 @@ irqreturn_t vmw_irq_handler(int irq, void *arg) return IRQ_NONE;
if (masked_status & (SVGA_IRQFLAG_ANY_FENCE | - SVGA_IRQFLAG_FENCE_GOAL)) { - vmw_fences_update(dev_priv->fman); - wake_up_all(&dev_priv->fence_queue); - } + SVGA_IRQFLAG_FENCE_GOAL)) + vmw_fence_tasklet_schedule(dev_priv->fman);
if (masked_status & SVGA_IRQFLAG_FIFO_PROGRESS) wake_up_all(&dev_priv->fifo_queue);
On Fri, Oct 30, 2015 at 02:42:44AM -0700, Thomas Hellstrom wrote:
Reduce the time in hardware irq context and hardware irq latency.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com Reviewed-by: Sinclair Yeh syeh@vmware.com
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 108 ++++++++++++++++++++-------------- drivers/gpu/drm/vmwgfx/vmwgfx_fence.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 6 +- 3 files changed, 68 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 8e689b4..f40c36e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -47,6 +47,7 @@ struct vmw_fence_manager { bool seqno_valid; /* Protected by @lock, and may not be set to true without the @goal_irq_mutex held. */ unsigned ctx;
- struct tasklet_struct tasklet;
Bottom halves are super-deprecated except for giant existing users like networking. I think the recommended way to do this is to either use threaded interrupts or work-queues. The reason for that seems to be that locking is funky around them, which is a major pain for RT. And RT is going mainline now for real. -Daniel
};
struct vmw_user_fence { @@ -81,6 +82,8 @@ struct vmw_event_fence_action { uint32_t *tv_usec; };
+static void vmw_fence_tasklet(unsigned long data);
static struct vmw_fence_manager * fman_from_fence(struct vmw_fence_obj *fence) { @@ -115,12 +118,11 @@ static void vmw_fence_obj_destroy(struct fence *f) container_of(f, struct vmw_fence_obj, base);
struct vmw_fence_manager *fman = fman_from_fence(fence);
unsigned long irq_flags;
spin_lock_irqsave(&fman->lock, irq_flags);
- spin_lock_bh(&fman->lock); list_del_init(&fence->head); --fman->num_fence_objects;
- spin_unlock_irqrestore(&fman->lock, irq_flags);
- spin_unlock_bh(&fman->lock); fence->destroy(fence);
}
@@ -177,7 +179,6 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) struct vmw_private *dev_priv = fman->dev_priv; struct vmwgfx_wait_cb cb; long ret = timeout;
unsigned long irq_flags;
if (likely(vmw_fence_obj_signaled(fence))) return timeout;
@@ -185,7 +186,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); vmw_seqno_waiter_add(dev_priv);
- spin_lock_irqsave(f->lock, irq_flags);
spin_lock_bh(f->lock);
if (intr && signal_pending(current)) { ret = -ERESTARTSYS;
@@ -205,11 +206,11 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irqrestore(f->lock, irq_flags);
spin_unlock_bh(f->lock);
ret = schedule_timeout(ret);
spin_lock_irqsave(f->lock, irq_flags);
if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; }spin_lock_bh(f->lock);
@@ -219,7 +220,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) __set_current_state(TASK_RUNNING);
out:
- spin_unlock_irqrestore(f->lock, irq_flags);
spin_unlock_bh(f->lock);
vmw_seqno_waiter_remove(dev_priv);
@@ -300,21 +301,22 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv) ttm_round_pot(sizeof(struct vmw_event_fence_action)); mutex_init(&fman->goal_irq_mutex); fman->ctx = fence_context_alloc(1);
tasklet_init(&fman->tasklet, vmw_fence_tasklet,
(unsigned long) fman);
return fman;
}
void vmw_fence_manager_takedown(struct vmw_fence_manager *fman) {
unsigned long irq_flags; bool lists_empty;
(void) cancel_work_sync(&fman->work);
spin_lock_irqsave(&fman->lock, irq_flags);
- spin_lock_bh(&fman->lock); lists_empty = list_empty(&fman->fence_list) && list_empty(&fman->cleanup_list);
- spin_unlock_irqrestore(&fman->lock, irq_flags);
spin_unlock_bh(&fman->lock);
BUG_ON(!lists_empty); kfree(fman);
@@ -324,7 +326,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, struct vmw_fence_obj *fence, u32 seqno, void (*destroy) (struct vmw_fence_obj *fence)) {
unsigned long irq_flags; int ret = 0;
fence_init(&fence->base, &vmw_fence_ops, &fman->lock,
@@ -332,7 +333,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, INIT_LIST_HEAD(&fence->seq_passed_actions); fence->destroy = destroy;
- spin_lock_irqsave(&fman->lock, irq_flags);
- spin_lock_bh(&fman->lock); if (unlikely(fman->fifo_down)) { ret = -EBUSY; goto out_unlock;
@@ -341,7 +342,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, ++fman->num_fence_objects;
out_unlock:
- spin_unlock_irqrestore(&fman->lock, irq_flags);
- spin_unlock_bh(&fman->lock); return ret;
} @@ -490,11 +491,9 @@ rerun:
void vmw_fences_update(struct vmw_fence_manager *fman) {
- unsigned long irq_flags;
- spin_lock_irqsave(&fman->lock, irq_flags);
- spin_lock_bh(&fman->lock); __vmw_fences_update(fman);
- spin_unlock_irqrestore(&fman->lock, irq_flags);
- spin_unlock_bh(&fman->lock);
}
bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence) @@ -694,11 +693,9 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
void vmw_fence_fifo_up(struct vmw_fence_manager *fman) {
- unsigned long irq_flags;
- spin_lock_irqsave(&fman->lock, irq_flags);
- spin_lock_bh(&fman->lock); fman->fifo_down = false;
- spin_unlock_irqrestore(&fman->lock, irq_flags);
- spin_unlock_bh(&fman->lock);
}
@@ -825,10 +822,9 @@ void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman, { struct vmw_event_fence_action *eaction; struct drm_pending_event *event;
unsigned long irq_flags;
while (1) {
spin_lock_irqsave(&fman->lock, irq_flags);
if (list_empty(event_list)) goto out_unlock; eaction = list_first_entry(event_list,spin_lock_bh(&fman->lock);
@@ -837,11 +833,11 @@ void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman, list_del_init(&eaction->fpriv_head); event = eaction->event; eaction->event = NULL;
spin_unlock_irqrestore(&fman->lock, irq_flags);
event->destroy(event); }spin_unlock_bh(&fman->lock);
out_unlock:
- spin_unlock_irqrestore(&fman->lock, irq_flags);
- spin_unlock_bh(&fman->lock);
}
@@ -854,7 +850,7 @@ out_unlock:
- This function is called when the seqno of the fence where @action is
- attached has passed. It queues the event on the submitter's event list.
- This function is always called from atomic context, and may be called
- from irq context.
*/
- from tasklet context.
static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action) { @@ -863,13 +859,12 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action) struct drm_device *dev = eaction->dev; struct drm_pending_event *event = eaction->event; struct drm_file *file_priv;
unsigned long irq_flags;
if (unlikely(event == NULL)) return;
file_priv = event->file_priv;
spin_lock_irqsave(&dev->event_lock, irq_flags);
spin_lock_bh(&dev->event_lock);
if (likely(eaction->tv_sec != NULL)) { struct timeval tv;
@@ -883,7 +878,7 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action) list_add_tail(&eaction->event->link, &file_priv->event_list); eaction->event = NULL; wake_up_all(&file_priv->event_wait);
- spin_unlock_irqrestore(&dev->event_lock, irq_flags);
- spin_unlock_bh(&dev->event_lock);
}
/** @@ -900,11 +895,10 @@ static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action) struct vmw_event_fence_action *eaction = container_of(action, struct vmw_event_fence_action, action); struct vmw_fence_manager *fman = fman_from_fence(eaction->fence);
unsigned long irq_flags;
spin_lock_irqsave(&fman->lock, irq_flags);
- spin_lock_bh(&fman->lock); list_del(&eaction->fpriv_head);
- spin_unlock_irqrestore(&fman->lock, irq_flags);
spin_unlock_bh(&fman->lock);
vmw_fence_obj_unreference(&eaction->fence); kfree(eaction);
@@ -924,11 +918,10 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence, struct vmw_fence_action *action) { struct vmw_fence_manager *fman = fman_from_fence(fence);
unsigned long irq_flags; bool run_update = false;
mutex_lock(&fman->goal_irq_mutex);
spin_lock_irqsave(&fman->lock, irq_flags);
spin_lock_bh(&fman->lock);
fman->pending_actions[action->type]++; if (fence_is_signaled_locked(&fence->base)) {
@@ -947,7 +940,7 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence, run_update = vmw_fence_goal_check_locked(fence); }
- spin_unlock_irqrestore(&fman->lock, irq_flags);
spin_unlock_bh(&fman->lock);
if (run_update) { if (!fman->goal_irq_on) {
@@ -985,7 +978,6 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv, struct vmw_event_fence_action *eaction; struct vmw_fence_manager *fman = fman_from_fence(fence); struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
unsigned long irq_flags;
eaction = kzalloc(sizeof(*eaction), GFP_KERNEL); if (unlikely(eaction == NULL))
@@ -1002,9 +994,9 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv, eaction->tv_sec = tv_sec; eaction->tv_usec = tv_usec;
- spin_lock_irqsave(&fman->lock, irq_flags);
- spin_lock_bh(&fman->lock); list_add_tail(&eaction->fpriv_head, &vmw_fp->fence_events);
- spin_unlock_irqrestore(&fman->lock, irq_flags);
spin_unlock_bh(&fman->lock);
vmw_fence_obj_add_action(fence, &eaction->action);
@@ -1025,16 +1017,15 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv, struct vmw_event_fence_pending *event; struct vmw_fence_manager *fman = fman_from_fence(fence); struct drm_device *dev = fman->dev_priv->dev;
unsigned long irq_flags; int ret;
spin_lock_irqsave(&dev->event_lock, irq_flags);
spin_lock_bh(&dev->event_lock);
ret = (file_priv->event_space < sizeof(event->event)) ? -EBUSY : 0; if (likely(ret == 0)) file_priv->event_space -= sizeof(event->event);
- spin_unlock_irqrestore(&dev->event_lock, irq_flags);
spin_unlock_bh(&dev->event_lock);
if (unlikely(ret != 0)) { DRM_ERROR("Failed to allocate event space for this file.\n");
@@ -1078,9 +1069,9 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv, out_no_queue: event->base.destroy(&event->base); out_no_event:
- spin_lock_irqsave(&dev->event_lock, irq_flags);
- spin_lock_bh(&dev->event_lock); file_priv->event_space += sizeof(*event);
- spin_unlock_irqrestore(&dev->event_lock, irq_flags);
- spin_unlock_bh(&dev->event_lock);
out_no_space: return ret; } @@ -1172,3 +1163,32 @@ out_no_ref_obj: vmw_fence_obj_unreference(&fence); return ret; }
+/**
- vmw_fence_tasklet - Fence manager tasklet entry point
- @data: The tasklet closure - A pointer to the fence manager cast to an
- unsigned long.
- */
+static void vmw_fence_tasklet(unsigned long data) +{
- struct vmw_fence_manager *fman = (struct vmw_fence_manager *) data;
- spin_lock(&fman->lock);
- __vmw_fences_update(fman);
- spin_unlock(&fman->lock);
- wake_up_all(&fman->dev_priv->fence_queue);
+}
+/**
- vmw_fence_tasklet_schedule - Schedule a fence manager tasklet run
- @fman: Pointer to a fence manager
- */
+void vmw_fence_tasklet_schedule(struct vmw_fence_manager *fman) +{
- if (!fman)
return;
- tasklet_schedule(&fman->tasklet);
+} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h index 8be6c29..e55b2c9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h @@ -124,4 +124,6 @@ extern int vmw_event_fence_action_queue(struct drm_file *filee_priv, uint32_t *tv_sec, uint32_t *tv_usec, bool interruptible); +extern void vmw_fence_tasklet_schedule(struct vmw_fence_manager *fman);
#endif /* _VMWGFX_FENCE_H_ */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index ac3eccd..b0a6e65 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -48,10 +48,8 @@ irqreturn_t vmw_irq_handler(int irq, void *arg) return IRQ_NONE;
if (masked_status & (SVGA_IRQFLAG_ANY_FENCE |
SVGA_IRQFLAG_FENCE_GOAL)) {
vmw_fences_update(dev_priv->fman);
wake_up_all(&dev_priv->fence_queue);
- }
SVGA_IRQFLAG_FENCE_GOAL))
vmw_fence_tasklet_schedule(dev_priv->fman);
if (masked_status & SVGA_IRQFLAG_FIFO_PROGRESS) wake_up_all(&dev_priv->fifo_queue);
-- 2.4.3
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
On 10/30/2015 11:23 AM, Daniel Vetter wrote:
On Fri, Oct 30, 2015 at 02:42:44AM -0700, Thomas Hellstrom wrote:
Reduce the time in hardware irq context and hardware irq latency.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com Reviewed-by: Sinclair Yeh syeh@vmware.com
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 108 ++++++++++++++++++++-------------- drivers/gpu/drm/vmwgfx/vmwgfx_fence.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 6 +- 3 files changed, 68 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 8e689b4..f40c36e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -47,6 +47,7 @@ struct vmw_fence_manager { bool seqno_valid; /* Protected by @lock, and may not be set to true without the @goal_irq_mutex held. */ unsigned ctx;
- struct tasklet_struct tasklet;
Bottom halves are super-deprecated except for giant existing users like networking. I think the recommended way to do this is to either use threaded interrupts or work-queues. The reason for that seems to be that locking is funky around them, which is a major pain for RT. And RT is going mainline now for real. -Daniel
Thanks for the heads up. Unfortunately work-queues introduce too much latency for this use-case. Given that we (vmwgfx) already is an existing user (albeit not giant :)), and RT in a VM guest is somewhat unlikely I wonder how much of an issue this is.
I'll read up on threaded interrupts.
/Thomas
Relax locking with the goal of reducing the number of locking cycles and time spent with irqs disabled.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com Reviewed-by: Sinclair Yeh syeh@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 9 ++- drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 23 ++------ drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 104 ++++++++++------------------------- 4 files changed, 39 insertions(+), 99 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index d1c34ab..a09cf85 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -643,7 +643,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) init_waitqueue_head(&dev_priv->fence_queue); init_waitqueue_head(&dev_priv->fifo_queue); dev_priv->fence_queue_waiters = 0; - atomic_set(&dev_priv->fifo_queue_waiters, 0); + dev_priv->fifo_queue_waiters = 0;
dev_priv->used_memory_size = 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 198c8b1..a8ae9df 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -440,13 +440,12 @@ struct vmw_private { spinlock_t waiter_lock; int fence_queue_waiters; /* Protected by waiter_lock */ int goal_queue_waiters; /* Protected by waiter_lock */ - int cmdbuf_waiters; /* Protected by irq_lock */ - int error_waiters; /* Protected by irq_lock */ - atomic_t fifo_queue_waiters; + int cmdbuf_waiters; /* Protected by waiter_lock */ + int error_waiters; /* Protected by waiter_lock */ + int fifo_queue_waiters; /* Protected by waiter_lock */ uint32_t last_read_seqno; - spinlock_t irq_lock; struct vmw_fence_manager *fman; - uint32_t irq_mask; + uint32_t irq_mask; /* Updates protected by waiter_lock */
/* * Device state diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c index 0cbaf88..a8baf5f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c @@ -252,7 +252,6 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv, unsigned long timeout) { long ret = 1L; - unsigned long irq_flags;
if (likely(!vmw_fifo_is_full(dev_priv, bytes))) return 0; @@ -262,16 +261,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv, return vmw_fifo_wait_noirq(dev_priv, bytes, interruptible, timeout);
- spin_lock(&dev_priv->waiter_lock); - if (atomic_add_return(1, &dev_priv->fifo_queue_waiters) > 0) { - spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); - outl(SVGA_IRQFLAG_FIFO_PROGRESS, - dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); - dev_priv->irq_mask |= SVGA_IRQFLAG_FIFO_PROGRESS; - vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); - spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); - } - spin_unlock(&dev_priv->waiter_lock); + vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FIFO_PROGRESS, + &dev_priv->fifo_queue_waiters);
if (interruptible) ret = wait_event_interruptible_timeout @@ -287,14 +278,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv, else if (likely(ret > 0)) ret = 0;
- spin_lock(&dev_priv->waiter_lock); - if (atomic_dec_and_test(&dev_priv->fifo_queue_waiters)) { - spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); - dev_priv->irq_mask &= ~SVGA_IRQFLAG_FIFO_PROGRESS; - vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); - spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); - } - spin_unlock(&dev_priv->waiter_lock); + vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FIFO_PROGRESS, + &dev_priv->fifo_queue_waiters);
return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index b0a6e65..12aaed7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -36,15 +36,13 @@ irqreturn_t vmw_irq_handler(int irq, void *arg) struct vmw_private *dev_priv = vmw_priv(dev); uint32_t status, masked_status;
- spin_lock(&dev_priv->irq_lock); status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); - masked_status = status & dev_priv->irq_mask; - spin_unlock(&dev_priv->irq_lock); + masked_status = status & READ_ONCE(dev_priv->irq_mask);
if (likely(status)) outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
- if (!masked_status) + if (!status) return IRQ_NONE;
if (masked_status & (SVGA_IRQFLAG_ANY_FENCE | @@ -188,65 +186,51 @@ out_err: return ret; }
-void vmw_seqno_waiter_add(struct vmw_private *dev_priv) +void vmw_generic_waiter_add(struct vmw_private *dev_priv, + u32 flag, int *waiter_count) { - spin_lock(&dev_priv->waiter_lock); - if (dev_priv->fence_queue_waiters++ == 0) { - unsigned long irq_flags; - - spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); - outl(SVGA_IRQFLAG_ANY_FENCE, - dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); - dev_priv->irq_mask |= SVGA_IRQFLAG_ANY_FENCE; + spin_lock_bh(&dev_priv->waiter_lock); + if ((*waiter_count)++ == 0) { + outl(flag, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); + dev_priv->irq_mask |= flag; vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); - spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); } - spin_unlock(&dev_priv->waiter_lock); + spin_unlock_bh(&dev_priv->waiter_lock); }
-void vmw_seqno_waiter_remove(struct vmw_private *dev_priv) +void vmw_generic_waiter_remove(struct vmw_private *dev_priv, + u32 flag, int *waiter_count) { - spin_lock(&dev_priv->waiter_lock); - if (--dev_priv->fence_queue_waiters == 0) { - unsigned long irq_flags; - - spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); - dev_priv->irq_mask &= ~SVGA_IRQFLAG_ANY_FENCE; + spin_lock_bh(&dev_priv->waiter_lock); + if (--(*waiter_count) == 0) { + dev_priv->irq_mask &= ~flag; vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); - spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); } - spin_unlock(&dev_priv->waiter_lock); + spin_unlock_bh(&dev_priv->waiter_lock); }
+void vmw_seqno_waiter_add(struct vmw_private *dev_priv) +{ + vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_ANY_FENCE, + &dev_priv->fence_queue_waiters); +} + +void vmw_seqno_waiter_remove(struct vmw_private *dev_priv) +{ + vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_ANY_FENCE, + &dev_priv->fence_queue_waiters); +}
void vmw_goal_waiter_add(struct vmw_private *dev_priv) { - spin_lock(&dev_priv->waiter_lock); - if (dev_priv->goal_queue_waiters++ == 0) { - unsigned long irq_flags; - - spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); - outl(SVGA_IRQFLAG_FENCE_GOAL, - dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); - dev_priv->irq_mask |= SVGA_IRQFLAG_FENCE_GOAL; - vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); - spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); - } - spin_unlock(&dev_priv->waiter_lock); + vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL, + &dev_priv->goal_queue_waiters); }
void vmw_goal_waiter_remove(struct vmw_private *dev_priv) { - spin_lock(&dev_priv->waiter_lock); - if (--dev_priv->goal_queue_waiters == 0) { - unsigned long irq_flags; - - spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); - dev_priv->irq_mask &= ~SVGA_IRQFLAG_FENCE_GOAL; - vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); - spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); - } - spin_unlock(&dev_priv->waiter_lock); + vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL, + &dev_priv->goal_queue_waiters); }
int vmw_wait_seqno(struct vmw_private *dev_priv, @@ -303,7 +287,6 @@ void vmw_irq_preinstall(struct drm_device *dev) if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK)) return;
- spin_lock_init(&dev_priv->irq_lock); status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); } @@ -326,30 +309,3 @@ void vmw_irq_uninstall(struct drm_device *dev) status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); } - -void vmw_generic_waiter_add(struct vmw_private *dev_priv, - u32 flag, int *waiter_count) -{ - unsigned long irq_flags; - - spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); - if ((*waiter_count)++ == 0) { - outl(flag, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); - dev_priv->irq_mask |= flag; - vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); -} - -void vmw_generic_waiter_remove(struct vmw_private *dev_priv, - u32 flag, int *waiter_count) -{ - unsigned long irq_flags; - - spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); - if (--(*waiter_count) == 0) { - dev_priv->irq_mask &= ~flag; - vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); - } - spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); -}
Relax the required locking to, instead of disabling irqs, disable tasklets. Allow the caller to perform the locking using utility functions, which allows the caller to combine a number of register accesses within the same critical section. Implement this for capability reads and command buffer submission.
Signed-off-by: Thomas Hellstrom thellstrom@vmware.com Reviewed-by: Sinclair Yeh syeh@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 8 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 9 +-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 130 ++++++++++++++++++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 6 +- drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 40 +++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 20 ++--- drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 2 +- 7 files changed, 163 insertions(+), 52 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 8a76821..b8d2436 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -290,18 +290,20 @@ void vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header) */ static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header) { - struct vmw_cmdbuf_man *man = header->man; + struct vmw_private *dev_priv = header->man->dev_priv; u32 val;
if (sizeof(header->handle) > 4) val = (header->handle >> 32); else val = 0; - vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val); + vmw_hw_lock(dev_priv, true); + __vmw_write(dev_priv, SVGA_REG_COMMAND_HIGH, val);
val = (header->handle & 0xFFFFFFFFULL); val |= header->cb_context & SVGA_CB_CONTEXT_MASK; - vmw_write(man->dev_priv, SVGA_REG_COMMAND_LOW, val); + __vmw_write(dev_priv, SVGA_REG_COMMAND_LOW, val); + vmw_hw_unlock(dev_priv, true);
return header->cb_header->status; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index a09cf85..2a5496a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -631,7 +631,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) ttm_lock_init(&dev_priv->reservation_sem); spin_lock_init(&dev_priv->hw_lock); spin_lock_init(&dev_priv->waiter_lock); - spin_lock_init(&dev_priv->cap_lock); spin_lock_init(&dev_priv->svga_lock);
for (i = vmw_res_context; i < vmw_res_max; ++i) { @@ -854,10 +853,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) }
if (dev_priv->has_mob) { - spin_lock(&dev_priv->cap_lock); - vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX); - dev_priv->has_dx = !!vmw_read(dev_priv, SVGA_REG_DEV_CAP); - spin_unlock(&dev_priv->cap_lock); + vmw_hw_lock(dev_priv, true); + __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX); + dev_priv->has_dx = !!__vmw_read(dev_priv, SVGA_REG_DEV_CAP); + vmw_hw_unlock(dev_priv, true); }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a8ae9df..f7dcbcc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -385,7 +385,6 @@ struct vmw_private { bool has_gmr; bool has_mob; spinlock_t hw_lock; - spinlock_t cap_lock; bool has_dx;
/* @@ -546,34 +545,127 @@ static inline struct vmw_master *vmw_master(struct drm_master *master) return (struct vmw_master *) master->driver_priv; }
-/* - * The locking here is fine-grained, so that it is performed once - * for every read- and write operation. This is of course costly, but we - * don't perform much register access in the timing critical paths anyway. - * Instead we have the extra benefit of being sure that we don't forget - * the hw lock around register accesses. +/** + * vmw_hw_lock - Perform necessary locking for register access + * + * @dev_priv: Pointer to device private structure + * @lock_bh: Disable bottom halves. Always set to true unless irqs are disabled. */ -static inline void vmw_write(struct vmw_private *dev_priv, - unsigned int offset, uint32_t value) +static inline void vmw_hw_lock(struct vmw_private *dev_priv, + bool lock_bh) { - unsigned long irq_flags; + if (lock_bh) + spin_lock_bh(&dev_priv->hw_lock); + else + spin_lock(&dev_priv->hw_lock); +}
- spin_lock_irqsave(&dev_priv->hw_lock, irq_flags); +/** + * vmw_hw_unlock - Perform necessary unlocking after register access + * + * @dev_priv: Pointer to device private structure + * @unlock_bh: Disable bottom halves. Always set to true if it was set to tru + * in the corresponding lock. + */ +static inline void vmw_hw_unlock(struct vmw_private *dev_priv, + bool unlock_bh) +{ + if (unlock_bh) + spin_unlock_bh(&dev_priv->hw_lock); + else + spin_unlock(&dev_priv->hw_lock); +} + +/** + * __vmw_write - Write a value to a device register with external locking. + * + * @dev_priv: Pointer to the device private structure + * @offset: Device register offset + * @value: Value to write + * + * Note that the caller must ensure that when calling this function, + * @dev_priv::hw_lock must be held and that we're safe against racing + * against tasklets. + */ +static inline void __vmw_write(struct vmw_private *dev_priv, + unsigned int offset, uint32_t value) +{ +#ifdef CONFIG_LOCKDEP + lockdep_assert_held_once(&dev_priv->hw_lock.rlock); + WARN_ON_ONCE(!(in_softirq() || irqs_disabled())); +#endif outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT); outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT); - spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags); }
-static inline uint32_t vmw_read(struct vmw_private *dev_priv, - unsigned int offset) +/** + * __vmw_read - Read a value from a device register with external locking. + * + * @dev_priv: Pointer to the device private structure + * @offset: Device register offset + * @value: Value to write + * + * Note that the caller must ensure that when calling this function, + * @dev_priv::hw_lock is be held and that we're safe against racing + * against tasklets. + */ +static inline uint32_t __vmw_read(struct vmw_private *dev_priv, + unsigned int offset) { - unsigned long irq_flags; u32 val;
- spin_lock_irqsave(&dev_priv->hw_lock, irq_flags); +#ifdef CONFIG_LOCKDEP + lockdep_assert_held_once(&dev_priv->hw_lock.rlock); + WARN_ON_ONCE(!(in_softirq() || irqs_disabled())); +#endif outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT); val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT); - spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags); + + return val; +} + +/** + * vmw_write - Write a value to a device register + * + * @dev_priv: Pointer to the device private structure + * @offset: Device register offset + * @value: Value to write + * + * This function may not be called when irqs are disabled, since + * its call to vmw_hw_unlock will re-enable irqs. + */ +static inline void vmw_write(struct vmw_private *dev_priv, + unsigned int offset, uint32_t value) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(irqs_disabled()); +#endif + vmw_hw_lock(dev_priv, true); + __vmw_write(dev_priv, offset, value); + vmw_hw_unlock(dev_priv, true); +} + +/** + * vmw_write - Read a value from a device register + * + * @dev_priv: Pointer to the device private structure + * @offset: Device register offset + * @value: Value to write + * + * This function may not be called when irqs are disabled, since + * its call to vmw_hw_unlock will re-enable irqs. + */ +static inline uint32_t vmw_read(struct vmw_private *dev_priv, + unsigned int offset) +{ + u32 val; + +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(irqs_disabled()); +#endif + vmw_hw_lock(dev_priv, true); + val = __vmw_read(dev_priv, offset); + vmw_hw_unlock(dev_priv, true);
return val; } @@ -722,8 +814,8 @@ extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes); extern void vmw_fifo_commit_flush(struct vmw_private *dev_priv, uint32_t bytes); extern int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno); -extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason); -extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason); +extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason, + bool lock_bh); extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv); extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv); extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index f40c36e..ac66aad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -149,7 +149,7 @@ static bool vmw_fence_enable_signaling(struct fence *f) if (seqno - fence->base.seqno < VMW_FENCE_WRAP) return false;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, false);
return true; } @@ -183,7 +183,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) if (likely(vmw_fence_obj_signaled(fence))) return timeout;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); vmw_seqno_waiter_add(dev_priv);
spin_lock_bh(f->lock); @@ -525,7 +525,7 @@ void vmw_fence_obj_flush(struct vmw_fence_obj *fence) { struct vmw_private *dev_priv = fman_from_fence(fence)->dev_priv;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); }
static void vmw_fence_destroy(struct vmw_fence_obj *fence) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c index a8baf5f..a70cc88 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c @@ -49,10 +49,10 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv) if (!dev_priv->has_mob) return false;
- spin_lock(&dev_priv->cap_lock); - vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D); - result = vmw_read(dev_priv, SVGA_REG_DEV_CAP); - spin_unlock(&dev_priv->cap_lock); + vmw_hw_lock(dev_priv, true); + __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D); + result = __vmw_read(dev_priv, SVGA_REG_DEV_CAP); + vmw_hw_unlock(dev_priv, true);
return (result != 0); } @@ -163,14 +163,32 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) return 0; }
-void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason) +/** + * vmw_fifo_ping_host - Notify the device that there are device commands + * to process + * + * @dev_priv: Pointer to a device private struct. + * @reason: A valid reason to ping the host. See device headers. + * @lock_bh: Always set to true unless irqs are disabled in which case + * it must be set to false. + * + * Aka the device "doorbell". The device has an optimization that allows + * avoiding calling the relatively expensive register write to + * SVGA_REG_SYNC if the SVGA_FIFO_BUSY mmio register is set to 1, meaning + * that the doorbell has already been called. The SVGA_FIFO_BUSY mmio register + * will be cleared by the device when a new write to SVGA_REG_SYNC is needed + * to notify the device of pending commands. + */ +void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason, + bool lock_bh) { u32 *fifo_mem = dev_priv->mmio_virt;
- preempt_disable(); - if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) - vmw_write(dev_priv, SVGA_REG_SYNC, reason); - preempt_enable(); + if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) { + vmw_hw_lock(dev_priv, lock_bh); + __vmw_write(dev_priv, SVGA_REG_SYNC, reason); + vmw_hw_unlock(dev_priv, lock_bh); + } }
void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) @@ -256,7 +274,7 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv, if (likely(!vmw_fifo_is_full(dev_priv, bytes))) return 0;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL, true); if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK)) return vmw_fifo_wait_noirq(dev_priv, bytes, interruptible, timeout); @@ -490,7 +508,7 @@ static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes) vmw_mmio_write(0, fifo_mem + SVGA_FIFO_RESERVED); mb(); up_write(&fifo_state->rwsem); - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); mutex_unlock(&fifo_state->fifo_mutex); }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index b8c6a03..8ce0886 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -159,14 +159,14 @@ static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce, (pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32); compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS;
- spin_lock(&dev_priv->cap_lock); + vmw_hw_lock(dev_priv, true); for (i = 0; i < max_size; ++i) { - vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); + __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); compat_cap->pairs[i][0] = i; compat_cap->pairs[i][1] = vmw_mask_multisample - (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP)); + (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP)); } - spin_unlock(&dev_priv->cap_lock); + vmw_hw_unlock(dev_priv, true);
return 0; } @@ -216,13 +216,13 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, if (num > SVGA3D_DEVCAP_MAX) num = SVGA3D_DEVCAP_MAX;
- spin_lock(&dev_priv->cap_lock); + vmw_hw_lock(dev_priv, true); for (i = 0; i < num; ++i) { - vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); + __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); *bounce32++ = vmw_mask_multisample - (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP)); + (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP)); } - spin_unlock(&dev_priv->cap_lock); + vmw_hw_unlock(dev_priv, true); } else if (gb_objects) { ret = vmw_fill_compat_cap(dev_priv, bounce, size); if (unlikely(ret != 0)) @@ -420,7 +420,7 @@ unsigned int vmw_fops_poll(struct file *filp, struct poll_table_struct *wait) struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); return drm_poll(filp, wait); }
@@ -443,6 +443,6 @@ ssize_t vmw_fops_read(struct file *filp, char __user *buffer, struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true); return drm_read(filp, buffer, count, offset); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index 12aaed7..af6970b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -246,7 +246,7 @@ int vmw_wait_seqno(struct vmw_private *dev_priv, if (likely(vmw_seqno_passed(dev_priv, seqno))) return 0;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE)) return vmw_fallback_wait(dev_priv, lazy, true, seqno,
dri-devel@lists.freedesktop.org