lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Mon, 18 May 2015 21:02:15 +0200 From: Denys Vlasenko <dvlasenk@...hat.com> To: Christian König <christian.koenig@....com> Cc: Denys Vlasenko <dvlasenk@...hat.com>, Alex Deucher <alexander.deucher@....com>, linux-kernel@...r.kernel.org Subject: [PATCH v2] radeon: Deinline indirect register accessor functions This patch deinlines indirect register accessor functions. These functions perform two mmio accesses, framed by spin lock/unlock. Spin lock/unlock by itself takes more than 50 cycles in ideal case (if lock is exclusively cached on current CPU). With this .config: http://busybox.net/~vda/kernel_config, after uninlining these functions have sizes and callsite counts as follows: r600_uvd_ctx_rreg: 111 bytes, 4 callsites r600_uvd_ctx_wreg: 113 bytes, 5 callsites eg_pif_phy0_rreg: 106 bytes, 13 callsites eg_pif_phy0_wreg: 108 bytes, 13 callsites eg_pif_phy1_rreg: 107 bytes, 13 callsites eg_pif_phy1_wreg: 108 bytes, 13 callsites rv370_pcie_rreg: 111 bytes, 21 callsites rv370_pcie_wreg: 113 bytes, 24 callsites r600_rcu_rreg: 111 bytes, 16 callsites r600_rcu_wreg: 113 bytes, 25 callsites cik_didt_rreg: 106 bytes, 10 callsites cik_didt_wreg: 107 bytes, 10 callsites tn_smc_rreg: 106 bytes, 126 callsites tn_smc_wreg: 107 bytes, 116 callsites eg_cg_rreg: 107 bytes, 20 callsites eg_cg_wreg: 108 bytes, 52 callsites Functions r100_mm_rreg() and r100_mm_rreg() have a fast path and locked (slow) path. This patch deinlines only slow path. r100_mm_rreg_slow: 78 bytes, 2083 callsites r100_mm_wreg_slow: 81 bytes, 3570 callsites Reduction in code size is more than 65,000 bytes: text data bss dec hex filename 85740176 22294680 20627456 128662312 7ab3b28 vmlinux.before 85674192 22294776 20627456 128598664 7aa4288 vmlinux Signed-off-by: Denys Vlasenko <dvlasenk@...hat.com> Cc: Christian König <christian.koenig@....com> Cc: Alex Deucher <alexander.deucher@....com> Cc: linux-kernel@...r.kernel.org --- Changes in v2: only partially deinline r100_mm_r/wreg drivers/gpu/drm/radeon/r100.c | 22 ++++ drivers/gpu/drm/radeon/radeon.h | 218 ++++----------------------------- drivers/gpu/drm/radeon/radeon_device.c | 179 +++++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 196 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 04f2514..238b13f 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -4090,6 +4090,28 @@ int r100_init(struct radeon_device *rdev) return 0; } +uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg) +{ + unsigned long flags; + uint32_t ret; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); + return ret; +} + +void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); +} + u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) { if (reg < rdev->rio_mem_size) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5587603..d9a7c55 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2465,38 +2465,24 @@ int radeon_gpu_wait_for_idle(struct radeon_device *rdev); #define RADEON_MIN_MMIO_SIZE 0x10000 +uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg); +void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v); static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, bool always_indirect) { /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */ if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) return readl(((void __iomem *)rdev->rmmio) + reg); - else { - unsigned long flags; - uint32_t ret; - - spin_lock_irqsave(&rdev->mmio_idx_lock, flags); - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); - ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); - spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); - - return ret; - } + else + return r100_mm_rreg_slow(rdev, reg); } - static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, bool always_indirect) { if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) writel(v, ((void __iomem *)rdev->rmmio) + reg); - else { - unsigned long flags; - - spin_lock_irqsave(&rdev->mmio_idx_lock, flags); - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); - writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); - spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); - } + else + r100_mm_wreg_slow(rdev, reg, v); } u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); @@ -2582,182 +2568,22 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f) /* * Indirect registers accessor */ -static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg) -{ - unsigned long flags; - uint32_t r; - - spin_lock_irqsave(&rdev->pcie_idx_lock, flags); - WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); - r = RREG32(RADEON_PCIE_DATA); - spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); - return r; -} - -static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->pcie_idx_lock, flags); - WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); - WREG32(RADEON_PCIE_DATA, (v)); - spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); -} - -static inline u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->smc_idx_lock, flags); - WREG32(TN_SMC_IND_INDEX_0, (reg)); - r = RREG32(TN_SMC_IND_DATA_0); - spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); - return r; -} - -static inline void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->smc_idx_lock, flags); - WREG32(TN_SMC_IND_INDEX_0, (reg)); - WREG32(TN_SMC_IND_DATA_0, (v)); - spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); -} - -static inline u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->rcu_idx_lock, flags); - WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); - r = RREG32(R600_RCU_DATA); - spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); - return r; -} - -static inline void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->rcu_idx_lock, flags); - WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); - WREG32(R600_RCU_DATA, (v)); - spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); -} - -static inline u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->cg_idx_lock, flags); - WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); - r = RREG32(EVERGREEN_CG_IND_DATA); - spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); - return r; -} - -static inline void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->cg_idx_lock, flags); - WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); - WREG32(EVERGREEN_CG_IND_DATA, (v)); - spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); -} - -static inline u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->pif_idx_lock, flags); - WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); - r = RREG32(EVERGREEN_PIF_PHY0_DATA); - spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); - return r; -} - -static inline void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->pif_idx_lock, flags); - WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); - WREG32(EVERGREEN_PIF_PHY0_DATA, (v)); - spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); -} - -static inline u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->pif_idx_lock, flags); - WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); - r = RREG32(EVERGREEN_PIF_PHY1_DATA); - spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); - return r; -} - -static inline void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->pif_idx_lock, flags); - WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); - WREG32(EVERGREEN_PIF_PHY1_DATA, (v)); - spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); -} - -static inline u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->uvd_idx_lock, flags); - WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); - r = RREG32(R600_UVD_CTX_DATA); - spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); - return r; -} - -static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->uvd_idx_lock, flags); - WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); - WREG32(R600_UVD_CTX_DATA, (v)); - spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); -} - - -static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->didt_idx_lock, flags); - WREG32(CIK_DIDT_IND_INDEX, (reg)); - r = RREG32(CIK_DIDT_IND_DATA); - spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); - return r; -} - -static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->didt_idx_lock, flags); - WREG32(CIK_DIDT_IND_INDEX, (reg)); - WREG32(CIK_DIDT_IND_DATA, (v)); - spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); -} +uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg); +void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); +u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg); +void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg); +void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg); +void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg); +void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg); +void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg); +void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg); +void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v); void r100_pll_errata_after_index(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index bd7519f..6712505 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -161,6 +161,185 @@ static void radeon_device_handle_px_quirks(struct radeon_device *rdev) rdev->flags &= ~RADEON_IS_PX; } +/* + * Indirect registers accessor + */ +uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg) +{ + unsigned long flags; + uint32_t r; + + spin_lock_irqsave(&rdev->pcie_idx_lock, flags); + WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); + r = RREG32(RADEON_PCIE_DATA); + spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); + return r; +} + +void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->pcie_idx_lock, flags); + WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); + WREG32(RADEON_PCIE_DATA, (v)); + spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); +} + +u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->smc_idx_lock, flags); + WREG32(TN_SMC_IND_INDEX_0, (reg)); + r = RREG32(TN_SMC_IND_DATA_0); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); + return r; +} + +void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->smc_idx_lock, flags); + WREG32(TN_SMC_IND_INDEX_0, (reg)); + WREG32(TN_SMC_IND_DATA_0, (v)); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); +} + +u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->rcu_idx_lock, flags); + WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); + r = RREG32(R600_RCU_DATA); + spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); + return r; +} + +void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->rcu_idx_lock, flags); + WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); + WREG32(R600_RCU_DATA, (v)); + spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); +} + +u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->cg_idx_lock, flags); + WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); + r = RREG32(EVERGREEN_CG_IND_DATA); + spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); + return r; +} + +void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->cg_idx_lock, flags); + WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); + WREG32(EVERGREEN_CG_IND_DATA, (v)); + spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); +} + +u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); + WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); + r = RREG32(EVERGREEN_PIF_PHY0_DATA); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); + return r; +} + +void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); + WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); + WREG32(EVERGREEN_PIF_PHY0_DATA, (v)); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); +} + +u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); + WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); + r = RREG32(EVERGREEN_PIF_PHY1_DATA); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); + return r; +} + +void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); + WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); + WREG32(EVERGREEN_PIF_PHY1_DATA, (v)); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); +} + +u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->uvd_idx_lock, flags); + WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); + r = RREG32(R600_UVD_CTX_DATA); + spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); + return r; +} + +void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->uvd_idx_lock, flags); + WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); + WREG32(R600_UVD_CTX_DATA, (v)); + spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); +} + +u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->didt_idx_lock, flags); + WREG32(CIK_DIDT_IND_INDEX, (reg)); + r = RREG32(CIK_DIDT_IND_DATA); + spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); + return r; +} + +void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->didt_idx_lock, flags); + WREG32(CIK_DIDT_IND_INDEX, (reg)); + WREG32(CIK_DIDT_IND_DATA, (v)); + spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); +} + /** * radeon_program_register_sequence - program an array of registers. * -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists