From cbbe555c62e9c37849dcfcfcf846cf30b6a61140 Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Tue, 12 Apr 2022 20:49:09 +0000 Subject: [PATCH 01/10] Current work on Radeon 5450 card based on Coreforge's patch. --- drivers/gpu/drm/radeon/atom.c | 4 ++-- drivers/gpu/drm/radeon/radeon_bios.c | 7 +++++- drivers/gpu/drm/radeon/radeon_device.c | 31 ++++++++++++++++++++++++- drivers/gpu/drm/radeon/radeon_fb.c | 32 +++++++++++++++++++++++++- drivers/gpu/drm/radeon/radeon_object.c | 4 ++++ drivers/gpu/drm/radeon/radeon_ttm.c | 3 ++- drivers/gpu/drm/radeon/radeon_vm.c | 3 ++- drivers/gpu/drm/radeon/rs600.c | 5 +++- drivers/video/fbdev/core/cfbcopyarea.c | 16 +++++++++++-- drivers/video/fbdev/core/cfbfillrect.c | 16 +++++++++++-- 10 files changed, 109 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index f15b20da5315c8..ad4c568c9209be 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -729,8 +729,8 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg) cjiffies = jiffies; if (time_after(cjiffies, ctx->last_jump_jiffies)) { cjiffies -= ctx->last_jump_jiffies; - if ((jiffies_to_msecs(cjiffies) > 5000)) { - DRM_ERROR("atombios stuck in loop for more than 5secs aborting\n"); + if ((jiffies_to_msecs(cjiffies) > 30000)) { + DRM_ERROR("atombios stuck in loop for more than 30secs aborting\n"); ctx->abort = true; } } else { diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 33121655d50bbe..76a81dc1b37c2e 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -101,7 +101,12 @@ static bool radeon_read_bios(struct radeon_device *rdev) pci_unmap_rom(rdev->pdev, bios); return false; } - memcpy_fromio(rdev->bios, bios, size); + //memcpy_fromio(rdev->bios, bios, size); + int pos; + for(pos = 0;pos < size; pos++){ + //memcpy_fromio(rdev->bios+pos,bios+pos,1); + rdev->bios[pos] = __raw_readb(bios+pos); + } pci_unmap_rom(rdev->pdev, bios); return true; } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 4f0fbf66743160..909f35f750054c 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -446,6 +446,35 @@ void radeon_wb_fini(struct radeon_device *rdev) } } +//memset_io with only 32-bit accesses +void memset_io_pcie_wb(volatile void __iomem *dst, int c, size_t count) +{ + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + //qc |= qc << 32; + mb(); + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} + + /** * radeon_wb_init- Init Writeback driver info and allocate memory * @@ -490,7 +519,7 @@ int radeon_wb_init(struct radeon_device *rdev) } /* clear wb memory */ - memset((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); + memset_io_pcie_wb((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); /* disable event_write fences */ rdev->wb.use_event = false; /* disabled via module param */ diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index ca382fbf7a86a3..0fee4eac304b44 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -209,6 +209,36 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, return ret; } +//memset_io with only 32-bit accesses + void memset_io_pcie(volatile void __iomem *dst, int c, size_t count) + { + u64 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + //qc |= qc << 32; + + __iowmb(); + + while (count && !IS_ALIGNED((unsigned long)dst, 16)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} + static int radeonfb_create(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { @@ -262,7 +292,7 @@ static int radeonfb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - memset_io(rbo->kptr, 0x0, radeon_bo_size(rbo)); + memset_io_pcie(rbo->kptr, 0x0, radeon_bo_size(rbo)); info->fbops = &radeonfb_ops; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 56ede9d63b12c5..b33e32f427062e 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -220,6 +220,10 @@ int radeon_bo_create(struct radeon_device *rdev, bo->flags &= ~RADEON_GEM_GTT_WC; #endif + //Write combining may cause issues on the raspberry pi + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); + bo->flags |= RADEON_GEM_GTT_UC; + radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index a06d4cc2fb1c43..e0ddcfc60cdc0a 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -456,7 +456,8 @@ static int radeon_ttm_backend_bind(struct ttm_device *bdev, ttm->num_pages, bo_mem, ttm); } if (ttm->caching == ttm_cached) - flags |= RADEON_GART_PAGE_SNOOP; + printk("TTM Page would've been snooped\n"); + // flags |= RADEON_GART_PAGE_SNOOP; r = radeon_gart_bind(rdev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index bb53016f3138a2..a5065df377e7d6 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -952,7 +952,8 @@ int radeon_vm_bo_update(struct radeon_device *rdev, if (mem->mem_type == TTM_PL_TT) { bo_va->flags |= RADEON_VM_PAGE_SYSTEM; if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) - bo_va->flags |= RADEON_VM_PAGE_SNOOPED; + printk("VM Page would've been set to snooped\n"); + //bo_va->flags |= RADEON_VM_PAGE_SNOOPED; } else { addr += rdev->vm_manager.vram_base_offset; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index b87dd551e93977..58a21c1a5f7011 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -660,7 +660,10 @@ void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t entry) { void __iomem *ptr = (void *)rdev->gart.ptr; - writeq(entry, ptr + (i * 8)); + uint32_t high = entry >> 32; + writel(entry,ptr+(i*8)); + writel(high,ptr + (i*8) + 4); + //writeq(entry, ptr + (i * 8)); } int rs600_irq_set(struct radeon_device *rdev) diff --git a/drivers/video/fbdev/core/cfbcopyarea.c b/drivers/video/fbdev/core/cfbcopyarea.c index 6d4bfeecee3508..3e2369ff697fa1 100644 --- a/drivers/video/fbdev/core/cfbcopyarea.c +++ b/drivers/video/fbdev/core/cfbcopyarea.c @@ -34,10 +34,22 @@ # define FB_WRITEL fb_writel # define FB_READL fb_readl #else -# define FB_WRITEL fb_writeq -# define FB_READL fb_readq +# define FB_WRITEL fb_writel_writeq +# define FB_READL fb_readl_readq #endif +static void fb_writel_writeq(u64 val, volatile void __iomem *addr){ + fb_writel(val,addr); + fb_writel(val >> 32, addr + 4); +} + +static u64 fb_readl_readq(volatile void __iomem *addr){ + u64 val; + val = fb_readl(addr); + val |= fb_readl(addr + 4) << 32; + return val; +} + /* * Generic bitwise copy algorithm */ diff --git a/drivers/video/fbdev/core/cfbfillrect.c b/drivers/video/fbdev/core/cfbfillrect.c index ba9f58b2a5e86e..8dda2665ec7f2e 100644 --- a/drivers/video/fbdev/core/cfbfillrect.c +++ b/drivers/video/fbdev/core/cfbfillrect.c @@ -23,10 +23,22 @@ # define FB_WRITEL fb_writel # define FB_READL fb_readl #else -# define FB_WRITEL fb_writeq -# define FB_READL fb_readq +# define FB_WRITEL fb_writel_writeq +# define FB_READL fb_readl_readq #endif +static void fb_writel_writeq(u64 val, volatile void __iomem *addr){ + fb_writel(val,addr); + fb_writel(val >> 32, addr + 4); +} + +static u64 fb_readl_readq(volatile void __iomem *addr){ + u64 val; + val = fb_readl(addr); + val |= fb_readl(addr + 4) << 32; + return val; +} + /* * Aligned pattern fill using 32/64-bit memory accesses */ From 180b3175e3e0c3dd55634c0ac300dccfb662d465 Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Wed, 13 Apr 2022 22:04:15 +0000 Subject: [PATCH 02/10] Fixing a few missing memset_io and memcpy_io calls. --- drivers/gpu/drm/radeon/radeon.h | 7 ++ drivers/gpu/drm/radeon/radeon_bios.c | 6 +- drivers/gpu/drm/radeon/radeon_device.c | 92 ++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_fb.c | 30 --------- drivers/gpu/drm/radeon/radeon_uvd.c | 4 +- 5 files changed, 104 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 895776c421d4de..37fbef271db338 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2237,6 +2237,13 @@ void radeon_agp_disable(struct radeon_device *rdev); int radeon_asic_init(struct radeon_device *rdev); +/* + * memcpy_io and memset_io functions that work on a raspberry pi 4 + */ +void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count); +void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count); +void memset_io_pcie(volatile void __iomem *dst, int c, size_t count); + /* * IOCTL. */ diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 76a81dc1b37c2e..38a88f3297e890 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -72,7 +72,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev) iounmap(bios); return false; } - memcpy_fromio(rdev->bios, bios, size); + memcpy_fromio_pcie(rdev->bios, bios, size); iounmap(bios); return true; } @@ -81,6 +81,7 @@ static bool radeon_read_bios(struct radeon_device *rdev) { uint8_t __iomem *bios, val1, val2; size_t size; + int pos; rdev->bios = NULL; /* XXX: some cards may return 0 for rom size? ddx has a workaround */ @@ -102,7 +103,6 @@ static bool radeon_read_bios(struct radeon_device *rdev) return false; } //memcpy_fromio(rdev->bios, bios, size); - int pos; for(pos = 0;pos < size; pos++){ //memcpy_fromio(rdev->bios+pos,bios+pos,1); rdev->bios[pos] = __raw_readb(bios+pos); @@ -130,7 +130,7 @@ static bool radeon_read_platform_bios(struct radeon_device *rdev) if (!bios) goto free_bios; - memcpy_fromio(rdev->bios, bios, romlen); + memcpy_fromio_pcie(rdev->bios, bios, romlen); iounmap(bios); if (rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 909f35f750054c..666c3cd9aba913 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -112,6 +112,98 @@ static const char radeon_family_name[][16] = { "LAST", }; +/** + * DOC: memcpy_fromio_pcie + * + * like memcpy_fromio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)from, 8)) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } + + while (count >= 4) { + *(u32 *)to = __raw_readl(from); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } +} + +/** + * DOC: memcpy_toio_pcie + * + * like memcpy_toio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)to, 8)) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } + + while (count >= 4) { + __raw_writel(*(u64 *)from, to); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } +} + +/** + * DOC: memset_io_pcie + * + * like memset_io, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memset_io_pcie(volatile void __iomem *dst, int c, size_t count) +{ + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} + #if defined(CONFIG_VGA_SWITCHEROO) bool radeon_has_atpx_dgpu_power_cntl(void); bool radeon_is_atpx_hybrid(void); diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 0fee4eac304b44..7165c67ec45d1f 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -209,36 +209,6 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, return ret; } -//memset_io with only 32-bit accesses - void memset_io_pcie(volatile void __iomem *dst, int c, size_t count) - { - u64 qc = (u8)c; - - qc |= qc << 8; - qc |= qc << 16; - //qc |= qc << 32; - - __iowmb(); - - while (count && !IS_ALIGNED((unsigned long)dst, 16)) { - __raw_writeb(c, dst); - dst++; - count--; - } - - while (count >= 4) { - __raw_writel(qc, dst); - dst += 4; - count -= 4; - } - - while (count) { - __raw_writeb(c, dst); - dst++; - count--; - } -} - static int radeonfb_create(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 2ea86919d95364..e3804be3680d8c 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -288,7 +288,7 @@ int radeon_uvd_resume(struct radeon_device *rdev) if (rdev->uvd.vcpu_bo == NULL) return -EINVAL; - memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); + memcpy_toio_pcie((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); size = radeon_bo_size(rdev->uvd.vcpu_bo); size -= rdev->uvd_fw->size; @@ -296,7 +296,7 @@ int radeon_uvd_resume(struct radeon_device *rdev) ptr = rdev->uvd.cpu_addr; ptr += rdev->uvd_fw->size; - memset_io((void __iomem *)ptr, 0, size); + memset_io_pcie((void __iomem *)ptr, 0, size); return 0; } From af1da8e3d57b543dd6fc70fdef85c6f92d7e659f Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Wed, 13 Apr 2022 22:47:31 +0000 Subject: [PATCH 03/10] Adding almost all the rest of the changes from Coreforge's branch. --- drivers/gpu/drm/radeon/evergreen_dma.c | 2 ++ drivers/gpu/drm/radeon/r600.c | 9 ++++--- drivers/gpu/drm/radeon/r600_dma.c | 4 ++++ drivers/gpu/drm/radeon/radeon.h | 27 +++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_cs.c | 2 ++ drivers/gpu/drm/radeon/radeon_device.c | 7 ++++-- drivers/gpu/drm/radeon/radeon_fb.c | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 17 +++++++++++++ drivers/gpu/drm/radeon/radeon_gem.c | 33 ++++++++++++++++++++------ drivers/gpu/drm/radeon/radeon_ib.c | 19 ++++++++++++++- drivers/gpu/drm/radeon/radeon_object.c | 1 + drivers/gpu/drm/radeon/radeon_ring.c | 10 +++++++- drivers/gpu/drm/radeon/radeon_ttm.c | 1 + drivers/gpu/drm/radeon/rs600.c | 4 ++-- 14 files changed, 121 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 52c79da1ecf571..af9100167fc102 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -155,6 +155,8 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, radeon_ring_unlock_commit(rdev, ring, false); radeon_sync_free(rdev, &sync, fence); + r600_dma_ring_test(rdev, ring); + return fence; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index ca3fcae2adb537..23f54a7394a0b1 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2625,6 +2625,7 @@ u32 r600_gfx_get_rptr(struct radeon_device *rdev, else rptr = RREG32(R600_CP_RB_RPTR); + mb(); //CHANGED return rptr; } @@ -3483,7 +3484,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev) if (rdev->ih.ring_obj == NULL) { r = radeon_bo_create(rdev, rdev->ih.ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, + RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->ih.ring_obj); if (r) { DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r); @@ -3493,7 +3494,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev) if (unlikely(r != 0)) return r; r = radeon_bo_pin(rdev->ih.ring_obj, - RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_DOMAIN_VRAM, &rdev->ih.gpu_addr); if (r) { radeon_bo_unreserve(rdev->ih.ring_obj); @@ -4038,8 +4039,10 @@ static u32 r600_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp; - if (rdev->wb.enabled) + if (rdev->wb.enabled) { wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); + mb(); + } else wptr = RREG32(IH_RB_WPTR); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 89ca2738c5d4c8..1f6e86c8268e76 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -57,6 +57,7 @@ uint32_t r600_dma_get_rptr(struct radeon_device *rdev, else rptr = RREG32(DMA_RB_RPTR); + mb(); //CHANGED return (rptr & 0x3fffc) >> 2; } @@ -245,6 +246,7 @@ int r600_dma_ring_test(struct radeon_device *rdev, tmp = 0xCAFEDEAD; rdev->wb.wb[index/4] = cpu_to_le32(tmp); + mb(); //CHANGED r = radeon_ring_lock(rdev, ring, 4); if (r) { DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); @@ -258,6 +260,7 @@ int r600_dma_ring_test(struct radeon_device *rdev, for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); + mb(); //CHANGED if (tmp == 0xDEADBEEF) break; udelay(1); @@ -379,6 +382,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) r = 0; for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); + mb(); //CHANGED if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 37fbef271db338..3c133e0da0f244 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -370,6 +370,7 @@ struct radeon_fence_driver { atomic64_t last_seq; bool initialized, delayed_irq; struct delayed_work lockup_work; + dma_addr_t dma_addr; }; struct radeon_fence { @@ -668,6 +669,8 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, dma_addr_t *dma_addr, uint32_t flags); +void radeon_gart_sync_all_for_device(struct radeon_device *rdev); + /* * GPU MC structures, functions & helpers */ @@ -2344,6 +2347,11 @@ struct radeon_atcs { typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t); typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); +struct moved_bos_entry{ + struct radeon_bo* bo; + struct list_head list; +}; + struct radeon_device { struct device *dev; struct drm_device *ddev; @@ -2489,6 +2497,25 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; + struct radeon_bo *rick; + uint64_t rick_gpu; + void *rick_cpu; + uint64_t fb_gpu; + + // reading back shader code for debugging + struct radeon_bo* shader_read_bo; + uint64_t shader_read_gpu; + void* shader_read_cpu; + + // needed because of weird stuff + int numFSuses; + + // tracking moved BOs to move them back after CS execution + struct radeon_bo** moved_bos; // array of pointers to the BOs that were moved + int nMovedBos; // number of BOs moved (determines size of array) + struct mutex move_bos_mutex; + struct list_head move_bo_head; + bool trackMoves; // enables or disables tracking of the BO moves to avoid tracking while moving the BOs back after CS execution }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 9ed2b2700e0a56..8ad893c91aa2be 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -663,6 +663,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_device *rdev = dev->dev_private; struct radeon_cs_parser parser; int r; + INIT_LIST_HEAD(&rdev->move_bo_head); down_read(&rdev->exclusive_lock); if (!rdev->accel_working) { @@ -676,6 +677,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = -EAGAIN; return r; } + rdev->trackMoves = true; /* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 666c3cd9aba913..245d01b5f3df1b 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -582,7 +582,7 @@ int radeon_wb_init(struct radeon_device *rdev) if (rdev->wb.wb_obj == NULL) { r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, + RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->wb.wb_obj); if (r) { dev_warn(rdev->dev, "(%d) create WB bo failed\n", r); @@ -593,7 +593,7 @@ int radeon_wb_init(struct radeon_device *rdev) radeon_wb_fini(rdev); return r; } - r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_GTT, + r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_VRAM, &rdev->wb.gpu_addr); if (r) { radeon_bo_unreserve(rdev->wb.wb_obj); @@ -1622,6 +1622,9 @@ int radeon_device_init(struct radeon_device *rdev, else DRM_INFO("radeon: acceleration disabled, skipping benchmarks\n"); } + + mutex_init(&rdev->move_bos_mutex); + return 0; failed: diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 7165c67ec45d1f..2557884a88f06c 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -189,7 +189,7 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, /* Only 27 bit offset for legacy CRTC */ ret = radeon_bo_pin_restricted(rbo, RADEON_GEM_DOMAIN_VRAM, ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - NULL); + &rdev->fb_gpu); if (ret) { radeon_bo_unreserve(rbo); goto out_unref; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 04109a2a6fd760..04e7ccb581569d 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -320,6 +320,23 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, return 0; } +/** + * syncs all bound pages for the card (workaround for incoherent systems) + * + */ +void radeon_gart_sync_all_for_device(struct radeon_device *rdev){ + int i; + printk("syncing all GART pages for device\n"); + for (i = 0; i < rdev->gart.num_gpu_pages; i++){ // loop over all gpu pages + if(rdev->gart.pages_entry[i] == rdev->dummy_page.entry){ + continue; // entry is just the dummy page, so it can be ignored + } + dma_sync_single_for_device(rdev->dev, rdev->gart.pages_entry[i] & 0xFFFFFFFFFFFFF000ULL, 4096, DMA_BIDIRECTIONAL); + dma_sync_single_for_cpu(rdev->dev, rdev->gart.pages_entry[i] & 0xFFFFFFFFFFFFF000ULL, 4096, DMA_BIDIRECTIONAL); + } + +} + /** * radeon_gart_init - init the driver info for managing the gart * diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index a36a4f2c76b097..ccc211f7c71cad 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -330,20 +330,39 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_create *args = data; struct drm_gem_object *gobj; + struct radeon_bo* rbo; uint32_t handle; int r; + u64 size; + __u32 flags; down_read(&rdev->exclusive_lock); /* create a gem object to contain this object in */ + size = args->size; + flags = args->flags; + if(1){ + flags &= ~RADEON_GEM_NO_CPU_ACCESS; + flags &= ~RADEON_GEM_GTT_WC; + flags |= RADEON_GEM_GTT_UC; + + args->initial_domain = RADEON_GEM_DOMAIN_GTT; + } args->size = roundup(args->size, PAGE_SIZE); r = radeon_gem_object_create(rdev, args->size, args->alignment, - args->initial_domain, args->flags, + args->initial_domain, flags, false, &gobj); if (r) { up_read(&rdev->exclusive_lock); r = radeon_gem_handle_lockup(rdev, r); return r; } + + rbo = gem_to_radeon_bo(gobj); + if(size == 48){ + // 12 dw, first shader + printk("first shader?\n"); + } + r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_put(gobj); @@ -396,7 +415,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, /* create a gem object to contain this object in */ r = radeon_gem_object_create(rdev, args->size, 0, - RADEON_GEM_DOMAIN_CPU, 0, + RADEON_GEM_DOMAIN_GTT, 0, false, &gobj); if (r) goto handle_lockup; @@ -420,7 +439,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; } - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT); + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); radeon_bo_unreserve(bo); mmap_read_unlock(current->mm); @@ -711,7 +730,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, * otherwise we will endup with broken userspace and we won't be able * to enable this feature without adding new interface */ - invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM; + invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM | RADEON_VM_PAGE_SNOOPED; if ((args->flags & invalid_flags)) { dev_err(dev->dev, "invalid flags 0x%08X vs 0x%08X\n", args->flags, invalid_flags); @@ -804,9 +823,9 @@ int radeon_gem_op_ioctl(struct drm_device *dev, void *data, args->value = robj->initial_domain; break; case RADEON_GEM_OP_SET_INITIAL_DOMAIN: - robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_CPU); + robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM);// | + //RADEON_GEM_DOMAIN_GTT | + //RADEON_GEM_DOMAIN_CPU); break; default: r = -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 62b116727b4f50..3acdb6fa6f378c 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -30,6 +30,7 @@ #include #include "radeon.h" +#include "evergreend.h" /* * IB @@ -128,6 +129,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; + printk("IB scheduled, dumping %d DWORDs\n",ib->length_dw); + + if (0) { + radeon_ring_lock(rdev,ring,7); + radeon_ring_write(ring,PACKET3(PACKET3_CP_DMA,4)); + radeon_ring_write(ring,lower_32_bits(rdev->rick_gpu)); + radeon_ring_write(ring,upper_32_bits(rdev->rick_gpu) & 0xFF); + radeon_ring_write(ring,lower_32_bits(rdev->fb_gpu)); + radeon_ring_write(ring,upper_32_bits(rdev->fb_gpu) & 0xFF); + radeon_ring_write(ring,(1920*1080*4) & 0xFFFFF); + + radeon_ring_unlock_commit(rdev,ring,false); + printk("DMAd test image to FB\n"); + } + radeon_gart_sync_all_for_device(rdev); + if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ dev_err(rdev->dev, "couldn't schedule ib\n"); @@ -212,7 +229,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev) r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, RADEON_IB_POOL_SIZE*64*1024, RADEON_GPU_PAGE_SIZE, - RADEON_GEM_DOMAIN_GTT, 0); + RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_UC); } if (r) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index b33e32f427062e..13ffa93c3505e9 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -184,6 +184,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo->flags = flags; /* PCI GART is always snooped */ + /* Don't be so sure. TODO */ if (!(rdev->flags & RADEON_IS_PCIE)) bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 7e207276df374f..0b86054717f62c 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -31,6 +31,7 @@ #include #include "radeon.h" +#include "evergreend.h" /* * Rings @@ -177,6 +178,13 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, ring->nop); } mb(); + + __iowmb(); + dsb(sy); + int numdw; + int i; + numdw = ring->wptr - ring->wptr_old; + /* If we are emitting the HDP flush via MMIO, we need to do it after * all CPU writes to VRAM finished. */ @@ -390,7 +398,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, + RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_UC, NULL, NULL, &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index e0ddcfc60cdc0a..33544b5f78d002 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -516,6 +516,7 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } + // TODO: Highly suspect. if (rbo->flags & RADEON_GEM_GTT_UC) caching = ttm_uncached; else if (rbo->flags & RADEON_GEM_GTT_WC) diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 58a21c1a5f7011..0b62d1f9dc307f 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -651,8 +651,8 @@ uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags) addr |= R600_PTE_READABLE; if (flags & RADEON_GART_PAGE_WRITE) addr |= R600_PTE_WRITEABLE; - if (flags & RADEON_GART_PAGE_SNOOP) - addr |= R600_PTE_SNOOPED; + // if (flags & RADEON_GART_PAGE_SNOOP) // no snooping around + // addr |= R600_PTE_SNOOPED; return addr; } From 5b4656fa4a711f59190fc580ad176d337d380964 Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Wed, 13 Apr 2022 22:57:10 +0000 Subject: [PATCH 04/10] Add one last missing change from Coreforge's repo. --- arch/arm64/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b7c81dacabf079..40a9205ab09016 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -87,7 +87,7 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) if (pfn_is_map_memory(__phys_to_pfn(phys_addr))) return (void __iomem *)__phys_to_virt(phys_addr); - return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), + return __ioremap_caller(phys_addr, size, __pgprot(PROT_DEVICE_nGnRE), __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_cache); From 6b53bfef54265c221e81edfea5d9ef722615c051 Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Thu, 14 Apr 2022 19:54:33 +0000 Subject: [PATCH 05/10] Debug info for memset_io_pcie_wb. --- drivers/gpu/drm/radeon/evergreen.c | 7 +++ drivers/gpu/drm/radeon/radeon_device.c | 65 +++++++++++++++++--------- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index eeb590d2dec2e7..d8f97ec975c67c 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5029,6 +5029,7 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; } + evergreen_gpu_init(rdev); /* allocate rlc buffers */ @@ -5044,11 +5045,17 @@ static int evergreen_startup(struct radeon_device *rdev) } } + /* TODO: Dying after here currently. */ + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + /* allocate wb buffer */ r = radeon_wb_init(rdev); if (r) return r; + /* TODO: Dying before here currently. */ + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); if (r) { dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 245d01b5f3df1b..eeb4e2cc9c3b46 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -541,29 +541,36 @@ void radeon_wb_fini(struct radeon_device *rdev) //memset_io with only 32-bit accesses void memset_io_pcie_wb(volatile void __iomem *dst, int c, size_t count) { - u32 qc = (u8)c; - - qc |= qc << 8; - qc |= qc << 16; - //qc |= qc << 32; - mb(); - while (count && !IS_ALIGNED((unsigned long)dst, 8)) { - __raw_writeb(c, dst); - dst++; - count--; - } - - while (count >= 4) { - __raw_writel(qc, dst); - dst += 4; - count -= 4; - } - - while (count) { - __raw_writeb(c, dst); - dst++; - count--; - } + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + //qc |= qc << 32; + mb(); + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } } @@ -610,14 +617,23 @@ int radeon_wb_init(struct radeon_device *rdev) } } + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + /* clear wb memory */ memset_io_pcie_wb((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + /* disable event_write fences */ rdev->wb.use_event = false; + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + /* disabled via module param */ if (radeon_no_wb == 1) { rdev->wb.enabled = false; } else { + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); if (rdev->flags & RADEON_IS_AGP) { /* often unreliable on AGP */ rdev->wb.enabled = false; @@ -625,6 +641,7 @@ int radeon_wb_init(struct radeon_device *rdev) /* often unreliable on pre-r300 */ rdev->wb.enabled = false; } else { + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); rdev->wb.enabled = true; /* event_write fences are only available on r600+ */ if (rdev->family >= CHIP_R600) { @@ -632,12 +649,14 @@ int radeon_wb_init(struct radeon_device *rdev) } } } + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); /* always use writeback/events on NI, APUs */ if (rdev->family >= CHIP_PALM) { rdev->wb.enabled = true; rdev->wb.use_event = true; } + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); dev_info(rdev->dev, "WB %sabled\n", rdev->wb.enabled ? "en" : "dis"); return 0; From 2b2f1b9c9774d6955ca2c8ee13809ee26be7c4b6 Mon Sep 17 00:00:00 2001 From: Jeff Geerling Date: Thu, 14 Apr 2022 21:29:11 +0000 Subject: [PATCH 06/10] Use RADEON_GEM_GTT_UC in radeon_ttm_placement_from_domain. --- drivers/gpu/drm/radeon/radeon_object.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 13ffa93c3505e9..b6859869167556 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -109,29 +109,29 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) rbo->placements[c].fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; rbo->placements[c].mem_type = TTM_PL_VRAM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_VRAM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (domain & RADEON_GEM_DOMAIN_GTT) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_TT; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (domain & RADEON_GEM_DOMAIN_CPU) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_SYSTEM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (!c) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_SYSTEM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } rbo->placement.num_placement = c; From d351f5da6ae48107aa31f8e75a9be377686a7e86 Mon Sep 17 00:00:00 2001 From: Coreforge Date: Sun, 1 May 2022 21:33:53 +0200 Subject: [PATCH 07/10] set some memory allocations to uncached for bcm2711 --- drivers/gpu/drm/radeon/r600.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 23f54a7394a0b1..2ca39a42066993 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3702,7 +3702,7 @@ int r600_irq_init(struct radeon_device *rdev) */ interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ - interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; + interrupt_cntl |= IH_REQ_NONSNOOP_EN; WREG32(INTERRUPT_CNTL, interrupt_cntl); WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 33544b5f78d002..8d2266ea17b22c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -295,7 +295,7 @@ static int radeon_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resourc return -EINVAL; mem->bus.offset += rdev->mc.aper_base; mem->bus.is_iomem = true; - mem->bus.caching = ttm_write_combined; + mem->bus.caching = ttm_uncached; #ifdef __alpha__ /* * Alpha: use bus.addr to hold the ioremap() return, @@ -523,7 +523,7 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo, caching = ttm_write_combined; else caching = ttm_cached; - + caching = ttm_uncached; if (ttm_sg_tt_init(>t->ttm, bo, page_flags, caching)) { kfree(gtt); return NULL; From 7fa710cda49a153d0350fd3d3c105fa29b5d0c93 Mon Sep 17 00:00:00 2001 From: Coreforge Date: Mon, 2 May 2022 21:31:45 +0200 Subject: [PATCH 08/10] removed most almost definitely unneeded changes (untested) --- drivers/gpu/drm/radeon/atom.c | 4 ++-- drivers/gpu/drm/radeon/evergreen.c | 7 ------- drivers/gpu/drm/radeon/evergreen_dma.c | 2 -- drivers/gpu/drm/radeon/radeon.h | 20 -------------------- drivers/gpu/drm/radeon/radeon_cs.c | 2 -- drivers/gpu/drm/radeon/radeon_device.c | 15 --------------- drivers/gpu/drm/radeon/radeon_ib.c | 14 -------------- 7 files changed, 2 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index ad4c568c9209be..f15b20da5315c8 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -729,8 +729,8 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg) cjiffies = jiffies; if (time_after(cjiffies, ctx->last_jump_jiffies)) { cjiffies -= ctx->last_jump_jiffies; - if ((jiffies_to_msecs(cjiffies) > 30000)) { - DRM_ERROR("atombios stuck in loop for more than 30secs aborting\n"); + if ((jiffies_to_msecs(cjiffies) > 5000)) { + DRM_ERROR("atombios stuck in loop for more than 5secs aborting\n"); ctx->abort = true; } } else { diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index d8f97ec975c67c..eeb590d2dec2e7 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5029,7 +5029,6 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; } - evergreen_gpu_init(rdev); /* allocate rlc buffers */ @@ -5045,17 +5044,11 @@ static int evergreen_startup(struct radeon_device *rdev) } } - /* TODO: Dying after here currently. */ - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); - /* allocate wb buffer */ r = radeon_wb_init(rdev); if (r) return r; - /* TODO: Dying before here currently. */ - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); - r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); if (r) { dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index af9100167fc102..52c79da1ecf571 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -155,8 +155,6 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, radeon_ring_unlock_commit(rdev, ring, false); radeon_sync_free(rdev, &sync, fence); - r600_dma_ring_test(rdev, ring); - return fence; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3c133e0da0f244..7f8fbf1d09ee23 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2347,10 +2347,6 @@ struct radeon_atcs { typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t); typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); -struct moved_bos_entry{ - struct radeon_bo* bo; - struct list_head list; -}; struct radeon_device { struct device *dev; @@ -2497,25 +2493,9 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; - struct radeon_bo *rick; - uint64_t rick_gpu; - void *rick_cpu; - uint64_t fb_gpu; - - // reading back shader code for debugging - struct radeon_bo* shader_read_bo; - uint64_t shader_read_gpu; - void* shader_read_cpu; // needed because of weird stuff int numFSuses; - - // tracking moved BOs to move them back after CS execution - struct radeon_bo** moved_bos; // array of pointers to the BOs that were moved - int nMovedBos; // number of BOs moved (determines size of array) - struct mutex move_bos_mutex; - struct list_head move_bo_head; - bool trackMoves; // enables or disables tracking of the BO moves to avoid tracking while moving the BOs back after CS execution }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 8ad893c91aa2be..9ed2b2700e0a56 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -663,7 +663,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_device *rdev = dev->dev_private; struct radeon_cs_parser parser; int r; - INIT_LIST_HEAD(&rdev->move_bo_head); down_read(&rdev->exclusive_lock); if (!rdev->accel_working) { @@ -677,7 +676,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = -EAGAIN; return r; } - rdev->trackMoves = true; /* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index eeb4e2cc9c3b46..6dd41656146709 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -617,23 +617,14 @@ int radeon_wb_init(struct radeon_device *rdev) } } - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); - /* clear wb memory */ memset_io_pcie_wb((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); - - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); - /* disable event_write fences */ rdev->wb.use_event = false; - - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); - /* disabled via module param */ if (radeon_no_wb == 1) { rdev->wb.enabled = false; } else { - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); if (rdev->flags & RADEON_IS_AGP) { /* often unreliable on AGP */ rdev->wb.enabled = false; @@ -641,7 +632,6 @@ int radeon_wb_init(struct radeon_device *rdev) /* often unreliable on pre-r300 */ rdev->wb.enabled = false; } else { - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); rdev->wb.enabled = true; /* event_write fences are only available on r600+ */ if (rdev->family >= CHIP_R600) { @@ -649,14 +639,12 @@ int radeon_wb_init(struct radeon_device *rdev) } } } - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); /* always use writeback/events on NI, APUs */ if (rdev->family >= CHIP_PALM) { rdev->wb.enabled = true; rdev->wb.use_event = true; } - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); dev_info(rdev->dev, "WB %sabled\n", rdev->wb.enabled ? "en" : "dis"); return 0; @@ -1641,9 +1629,6 @@ int radeon_device_init(struct radeon_device *rdev, else DRM_INFO("radeon: acceleration disabled, skipping benchmarks\n"); } - - mutex_init(&rdev->move_bos_mutex); - return 0; failed: diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 3acdb6fa6f378c..fecd9d4cac1531 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -129,20 +129,6 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; - printk("IB scheduled, dumping %d DWORDs\n",ib->length_dw); - - if (0) { - radeon_ring_lock(rdev,ring,7); - radeon_ring_write(ring,PACKET3(PACKET3_CP_DMA,4)); - radeon_ring_write(ring,lower_32_bits(rdev->rick_gpu)); - radeon_ring_write(ring,upper_32_bits(rdev->rick_gpu) & 0xFF); - radeon_ring_write(ring,lower_32_bits(rdev->fb_gpu)); - radeon_ring_write(ring,upper_32_bits(rdev->fb_gpu) & 0xFF); - radeon_ring_write(ring,(1920*1080*4) & 0xFFFFF); - - radeon_ring_unlock_commit(rdev,ring,false); - printk("DMAd test image to FB\n"); - } radeon_gart_sync_all_for_device(rdev); if (!ib->length_dw || !ring->ready) { From 33eba463eca325a5fb7dc84b6099ee3e4d36bc90 Mon Sep 17 00:00:00 2001 From: Coreforge Date: Tue, 3 May 2022 14:50:58 +0200 Subject: [PATCH 09/10] changed back some BO domains --- drivers/gpu/drm/radeon/r600.c | 4 ++-- drivers/gpu/drm/radeon/radeon_device.c | 4 ++-- drivers/gpu/drm/radeon/radeon_fb.c | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 2ca39a42066993..e80d0fe6f92050 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3484,7 +3484,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev) if (rdev->ih.ring_obj == NULL) { r = radeon_bo_create(rdev, rdev->ih.ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, + RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, &rdev->ih.ring_obj); if (r) { DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r); @@ -3494,7 +3494,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev) if (unlikely(r != 0)) return r; r = radeon_bo_pin(rdev->ih.ring_obj, - RADEON_GEM_DOMAIN_VRAM, + RADEON_GEM_DOMAIN_GTT, &rdev->ih.gpu_addr); if (r) { radeon_bo_unreserve(rdev->ih.ring_obj); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 6dd41656146709..10af4b01c64835 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -589,7 +589,7 @@ int radeon_wb_init(struct radeon_device *rdev) if (rdev->wb.wb_obj == NULL) { r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, + RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, &rdev->wb.wb_obj); if (r) { dev_warn(rdev->dev, "(%d) create WB bo failed\n", r); @@ -600,7 +600,7 @@ int radeon_wb_init(struct radeon_device *rdev) radeon_wb_fini(rdev); return r; } - r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_VRAM, + r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_GTT, &rdev->wb.gpu_addr); if (r) { radeon_bo_unreserve(rdev->wb.wb_obj); diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 2557884a88f06c..144fdef805a3a9 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -189,7 +189,7 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, /* Only 27 bit offset for legacy CRTC */ ret = radeon_bo_pin_restricted(rbo, RADEON_GEM_DOMAIN_VRAM, ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - &rdev->fb_gpu); + 0); if (ret) { radeon_bo_unreserve(rbo); goto out_unref; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ccc211f7c71cad..88ecfe93d4961c 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -415,7 +415,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, /* create a gem object to contain this object in */ r = radeon_gem_object_create(rdev, args->size, 0, - RADEON_GEM_DOMAIN_GTT, 0, + RADEON_GEM_DOMAIN_CPU, 0, false, &gobj); if (r) goto handle_lockup; @@ -439,7 +439,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; } - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); radeon_bo_unreserve(bo); mmap_read_unlock(current->mm); From b3134bd0511c2676a96d0d9c018cc035f6877360 Mon Sep 17 00:00:00 2001 From: Coreforge Date: Tue, 3 May 2022 16:13:55 +0200 Subject: [PATCH 10/10] removed the rest of the unneeded changes --- arch/arm64/mm/ioremap.c | 2 +- drivers/gpu/drm/radeon/r600.c | 7 ++--- drivers/gpu/drm/radeon/r600_dma.c | 4 --- drivers/gpu/drm/radeon/radeon.h | 7 ----- drivers/gpu/drm/radeon/radeon_bios.c | 7 +---- drivers/gpu/drm/radeon/radeon_device.c | 38 +------------------------- drivers/gpu/drm/radeon/radeon_fb.c | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 17 ------------ drivers/gpu/drm/radeon/radeon_gem.c | 27 +++--------------- drivers/gpu/drm/radeon/radeon_ib.c | 3 -- drivers/gpu/drm/radeon/radeon_ring.c | 8 ------ 11 files changed, 10 insertions(+), 112 deletions(-) diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 40a9205ab09016..b7c81dacabf079 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -87,7 +87,7 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) if (pfn_is_map_memory(__phys_to_pfn(phys_addr))) return (void __iomem *)__phys_to_virt(phys_addr); - return __ioremap_caller(phys_addr, size, __pgprot(PROT_DEVICE_nGnRE), + return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_cache); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e80d0fe6f92050..ca3fcae2adb537 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2625,7 +2625,6 @@ u32 r600_gfx_get_rptr(struct radeon_device *rdev, else rptr = RREG32(R600_CP_RB_RPTR); - mb(); //CHANGED return rptr; } @@ -3702,7 +3701,7 @@ int r600_irq_init(struct radeon_device *rdev) */ interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ - interrupt_cntl |= IH_REQ_NONSNOOP_EN; + interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; WREG32(INTERRUPT_CNTL, interrupt_cntl); WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); @@ -4039,10 +4038,8 @@ static u32 r600_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp; - if (rdev->wb.enabled) { + if (rdev->wb.enabled) wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); - mb(); - } else wptr = RREG32(IH_RB_WPTR); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 1f6e86c8268e76..89ca2738c5d4c8 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -57,7 +57,6 @@ uint32_t r600_dma_get_rptr(struct radeon_device *rdev, else rptr = RREG32(DMA_RB_RPTR); - mb(); //CHANGED return (rptr & 0x3fffc) >> 2; } @@ -246,7 +245,6 @@ int r600_dma_ring_test(struct radeon_device *rdev, tmp = 0xCAFEDEAD; rdev->wb.wb[index/4] = cpu_to_le32(tmp); - mb(); //CHANGED r = radeon_ring_lock(rdev, ring, 4); if (r) { DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); @@ -260,7 +258,6 @@ int r600_dma_ring_test(struct radeon_device *rdev, for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); - mb(); //CHANGED if (tmp == 0xDEADBEEF) break; udelay(1); @@ -382,7 +379,6 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) r = 0; for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); - mb(); //CHANGED if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7f8fbf1d09ee23..37fbef271db338 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -370,7 +370,6 @@ struct radeon_fence_driver { atomic64_t last_seq; bool initialized, delayed_irq; struct delayed_work lockup_work; - dma_addr_t dma_addr; }; struct radeon_fence { @@ -669,8 +668,6 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, dma_addr_t *dma_addr, uint32_t flags); -void radeon_gart_sync_all_for_device(struct radeon_device *rdev); - /* * GPU MC structures, functions & helpers */ @@ -2347,7 +2344,6 @@ struct radeon_atcs { typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t); typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); - struct radeon_device { struct device *dev; struct drm_device *ddev; @@ -2493,9 +2489,6 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; - - // needed because of weird stuff - int numFSuses; }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 38a88f3297e890..85cdec1f59ae73 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -81,7 +81,6 @@ static bool radeon_read_bios(struct radeon_device *rdev) { uint8_t __iomem *bios, val1, val2; size_t size; - int pos; rdev->bios = NULL; /* XXX: some cards may return 0 for rom size? ddx has a workaround */ @@ -102,11 +101,7 @@ static bool radeon_read_bios(struct radeon_device *rdev) pci_unmap_rom(rdev->pdev, bios); return false; } - //memcpy_fromio(rdev->bios, bios, size); - for(pos = 0;pos < size; pos++){ - //memcpy_fromio(rdev->bios+pos,bios+pos,1); - rdev->bios[pos] = __raw_readb(bios+pos); - } + memcpy_fromio_pcie(rdev->bios, bios, size); pci_unmap_rom(rdev->pdev, bios); return true; } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 10af4b01c64835..680415b98ad6e1 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -538,42 +538,6 @@ void radeon_wb_fini(struct radeon_device *rdev) } } -//memset_io with only 32-bit accesses -void memset_io_pcie_wb(volatile void __iomem *dst, int c, size_t count) -{ - u32 qc = (u8)c; - - qc |= qc << 8; - qc |= qc << 16; - //qc |= qc << 32; - mb(); - - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); - - while (count && !IS_ALIGNED((unsigned long)dst, 8)) { - __raw_writeb(c, dst); - dst++; - count--; - } - - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); - - while (count >= 4) { - __raw_writel(qc, dst); - dst += 4; - count -= 4; - } - - printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); - - while (count) { - __raw_writeb(c, dst); - dst++; - count--; - } -} - - /** * radeon_wb_init- Init Writeback driver info and allocate memory * @@ -618,7 +582,7 @@ int radeon_wb_init(struct radeon_device *rdev) } /* clear wb memory */ - memset_io_pcie_wb((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); + memset_io_pcie((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); /* disable event_write fences */ rdev->wb.use_event = false; /* disabled via module param */ diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 144fdef805a3a9..7165c67ec45d1f 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -189,7 +189,7 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, /* Only 27 bit offset for legacy CRTC */ ret = radeon_bo_pin_restricted(rbo, RADEON_GEM_DOMAIN_VRAM, ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - 0); + NULL); if (ret) { radeon_bo_unreserve(rbo); goto out_unref; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 04e7ccb581569d..04109a2a6fd760 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -320,23 +320,6 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, return 0; } -/** - * syncs all bound pages for the card (workaround for incoherent systems) - * - */ -void radeon_gart_sync_all_for_device(struct radeon_device *rdev){ - int i; - printk("syncing all GART pages for device\n"); - for (i = 0; i < rdev->gart.num_gpu_pages; i++){ // loop over all gpu pages - if(rdev->gart.pages_entry[i] == rdev->dummy_page.entry){ - continue; // entry is just the dummy page, so it can be ignored - } - dma_sync_single_for_device(rdev->dev, rdev->gart.pages_entry[i] & 0xFFFFFFFFFFFFF000ULL, 4096, DMA_BIDIRECTIONAL); - dma_sync_single_for_cpu(rdev->dev, rdev->gart.pages_entry[i] & 0xFFFFFFFFFFFFF000ULL, 4096, DMA_BIDIRECTIONAL); - } - -} - /** * radeon_gart_init - init the driver info for managing the gart * diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 88ecfe93d4961c..7ec6a9140b12be 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -330,39 +330,20 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_create *args = data; struct drm_gem_object *gobj; - struct radeon_bo* rbo; uint32_t handle; int r; - u64 size; - __u32 flags; down_read(&rdev->exclusive_lock); /* create a gem object to contain this object in */ - size = args->size; - flags = args->flags; - if(1){ - flags &= ~RADEON_GEM_NO_CPU_ACCESS; - flags &= ~RADEON_GEM_GTT_WC; - flags |= RADEON_GEM_GTT_UC; - - args->initial_domain = RADEON_GEM_DOMAIN_GTT; - } args->size = roundup(args->size, PAGE_SIZE); r = radeon_gem_object_create(rdev, args->size, args->alignment, - args->initial_domain, flags, + args->initial_domain, args->flags, false, &gobj); if (r) { up_read(&rdev->exclusive_lock); r = radeon_gem_handle_lockup(rdev, r); return r; } - - rbo = gem_to_radeon_bo(gobj); - if(size == 48){ - // 12 dw, first shader - printk("first shader?\n"); - } - r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_put(gobj); @@ -823,9 +804,9 @@ int radeon_gem_op_ioctl(struct drm_device *dev, void *data, args->value = robj->initial_domain; break; case RADEON_GEM_OP_SET_INITIAL_DOMAIN: - robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM);// | - //RADEON_GEM_DOMAIN_GTT | - //RADEON_GEM_DOMAIN_CPU); + robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM | + RADEON_GEM_DOMAIN_GTT | + RADEON_GEM_DOMAIN_CPU); break; default: r = -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index fecd9d4cac1531..342cbe4479a5ec 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -30,7 +30,6 @@ #include #include "radeon.h" -#include "evergreend.h" /* * IB @@ -129,8 +128,6 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; - radeon_gart_sync_all_for_device(rdev); - if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ dev_err(rdev->dev, "couldn't schedule ib\n"); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 0b86054717f62c..4eab7460a9ce62 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -31,7 +31,6 @@ #include #include "radeon.h" -#include "evergreend.h" /* * Rings @@ -178,13 +177,6 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, ring->nop); } mb(); - - __iowmb(); - dsb(sy); - int numdw; - int i; - numdw = ring->wptr - ring->wptr_old; - /* If we are emitting the HDP flush via MMIO, we need to do it after * all CPU writes to VRAM finished. */