diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 895776c421d4de..37fbef271db338 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2237,6 +2237,13 @@ void radeon_agp_disable(struct radeon_device *rdev); int radeon_asic_init(struct radeon_device *rdev); +/* + * memcpy_io and memset_io functions that work on a raspberry pi 4 + */ +void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count); +void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count); +void memset_io_pcie(volatile void __iomem *dst, int c, size_t count); + /* * IOCTL. */ diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 33121655d50bbe..85cdec1f59ae73 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -72,7 +72,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev) iounmap(bios); return false; } - memcpy_fromio(rdev->bios, bios, size); + memcpy_fromio_pcie(rdev->bios, bios, size); iounmap(bios); return true; } @@ -101,7 +101,7 @@ static bool radeon_read_bios(struct radeon_device *rdev) pci_unmap_rom(rdev->pdev, bios); return false; } - memcpy_fromio(rdev->bios, bios, size); + memcpy_fromio_pcie(rdev->bios, bios, size); pci_unmap_rom(rdev->pdev, bios); return true; } @@ -125,7 +125,7 @@ static bool radeon_read_platform_bios(struct radeon_device *rdev) if (!bios) goto free_bios; - memcpy_fromio(rdev->bios, bios, romlen); + memcpy_fromio_pcie(rdev->bios, bios, romlen); iounmap(bios); if (rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 4f0fbf66743160..680415b98ad6e1 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -112,6 +112,98 @@ static const char radeon_family_name[][16] = { "LAST", }; +/** + * DOC: memcpy_fromio_pcie + * + * like memcpy_fromio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)from, 8)) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } + + while (count >= 4) { + *(u32 *)to = __raw_readl(from); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } +} + +/** + * DOC: memcpy_toio_pcie + * + * like memcpy_toio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)to, 8)) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } + + while (count >= 4) { + __raw_writel(*(u64 *)from, to); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } +} + +/** + * DOC: memset_io_pcie + * + * like memset_io, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memset_io_pcie(volatile void __iomem *dst, int c, size_t count) +{ + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} + #if defined(CONFIG_VGA_SWITCHEROO) bool radeon_has_atpx_dgpu_power_cntl(void); bool radeon_is_atpx_hybrid(void); @@ -490,7 +582,7 @@ int radeon_wb_init(struct radeon_device *rdev) } /* clear wb memory */ - memset((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); + memset_io_pcie((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); /* disable event_write fences */ rdev->wb.use_event = false; /* disabled via module param */ diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index ca382fbf7a86a3..7165c67ec45d1f 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -262,7 +262,7 @@ static int radeonfb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - memset_io(rbo->kptr, 0x0, radeon_bo_size(rbo)); + memset_io_pcie(rbo->kptr, 0x0, radeon_bo_size(rbo)); info->fbops = &radeonfb_ops; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index a36a4f2c76b097..7ec6a9140b12be 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -711,7 +711,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, * otherwise we will endup with broken userspace and we won't be able * to enable this feature without adding new interface */ - invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM; + invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM | RADEON_VM_PAGE_SNOOPED; if ((args->flags & invalid_flags)) { dev_err(dev->dev, "invalid flags 0x%08X vs 0x%08X\n", args->flags, invalid_flags); diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 62b116727b4f50..342cbe4479a5ec 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -212,7 +212,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev) r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, RADEON_IB_POOL_SIZE*64*1024, RADEON_GPU_PAGE_SIZE, - RADEON_GEM_DOMAIN_GTT, 0); + RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_UC); } if (r) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 56ede9d63b12c5..b6859869167556 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -109,29 +109,29 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) rbo->placements[c].fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; rbo->placements[c].mem_type = TTM_PL_VRAM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_VRAM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (domain & RADEON_GEM_DOMAIN_GTT) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_TT; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (domain & RADEON_GEM_DOMAIN_CPU) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_SYSTEM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (!c) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_SYSTEM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } rbo->placement.num_placement = c; @@ -184,6 +184,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo->flags = flags; /* PCI GART is always snooped */ + /* Don't be so sure. TODO */ if (!(rdev->flags & RADEON_IS_PCIE)) bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); @@ -220,6 +221,10 @@ int radeon_bo_create(struct radeon_device *rdev, bo->flags &= ~RADEON_GEM_GTT_WC; #endif + //Write combining may cause issues on the raspberry pi + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); + bo->flags |= RADEON_GEM_GTT_UC; + radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 7e207276df374f..4eab7460a9ce62 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -390,7 +390,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, + RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_UC, NULL, NULL, &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index a06d4cc2fb1c43..8d2266ea17b22c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -295,7 +295,7 @@ static int radeon_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resourc return -EINVAL; mem->bus.offset += rdev->mc.aper_base; mem->bus.is_iomem = true; - mem->bus.caching = ttm_write_combined; + mem->bus.caching = ttm_uncached; #ifdef __alpha__ /* * Alpha: use bus.addr to hold the ioremap() return, @@ -456,7 +456,8 @@ static int radeon_ttm_backend_bind(struct ttm_device *bdev, ttm->num_pages, bo_mem, ttm); } if (ttm->caching == ttm_cached) - flags |= RADEON_GART_PAGE_SNOOP; + printk("TTM Page would've been snooped\n"); + // flags |= RADEON_GART_PAGE_SNOOP; r = radeon_gart_bind(rdev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); if (r) { @@ -515,13 +516,14 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } + // TODO: Highly suspect. if (rbo->flags & RADEON_GEM_GTT_UC) caching = ttm_uncached; else if (rbo->flags & RADEON_GEM_GTT_WC) caching = ttm_write_combined; else caching = ttm_cached; - + caching = ttm_uncached; if (ttm_sg_tt_init(>t->ttm, bo, page_flags, caching)) { kfree(gtt); return NULL; diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 2ea86919d95364..e3804be3680d8c 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -288,7 +288,7 @@ int radeon_uvd_resume(struct radeon_device *rdev) if (rdev->uvd.vcpu_bo == NULL) return -EINVAL; - memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); + memcpy_toio_pcie((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); size = radeon_bo_size(rdev->uvd.vcpu_bo); size -= rdev->uvd_fw->size; @@ -296,7 +296,7 @@ int radeon_uvd_resume(struct radeon_device *rdev) ptr = rdev->uvd.cpu_addr; ptr += rdev->uvd_fw->size; - memset_io((void __iomem *)ptr, 0, size); + memset_io_pcie((void __iomem *)ptr, 0, size); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index bb53016f3138a2..a5065df377e7d6 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -952,7 +952,8 @@ int radeon_vm_bo_update(struct radeon_device *rdev, if (mem->mem_type == TTM_PL_TT) { bo_va->flags |= RADEON_VM_PAGE_SYSTEM; if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) - bo_va->flags |= RADEON_VM_PAGE_SNOOPED; + printk("VM Page would've been set to snooped\n"); + //bo_va->flags |= RADEON_VM_PAGE_SNOOPED; } else { addr += rdev->vm_manager.vram_base_offset; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index b87dd551e93977..0b62d1f9dc307f 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -651,8 +651,8 @@ uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags) addr |= R600_PTE_READABLE; if (flags & RADEON_GART_PAGE_WRITE) addr |= R600_PTE_WRITEABLE; - if (flags & RADEON_GART_PAGE_SNOOP) - addr |= R600_PTE_SNOOPED; + // if (flags & RADEON_GART_PAGE_SNOOP) // no snooping around + // addr |= R600_PTE_SNOOPED; return addr; } @@ -660,7 +660,10 @@ void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t entry) { void __iomem *ptr = (void *)rdev->gart.ptr; - writeq(entry, ptr + (i * 8)); + uint32_t high = entry >> 32; + writel(entry,ptr+(i*8)); + writel(high,ptr + (i*8) + 4); + //writeq(entry, ptr + (i * 8)); } int rs600_irq_set(struct radeon_device *rdev) diff --git a/drivers/video/fbdev/core/cfbcopyarea.c b/drivers/video/fbdev/core/cfbcopyarea.c index 6d4bfeecee3508..3e2369ff697fa1 100644 --- a/drivers/video/fbdev/core/cfbcopyarea.c +++ b/drivers/video/fbdev/core/cfbcopyarea.c @@ -34,10 +34,22 @@ # define FB_WRITEL fb_writel # define FB_READL fb_readl #else -# define FB_WRITEL fb_writeq -# define FB_READL fb_readq +# define FB_WRITEL fb_writel_writeq +# define FB_READL fb_readl_readq #endif +static void fb_writel_writeq(u64 val, volatile void __iomem *addr){ + fb_writel(val,addr); + fb_writel(val >> 32, addr + 4); +} + +static u64 fb_readl_readq(volatile void __iomem *addr){ + u64 val; + val = fb_readl(addr); + val |= fb_readl(addr + 4) << 32; + return val; +} + /* * Generic bitwise copy algorithm */ diff --git a/drivers/video/fbdev/core/cfbfillrect.c b/drivers/video/fbdev/core/cfbfillrect.c index ba9f58b2a5e86e..8dda2665ec7f2e 100644 --- a/drivers/video/fbdev/core/cfbfillrect.c +++ b/drivers/video/fbdev/core/cfbfillrect.c @@ -23,10 +23,22 @@ # define FB_WRITEL fb_writel # define FB_READL fb_readl #else -# define FB_WRITEL fb_writeq -# define FB_READL fb_readq +# define FB_WRITEL fb_writel_writeq +# define FB_READL fb_readl_readq #endif +static void fb_writel_writeq(u64 val, volatile void __iomem *addr){ + fb_writel(val,addr); + fb_writel(val >> 32, addr + 4); +} + +static u64 fb_readl_readq(volatile void __iomem *addr){ + u64 val; + val = fb_readl(addr); + val |= fb_readl(addr + 4) << 32; + return val; +} + /* * Aligned pattern fill using 32/64-bit memory accesses */