Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accelerate framebuffer copyarea function with DMA and add ioctl to access it from userspace #313

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 159 additions & 3 deletions drivers/video/bcm2708_fb.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <linux/printk.h>
#include <linux/console.h>

#include <mach/dma.h>
#include <mach/platform.h>
#include <mach/vcio.h>

Expand Down Expand Up @@ -64,6 +65,11 @@ struct bcm2708_fb {
struct fbinfo_s *info;
dma_addr_t dma;
u32 cmap[16];
int dma_chan;
int dma_irq;
void __iomem *dma_chan_base;
void *cb_base; /* DMA control blocks */
dma_addr_t cb_handle;
};

#define to_bcm2708(info) container_of(info, struct bcm2708_fb, fb)
Expand Down Expand Up @@ -312,11 +318,133 @@ static void bcm2708_fb_fillrect(struct fb_info *info,
cfb_fillrect(info, rect);
}

/* A helper function for configuring dma control block */
static void set_dma_cb(struct bcm2708_dma_cb *cb,
int burst_size,
dma_addr_t dst,
int dst_stride,
dma_addr_t src,
int src_stride,
int w,
int h)
{
cb->info = BCM2708_DMA_BURST(burst_size) | BCM2708_DMA_S_WIDTH |
BCM2708_DMA_S_INC | BCM2708_DMA_D_WIDTH |
BCM2708_DMA_D_INC | BCM2708_DMA_TDMODE;
cb->dst = dst;
cb->src = src;
/*
* This is not really obvious from the DMA documentation,
* but the top 16 bits must be programmmed to "height -1"
* and not "height" in 2D mode.
*/
cb->length = ((h - 1) << 16) | w;
cb->stride = ((dst_stride - w) << 16) | (u16)(src_stride - w);
cb->pad[0] = 0;
cb->pad[1] = 0;
}

static void bcm2708_fb_copyarea(struct fb_info *info,
const struct fb_copyarea *region)
{
/*print_debug("bcm2708_fb_copyarea\n"); */
cfb_copyarea(info, region);
struct bcm2708_fb *fb = to_bcm2708(info);
struct bcm2708_dma_cb *cb = fb->cb_base;
int bytes_per_pixel = (info->var.bits_per_pixel + 7) >> 3;
/* Channel 0 supports larger bursts and is a bit faster */
int burst_size = (fb->dma_chan == 0) ? 10 : 4;

/* Fallback to cfb_copyarea() if we don't like something */
if (bytes_per_pixel > 4 ||
info->var.xres > 1920 || info->var.yres > 1200 ||
region->width <= 0 || region->width > info->var.xres ||
region->height <= 0 || region->height > info->var.yres ||
region->sx < 0 || region->sx >= info->var.xres ||
region->sy < 0 || region->sy >= info->var.yres ||
region->dx < 0 || region->dx >= info->var.xres ||
region->dy < 0 || region->dy >= info->var.yres ||
region->sx + region->width > info->var.xres ||
region->dx + region->width > info->var.xres ||
region->sy + region->height > info->var.yres ||
region->dy + region->height > info->var.yres) {
cfb_copyarea(info, region);
return;
}

if (region->dy == region->sy && region->dx > region->sx) {
/*
* A difficult case of overlapped copy. Because DMA can't
* copy individual scanlines in backwards direction, we need
* two-pass processing. We do it by programming a chain of dma
* control blocks in the first 16K part of the buffer and use
* the remaining 48K as the intermediate temporary scratch
* buffer. The buffer size is sufficient to handle up to
* 1920x1200 resolution at 32bpp pixel depth.
*/
int y;
dma_addr_t control_block_pa = fb->cb_handle;
dma_addr_t scratchbuf = fb->cb_handle + 16 * 1024;
int scanline_size = bytes_per_pixel * region->width;
int scanlines_per_cb = (64 * 1024 - 16 * 1024) / scanline_size;

for (y = 0; y < region->height; y += scanlines_per_cb) {
dma_addr_t src =
fb->fb.fix.smem_start +
bytes_per_pixel * region->sx +
(region->sy + y) * fb->fb.fix.line_length;
dma_addr_t dst =
fb->fb.fix.smem_start +
bytes_per_pixel * region->dx +
(region->dy + y) * fb->fb.fix.line_length;

if (region->height - y < scanlines_per_cb)
scanlines_per_cb = region->height - y;

set_dma_cb(cb, burst_size, scratchbuf, scanline_size,
src, fb->fb.fix.line_length,
scanline_size, scanlines_per_cb);
control_block_pa += sizeof(struct bcm2708_dma_cb);
cb->next = control_block_pa;
cb++;

set_dma_cb(cb, burst_size, dst, fb->fb.fix.line_length,
scratchbuf, scanline_size,
scanline_size, scanlines_per_cb);
control_block_pa += sizeof(struct bcm2708_dma_cb);
cb->next = control_block_pa;
cb++;
}
/* move the pointer back to the last dma control block */
cb--;
} else {
/* A single dma control block is enough. */
int sy, dy, stride;
if (region->dy <= region->sy) {
/* processing from top to bottom */
dy = region->dy;
sy = region->sy;
stride = fb->fb.fix.line_length;
} else {
/* processing from bottom to top */
dy = region->dy + region->height - 1;
sy = region->sy + region->height - 1;
stride = -fb->fb.fix.line_length;
}
set_dma_cb(cb, burst_size,
fb->fb.fix.smem_start + dy * fb->fb.fix.line_length +
bytes_per_pixel * region->dx,
stride,
fb->fb.fix.smem_start + sy * fb->fb.fix.line_length +
bytes_per_pixel * region->sx,
stride,
region->width * bytes_per_pixel,
region->height);
}

/* end of dma control blocks chain */
cb->next = 0;

bcm_dma_start(fb->dma_chan_base, fb->cb_handle);
bcm_dma_wait_idle(fb->dma_chan_base);
}

static void bcm2708_fb_imageblit(struct fb_info *info,
Expand Down Expand Up @@ -359,7 +487,7 @@ static int bcm2708_fb_register(struct bcm2708_fb *fb)
fb->dma = dma;
}
fb->fb.fbops = &bcm2708_fb_ops;
fb->fb.flags = FBINFO_FLAG_DEFAULT;
fb->fb.flags = FBINFO_FLAG_DEFAULT | FBINFO_HWACCEL_COPYAREA;
fb->fb.pseudo_palette = fb->cmap;

strncpy(fb->fb.fix.id, bcm2708_name, sizeof(fb->fb.fix.id));
Expand Down Expand Up @@ -424,6 +552,28 @@ static int bcm2708_fb_probe(struct platform_device *dev)
}
memset(fb, 0, sizeof(struct bcm2708_fb));

fb->cb_base = dma_alloc_writecombine(&dev->dev, SZ_64K,
&fb->cb_handle, GFP_KERNEL);
if (!fb->cb_base) {
dev_err(&dev->dev, "cannot allocate DMA CBs\n");
ret = -ENOMEM;
goto free_fb;
}

pr_info("BCM2708FB: allocated DMA memory %08x\n",
fb->cb_handle);

ret = bcm_dma_chan_alloc(BCM_DMA_FEATURE_FAST,
&fb->dma_chan_base, &fb->dma_irq);
if (ret < 0) {
dev_err(&dev->dev, "couldn't allocate a DMA channel\n");
goto free_cb;
}
fb->dma_chan = ret;

pr_info("BCM2708FB: allocated DMA channel %d @ %p\n",
fb->dma_chan, fb->dma_chan_base);

fb->dev = dev;

ret = bcm2708_fb_register(fb);
Expand All @@ -432,6 +582,9 @@ static int bcm2708_fb_probe(struct platform_device *dev)
goto out;
}

free_cb:
dma_free_writecombine(&dev->dev, SZ_64K, fb->cb_base, fb->cb_handle);
free_fb:
kfree(fb);
free_region:
dev_err(&dev->dev, "probe failed, err %d\n", ret);
Expand All @@ -449,6 +602,9 @@ static int bcm2708_fb_remove(struct platform_device *dev)
iounmap(fb->fb.screen_base);
unregister_framebuffer(&fb->fb);

dma_free_writecombine(&dev->dev, SZ_64K, fb->cb_base, fb->cb_handle);
bcm_dma_chan_free(fb->dma_chan);

dma_free_coherent(NULL, PAGE_ALIGN(sizeof(*fb->info)), (void *)fb->info,
fb->dma);
kfree(fb);
Expand Down
30 changes: 30 additions & 0 deletions drivers/video/fbmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -1074,6 +1074,25 @@ fb_blank(struct fb_info *info, int blank)
return ret;
}

static int fb_copyarea_user(struct fb_info *info,
struct fb_copyarea *copy)
{
int ret = 0;
if (!lock_fb_info(info))
return -ENODEV;
if (copy->dx + copy->width > info->var.xres ||
copy->sx + copy->width > info->var.xres ||
copy->dy + copy->height > info->var.yres ||
copy->sy + copy->height > info->var.yres) {
ret = -EINVAL;
goto out;
}
info->fbops->fb_copyarea(info, copy);
out:
unlock_fb_info(info);
return ret;
}

static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
unsigned long arg)
{
Expand All @@ -1084,6 +1103,7 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
struct fb_cmap cmap_from;
struct fb_cmap_user cmap;
struct fb_event event;
struct fb_copyarea copy;
void __user *argp = (void __user *)arg;
long ret = 0;

Expand Down Expand Up @@ -1191,6 +1211,15 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
console_unlock();
unlock_fb_info(info);
break;
case FBIOCOPYAREA:
if (info->flags & FBINFO_HWACCEL_COPYAREA) {
/* only provide this ioctl if it is accelerated */
if (copy_from_user(&copy, argp, sizeof(copy)))
return -EFAULT;
ret = fb_copyarea_user(info, &copy);
break;
}
/* fall through */
default:
if (!lock_fb_info(info))
return -ENODEV;
Expand Down Expand Up @@ -1343,6 +1372,7 @@ static long fb_compat_ioctl(struct file *file, unsigned int cmd,
case FBIOPAN_DISPLAY:
case FBIOGET_CON2FBMAP:
case FBIOPUT_CON2FBMAP:
case FBIOCOPYAREA:
arg = (unsigned long) compat_ptr(arg);
case FBIOBLANK:
ret = do_fb_ioctl(info, cmd, arg);
Expand Down
5 changes: 5 additions & 0 deletions include/linux/fb.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
#define FBIOPUT_MODEINFO 0x4617
#define FBIOGET_DISPINFO 0x4618
#define FBIO_WAITFORVSYNC _IOW('F', 0x20, __u32)
/*
* HACK: use 'z' in order not to clash with any other ioctl numbers which might
* be concurrently added to the mainline kernel
*/
#define FBIOCOPYAREA _IOW('z', 0x21, struct fb_copyarea)

#define FB_TYPE_PACKED_PIXELS 0 /* Packed Pixels */
#define FB_TYPE_PLANES 1 /* Non interleaved planes */
Expand Down