Skip to content

Commit

Permalink
Optimize intra reference building
Browse files Browse the repository at this point in the history
-Add function with reduced logic for the most common case
  • Loading branch information
Arizer committed Nov 16, 2016
1 parent 02c9e37 commit c31207e
Showing 1 changed file with 171 additions and 41 deletions.
212 changes: 171 additions & 41 deletions src/intra.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,45 @@
#include "transform.h"
#include "videoframe.h"

// Tables for looking up the number of intra reference pixels based on
// prediction units coordinate within an LCU.
// generated by "tools/generate_ref_pixel_tables.py".
static const uint8_t num_ref_pixels_top[16][16] = {
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 64, 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }
};
static const uint8_t num_ref_pixels_left[16][16] = {
{ 64, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 },
{ 60, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 },
{ 56, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 },
{ 52, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 },
{ 48, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 },
{ 44, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 },
{ 40, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 36, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
{ 32, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 },
{ 28, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 },
{ 24, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 },
{ 20, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 },
{ 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 },
{ 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
};

int8_t kvz_intra_get_dir_luma_predictor(
const uint32_t x,
Expand Down Expand Up @@ -246,7 +285,7 @@ void kvz_intra_predict(
}


void kvz_intra_build_reference(
void kvz_intra_build_reference_any(
const int_fast8_t log2_width,
const color_t color,
const vector2d_t *const luma_px,
Expand All @@ -256,46 +295,6 @@ void kvz_intra_build_reference(
{
assert(log2_width >= 2 && log2_width <= 5);

// Tables for looking up the number of intra reference pixels based on
// prediction units coordinate within an LCU.
// generated by "tools/generate_ref_pixel_tables.py".
static const uint8_t num_ref_pixels_top[16][16] = {
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 64, 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }
};
static const uint8_t num_ref_pixels_left[16][16] = {
{ 64, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 },
{ 60, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 },
{ 56, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 },
{ 52, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 },
{ 48, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 },
{ 44, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 },
{ 40, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 36, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
{ 32, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 },
{ 28, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 },
{ 24, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 },
{ 20, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 },
{ 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 },
{ 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
};

refs->filtered_initialized = false;
kvz_pixel *out_left_ref = &refs->ref.left[0];
kvz_pixel *out_top_ref = &refs->ref.top[0];
Expand Down Expand Up @@ -410,6 +409,137 @@ void kvz_intra_build_reference(
}
}

void kvz_intra_build_reference_inner(
const int_fast8_t log2_width,
const color_t color,
const vector2d_t *const luma_px,
const vector2d_t *const pic_px,
const lcu_t *const lcu,
kvz_intra_references *const refs)
{
assert(log2_width >= 2 && log2_width <= 5);

refs->filtered_initialized = false;
kvz_pixel * __restrict out_left_ref = &refs->ref.left[0];
kvz_pixel * __restrict out_top_ref = &refs->ref.top[0];

const int is_chroma = color != COLOR_Y ? 1 : 0;
const int_fast8_t width = 1 << log2_width;

// Convert luma coordinates to chroma coordinates for chroma.
const vector2d_t lcu_px = {
luma_px->x % LCU_WIDTH,
luma_px->y % LCU_WIDTH
};
const vector2d_t px = {
lcu_px.x >> is_chroma,
lcu_px.y >> is_chroma,
};

// Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0.
const kvz_pixel * __restrict left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
const kvz_pixel * __restrict top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
const kvz_pixel * __restrict rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v;

// Init top borders pointer to point to the correct place in the correct reference array.
const kvz_pixel * __restrict top_border;
if (px.y) {
top_border = &rec_ref[px.x + (px.y - 1) * (LCU_WIDTH >> is_chroma)];
} else {
top_border = &top_ref[px.x];

}

// Init left borders pointer to point to the correct place in the correct reference array.
const kvz_pixel * __restrict left_border;
int left_stride; // Distance between reference samples.

// Generate top-left reference.
// If the block is at an LCU border, the top-left must be copied from
// the border that points to the LCUs 1D reference buffer.
if (px.x) {
left_border = &rec_ref[px.x - 1 + px.y * (LCU_WIDTH >> is_chroma)];
left_stride = LCU_WIDTH >> is_chroma;
out_left_ref[0] = top_border[-1];
out_top_ref[0] = top_border[-1];
} else {
left_border = &left_ref[px.y];
left_stride = 1;
out_left_ref[0] = left_border[-1 * left_stride];
out_top_ref[0] = left_border[-1 * left_stride];
}

// Generate left reference.

// Get the number of reference pixels based on the PU coordinate within the LCU.
int px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;

// Limit the number of available pixels based on block size and dimensions
// of the picture.
px_available_left = MIN(px_available_left, width * 2);
px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);

// Copy pixels from coded CUs.
int i = 0;
do {
out_left_ref[i + 1] = left_border[(i + 0) * left_stride];
out_left_ref[i + 2] = left_border[(i + 1) * left_stride];
out_left_ref[i + 3] = left_border[(i + 2) * left_stride];
out_left_ref[i + 4] = left_border[(i + 3) * left_stride];
i += 4;
} while (i < px_available_left);

// Extend the last pixel for the rest of the reference values.
kvz_pixel nearest_pixel = out_left_ref[i];
for (; i < width * 2; i += 4) {
out_left_ref[i + 1] = nearest_pixel;
out_left_ref[i + 2] = nearest_pixel;
out_left_ref[i + 3] = nearest_pixel;
out_left_ref[i + 4] = nearest_pixel;
}

// Generate top reference.

// Get the number of reference pixels based on the PU coordinate within the LCU.
int px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;

// Limit the number of available pixels based on block size and dimensions
// of the picture.
px_available_top = MIN(px_available_top, width * 2);
px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma);

// Copy all the pixels we can.
i = 0;
do {
memcpy(out_top_ref + i + 1, top_border + i, 4 * sizeof(kvz_pixel));
i += 4;
} while (i < px_available_top);

// Extend the last pixel for the rest of the reference values.
nearest_pixel = out_top_ref[i];
for (; i < width * 2; i += 4) {
out_top_ref[i + 1] = nearest_pixel;
out_top_ref[i + 2] = nearest_pixel;
out_top_ref[i + 3] = nearest_pixel;
out_top_ref[i + 4] = nearest_pixel;
}
}

void kvz_intra_build_reference(
const int_fast8_t log2_width,
const color_t color,
const vector2d_t *const luma_px,
const vector2d_t *const pic_px,
const lcu_t *const lcu,
kvz_intra_references *const refs)
{
// Much logic can be discarded if not on the edge
if (luma_px->x > 0 && luma_px->y > 0) {
kvz_intra_build_reference_inner(log2_width, color, luma_px, pic_px, lcu, refs);
} else {
kvz_intra_build_reference_any(log2_width, color, luma_px, pic_px, lcu, refs);
}
}

void kvz_intra_recon_lcu_luma(
encoder_state_t *const state,
Expand Down

0 comments on commit c31207e

Please sign in to comment.