Skip to content

Commit

Permalink
Merge pull request #254 from brilliantlabsAR/jpeg-merging
Browse files Browse the repository at this point in the history
JPEG optimisations
  • Loading branch information
siliconwitch authored Sep 4, 2024
2 parents 1271c82 + 9376cb4 commit a97dfb1
Show file tree
Hide file tree
Showing 18 changed files with 23,805 additions and 25,321 deletions.
48,350 changes: 23,406 additions & 24,944 deletions source/fpga/fpga_application.h

Large diffs are not rendered by default.

139 changes: 139 additions & 0 deletions source/fpga/modules/camera/jpeg_encoder/jenc/bit_pack.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Authored by: Robert Metchev / Chips & Scripts (rmetchev@ieee.org)
*
* CERN Open Hardware Licence Version 2 - Permissive
*
* Copyright (C) 2024 Robert Metchev
*/
module bit_pack (
//packed code+coeff
input logic [63:0] in_data,
input logic [6:0] in_nbits,
input logic in_tlast,
input logic in_valid,
output logic in_hold,

output logic [31:0] out_data,
output logic [2:0] out_nbytes,
output logic out_tlast,
output logic out_valid,
input logic out_hold,

input logic clk,
input logic resetn
);

// 1.) 64-bit to 32-bit align: There will be extremely rarely more than 32 bits.
// Stall during 1st 32 bits.
logic [31:0] in32_data;
logic [5:0] in32_nbits;
logic in32_tlast;
logic in32_valid;
logic in32_hold;
logic long_in;

always @(posedge clk)
if (!resetn) begin
in32_valid <= 0;
long_in <= 0;
end
else if (~(in32_hold & in32_valid)) begin
in32_valid <= in_valid;

if (long_in)
long_in <= 0;
else if (in_valid & in_nbits > 32)
long_in <= 1;
end

always @(posedge clk)
if (~(in32_hold & in32_valid))
if (long_in) begin
in32_nbits <= in_nbits - 32;
in32_data <= in_data;
in32_tlast <= in_tlast;
end
else if (in_valid)
if (in_nbits > 32) begin
in32_nbits <= 32;
in32_data <= in_data >> 32;
in32_tlast <= 0;
end
else begin
in32_nbits <= in_nbits;
in32_data <= in_data >> 32;
in32_tlast <= in_tlast;
end

// Stall to split 32+ into 32 + remainder
always_comb in_hold = (in32_hold & in32_valid) | (~long_in & in_nbits > 32); // goes out


// 2.) incoming: 32 bits max = 4 bytes
// send data when more than 31 bits in storage
logic [5:0] bit_count, next_bit_count, next_bit_count_incr, next_bit_count_decr;
logic [63:0] bit_packer, next_bit_packer, next_bit_packer_load;
logic [5:0] next_bit_packer_shift;
logic tlast_cycle, next_tlast_cycle;
logic next_out_tlast;

always_comb out_data = (bit_packer >> 32) | (out_tlast ? (32'hffffffff >> bit_count) : 0);
always_comb next_bit_count = bit_count + next_bit_count_incr - next_bit_count_decr;
always_comb next_bit_packer = (bit_packer << next_bit_packer_shift) | (next_bit_packer_load << (32 + next_bit_count_decr - bit_count));

always_comb begin
if (out_tlast) begin
next_bit_count_decr = bit_count;
next_bit_packer_shift = 32;
end
else if (bit_count >= 32) begin
next_bit_count_decr = 32;
next_bit_packer_shift = 32;
end
else begin
next_bit_count_decr = 0;
next_bit_packer_shift = 0;
end

if (in32_valid & ~in32_hold) begin
next_bit_count_incr = in32_nbits;
next_bit_packer_load = in32_data;
end
else begin
next_bit_count_incr = 0;
next_bit_packer_load = 0;
end

if (tlast_cycle)
next_tlast_cycle = ~out_tlast;
else if (in32_valid)
next_tlast_cycle = in32_tlast;
else
next_tlast_cycle = tlast_cycle;
end

always @(posedge clk)
if (!resetn) begin
bit_count <= 0;
tlast_cycle <= 0;
out_tlast <= 0;
out_valid <= 0;
bit_packer <= 0;
end
else if (~(out_hold & out_valid)) begin
bit_count <= next_bit_count;
tlast_cycle <= next_tlast_cycle;

out_tlast <= next_tlast_cycle & next_bit_count <= 32;
out_valid <= (next_tlast_cycle & next_bit_count <= 32) | next_bit_count >= 32; //always_comb out_valid = out_tlast | bit_count >= 32;

bit_packer <= next_bit_packer;
end

always @(posedge clk)
if (~(out_hold & out_valid))
out_nbytes <= (next_tlast_cycle & next_bit_count <= 32) ? (next_bit_count + 7) >> 3 : 4; // always_comb out_nbytes = out_tlast ? (bit_count + 7) >> 3 : 4;

always_comb in32_hold = (out_hold & out_valid) | (tlast_cycle & ~out_tlast);

endmodule
80 changes: 0 additions & 80 deletions source/fpga/modules/camera/jpeg_encoder/jenc/bitpacker.sv

This file was deleted.

85 changes: 85 additions & 0 deletions source/fpga/modules/camera/jpeg_encoder/jenc/byte_pack.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Authored by: Robert Metchev / Chips & Scripts (rmetchev@ieee.org)
*
* CERN Open Hardware Licence Version 2 - Permissive
*
* Copyright (C) 2024 Robert Metchev
*/
module byte_pack (
//packed code+coeff
input logic [5:0] codecoeff_length,
input logic [51:0] codecoeff,
input logic codecoeff_tlast,
input logic codecoeff_valid,
output logic codecoeff_hold,

output logic [31:0] out_data,
output logic out_tlast,
output logic out_valid,
input logic out_hold,

input logic clk,
input logic resetn
);

// Pack up to 52 bits into 4 byte words
logic [31:0] data_0;
logic [2:0] nbytes_0;
logic tlast_0;
logic valid_0;
logic hold_0;

bit_pack bit_pack_0 (
.in_data ({codecoeff, 12'h0}),
.in_nbits ({1'b0, codecoeff_length}),
.in_tlast (codecoeff_tlast),
.in_valid (codecoeff_valid),
.in_hold (codecoeff_hold),

.out_data (data_0),
.out_nbytes (nbytes_0),
.out_tlast (tlast_0),
.out_valid (valid_0),
.out_hold (hold_0 & valid_0),

.*
);

// pad 0xFF with 0x00
logic [63:0] data_1;
logic [3:0] nbytes_1;
logic tlast_1;
logic valid_1;
logic hold_1;

ff00 ff00 (
.in_data (data_0),
.in_nbytes (nbytes_0),
.in_tlast (tlast_0),
.in_valid (valid_0),
.in_hold (hold_0),

.out_data (data_1),
.out_nbytes (nbytes_1),
.out_tlast (tlast_1),
.out_valid (valid_1),
.out_hold (hold_1 & valid_1),

.*
);

// Pack up to 8 bytes into 4 byte words
bit_pack bit_pack_1 (
.in_data (data_1),
.in_nbits ({nbytes_1, 3'h0}), // bytes -> bits
.in_tlast (tlast_1),
.in_valid (valid_1),
.in_hold (hold_1),

.out_nbytes ( ), // always full 32 bits/4 bytes
.out_hold (out_hold & out_valid),

.*
);

endmodule
Loading

0 comments on commit a97dfb1

Please sign in to comment.