Skip to content

Commit

Permalink
png: a variety of minor optimizations to the PNG writer (#3980)
Browse files Browse the repository at this point in the history
* Implement PNGOutput::write_scanlines, previously we only had
write_scanline. Sadly, being able to write multiple scanlines at once
reduces overhead by much less than I had hoped. But there's a bit of
savings, so I'll keep it.

* New compression recognized: "pngfast", which translates to the
Z_BEST_SPEED setting. I don't particularly recommend this, but it's
useful for benchmarking.

* New compression recognized: "none", which turns off compression. Also
not recommended, as it makes the files much larger. Mostly done for
benchmarking and other comparisons.

* Don't unconditionally copy user's data buffer to m_scratch. It's only
necessary if there's a data type conversion, stride shuffling, alpha
deassociation, or endian swapping. When none of those are needed, we can
avoid the extra allocatin and copy, and so now we do. (Sigh, in real
use, we're almost always doing the alpha deassociation, so this rarely
is a savings.)

* Better documentation on the custom output attribute hint
`"png:filter"` noting that a non-default value can make it write PNG
files dramatically faster, but with the tradeoff of having much larger
files.

---------

Signed-off-by: Larry Gritz <lg@larrygritz.com>
  • Loading branch information
lgritz authored Sep 13, 2023
1 parent 1fa2bb1 commit 069e1d9
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 2 deletions.
7 changes: 7 additions & 0 deletions src/doc/builtinplugins.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,13 @@ control aspects of the writing itself:
(``PNG_FILTER_UP``), 64 (``PNG_FILTER_AVG``), or 128
(``PNG_FILTER_PAETH``).

**Important**: We have noticed that 8 (PNG_FILTER_NONE) is much
faster than the default of NO_FILTERS (sometimes 3x or more faster),
but it also makes the resulting files quite a bit larger (sometimes
2x larger). If you need to optimize PNG write speed and are willing
to have larger PNG files on disk, you may want to use that value for
this attribute.

**Custom I/O Overrides**

PNG input and output both support the "custom I/O" feature via the special
Expand Down
21 changes: 21 additions & 0 deletions src/png.imageio/png_pvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,27 @@ write_row(png_structp& sp, png_byte* data)



/// Write scanlines
inline bool
write_rows(png_structp& sp, png_byte* data, int nrows = 0, stride_t ystride = 0)
{
if (setjmp(png_jmpbuf(sp))) { // NOLINT(cert-err52-cpp)
//error ("PNG library error");
return false;
}
if (nrows == 1) {
png_write_row(sp, data);
} else {
png_byte** ptrs = OIIO_ALLOCA(png_byte*, nrows);
for (int i = 0; i < nrows; ++i)
ptrs[i] = data + i * ystride;
png_write_rows(sp, ptrs, png_uint_32(nrows));
}
return true;
}



/// Helper function - error-catching wrapper for png_write_end
inline void
write_end(png_structp& sp, png_infop& ip)
Expand Down
73 changes: 71 additions & 2 deletions src/png.imageio/pngoutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class PNGOutput final : public ImageOutput {
bool close() override;
bool write_scanline(int y, int z, TypeDesc format, const void* data,
stride_t xstride) override;
bool write_scanlines(int ybegin, int yend, int z, TypeDesc format,
const void* data, stride_t xstride = AutoStride,
stride_t ystride = AutoStride) override;
bool write_tile(int x, int y, int z, TypeDesc format, const void* data,
stride_t xstride, stride_t ystride,
stride_t zstride) override;
Expand All @@ -43,6 +46,7 @@ class PNGOutput final : public ImageOutput {
unsigned int m_dither;
int m_color_type; ///< PNG color model type
bool m_convert_alpha; ///< Do we deassociate alpha?
bool m_need_swap; ///< Do we need to swap bytes?
float m_gamma; ///< Gamma to use for alpha conversion
std::vector<unsigned char> m_scratch;
std::vector<png_text> m_pngtext;
Expand All @@ -55,6 +59,7 @@ class PNGOutput final : public ImageOutput {
m_png = NULL;
m_info = NULL;
m_convert_alpha = true;
m_need_swap = false;
m_gamma = 1.0;
m_pngtext.clear();
ioproxy_clear();
Expand Down Expand Up @@ -165,10 +170,18 @@ PNGOutput::open(const std::string& name, const ImageSpec& userspec,
png_set_compression_strategy(m_png, Z_RLE);
} else if (Strutil::iequals(compression, "fixed")) {
png_set_compression_strategy(m_png, Z_FIXED);
} else if (Strutil::iequals(compression, "pngfast")) {
png_set_compression_strategy(m_png, Z_DEFAULT_STRATEGY);
png_set_compression_level(m_png, Z_BEST_SPEED);
} else if (Strutil::iequals(compression, "none")) {
png_set_compression_strategy(m_png, Z_NO_COMPRESSION);
png_set_compression_level(m_png, 0);
} else {
png_set_compression_strategy(m_png, Z_DEFAULT_STRATEGY);
}

m_need_swap = (m_spec.format == TypeDesc::UINT16 && littleendian());

png_set_filter(m_png, 0,
spec().get_int_attribute("png:filter", PNG_NO_FILTERS));
// https://www.w3.org/TR/PNG-Encoders.html#E.Filter-selection
Expand Down Expand Up @@ -293,7 +306,7 @@ PNGOutput::write_scanline(int y, int z, TypeDesc format, const void* data,
m_spec.auto_stride(xstride, format, spec().nchannels);
const void* origdata = data;
data = to_native_scanline(format, data, xstride, m_scratch, m_dither, y, z);
if (data == origdata) {
if (data == origdata && (m_convert_alpha || m_need_swap)) {
m_scratch.assign((unsigned char*)data,
(unsigned char*)data + m_spec.scanline_bytes());
data = &m_scratch[0];
Expand All @@ -310,7 +323,7 @@ PNGOutput::write_scanline(int y, int z, TypeDesc format, const void* data,
}

// PNG is always big endian
if (littleendian() && m_spec.format == TypeDesc::UINT16)
if (m_need_swap)
swap_endian((unsigned short*)data, m_spec.width * m_spec.nchannels);

if (!PNG_pvt::write_row(m_png, (png_byte*)data)) {
Expand All @@ -323,6 +336,62 @@ PNGOutput::write_scanline(int y, int z, TypeDesc format, const void* data,



bool
PNGOutput::write_scanlines(int ybegin, int yend, int z, TypeDesc format,
const void* data, stride_t xstride, stride_t ystride)
{
#if 0
// For testing/benchmarking: just implement write_scanlines in terms of
// individual calls to write_scanline.
for (int y = ybegin ; y < yend; ++y) {
if (!write_scanline(y, z, format, data, xstride))
return false;
data = (const char*)data + ystride;
}
return true;
#else
stride_t zstride = AutoStride;
m_spec.auto_stride(xstride, ystride, zstride, format, m_spec.nchannels,
m_spec.width, m_spec.height);
const void* origdata = data;
data = to_native_rectangle(m_spec.x, m_spec.x + m_spec.width, ybegin, yend,
z, z + 1, format, data, xstride, ystride,
zstride, m_scratch);
size_t npixels = m_spec.width * (yend - ybegin);
size_t nvals = npixels * m_spec.nchannels;
if (data == origdata && (m_convert_alpha || m_need_swap)) {
m_scratch.assign((unsigned char*)data,
(unsigned char*)data + nvals * m_spec.format.size());
data = m_scratch.data();
}

// PNG specifically dictates unassociated (un-"premultiplied") alpha
if (m_convert_alpha) {
if (m_spec.format == TypeDesc::UINT16)
deassociateAlpha((unsigned short*)data, npixels, m_spec.nchannels,
m_spec.alpha_channel, m_gamma);
else
deassociateAlpha((unsigned char*)data, npixels, m_spec.nchannels,
m_spec.alpha_channel, m_gamma);
}

// PNG is always big endian
if (m_need_swap)
swap_endian((unsigned short*)data, nvals);

if (!PNG_pvt::write_rows(m_png, (png_byte*)data, yend - ybegin,
stride_t(m_spec.width) * m_spec.nchannels
* m_spec.format.size())) {
errorfmt("PNG library error");
return false;
}
#endif

return true;
}



bool
PNGOutput::write_tile(int x, int y, int z, TypeDesc format, const void* data,
stride_t xstride, stride_t ystride, stride_t zstride)
Expand Down

0 comments on commit 069e1d9

Please sign in to comment.