From ad4b53fe4c70f3a9f8110005d90ba5043e88ced9 Mon Sep 17 00:00:00 2001 From: Justin Israel Date: Tue, 30 May 2023 09:21:03 +1200 Subject: [PATCH] go/cpp: fix regex to handle complex extensions with single character component (refs #26) --- CHANGES | 1 + cpp/private/sequence_p.cpp | 13 ++++-- cpp/test/TestFunctions.cc | 20 ++++++++- fileseq.go | 24 +++++------ fileseq_test.go | 36 +++++++++-------- sequence.go | 63 +++++++++++++++-------------- testdata/complex_frame_only/1.a.jpg | 0 testdata/complex_frame_only/2.a.jpg | 0 testdata/complex_frame_only/3.a.jpg | 0 9 files changed, 93 insertions(+), 64 deletions(-) create mode 100644 testdata/complex_frame_only/1.a.jpg create mode 100644 testdata/complex_frame_only/2.a.jpg create mode 100644 testdata/complex_frame_only/3.a.jpg diff --git a/CHANGES b/CHANGES index 68f9e1b..a8ca9b9 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,7 @@ Changes: 2.11.2 ---------------- +* go/cpp: fix regex to handle parsing complex extensions with single character component ("file.1.a.ext") * go: remove print statement from FindSequenceOnDiskPad 2.11.1 diff --git a/cpp/private/sequence_p.cpp b/cpp/private/sequence_p.cpp index e50b219..b54ca5c 100644 --- a/cpp/private/sequence_p.cpp +++ b/cpp/private/sequence_p.cpp @@ -28,10 +28,15 @@ bool getSplitPatternMatch(SeqPatternMatch &match, const std::string &path) { // /film/shot/renders/hero_bty..exr // /film/shot/renders/hero_bty.%(UDIM)d.exr static const char* s_pattern = - R"(^(.*?))" // dir and basename + "^" + R"((.*?))" // dir and basename R"(([\d-][:xy\d,-]*)?)" // optional frame range R"(([#@]+|%\d*d|\$F\d*||%\(UDIM\)d))" // padding: chars, printf, houdini, udim - R"(((?:\.\w*[a-zA-Z]\w)*(?:\.[^.]+)?)$)"; // extension + "(" // extension + R"((?:\.\w*[a-zA-Z]\w?)*)" // optional leading alnum ext prefix (.foo.1bar) + R"((?:\.[^.]+)?)" // ext suffix + ")" + "$"; match.base.clear(); match.range.clear(); @@ -68,8 +73,8 @@ bool getSingleFrameMatch(SeqPatternMatch &match, const std::string &path, bool r static const string s_extension = // multiple extension parts: "(" - R"((?:\.\w*[a-zA-Z]\w)*)" // optional leading alnum ext prefix (.foo.1bar) - R"((?:\.[^.]+)?)" // ext suffix + R"((?:\.\w*[a-zA-Z]\w?)*)" // optional leading alnum ext prefix (.foo.1bar) + R"((?:\.[^.]+)?)" // ext suffix ")"; // Regular expression pattern for matching single file path names containing a frame. diff --git a/cpp/test/TestFunctions.cc b/cpp/test/TestFunctions.cc index d9cbccc..238f418 100644 --- a/cpp/test/TestFunctions.cc +++ b/cpp/test/TestFunctions.cc @@ -382,6 +382,14 @@ class TestFindSequenceOnDisk : public testing::Test { Case t = {PadStyleHash1, "testdata/_MISSING_.0010.tif", ""}; m_cases.push_back(t); } + { + Case t = {PadStyleHash1, "testdata/complex.@.tar.gz", "testdata/complex.5-7####.tar.gz"}; + m_cases.push_back(t); + } + { + Case t = {PadStyleHash1, "testdata/complex_frame_only/@.a.jpg", "testdata/complex_frame_only/1-3#.a.jpg"}; + m_cases.push_back(t); + } // PadStyleHash4 { @@ -428,6 +436,14 @@ class TestFindSequenceOnDisk : public testing::Test { Case t = {PadStyleHash4, "testdata/2frame/foo.#.ext", "testdata/2frame/foo.1,2#.ext"}; m_cases.push_back(t); } + { + Case t = {PadStyleHash4, "testdata/complex.@.tar.gz", "testdata/complex.5-7#.tar.gz"}; + m_cases.push_back(t); + } + { + Case t = {PadStyleHash4, "testdata/complex_frame_only/@.a.jpg", "testdata/complex_frame_only/1-3@.a.jpg"}; + m_cases.push_back(t); + } } std::vector m_cases; @@ -449,11 +465,11 @@ TEST_F( TestFindSequenceOnDisk, FindSeq ) { } if (!t.expected.empty()) { - EXPECT_EQ(t.expected, seq.string()); + EXPECT_EQ(t.expected, seq.string()) << "for test case #" << i; } // Sanity check - if (seq.index(0) == "" && !t.expected.empty()) { + if (seq.index(0).empty() && !t.expected.empty()) { ADD_FAILURE() << "Expected non-empty string for index(0) of input: " << t.input; } if (t.expected.empty()) { diff --git a/fileseq.go b/fileseq.go index c23d67e..b7313b6 100644 --- a/fileseq.go +++ b/fileseq.go @@ -2,20 +2,20 @@ Package fileseq is a library for parsing file sequence strings commonly used in VFX and animation applications. -Frame Range Shorthand +# Frame Range Shorthand Support for: - Standard: 1-10 - Comma Delimited: 1-10,10-20 - Chunked: 1-100x5 - Filled: 1-100y5 - Staggered: 1-100:3 (1-100x3, 1-100x2, 1-100) - Negative frame numbers: -10-100 - Padding: #=4 padded, @=single pad - Printf Syntax Padding: %04d=4 padded, %01d=1 padded - Houdini Syntax Padding: $F4=4 padding, $F=1 padded - Udim Syntax Padding: or %(UDIM)d, always 4 padded + Standard: 1-10 + Comma Delimited: 1-10,10-20 + Chunked: 1-100x5 + Filled: 1-100y5 + Staggered: 1-100:3 (1-100x3, 1-100x2, 1-100) + Negative frame numbers: -10-100 + Padding: #=4 padded, @=single pad + Printf Syntax Padding: %04d=4 padded, %01d=1 padded + Houdini Syntax Padding: $F4=4 padding, $F=1 padded + Udim Syntax Padding: or %(UDIM)d, always 4 padded */ package fileseq @@ -48,7 +48,7 @@ var ( extPatternStr = `` + `(?P` + // multiple extension parts: - `(?:\.\w*[a-zA-Z]\w)*` + // optional leading alnum ext prefix (.foo.1bar) + `(?:\.\w*[a-zA-Z]\w?)*` + // optional leading alnum ext prefix (.foo.1bar) `(?:\.[^.]+)?` + // ext suffix `)` diff --git a/fileseq_test.go b/fileseq_test.go index 24c957b..759c526 100644 --- a/fileseq_test.go +++ b/fileseq_test.go @@ -912,14 +912,16 @@ func TestFindSequenceOnDisk(t *testing.T) { name: "pad hash 1", mapper: PadStyleHash1, tests: map[string]string{ - "testdata/seqC.@@.tif": "testdata/seqC.-5-2,4-10,20-21,27-30##.tif", - "testdata/seqC.0010.tif": "", - "testdata/seqC.10.tif": "testdata/seqC.-5-2,4-10,20-21,27-30##.tif", - "testdata/seqB.####.jpg": "testdata/seqB.5-14,16-18,20####.jpg", - "testdata/seqB.16-18####.jpg": "testdata/seqB.5-14,16-18,20####.jpg", - "testdata/seqA.####.exr": "testdata/seqA.1,3-6,8-10####.exr", - "testdata/seqA.@@@@.exr": "testdata/seqA.1,3-6,8-10####.exr", - "testdata/seqA.@.jpg": "", + "testdata/seqC.@@.tif": "testdata/seqC.-5-2,4-10,20-21,27-30##.tif", + "testdata/seqC.0010.tif": "", + "testdata/seqC.10.tif": "testdata/seqC.-5-2,4-10,20-21,27-30##.tif", + "testdata/seqB.####.jpg": "testdata/seqB.5-14,16-18,20####.jpg", + "testdata/seqB.16-18####.jpg": "testdata/seqB.5-14,16-18,20####.jpg", + "testdata/seqA.####.exr": "testdata/seqA.1,3-6,8-10####.exr", + "testdata/seqA.@@@@.exr": "testdata/seqA.1,3-6,8-10####.exr", + "testdata/seqA.@.jpg": "", + "testdata/complex.@@@@.tar.gz": "testdata/complex.5-7####.tar.gz", + "testdata/complex_frame_only/@.a.jpg": "testdata/complex_frame_only/1-3#.a.jpg", "testdata/seqC.%02d.tif": "testdata/seqC.-5-2,4-10,20-21,27-30##.tif", "testdata/seqC.$F02.tif": "testdata/seqC.-5-2,4-10,20-21,27-30##.tif", @@ -938,14 +940,16 @@ func TestFindSequenceOnDisk(t *testing.T) { name: "pad hash 4", mapper: PadStyleHash4, tests: map[string]string{ - "testdata/seqC.@@.tif": "testdata/seqC.-5-2,4-10,20-21,27-30@@.tif", - "testdata/seqC.0010.tif": "", - "testdata/seqC.10.tif": "testdata/seqC.-5-2,4-10,20-21,27-30@@.tif", - "testdata/seqB.#.jpg": "testdata/seqB.5-14,16-18,20#.jpg", - "testdata/seqB.16-18#.jpg": "testdata/seqB.5-14,16-18,20#.jpg", - "testdata/seqA.#.exr": "testdata/seqA.1,3-6,8-10#.exr", - "testdata/seqA.@.exr": "", - "testdata/seqA.@.jpg": "", + "testdata/seqC.@@.tif": "testdata/seqC.-5-2,4-10,20-21,27-30@@.tif", + "testdata/seqC.0010.tif": "", + "testdata/seqC.10.tif": "testdata/seqC.-5-2,4-10,20-21,27-30@@.tif", + "testdata/seqB.#.jpg": "testdata/seqB.5-14,16-18,20#.jpg", + "testdata/seqB.16-18#.jpg": "testdata/seqB.5-14,16-18,20#.jpg", + "testdata/seqA.#.exr": "testdata/seqA.1,3-6,8-10#.exr", + "testdata/seqA.@.exr": "", + "testdata/seqA.@.jpg": "", + "testdata/complex.#.tar.gz": "testdata/complex.5-7#.tar.gz", + "testdata/complex_frame_only/@.a.jpg": "testdata/complex_frame_only/1-3@.a.jpg", "testdata/seqC.%02d.tif": "testdata/seqC.-5-2,4-10,20-21,27-30@@.tif", "testdata/seqC.$F02.tif": "testdata/seqC.-5-2,4-10,20-21,27-30@@.tif", diff --git a/sequence.go b/sequence.go index bbcf1f1..73e2aee 100644 --- a/sequence.go +++ b/sequence.go @@ -17,15 +17,16 @@ import ( // to dictate how much padding the actual file numbers have. // // Valid padding characters: -// @ - 1 pad width (@@@@ is equal to 4 padding) -// # - 4 pad width (## is equal to 8 padding) +// +// @ - 1 pad width (@@@@ is equal to 4 padding) +// # - 4 pad width (## is equal to 8 padding) // // Example paths and padding: -// /path/to/single_image.0100.jpg -// /path/to/image_foo.1-10x2#.jpg (i.e. 0001) -// /path/to/image_foo.1-10x2@.jpg (i.e. 1) -// /path/to/image_foo.1-10x2@@@.jpg (i.e. 001) // +// /path/to/single_image.0100.jpg +// /path/to/image_foo.1-10x2#.jpg (i.e. 0001) +// /path/to/image_foo.1-10x2@.jpg (i.e. 1) +// /path/to/image_foo.1-10x2@@@.jpg (i.e. 001) type FileSequence struct { basename string dir string @@ -42,12 +43,12 @@ type FileSequence struct { // If error is non-nil, then the given sequence string could not // be successfully parsed. // -// PadStyleDefault is used as the padding character formatter +// # PadStyleDefault is used as the padding character formatter // // Example paths: -// /path/to/image_foo.1-10x2#.jpg -// /path/to/single_image.0100.jpg // +// /path/to/image_foo.1-10x2#.jpg +// /path/to/single_image.0100.jpg func NewFileSequence(sequence string) (*FileSequence, error) { return NewFileSequencePad(sequence, PadStyleDefault) } @@ -62,15 +63,16 @@ func NewFileSequence(sequence string) (*FileSequence, error) { // order to convert between padding characters and their numeric width. // // Example path w/ PadStyleHash1: -// /path/to/image_foo.1-10x2#.jpg => /path/to/image_foo.1.jpg ... -// /path/to/image_foo.1-10x2@.jpg => /path/to/image_foo.1.jpg ... -// /path/to/image_foo.1-10x2##.jpg => /path/to/image_foo.01.jpg ... +// +// /path/to/image_foo.1-10x2#.jpg => /path/to/image_foo.1.jpg ... +// /path/to/image_foo.1-10x2@.jpg => /path/to/image_foo.1.jpg ... +// /path/to/image_foo.1-10x2##.jpg => /path/to/image_foo.01.jpg ... // // Example path w/ PadStyleHash4: -// /path/to/image_foo.1-10x2#.jpg => /path/to/image_foo.0001.jpg ... -// /path/to/image_foo.1-10x2@.jpg => /path/to/image_foo.1.jpg ... -// /path/to/image_foo.1-10x2##.jpg => /path/to/image_foo.00000001.jpg ... // +// /path/to/image_foo.1-10x2#.jpg => /path/to/image_foo.0001.jpg ... +// /path/to/image_foo.1-10x2@.jpg => /path/to/image_foo.1.jpg ... +// /path/to/image_foo.1-10x2##.jpg => /path/to/image_foo.00000001.jpg ... func NewFileSequencePad(sequence string, style PadStyle) (*FileSequence, error) { var dir, basename, pad, ext string var frameSet *FrameSet @@ -157,21 +159,21 @@ Format returns the file sequence as a formatted string according to the given template. Utilizes Go text/template format syntax. Available functions include: - dir - the directory name. - base - the basename of the sequence (leading up to the frame range). - ext - the file extension of the sequence including leading period. - startf - the start frame. - endf - the end frame. - len - the length of the frame range. - pad - the detected padding characters (i.e. # , @@@ , ...). - frange - the frame range. - inverted - the inverted frame range. (returns empty string if none) - zfill - the int width of the frame padding + + dir - the directory name. + base - the basename of the sequence (leading up to the frame range). + ext - the file extension of the sequence including leading period. + startf - the start frame. + endf - the end frame. + len - the length of the frame range. + pad - the detected padding characters (i.e. # , @@@ , ...). + frange - the frame range. + inverted - the inverted frame range. (returns empty string if none) + zfill - the int width of the frame padding Example: {{dir}}{{base}}{{frange}}{{pad}}{{ext}} - */ func (s *FileSequence) Format(tpl string) (string, error) { c := map[string]interface{}{ @@ -919,11 +921,12 @@ func findSequencesInList(paths []*fileItem, opts ...FileOption) (FileSequences, // is returned. // // Example: -// // Find matches with any frame value -// FindSequenceOnDisk("/path/to/seq.#.ext") // -// // Find matches specifically having 4-padded frames -// FindSequenceOnDisk("/path/to/seq.#.ext", StrictPadding) +// // Find matches with any frame value +// FindSequenceOnDisk("/path/to/seq.#.ext") +// +// // Find matches specifically having 4-padded frames +// FindSequenceOnDisk("/path/to/seq.#.ext", StrictPadding) func FindSequenceOnDisk(pattern string, opts ...FileOption) (*FileSequence, error) { return FindSequenceOnDiskPad(pattern, PadStyleDefault, opts...) } diff --git a/testdata/complex_frame_only/1.a.jpg b/testdata/complex_frame_only/1.a.jpg new file mode 100644 index 0000000..e69de29 diff --git a/testdata/complex_frame_only/2.a.jpg b/testdata/complex_frame_only/2.a.jpg new file mode 100644 index 0000000..e69de29 diff --git a/testdata/complex_frame_only/3.a.jpg b/testdata/complex_frame_only/3.a.jpg new file mode 100644 index 0000000..e69de29