Skip to content

Commit

Permalink
[issue-479] fix parsing of snippet ranges
Browse files Browse the repository at this point in the history
The previous regular expressions for ranges would prevent lines with ":" to be parsed as LINE and instead be parsed as RANGE, the remainder after ":\d+" would be truncated and ignored, this lead to problems when parsing a package version like "1:2.36.1-8+deb11u1". Instead of defining a separate lexer function for RANGE we check if a LINE token matches the RANGE-pattern. This allows LINEs to include the RANGE pattern without getting the token RANGE.

Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
  • Loading branch information
meretp committed Feb 20, 2023
1 parent b729417 commit 58e691e
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
9 changes: 5 additions & 4 deletions spdx/parsers/lexers/tagvalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re

from ply import lex

Expand Down Expand Up @@ -170,10 +171,6 @@ def t_CHKSUM(self, t):
t.value = t.value[1:].strip()
return t

def t_RANGE(self, t):
r":\s*\d+:\d+"
t.value = t.value[1:].strip()
return t

def t_DOC_REF_ID(self, t):
r":\s*DocumentRef-([A-Za-z0-9\+\.\-]+)"
Expand Down Expand Up @@ -221,6 +218,10 @@ def t_LINE_OR_KEYWORD_VALUE(self, t):
t.value = t.value[1:].strip()
if t.value in self.reserved.keys():
t.type = self.reserved[t.value]
return t
range_pattern = re.compile("\d+:\d(?!\D)")
if range_pattern.match(t.value):
t.type = "RANGE"
else:
t.type = "LINE"
return t
Expand Down
12 changes: 6 additions & 6 deletions tests/test_tag_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
package_str = '\n'.join([
'PackageName: Test',
'SPDXID: SPDXRef-Package',
'PackageVersion: Version 0.9.2',
'PackageVersion: 1:2.36.1-8+deb11u1',
'PackageDownloadLocation: http://example.com/test',
'FilesAnalyzed: True',
'PackageSummary: <text>Test package</text>',
Expand Down Expand Up @@ -104,7 +104,7 @@
'SnippetLicenseConcluded: Apache-2.0',
'LicenseInfoInSnippet: Apache-2.0',
'SnippetByteRange: 310:420',
'SnippetLineRange: 5:23',
'SnippetLineRange: 5:7',
])

annotation_str = '\n'.join([
Expand Down Expand Up @@ -195,7 +195,7 @@ def test_package(self):
self.token_assert_helper(self.l.token(), 'SPDX_ID', 'SPDXID', 2)
self.token_assert_helper(self.l.token(), 'LINE', 'SPDXRef-Package', 2)
self.token_assert_helper(self.l.token(), 'PKG_VERSION', 'PackageVersion', 3)
self.token_assert_helper(self.l.token(), 'LINE', 'Version 0.9.2', 3)
self.token_assert_helper(self.l.token(), 'LINE', '1:2.36.1-8+deb11u1', 3)
self.token_assert_helper(self.l.token(), 'PKG_DOWN', 'PackageDownloadLocation', 4)
self.token_assert_helper(self.l.token(), 'LINE', 'http://example.com/test', 4)
self.token_assert_helper(self.l.token(), 'PKG_FILES_ANALYZED', 'FilesAnalyzed', 5)
Expand Down Expand Up @@ -275,7 +275,7 @@ def test_snippet(self):
self.token_assert_helper(self.l.token(), 'SNIPPET_BYTE_RANGE', 'SnippetByteRange', 9)
self.token_assert_helper(self.l.token(), 'RANGE', '310:420', 9)
self.token_assert_helper(self.l.token(), 'SNIPPET_LINE_RANGE', 'SnippetLineRange', 10)
self.token_assert_helper(self.l.token(), 'RANGE', '5:23', 10)
self.token_assert_helper(self.l.token(), 'RANGE', '5:7', 10)

def test_annotation(self):
data = annotation_str
Expand Down Expand Up @@ -337,7 +337,7 @@ def test_package(self):
assert not error
assert document.package.name == 'Test'
assert document.package.spdx_id == 'SPDXRef-Package'
assert document.package.version == 'Version 0.9.2'
assert document.package.version == '1:2.36.1-8+deb11u1'
assert len(document.package.licenses_from_files) == 2
assert (document.package.conc_lics.identifier == 'LicenseRef-2.0 AND Apache-2.0')
assert document.package.files_analyzed is True
Expand Down Expand Up @@ -408,4 +408,4 @@ def test_snippet(self):
assert document.snippet[-1].byte_range[0] == 310
assert document.snippet[-1].byte_range[1] == 420
assert document.snippet[-1].line_range[0] == 5
assert document.snippet[-1].line_range[1] == 23
assert document.snippet[-1].line_range[1] == 7

0 comments on commit 58e691e

Please sign in to comment.