Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

small changes to tag-value lexer/parser #534

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/spdx/parser/tagvalue/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ply.lex import TOKEN


class SPDXLexer(object):
class SPDXLexer:
reserved = {
# Top level fields
"SPDXVersion": "DOC_VERSION",
Expand Down Expand Up @@ -107,7 +107,7 @@ class SPDXLexer(object):
"UNKNOWN_TAG",
"ORGANIZATION_VALUE",
"PERSON_VALUE",
"DATE",
"ISO8601_DATE",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there was recently a discussion about the date not being exatly ISO8601 I am wondering if we should really name it like that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's be clear: it is an ISO-8601 formatted date.

It doesn't allow all variants of ISO-8601, which include 20230328T122341.123 or 20230328T12XXXX or 2023-33. You would find very few implementations handling all these variants.

"LINE",
"CHECKSUM"
] + list(reserved.values())
Expand Down Expand Up @@ -158,7 +158,7 @@ def t_PERSON_VALUE(self, t):
return t

@TOKEN(r":\s*\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ")
def t_DATE(self, t):
def t_ISO8601_DATE(self, t):
t.value = t.value[1:].strip()
return t

Expand Down
12 changes: 6 additions & 6 deletions src/spdx/parser/tagvalue/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
Snippet="SnippetSPDXID", Package="PackageName", ExtractedLicensingInfo="LicenseID")


class Parser(object):
class Parser:
tokens: List[str]
logger: Logger
current_element: Dict[str, Any]
Expand Down Expand Up @@ -169,7 +169,7 @@ def p_generic_value(self, p):
if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
set_value(p, self.current_element)

@grammar_rule("unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG DATE\n | UNKNOWN_TAG PERSON_VALUE \n"
@grammar_rule("unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n"
"| UNKNOWN_TAG")
def p_unknown_tag(self, p):
self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")
Expand Down Expand Up @@ -252,7 +252,7 @@ def p_external_document_ref(self, p):
def p_creator(self, p):
self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))

@grammar_rule("created : CREATED DATE")
@grammar_rule("created : CREATED ISO8601_DATE")
def p_created(self, p):
set_value(p, self.creation_info, method_to_apply=datetime_from_str)

Expand Down Expand Up @@ -384,8 +384,8 @@ def p_primary_package_purpose(self, p):
if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])

@grammar_rule("built_date : BUILT_DATE DATE\n release_date : RELEASE_DATE DATE\n "
"valid_until_date : VALID_UNTIL_DATE DATE")
@grammar_rule("built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n "
"valid_until_date : VALID_UNTIL_DATE ISO8601_DATE")
def p_package_dates(self, p):
if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
set_value(p, self.current_element, method_to_apply=datetime_from_str)
Expand Down Expand Up @@ -428,7 +428,7 @@ def p_annotator(self, p):
self.initialize_new_current_element(Annotation)
set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)

@grammar_rule("annotation_date : ANNOTATION_DATE DATE")
@grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE")
def p_annotation_date(self, p):
if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
set_value(p, self.current_element, method_to_apply=datetime_from_str)
Expand Down
10 changes: 5 additions & 5 deletions tests/spdx/parser/tagvalue/test_tag_value_lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_tokenization_of_creation_info(lexer):
token_assert_helper(lexer.token(), "CREATOR", "Creator", 2)
token_assert_helper(lexer.token(), "ORGANIZATION_VALUE", "Organization: Acme.", 2)
token_assert_helper(lexer.token(), "CREATED", "Created", 3)
token_assert_helper(lexer.token(), "DATE", "2010-02-03T00:00:00Z", 3)
token_assert_helper(lexer.token(), "ISO8601_DATE", "2010-02-03T00:00:00Z", 3)
token_assert_helper(lexer.token(), "CREATOR_COMMENT", "CreatorComment", 4)
token_assert_helper(lexer.token(), "TEXT", "<text>Sample Comment</text>", 4)

Expand Down Expand Up @@ -205,11 +205,11 @@ def test_tokenization_of_package(lexer):
token_assert_helper(lexer.token(), "PRIMARY_PACKAGE_PURPOSE", "PrimaryPackagePurpose", 23)
token_assert_helper(lexer.token(), "LINE", "OPERATING-SYSTEM", 23)
token_assert_helper(lexer.token(), "BUILT_DATE", "BuiltDate", 24)
token_assert_helper(lexer.token(), "DATE", "2020-01-01T12:00:00Z", 24)
token_assert_helper(lexer.token(), "ISO8601_DATE", "2020-01-01T12:00:00Z", 24)
token_assert_helper(lexer.token(), "RELEASE_DATE", "ReleaseDate", 25)
token_assert_helper(lexer.token(), "DATE", "2021-01-01T12:00:00Z", 25)
token_assert_helper(lexer.token(), "ISO8601_DATE", "2021-01-01T12:00:00Z", 25)
token_assert_helper(lexer.token(), "VALID_UNTIL_DATE", "ValidUntilDate", 26)
token_assert_helper(lexer.token(), "DATE", "2022-01-01T12:00:00Z", 26)
token_assert_helper(lexer.token(), "ISO8601_DATE", "2022-01-01T12:00:00Z", 26)


def test_tokenization_of_unknown_tag(lexer):
Expand Down Expand Up @@ -269,7 +269,7 @@ def test_tokenization_of_annotation(lexer):
token_assert_helper(lexer.token(), "ANNOTATOR", "Annotator", 1)
token_assert_helper(lexer.token(), "PERSON_VALUE", "Person: Jane Doe()", 1)
token_assert_helper(lexer.token(), "ANNOTATION_DATE", "AnnotationDate", 2)
token_assert_helper(lexer.token(), "DATE", "2010-01-29T18:30:22Z", 2)
token_assert_helper(lexer.token(), "ISO8601_DATE", "2010-01-29T18:30:22Z", 2)
token_assert_helper(lexer.token(), "ANNOTATION_COMMENT", "AnnotationComment", 3)
token_assert_helper(lexer.token(), "TEXT", "<text>Document level annotation</text>", 3)
token_assert_helper(lexer.token(), "ANNOTATION_TYPE", "AnnotationType", 4)
Expand Down