diff --git a/spm/private/modulemap_parser/BUILD.bazel b/spm/private/modulemap_parser/BUILD.bazel index d865ec9..b0d30da 100644 --- a/spm/private/modulemap_parser/BUILD.bazel +++ b/spm/private/modulemap_parser/BUILD.bazel @@ -62,6 +62,7 @@ bzl_library( srcs = ["collect_module.bzl"], visibility = ["//spm:__subpackages__"], deps = [ + ":collect_module_attribute", ":collect_module_members", ":collection_results", ":declarations", @@ -70,10 +71,51 @@ bzl_library( ], ) +bzl_library( + name = "collect_module_attribute", + srcs = ["collect_module_attribute.bzl"], + visibility = ["//spm:__subpackages__"], + deps = [ + ":collection_results", + ":tokens", + ], +) + bzl_library( name = "collect_module_members", srcs = ["collect_module_members.bzl"], visibility = ["//spm:__subpackages__"], + deps = [ + ":collect_export_declaration", + ":collect_header_declaration", + ":collect_link_declaration", + ":collect_submodule", + ":collect_umbrella_dir_declaration", + ":collection_results", + ":errors", + ":tokens", + "@bazel_skylib//lib:sets", + ], +) + +bzl_library( + name = "collect_submodule", + srcs = ["collect_submodule.bzl"], + visibility = ["//spm:__subpackages__"], + deps = [ + ":collect_module_attribute", + ":collect_submodule_members", + ":collection_results", + ":declarations", + ":errors", + ":tokens", + ], +) + +bzl_library( + name = "collect_submodule_members", + srcs = ["collect_submodule_members.bzl"], + visibility = ["//spm:__subpackages__"], deps = [ ":collect_export_declaration", ":collect_header_declaration", diff --git a/spm/private/modulemap_parser/collect_module.bzl b/spm/private/modulemap_parser/collect_module.bzl index f074b38..e461a52 100644 --- a/spm/private/modulemap_parser/collect_module.bzl +++ b/spm/private/modulemap_parser/collect_module.bzl @@ -1,51 +1,15 @@ """Defintion for collect_module.""" +load(":collect_module_attribute.bzl", "collect_module_attribute") load(":collect_module_members.bzl", "collect_module_members") load(":collection_results.bzl", "collection_results") load(":declarations.bzl", "declarations") load(":errors.bzl", "errors") load(":tokens.bzl", "tokens", rws = "reserved_words", tts = "token_types") -# MARK: - Attribute Collection - -def _collect_attribute(parsed_tokens): - """Collect a module attribute. - - Spec: https://clang.llvm.org/docs/Modules.html#attributes - - Syntax: - attributes: - attribute attributesopt - - attribute: - '[' identifier ']' - - Args: - parsed_tokens: A `list` of tokens. - - Returns: - A `tuple` where the first item is the collection result and the second is an - error `struct` as returned from errors.create(). - """ - tlen = len(parsed_tokens) - - _open_token, err = tokens.get_as(parsed_tokens, 0, tts.square_bracket_open, count = tlen) - if err != None: - return None, err - - attrib_token, err = tokens.get_as(parsed_tokens, 1, tts.identifier, count = tlen) - if err != None: - return None, err - - _open_token, err = tokens.get_as(parsed_tokens, 2, tts.square_bracket_close, count = tlen) - if err != None: - return None, err - - return collection_results.new([attrib_token.value], 3), None - # MARK: - Module Collection -def collect_module(parsed_tokens, is_submodule = False, prefix_tokens = []): +def collect_module(parsed_tokens, prefix_tokens = []): """Collect a module declaration. Spec: https://clang.llvm.org/docs/Modules.html#module-declaration @@ -55,14 +19,12 @@ def collect_module(parsed_tokens, is_submodule = False, prefix_tokens = []): Args: parsed_tokens: A `list` of tokens. - is_submodule: A `bool` that designates whether the module is a child of another module. prefix_tokens: A `list` of tokens that have already been collected, but not applied. Returns: A `tuple` where the first item is the collection result and the second is an error `struct` as returned from errors.create(). """ - explicit = False framework = False attributes = [] members = [] @@ -73,9 +35,7 @@ def collect_module(parsed_tokens, is_submodule = False, prefix_tokens = []): # Process the prefix tokens for token in prefix_tokens: if token.type == tts.reserved and token.value == rws.explicit: - if not is_submodule: - return None, errors.new("The explicit qualifier can only exist on submodules.") - explicit = True + return None, errors.new("The explicit qualifier can only exist on submodules.") elif token.type == tts.reserved and token.value == rws.framework: framework = True @@ -122,7 +82,7 @@ def collect_module(parsed_tokens, is_submodule = False, prefix_tokens = []): break elif tokens.is_a(token, tts.square_bracket_open): - collect_result, err = _collect_attribute(parsed_tokens[idx:]) + collect_result, err = collect_module_attribute(parsed_tokens[idx:]) if err != None: return None, err attributes.extend(collect_result.declarations) @@ -139,7 +99,7 @@ def collect_module(parsed_tokens, is_submodule = False, prefix_tokens = []): # Create the declaration decl = declarations.module( module_id = module_id_token.value, - explicit = explicit, + explicit = False, framework = framework, attributes = attributes, members = members, diff --git a/spm/private/modulemap_parser/collect_module_attribute.bzl b/spm/private/modulemap_parser/collect_module_attribute.bzl new file mode 100644 index 0000000..3b50d17 --- /dev/null +++ b/spm/private/modulemap_parser/collect_module_attribute.bzl @@ -0,0 +1,41 @@ +"""Defintion for collect_module_attribute.""" + +load(":collection_results.bzl", "collection_results") +load(":tokens.bzl", "tokens", tts = "token_types") + +# MARK: - Attribute Collection + +def collect_module_attribute(parsed_tokens): + """Collect a module attribute. + + Spec: https://clang.llvm.org/docs/Modules.html#attributes + + Syntax: + attributes: + attribute attributesopt + + attribute: + '[' identifier ']' + + Args: + parsed_tokens: A `list` of tokens. + + Returns: + A `tuple` where the first item is the collection result and the second is an + error `struct` as returned from errors.create(). + """ + tlen = len(parsed_tokens) + + _open_token, err = tokens.get_as(parsed_tokens, 0, tts.square_bracket_open, count = tlen) + if err != None: + return None, err + + attrib_token, err = tokens.get_as(parsed_tokens, 1, tts.identifier, count = tlen) + if err != None: + return None, err + + _open_token, err = tokens.get_as(parsed_tokens, 2, tts.square_bracket_close, count = tlen) + if err != None: + return None, err + + return collection_results.new([attrib_token.value], 3), None diff --git a/spm/private/modulemap_parser/collect_module_members.bzl b/spm/private/modulemap_parser/collect_module_members.bzl index aae9076..14b0486 100644 --- a/spm/private/modulemap_parser/collect_module_members.bzl +++ b/spm/private/modulemap_parser/collect_module_members.bzl @@ -4,8 +4,10 @@ load("@bazel_skylib//lib:sets.bzl", "sets") load(":collect_export_declaration.bzl", "collect_export_declaration") load(":collect_header_declaration.bzl", "collect_header_declaration") load(":collect_link_declaration.bzl", "collect_link_declaration") +load(":collect_submodule.bzl", "collect_submodule") load(":collect_umbrella_dir_declaration.bzl", "collect_umbrella_dir_declaration") load(":collection_results.bzl", "collection_results") +load(":declarations.bzl", "declaration_types") load(":errors.bzl", "errors") load(":tokens.bzl", "tokens", rws = "reserved_words", tts = "token_types") @@ -37,6 +39,7 @@ def collect_module_members(parsed_tokens): skip_ahead = 0 collect_result = None prefix_tokens = [] + umbrella_decl = None for idx in range(consumed_count, tlen - consumed_count): consumed_count += 1 if skip_ahead > 0: @@ -63,7 +66,7 @@ def collect_module_members(parsed_tokens): elif tokens.is_a(token, tts.newline): if len(prefix_tokens) > 0: return None, errors.new( - "Unexpected prefix tokens found before end of line. tokens: %s" % (prefix_tokens), + "Unexpected prefix tokens found encountering newline before end of line. tokens: %s" % (prefix_tokens), ) elif tokens.is_a(token, tts.reserved, rws.umbrella): @@ -80,13 +83,19 @@ def collect_module_members(parsed_tokens): else: if len(prefix_tokens) > 0: return None, errors.new( - "Unexpected prefix tokens found before end of line. tokens: %" % + "Unexpected prefix tokens found encountering umbrella dir before end of line. tokens: %" % (prefix_tokens), ) collect_result, err = collect_umbrella_dir_declaration(parsed_tokens[idx:]) + if err == None and len(collect_result.declarations) == 1: + umbrella_decl = collect_result.declarations[0] elif tokens.is_a(token, tts.reserved, rws.header): collect_result, err = collect_header_declaration(parsed_tokens[idx:], prefix_tokens) + if (err == None and + len(collect_result.declarations) == 1 and + collect_result.declarations[0].decl_type == declaration_types.umbrella_header): + umbrella_decl = collect_result.declarations[0] prefix_tokens = [] elif tokens.is_a(token, tts.reserved, rws.export): @@ -95,6 +104,10 @@ def collect_module_members(parsed_tokens): elif tokens.is_a(token, tts.reserved, rws.link): collect_result, err = collect_link_declaration(parsed_tokens[idx:]) + elif tokens.is_a(token, tts.reserved, rws.module): + collect_result, err = collect_submodule(parsed_tokens[idx:], prefix_tokens = prefix_tokens, umbrella_decl = umbrella_decl) + prefix_tokens = [] + elif tokens.is_a(token, tts.reserved) and sets.contains(_unsupported_module_members, token.value): return None, errors.new("Unsupported module member token. token: %s" % (token)) diff --git a/spm/private/modulemap_parser/collect_submodule.bzl b/spm/private/modulemap_parser/collect_submodule.bzl new file mode 100644 index 0000000..a7c4549 --- /dev/null +++ b/spm/private/modulemap_parser/collect_submodule.bzl @@ -0,0 +1,121 @@ +"""Defintion for collect_submodule.""" + +load(":collect_module_attribute.bzl", "collect_module_attribute") +load(":collect_submodule_members.bzl", "collect_submodule_members") +load(":collection_results.bzl", "collection_results") +load(":declarations.bzl", "declarations") +load(":errors.bzl", "errors") +load(":tokens.bzl", "tokens", ops = "operators", rws = "reserved_words", tts = "token_types") + +# MARK: - Module Collection + +def collect_submodule(parsed_tokens, prefix_tokens = [], umbrella_decl = None): + """Collect a submodule declaration. + + Spec: https://clang.llvm.org/docs/Modules.html#submodule-declaration + + Syntax: + explicitopt frameworkopt module module-id attributesopt '{' module-member* '}' + + Args: + parsed_tokens: A `list` of tokens. + prefix_tokens: A `list` of tokens that have already been collected, but not applied. + umbrella_decl: A `declaration` of type `umbrella`, in the case of an inferred declaration. + + Returns: + A `tuple` where the first item is the collection result and the second is an + error `struct` as returned from errors.create(). + """ + explicit = False + framework = False + attributes = [] + members = [] + consumed_count = 0 + + tlen = len(parsed_tokens) + + # Process the prefix tokens + for token in prefix_tokens: + if token.type == tts.reserved and token.value == rws.explicit: + explicit = True + + elif token.type == tts.reserved and token.value == rws.framework: + framework = True + + else: + return None, errors.new( + "Unexpected prefix token collecting module declaration. token: %s" % (token), + ) + + _module_token, err = tokens.get_as(parsed_tokens, 0, tts.reserved, rws.module, count = tlen) + if err != None: + return None, err + consumed_count += 1 + + module_id_token, err = tokens.get_as(parsed_tokens, 1, tts.identifier, count = tlen) + if err == None: + module_id = module_id_token.value + else: + if umbrella_decl == None: + return None, err + + # A submodule without its next token as an identifier may be an inferred submodule. + # Such a submodule gets its members from the umbrella declaration provided in the + # parent module's members. + _, i_err = tokens.get_as(parsed_tokens, 1, tts.operator, ops.asterisk, count = tlen) + if i_err != None: + return None, i_err + module_id = umbrella_decl.path + + consumed_count += 1 + + # Collect the attributes and module members + skip_ahead = 0 + collect_result = None + for idx in range(consumed_count, tlen - consumed_count): + consumed_count += 1 + if skip_ahead > 0: + skip_ahead -= 1 + continue + + collect_result = None + err = None + + # Get next token + token, err = tokens.get(parsed_tokens, idx, count = tlen) + if err != None: + return None, err + + # Process the token + if tokens.is_a(token, tts.curly_bracket_open): + collect_result, err = collect_submodule_members(parsed_tokens[idx:]) + if err != None: + return None, err + members.extend(collect_result.declarations) + consumed_count += collect_result.count - 1 + break + + elif tokens.is_a(token, tts.square_bracket_open): + collect_result, err = collect_module_attribute(parsed_tokens[idx:]) + if err != None: + return None, err + attributes.extend(collect_result.declarations) + + else: + return None, errors.new( + "Unexpected token collecting attributes and module members. token: %s" % (token), + ) + + # Handle index advancement. + if collect_result: + skip_ahead = collect_result.count - 1 + + # Create the declaration + decl = declarations.module( + module_id = module_id, + explicit = explicit, + framework = framework, + attributes = attributes, + members = members, + ) + return collection_results.new([decl], consumed_count), None diff --git a/spm/private/modulemap_parser/collect_submodule_members.bzl b/spm/private/modulemap_parser/collect_submodule_members.bzl new file mode 100644 index 0000000..716c1d9 --- /dev/null +++ b/spm/private/modulemap_parser/collect_submodule_members.bzl @@ -0,0 +1,112 @@ +"""Definition for collect_submodule_members.""" + +load("@bazel_skylib//lib:sets.bzl", "sets") +load(":collect_export_declaration.bzl", "collect_export_declaration") +load(":collect_header_declaration.bzl", "collect_header_declaration") +load(":collect_link_declaration.bzl", "collect_link_declaration") +load(":collect_umbrella_dir_declaration.bzl", "collect_umbrella_dir_declaration") +load(":collection_results.bzl", "collection_results") +load(":errors.bzl", "errors") +load(":tokens.bzl", "tokens", rws = "reserved_words", tts = "token_types") + +_unsupported_module_members = sets.make([ + rws.config_macros, + rws.conflict, + rws.requires, + rws.use, +]) + +def collect_submodule_members(parsed_tokens): + """Collect submodule members from the parsed tokens. + + Args: + parsed_tokens: A `list` of tokens. + + Returns: + Collection results. + """ + tlen = len(parsed_tokens) + members = [] + consumed_count = 0 + + _open_members_token, err = tokens.get_as(parsed_tokens, 0, tts.curly_bracket_open, count = tlen) + if err != None: + return None, err + consumed_count += 1 + + skip_ahead = 0 + collect_result = None + prefix_tokens = [] + for idx in range(consumed_count, tlen - consumed_count): + consumed_count += 1 + if skip_ahead > 0: + skip_ahead -= 1 + continue + + collect_result = None + + # Get next token + token, err = tokens.get(parsed_tokens, idx, count = tlen) + if err != None: + return None, err + + # Process token + + if tokens.is_a(token, tts.curly_bracket_close): + if len(prefix_tokens) > 0: + return None, errors.new( + "Unexpected prefix tokens found at end of module member block. tokens: %s" % + (prefix_tokens), + ) + break + + elif tokens.is_a(token, tts.newline): + if len(prefix_tokens) > 0: + return None, errors.new( + "Unexpected prefix tokens found before end of SUBMODULE line. tokens: %s" % (prefix_tokens), + ) + + elif tokens.is_a(token, tts.reserved, rws.umbrella): + # The umbrella word can appear for umbrella headers or umbrella directories. + # If the next token is header, then it is an umbrella header. Otherwise, it is an umbrella + # directory. + next_idx = idx + 1 + next_token, err = tokens.get(parsed_tokens, next_idx, count = tlen) + if err != None: + return None, err + if tokens.is_a(next_token, tts.reserved, rws.header): + prefix_tokens.append(token) + + else: + if len(prefix_tokens) > 0: + return None, errors.new( + "Unexpected prefix tokens found before end of line. tokens: %" % + (prefix_tokens), + ) + collect_result, err = collect_umbrella_dir_declaration(parsed_tokens[idx:]) + + elif tokens.is_a(token, tts.reserved, rws.header): + collect_result, err = collect_header_declaration(parsed_tokens[idx:], prefix_tokens) + prefix_tokens = [] + + elif tokens.is_a(token, tts.reserved, rws.export): + collect_result, err = collect_export_declaration(parsed_tokens[idx:]) + + elif tokens.is_a(token, tts.reserved, rws.link): + collect_result, err = collect_link_declaration(parsed_tokens[idx:]) + + elif tokens.is_a(token, tts.reserved) and sets.contains(_unsupported_module_members, token.value): + return None, errors.new("Unsupported module member token. token: %s" % (token)) + + else: + # Store any unrecognized tokens as prefix tokens to be processed later + prefix_tokens.append(token) + + # Handle index advancement. + if err != None: + return None, err + if collect_result: + members.extend(collect_result.declarations) + skip_ahead = collect_result.count - 1 + + return collection_results.new(members, consumed_count), None diff --git a/test/modulemap_parser/collect_module_tests.bzl b/test/modulemap_parser/collect_module_tests.bzl index 4b21e2b..9c7b44a 100644 --- a/test/modulemap_parser/collect_module_tests.bzl +++ b/test/modulemap_parser/collect_module_tests.bzl @@ -126,6 +126,123 @@ def _collect_module_test(ctx): ], ) + do_parse_test( + env, + "module with submodule", + text = """ + module MyModule { + module A { + header "A.h" + } + } + """, + expected = [ + declarations.module( + module_id = "MyModule", + framework = False, + explicit = False, + attributes = [], + members = [ + declarations.module(module_id = "A", members = [ + struct(attribs = None, decl_type = "single_header", path = "A.h", private = False, textual = False), + ]), + ], + ), + ], + ) + + do_parse_test( + env, + "module with explicit submodule", + text = """ + module MyModule { + umbrella "MyLib" + explicit module * { + export * + } + } + """, + expected = [ + declarations.module( + module_id = "MyModule", + framework = False, + explicit = False, + attributes = [], + members = [ + declarations.umbrella_directory("MyLib"), + declarations.module(module_id = "MyLib", explicit = True, members = [ + struct(decl_type = "export", identifiers = [], wildcard = True), + ]), + ], + ), + ], + ) + + do_parse_test( + env, + "module with explicit submodule and umbrella header", + text = """ + module MyModule { + umbrella header "header.h" + explicit module * { + export * + } + } + """, + expected = [ + declarations.module( + module_id = "MyModule", + framework = False, + explicit = False, + attributes = [], + members = [ + declarations.umbrella_header(path = "header.h"), + declarations.module(module_id = "header.h", explicit = True, members = [ + struct(decl_type = "export", identifiers = [], wildcard = True), + ]), + ], + ), + ], + ) + + do_parse_test( + env, + "standard submodule", + text = """ + module MyModule { + header "header.h" + module A { + header "A.h" + export * + } + } + """, + expected = [ + declarations.module( + module_id = "MyModule", + framework = False, + explicit = False, + attributes = [], + members = [ + struct(attribs = None, decl_type = "single_header", path = "header.h", private = False, textual = False), + declarations.module(module_id = "A", members = [ + struct(attribs = None, decl_type = "single_header", path = "A.h", private = False, textual = False), + struct(decl_type = "export", identifiers = [], wildcard = True), + ]), + ], + ), + ], + ) + + do_failing_parse_test( + env, + "explicit module", + text = """ + explicit module MyModule {} + """, + expected_err = "The explicit qualifier can only exist on submodules.", + ) + do_failing_parse_test( env, "module with unexpected qualifier",