feat: CloudFormation docs in schema (#2816)

aws · Jan 26, 2023 · 9c4f0da · 9c4f0da
1 parent 243740d
commit 9c4f0da
Show file tree

Hide file tree

Showing 6 changed files with 235,330 additions and 155,646 deletions.
diff --git a/Makefile b/Makefile
@@ -47,10 +47,22 @@ prepare-companion-stack:
 
 update-schema-data:
 	mkdir -p .tmp
+
+	# Update and parse SAM docs
 	rm -rf .tmp/aws-sam-developer-guide
-	git clone https://github.com/awsdocs/aws-sam-developer-guide.git .tmp/aws-sam-developer-guide
+	git clone --depth 1 https://github.com/awsdocs/aws-sam-developer-guide.git .tmp/aws-sam-developer-guide
 	bin/parse_docs.py .tmp/aws-sam-developer-guide/doc_source > samtranslator/schema/docs.json
-	curl -o samtranslator/schema/cloudformation.schema.json https://raw.githubusercontent.com/awslabs/goformation/master/schema/cloudformation.schema.json
+
+	# Update and parse CloudFormation docs
+	rm -rf .tmp/aws-cloudformation-user-guide
+	git clone --depth 1 git@github.com:awsdocs/aws-cloudformation-user-guide.git .tmp/aws-cloudformation-user-guide
+	bin/parse_docs.py --cfn .tmp/aws-cloudformation-user-guide/doc_source > samtranslator/schema/cloudformation-docs.json
+
+	# Update CloudFormation schema
+	curl -o .tmp/cloudformation.schema.json https://raw.githubusercontent.com/awslabs/goformation/master/schema/cloudformation.schema.json
+
+	# Add CloudFormation docs to CloudFormation schema
+	python bin/add_docs_cfn_schema.py --schema .tmp/cloudformation.schema.json --docs samtranslator/schema/cloudformation-docs.json > samtranslator/schema/cloudformation.schema.json
 
 schema:
 	python samtranslator/schema/schema.py --sam-schema samtranslator/schema/sam.schema.json --cfn-schema samtranslator/schema/cloudformation.schema.json --unified-schema samtranslator/schema/schema.json

diff --git a/bin/add_docs_cfn_schema.py b/bin/add_docs_cfn_schema.py
@@ -0,0 +1,88 @@
+"""
+Script to augment CloudFormation JSON schema with Markdown documentation.
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Iterator
+
+
+def log(s: str) -> None:
+    print(s, file=sys.stderr)
+
+
+def guess_slugs(s: str) -> Iterator[str]:
+    """
+    Guesses likely documentation page "slugs" from a schema definition name.
+
+    Used to map a definition in the JSON schema to the corresponding documentation
+    blurb. There are probably more sensible ways to do this.
+
+    For example, the definition for AWS::EC2::CapacityReservationFleet.TagSpecification
+    corresponds to aws-properties-ec2-capacityreservation-tagspecification in the
+    documentation: https://github.com/awsdocs/aws-cloudformation-user-guide/blob/main/doc_source/aws-properties-ec2-capacityreservation-tagspecification.md
+
+    Typically a definition with only the resource name corresponds to aws-resource-*,
+    but there are exceptions such as AWS::S3::Bucket, which starts with aws-properties-*:
+    https://github.com/awsdocs/aws-cloudformation-user-guide/blob/main/doc_source/aws-properties-s3-bucket.md
+    """
+    slug = s.split("::", 1)[1].lower().replace(".", "-").replace("::", "-")
+    yield "aws-properties-" + slug
+    yield "aws-resource-" + slug
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--schema",
+        help="CloudFormation JSON schema",
+        type=Path,
+        required=True,
+    )
+    parser.add_argument(
+        "--docs",
+        help="CloudFormation documentation (as generated by parse_docs.py)",
+        type=Path,
+        required=True,
+    )
+    args = parser.parse_args()
+
+    schema = json.loads(args.schema.read_text())
+    docs = json.loads(args.docs.read_text())["properties"]
+
+    # Assumes schema is from GoFormation and has consistent structure
+    # TODO: Use a more generic walk
+    # TODO: Use a more robust cross-referencing method than guessing slugs, doesn't always work
+    #       e.g. AWS::EC2::Instance.Ebs corresponds to aws-properties-ec2-blockdev-template
+    for def_name, def_schema in schema["definitions"].items():
+        if not def_name.startswith("AWS::"):
+            log(f"Skipping {def_name}: does not start with AWS::")
+            continue
+        # If e.g. AWS::S3::Bucket, we only look under Properties
+        # TODO: Support resource attributes et al.
+        props = def_schema["properties"] if "." in def_name else def_schema["properties"]["Properties"]["properties"]
+        for slug in guess_slugs(def_name):
+            if slug not in docs:
+                log(f"Skipping {def_name}: {slug} not in docs")
+                continue
+            for prop_name, prop_schema in props.items():
+                if prop_name not in docs[slug]:
+                    log(f"Skipping {def_name}: {prop_name} not in {slug} docs")
+                    continue
+                prop_schema["markdownDescription"] = docs[slug][prop_name]
+                # GoFormation schema doesn't include it, so VS Code defaults to something unrelated (e.g. "Resources")
+                prop_schema["title"] = prop_name
+
+    print(
+        json.dumps(
+            schema,
+            indent=2,
+            sort_keys=True,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/parse_docs.py b/bin/parse_docs.py
@@ -17,11 +17,14 @@
 from typing import Iterator, Tuple, Dict
 
 
-def parse(s: str) -> Iterator[Tuple[str, str]]:
-    """Parse an AWS SAM docs page in Markdown format, yielding each property."""
+def parse(s: str, cfn_docs: bool) -> Iterator[Tuple[str, str]]:
+    """Parse an AWS docs page in Markdown format, yielding each property."""
     parts = s.split("\n\n")
     for part in parts:
-        if part.startswith(" `"):
+        # TODO: More robust matching against properties? This might skip or include wrong sections
+        sam_prop = not cfn_docs and part.startswith(" `")
+        cfn_prop = cfn_docs and re.match(r"`\w+`  <a .+", part)
+        if sam_prop or cfn_prop:
             name = part.split("`")[1]
             yield name, part.strip()
 
@@ -33,10 +36,13 @@ def fix_markdown_code_link(s: str) -> str:
 
 
 def remove_first_line(s: str) -> str:
-    return s.split("\n", 1)[1]
+    try:
+        return s.split("\n", 1)[1]
+    except IndexError:
+        return ""
 
 
-def convert_to_full_path(description: str) -> str:
+def convert_to_full_path(description: str, prefix: str) -> str:
     pattern = re.compile("\(([#\.a-zA-Z0-9_-]+)\)")
     matched_content = pattern.findall(description)
 
@@ -45,25 +51,29 @@ def convert_to_full_path(description: str) -> str:
             url = path.split(".")[0] + ".html"
             if "#" in path:
                 url += "#" + path.split("#")[1]
-            description = description.replace(
-                path, f"https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/{url}"
-            )
+            description = description.replace(path, prefix + url)
     return description
 
 
 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("dir", type=Path)
+    parser.add_argument("--cfn", action="store_true")
     args = parser.parse_args()
 
     props: Dict[str, Dict[str, str]] = {}
     for path in args.dir.glob("*.md"):
-        for name, description in parse(path.read_text()):
+        for name, description in parse(path.read_text(), args.cfn):
             if path.stem not in props:
                 props[path.stem] = {}
             description = remove_first_line(description)  # Remove property name; already in the schema title
             description = fix_markdown_code_link(description)
-            description = convert_to_full_path(description)
+            prefix = (
+                "https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/"
+                if args.cfn
+                else "https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/"
+            )
+            description = convert_to_full_path(description, prefix)
             props[path.stem][name] = description
 
     print(