Skip to content

Commit

Permalink
feat: CloudFormation docs in schema (#2816)
Browse files Browse the repository at this point in the history
  • Loading branch information
hoffa authored Jan 26, 2023
1 parent 243740d commit 9c4f0da
Show file tree
Hide file tree
Showing 6 changed files with 235,330 additions and 155,646 deletions.
16 changes: 14 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,22 @@ prepare-companion-stack:

update-schema-data:
mkdir -p .tmp

# Update and parse SAM docs
rm -rf .tmp/aws-sam-developer-guide
git clone https://github.com/awsdocs/aws-sam-developer-guide.git .tmp/aws-sam-developer-guide
git clone --depth 1 https://github.com/awsdocs/aws-sam-developer-guide.git .tmp/aws-sam-developer-guide
bin/parse_docs.py .tmp/aws-sam-developer-guide/doc_source > samtranslator/schema/docs.json
curl -o samtranslator/schema/cloudformation.schema.json https://raw.githubusercontent.com/awslabs/goformation/master/schema/cloudformation.schema.json

# Update and parse CloudFormation docs
rm -rf .tmp/aws-cloudformation-user-guide
git clone --depth 1 git@github.com:awsdocs/aws-cloudformation-user-guide.git .tmp/aws-cloudformation-user-guide
bin/parse_docs.py --cfn .tmp/aws-cloudformation-user-guide/doc_source > samtranslator/schema/cloudformation-docs.json

# Update CloudFormation schema
curl -o .tmp/cloudformation.schema.json https://raw.githubusercontent.com/awslabs/goformation/master/schema/cloudformation.schema.json

# Add CloudFormation docs to CloudFormation schema
python bin/add_docs_cfn_schema.py --schema .tmp/cloudformation.schema.json --docs samtranslator/schema/cloudformation-docs.json > samtranslator/schema/cloudformation.schema.json

schema:
python samtranslator/schema/schema.py --sam-schema samtranslator/schema/sam.schema.json --cfn-schema samtranslator/schema/cloudformation.schema.json --unified-schema samtranslator/schema/schema.json
Expand Down
88 changes: 88 additions & 0 deletions bin/add_docs_cfn_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
Script to augment CloudFormation JSON schema with Markdown documentation.
"""

import argparse
import json
import sys
from pathlib import Path
from typing import Iterator


def log(s: str) -> None:
print(s, file=sys.stderr)


def guess_slugs(s: str) -> Iterator[str]:
"""
Guesses likely documentation page "slugs" from a schema definition name.
Used to map a definition in the JSON schema to the corresponding documentation
blurb. There are probably more sensible ways to do this.
For example, the definition for AWS::EC2::CapacityReservationFleet.TagSpecification
corresponds to aws-properties-ec2-capacityreservation-tagspecification in the
documentation: https://github.com/awsdocs/aws-cloudformation-user-guide/blob/main/doc_source/aws-properties-ec2-capacityreservation-tagspecification.md
Typically a definition with only the resource name corresponds to aws-resource-*,
but there are exceptions such as AWS::S3::Bucket, which starts with aws-properties-*:
https://github.com/awsdocs/aws-cloudformation-user-guide/blob/main/doc_source/aws-properties-s3-bucket.md
"""
slug = s.split("::", 1)[1].lower().replace(".", "-").replace("::", "-")
yield "aws-properties-" + slug
yield "aws-resource-" + slug


def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--schema",
help="CloudFormation JSON schema",
type=Path,
required=True,
)
parser.add_argument(
"--docs",
help="CloudFormation documentation (as generated by parse_docs.py)",
type=Path,
required=True,
)
args = parser.parse_args()

schema = json.loads(args.schema.read_text())
docs = json.loads(args.docs.read_text())["properties"]

# Assumes schema is from GoFormation and has consistent structure
# TODO: Use a more generic walk
# TODO: Use a more robust cross-referencing method than guessing slugs, doesn't always work
# e.g. AWS::EC2::Instance.Ebs corresponds to aws-properties-ec2-blockdev-template
for def_name, def_schema in schema["definitions"].items():
if not def_name.startswith("AWS::"):
log(f"Skipping {def_name}: does not start with AWS::")
continue
# If e.g. AWS::S3::Bucket, we only look under Properties
# TODO: Support resource attributes et al.
props = def_schema["properties"] if "." in def_name else def_schema["properties"]["Properties"]["properties"]
for slug in guess_slugs(def_name):
if slug not in docs:
log(f"Skipping {def_name}: {slug} not in docs")
continue
for prop_name, prop_schema in props.items():
if prop_name not in docs[slug]:
log(f"Skipping {def_name}: {prop_name} not in {slug} docs")
continue
prop_schema["markdownDescription"] = docs[slug][prop_name]
# GoFormation schema doesn't include it, so VS Code defaults to something unrelated (e.g. "Resources")
prop_schema["title"] = prop_name

print(
json.dumps(
schema,
indent=2,
sort_keys=True,
)
)


if __name__ == "__main__":
main()
30 changes: 20 additions & 10 deletions bin/parse_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@
from typing import Iterator, Tuple, Dict


def parse(s: str) -> Iterator[Tuple[str, str]]:
"""Parse an AWS SAM docs page in Markdown format, yielding each property."""
def parse(s: str, cfn_docs: bool) -> Iterator[Tuple[str, str]]:
"""Parse an AWS docs page in Markdown format, yielding each property."""
parts = s.split("\n\n")
for part in parts:
if part.startswith(" `"):
# TODO: More robust matching against properties? This might skip or include wrong sections
sam_prop = not cfn_docs and part.startswith(" `")
cfn_prop = cfn_docs and re.match(r"`\w+` <a .+", part)
if sam_prop or cfn_prop:
name = part.split("`")[1]
yield name, part.strip()

Expand All @@ -33,10 +36,13 @@ def fix_markdown_code_link(s: str) -> str:


def remove_first_line(s: str) -> str:
return s.split("\n", 1)[1]
try:
return s.split("\n", 1)[1]
except IndexError:
return ""


def convert_to_full_path(description: str) -> str:
def convert_to_full_path(description: str, prefix: str) -> str:
pattern = re.compile("\(([#\.a-zA-Z0-9_-]+)\)")
matched_content = pattern.findall(description)

Expand All @@ -45,25 +51,29 @@ def convert_to_full_path(description: str) -> str:
url = path.split(".")[0] + ".html"
if "#" in path:
url += "#" + path.split("#")[1]
description = description.replace(
path, f"https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/{url}"
)
description = description.replace(path, prefix + url)
return description


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("dir", type=Path)
parser.add_argument("--cfn", action="store_true")
args = parser.parse_args()

props: Dict[str, Dict[str, str]] = {}
for path in args.dir.glob("*.md"):
for name, description in parse(path.read_text()):
for name, description in parse(path.read_text(), args.cfn):
if path.stem not in props:
props[path.stem] = {}
description = remove_first_line(description) # Remove property name; already in the schema title
description = fix_markdown_code_link(description)
description = convert_to_full_path(description)
prefix = (
"https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/"
if args.cfn
else "https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/"
)
description = convert_to_full_path(description, prefix)
props[path.stem][name] = description

print(
Expand Down
Loading

0 comments on commit 9c4f0da

Please sign in to comment.