#!/usr/bin/python import os import re #this script removes md files which contain errors to get a successful build. has to be done stage by stage because some errors only occur after other ones are fixed docusaurusPath = '/docusaurus/' + os.environ['WEBSITE_NAME'] + '/' stdout = docusaurusPath + 'errorlog.txt' stderr = docusaurusPath + 'linkslog.txt' removedDataPath = docusaurusPath + 'docs/RemovedData.md' brokenLinksPath = docusaurusPath + 'docs/BrokenLinks.md' def getProblematicMds(log): problematicMds = {} def extractData(error): queryErrorPath = "REGEXQUERYWHICHDISCLOSESCOMPANYSENSITIVEDATA" querySyntaxErrorPath = 'SyntaxError:.*\.md' resultErrorPath = re.search(queryErrorPath, error) resultSyntaxErrorPath = re.search(querySyntaxErrorPath, error) if resultErrorPath != None: problematicMds[docusaurusPath + resultErrorPath.group()[4:]] = error elif resultSyntaxErrorPath != None: problematicMds[resultSyntaxErrorPath.group()[13:]] = error queryErr = "(Error|SyntaxError)(.*?)((?=Error)|(?=SyntaxError))" errors = list(map(lambda tuple: tuple[0] + tuple[1], re.findall(queryErr, log, flags = re.S | re.M))) if len(errors) != 0: for i, error in enumerate(errors): extractData(error) #for the last error, just take the index of the error before and assume this is the error message (queryErr regex doesnt catch last error) if i == len(errors) - 1: lastError = log[log.rindex(error) + len(error):] extractData(lastError) else: extractData(log) return problematicMds def deleteProblematicMds(foundMds): for md in foundMds: if md[0:22] != '/docusaurus/MyWebsite/': md = '/docusaurus/MyWebsite' + md if md[-3:] != '.md': md += '.md' if os.path.exists(md): print('deleted md: ' + md + ' ') os.remove(md) else: print('cannot delet md: ' + md + ' ') def createAppendMdOfRemoved(foundMds, header): append_write = 'a' if os.path.exists(removedDataPath) else 'w' f = open(removedDataPath, append_write) if append_write == 'w': f.write('# Removed files/ directories \n') f.write(header + '\n') if isinstance(foundMds, dict): for path, errormsg in foundMds.items(): f.write('
\n' + path + '\n\n```\n' + errormsg + '\n```\n\n
\n\n') else: for md in foundMds: f.write('* ' + md[md.rfind('/'):] + ' \n') f.close() def createMdOfBrokenLinks(log): queryWarningMessage = 'warn(.*?)in(.*?)\.md(.*?)\.md' queryBrokenLink = '\:.*?\"' queryContainedFile = '\/docusaurus\/.*\'' brokenLinks = re.findall(queryWarningMessage, log) if len(brokenLinks) != 0: f = open(brokenLinksPath, "w") f.write("## Broken links\n|File path|Broken link|\n|--|--|\n") for link in brokenLinks: containedFile = re.search(queryContainedFile, str(link)).group()[40:-1] brokenLink = re.search(queryBrokenLink, str(link)).group()[3:-1] f.write('|' + containedFile + '|' + brokenLink + '|\n') f.close() def mdFormatProblem(log): #dont always write the full log, filter it for the specific files that have problems print('Problems in MD files.. deleting') problematicMds = getProblematicMds(log) deleteProblematicMds(problematicMds.keys()) createAppendMdOfRemoved(problematicMds, '## Markdown format problems') print('starting build process again..') runSaurus() def removeFilesByQuery(log, msg, query): print('Having ' + msg) problematicMds = list(re.findall(query, log, flags = re.S | re.M)) deleteProblematicMds(problematicMds) createAppendMdOfRemoved(problematicMds, '## ' + msg) print('starting build process again') runSaurus() def runSaurus(): result = os.system( 'yarn run build 2> ' + stderr + ' > ' + stdout) outlog = open(stdout, "r").read() errlog = open(stderr, "r").read() createMdOfBrokenLinks(errlog) print('Return value from building: ' + str(result)) #error 2 = manual abortion, useful for local debugging to intercept an infinite loop if result == 2: exit(0) elif result != 0: if 'Client: Compiled successfully' in outlog: if 'could not render static page with path' in errlog: #case: all mds SEEM to be formatted correctly according to docusaurus, still not running through bc of # 'Docusaurus Node/SSR could not render static page with path ...' errors removeFilesByQuery(errlog, 'forbidden file content problems', 'could not render static page with path "(.*?)(?=\")') else: print('Successful build!') #still throws an error but compiles and produces the static html files successfully. is smth missing? else: if 'Error while parsing Markdown frontmatter' in errlog: #another kind of error which is caused by some yaml translation removeFilesByQuery(errlog, 'errors while parsing Markdown frontmatter', 'doc metadatas for doc at path \"(.*?)(?=\" in version)') else: #some mds have an incorrect format mdFormatProblem(outlog) else: print('Successful build!') runSaurus()