Skip to content

Commit

Permalink
Merge pull request #557 from Fortran-FOSS-Programmers/refactor-generi…
Browse files Browse the repository at this point in the history
…c-source

Specify `extra_filetypes` as a dict in toml settings files
  • Loading branch information
ZedThree authored Aug 17, 2023
2 parents 6ecad90 + ca40b12 commit d7d5056
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 116 deletions.
11 changes: 0 additions & 11 deletions ford/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,17 +320,6 @@ def parse_arguments(
proj_data.normalise_paths(directory)

proj_data.creation_date = datetime.now().strftime(proj_data.creation_date)
# Parse file extensions and comment characters for extra filetypes
extdict = {}
for ext in proj_data.extra_filetypes:
sp = ext.split()
if len(sp) < 2:
continue
if len(sp) == 2:
extdict[sp[0]] = sp[1] # (comment_char) only
else:
extdict[sp[0]] = (sp[1], sp[2]) # (comment_char and lexer_str)
proj_data.extra_filetypes = extdict

# Make sure no src_dir is contained within output_dir
for srcdir in proj_data.src_dir:
Expand Down
47 changes: 45 additions & 2 deletions ford/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,27 @@ def convert_to_bool(name: str, option: List[str]) -> bool:
)


@dataclass
class ExtraFileType:
extension: str
comment: str
lexer: Optional[str] = None

@classmethod
def from_string(cls, string: str):
parts = string.split()
if 3 < len(parts) < 2:
raise ValueError(
f"Unexpected format for 'extra_filetype': expected 'extension comment [lexer]', got {string!r}"
)

file_type = cls(parts[0], parts[1])
if len(parts) == 3:
file_type.lexer = parts[2]

return file_type


@dataclass
class ProjectSettings:
alias: Dict[str, str] = field(default_factory=dict)
Expand All @@ -99,7 +120,7 @@ class ProjectSettings:
)
external: Dict[str, str] = field(default_factory=dict)
externalize: bool = False
extra_filetypes: list = field(default_factory=list)
extra_filetypes: Dict[str, ExtraFileType] = field(default_factory=dict)
extra_mods: list = field(default_factory=list)
extra_vartypes: list = field(default_factory=list)
facebook: Optional[str] = None
Expand Down Expand Up @@ -204,6 +225,19 @@ def __post_init__(self):
f"{first} ('{first_mark}') and {second} ('{second_mark}') are the same"
)

if isinstance(self.extra_filetypes, list):
try:
self.extra_filetypes = {
filetype.extension: filetype for filetype in self.extra_filetypes
}
except AttributeError:
file_types = [
ExtraFileType(**file_type) for file_type in self.extra_filetypes
]
self.extra_filetypes = {
file_type.extension: file_type for file_type in file_types
}

@classmethod
def from_markdown_metadata(cls, meta: Dict[str, Any]):
return cls(**convert_types_from_metapreprocessor(cls, meta))
Expand Down Expand Up @@ -311,7 +345,16 @@ def convert_types_from_metapreprocessor(cls: Type, settings: Dict[str, Any]):
if isinstance(value, str):
value = [value]

settings[key] = _parse_to_dict(value, name=key)
# Get rid of any empty strings
value = [v for v in value if v]

if get_args(default_type) == (str, ExtraFileType):
file_types = [ExtraFileType.from_string(string) for string in value]
settings[key] = {
file_type.extension: file_type for file_type in file_types
}
else:
settings[key] = _parse_to_dict(value, name=key)

for key in keys_to_drop:
settings.pop(key)
Expand Down
219 changes: 116 additions & 103 deletions ford/sourceform.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
)
from itertools import chain
from urllib.parse import quote
import sys

import toposort
from pygments import highlight
Expand All @@ -56,6 +57,7 @@
from ford.intrinsics import INTRINSICS
from ford._markdown import MetaMarkdown
from ford.settings import ProjectSettings, EntitySettings
from ford._typing import PathLike

if TYPE_CHECKING:
from ford.fortran_project import Project
Expand Down Expand Up @@ -2648,139 +2650,150 @@ class GenericSource(FortranBase):
not be analyzed, but documentation can be extracted.
"""

def __init__(self, filename, settings: ProjectSettings):
def __init__(self, filename: PathLike, settings: ProjectSettings):
self.obj = "sourcefile"
self.parobj = None
self.parent = None
self.hierarchy = []
self.settings = settings
self.num_lines = 0
extra_filetypes = settings.extra_filetypes[filename.split(".")[-1]]
comchar = extra_filetypes[0]
if len(extra_filetypes) > 1:
self.lexer_str = extra_filetypes[1]
else:
self.lexer_str = None
docmark = settings.docmark
predocmark = settings.predocmark
docmark_alt = settings.docmark_alt
predocmark_alt = settings.predocmark_alt
self.path = filename.strip()
self.name = os.path.basename(self.path)
self.raw_src = pathlib.Path(self.path).read_text(encoding=settings.encoding)
filename = pathlib.Path(filename)
extra_filetypes = settings.extra_filetypes[str(filename.suffix)[1:]]

self.path = filename
self.name = self.path.name
self.raw_src = self.path.read_text(encoding=settings.encoding)
# TODO: Get line numbers to display properly
if self.lexer_str is None:
if extra_filetypes.lexer is None:
lexer = guess_lexer_for_filename(self.name, self.raw_src)
else:
import pygments.lexers

lexer = getattr(pygments.lexers, self.lexer_str)
lexer = getattr(pygments.lexers, extra_filetypes.lexer)
self.src = highlight(
self.raw_src, lexer, HtmlFormatter(lineanchors="ln", cssclass="hl")
)
com_re = re.compile(
r"^((?!{0}|[\"']).|(\'[^']*')|(\"[^\"]*\"))*({0}.*)$".format(
re.escape(comchar)
)

self.comment = extra_filetypes.comment
comchar = re.escape(extra_filetypes.comment)
self.com_re = re.compile(
r"^((?!{0}|[\"']).|(\'[^']*')|(\"[^\"]*\"))*({0}.*)$".format(comchar)
)
if docmark == docmark_alt != "":
raise Exception("Error: docmark and docmark_alt are the same.")
if docmark == predocmark_alt != "":
raise Exception("Error: docmark and predocmark_alt are the same.")
if docmark_alt == predocmark != "":
raise Exception("Error: docmark_alt and predocmark are the same.")
if predocmark == predocmark_alt != "":
raise Exception("Error: predocmark and predocmark_alt are the same.")
if len(predocmark) != 0:
doc_re = re.compile(
r"^((?!{0}|[\"']).|('[^']*')|(\"[^\"]*\"))*({0}(?:{1}|{2}).*)$".format(
re.escape(comchar), re.escape(docmark), re.escape(predocmark)
)
)
else:
doc_re = re.compile(
r"^((?!{0}|[\"']).|('[^']*')|(\"[^\"]*\"))*({0}{1}.*)$".format(
re.escape(comchar), re.escape(docmark)
)
)
if len(docmark_alt) != 0 and len(predocmark_alt) != 0:
doc_alt_re = re.compile(
r"^((?!{0}|[\"']).|('[^']*')|(\"[^\"]*\"))*({0}(?:{1}|{2}).*)$".format(
re.escape(comchar),
re.escape(docmark_alt),
re.escape(predocmark_alt),
)
)
elif len(docmark_alt) != 0:
doc_alt_re = re.compile(
r"^((?!{0}|[\"']).|('[^']*')|(\"[^\"]*\"))*({0}{1}.*)$".format(
re.escape(comchar), re.escape(docmark_alt)
)

docmark = settings.docmark
predocmark = settings.predocmark
docmark_alt = settings.docmark_alt
predocmark_alt = settings.predocmark_alt

docmark_re_bit = (
f"(?:{re.escape(settings.docmark)}|{re.escape(settings.predocmark)})"
if settings.predocmark
else re.escape(settings.docmark)
)

self.doc_re = re.compile(
r"^((?!{0}|[\"']).|('[^']*')|(\"[^\"]*\"))*({0}{1}.*)$".format(
comchar, docmark_re_bit
)
elif len(predocmark_alt) != 0:
doc_alt_re = re.compile(
)

if _docmark_alt := self._docmark_alt(settings):
self.doc_alt_re: Optional[re.Pattern] = re.compile(
r"^((?!{0}|[\"']).|('[^']*')|(\"[^\"]*\"))*({0}{1}.*)$".format(
re.escape(comchar), re.escape(predocmark_alt)
comchar, _docmark_alt
)
)
else:
doc_alt_re = None
self.doc_alt_re = None

self.doc_comment = extra_filetypes.comment + docmark
self.doc_comment_alt = extra_filetypes.comment + docmark_alt
self.predoc_comment = extra_filetypes.comment + predocmark
self.predoc_comment_alt = extra_filetypes.comment + predocmark_alt

self.parse_file(settings.encoding)

self.read_metadata()

@staticmethod
def _docmark_alt(settings: ProjectSettings) -> str:
if settings.docmark_alt and settings.predocmark_alt:
return f"(?:{re.escape(settings.docmark_alt)}|{re.escape(settings.predocmark_alt)})"
elif settings.docmark_alt:
return re.escape(settings.docmark_alt)
elif settings.predocmark_alt:
return re.escape(settings.predocmark_alt)
else:
return ""

def parse_file(self, encoding: str = "utf-8"):
self.doc_list = []
prevdoc = False
docalt = False
for line in open(filename, "r", encoding=settings.encoding):
line = line.strip()
if doc_alt_re:
match = doc_alt_re.match(line)
else:
match = False
if match:
prevdoc = True
docalt = True
doc = match.group(4)
if doc.startswith(comchar + docmark_alt):
doc = doc[len(comchar + docmark_alt) :].strip()
else:
doc = doc[len(comchar + predocmark_alt) :].strip()
self.doc_list.append(doc)
continue
match = doc_re.match(line)
if match:
prevdoc = True
if docalt:
docalt = False
doc = match.group(4)
if doc.startswith(comchar + docmark):
doc = doc[len(comchar + docmark) :].strip()
else:
doc = doc[len(comchar + predocmark) :].strip()
self.doc_list.append(doc)
continue
match = com_re.match(line)
if match:
if docalt:
if match.start(4) == 0:
doc = match.group(4)
doc = doc[len(comchar) :].strip()
self.doc_list.append(doc)
else:
with open(self.path, "r", encoding=encoding) as lines:
for line in lines:
line = line.strip()
if self.doc_alt_re and (match := self.doc_alt_re.match(line)):
prevdoc = True
docalt = True
self.doc_list.append(
remove_prefixes(
match.group(4),
self.doc_comment_alt,
self.predoc_comment_alt,
)
)
continue

if match := self.doc_re.match(line):
prevdoc = True
if docalt:
docalt = False
elif prevdoc:
prevdoc = False
self.doc_list.append("")
continue
# if not including any comment...
if prevdoc:
self.doc_list.append("")
prevdoc = False
docalt = False

self.read_metadata()
self.doc_list.append(
remove_prefixes(
match.group(4), self.doc_comment, self.predoc_comment
)
)
continue

if match := self.com_re.match(line):
if docalt:
if match.start(4) == 0:
self.doc_list.append(
remove_prefixes(match.group(4), self.comment)
)
else:
docalt = False
elif prevdoc:
prevdoc = False
self.doc_list.append("")
continue

# if not including any comment...
if prevdoc:
self.doc_list.append("")
prevdoc = False
docalt = False

def lines_description(self, total, total_all=0):
return ""


def remove_prefixes(string: str, prefix1: str, prefix2: Optional[str] = None) -> str:
if sys.version_info >= (3, 9):
string = string.removeprefix(prefix1)
if prefix2:
string = string.removeprefix(prefix2)
return string.strip()

if string.startswith(prefix1):
string = string[len(prefix1) :]
if prefix2 and string.startswith(prefix2):
string = string[len(prefix2) :]
return string.strip()


_can_have_contains = (
FortranModule,
FortranProgram,
Expand Down
Loading

0 comments on commit d7d5056

Please sign in to comment.