Skip to content

Commit

Permalink
Merge pull request #15 from FontysVenlo/improve-comments
Browse files Browse the repository at this point in the history
Improve comments
  • Loading branch information
Bonajo authored Sep 24, 2024
2 parents fd2a915 + 80b2cd7 commit e4cfb63
Show file tree
Hide file tree
Showing 23 changed files with 172 additions and 62 deletions.
8 changes: 5 additions & 3 deletions codestripper/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ def add_commandline_arguments(parser: argparse.ArgumentParser) -> None:
# Add optional arguments
parser.add_argument("-x", "--exclude", action="append",
help="files to include for code stripping (glob)", default=[])
parser.add_argument("-c", "--comment", action="store",
help="comment symbol(s) for the given language", default="//")
parser.add_argument("-c", "--comment", action="append",
help="comment symbol(s) for the given language, usage: <extension>:<comment> (e.g. .java://")
parser.add_argument("-v", "--verbosity", action="count", help="increase output verbosity", default=0)
parser.add_argument("-o", "--output", action="store",
help="output directory to store the stripped files", default="out")
Expand Down Expand Up @@ -44,4 +44,6 @@ def main() -> None:
cwd = get_working_directory(args.working_directory)
files = FileUtils(args.include, args.exclude, cwd, args.recursive, logger_name).get_matching_files()
# Strip all the files
strip_files(files, cwd, args.comment, args.output, args.dry_run, args.fail_on_error)

strip_files(files, cwd, comments=args.comment, output=args.output, dry_run=args.dry_run,
fail_on_error=args.fail_on_error)
25 changes: 21 additions & 4 deletions codestripper/code_stripper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,29 @@
import os.path
import shutil
from pathlib import Path
from typing import Union, Iterable, List
from typing import Union, Iterable, List, Optional

from codestripper.errors import InvalidTagError, TokenizerError
from codestripper.tags import IgnoreFileError
from codestripper.tags.tag import Tag, RangeTag
from codestripper.tokenizer import Tokenizer
from codestripper.utils import get_working_directory
from codestripper.utils.comments import comments_mapping, Comment

logger = logging.getLogger("codestripper")


def strip_files(files: Iterable[str], working_directory: Union[str, None] = None, comment: str = "//",
def strip_files(files: Iterable[str], working_directory: Union[str, None] = None, * ,comments: Optional[List[str]] = None,
output: Union[Path, str] = "out", dry_run: bool = False, fail_on_error: bool = False) -> List[str]:

if comments is not None:
for comment in comments:
parts = comment.split(":")
if len(parts) == 2:
comments_mapping[parts[0]] = Comment(parts[1])
else:
comments_mapping[parts[0]] = Comment(parts[1], parts[2])

cwd = get_working_directory(working_directory)
out = os.path.join(os.getcwd(), output)
if os.path.isdir(out):
Expand All @@ -26,7 +36,14 @@ def strip_files(files: Iterable[str], working_directory: Union[str, None] = None
content = handle.read()
if content is not None:
try:
stripped = CodeStripper(content, comment).strip()
_, file_extension = os.path.splitext(file)
file_extension = file_extension.lower()
if not file_extension in comments_mapping:
logger.error(f"Unknown extension: '{file_extension}', "
f"please specify which comment to use for this file extension.")
continue
com = comments_mapping[file_extension]
stripped = CodeStripper(content, com).strip()
except IgnoreFileError:
logger.info(f"File '{file}' is ignored, because of ignore tag")
continue
Expand All @@ -50,7 +67,7 @@ def strip_files(files: Iterable[str], working_directory: Union[str, None] = None

class CodeStripper:

def __init__(self, content: str, comment: str) -> None:
def __init__(self, content: str, comment: Comment) -> None:
self.content = content
self.comment = comment

Expand Down
2 changes: 1 addition & 1 deletion codestripper/tags/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class AddTag(SingleTag):
regex = r'cs:add:(.*?)$'
regex = r'cs:add:(.*)?'

def __init__(self, data: TagData) -> None:
super().__init__(data)
Expand Down
4 changes: 2 additions & 2 deletions codestripper/tags/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class LegacyOpenTag(RangeOpenTag):
regex = r'Start Solution::replacewith::(.*?)$'
regex = r'Start Solution::replacewith::(.*)'

def __init__(self, data: TagData) -> None:
super().__init__(LegacyRangeTag, data)
Expand All @@ -20,7 +20,7 @@ def execute(self, content: str) -> Union[str, None]:


class LegacyCloseTag(RangeCloseTag):
regex = r'End Solution::replacewith::(.*?)$'
regex = r'End Solution::replacewith::(.*)'

def __init__(self, data: TagData) -> None:
super().__init__(LegacyRangeTag, data)
Expand Down
2 changes: 1 addition & 1 deletion codestripper/tags/remove.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

class RemoveTag(SingleTag):

regex = r'cs:remove\s*?$'
regex = r'cs:remove(?!:)(.*)?'

def __init__(self, data: TagData):
super().__init__(data)
Expand Down
2 changes: 1 addition & 1 deletion codestripper/tags/replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


class ReplaceTag(SingleTag):
regex = r'cs:replace:(.*?)$'
regex = r'cs:replace:(.*?)'

def __init__(self, data: TagData) -> None:
super().__init__(data)
Expand Down
4 changes: 3 additions & 1 deletion codestripper/tags/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from dataclasses import dataclass
from typing import Type, Union, List, Pattern, Iterable, Optional

from codestripper.utils.comments import Comment


@dataclass
class TagData:
Expand All @@ -13,7 +15,7 @@ class TagData:
regex_end: int
parameter_start: int
parameter_end: int
comment: str
comment: Comment

def __repr__(self) -> str:
return (f"{self.line}, line ({self.line_number}): {self.line_start}:{self.line_end},"
Expand Down
6 changes: 3 additions & 3 deletions codestripper/tags/uncomment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@


class UncommentOpenTag(RangeOpenTag):
regex = r'cs:uncomment:start.*?$'
regex = r'cs:uncomment:start(.*)?'

def __init__(self, data: TagData) -> None:
super().__init__(UncommentRangeTag, data)


class UncommentCloseTag(RangeCloseTag):
regex = 'cs:uncomment:end.*?$'
regex = 'cs:uncomment:end(.*)?'

def __init__(self, data: TagData) -> None:
super().__init__(UncommentRangeTag, data)
Expand All @@ -27,7 +27,7 @@ def __init__(self, open_tag: RangeOpenTag, close_tag: RangeCloseTag):
def execute(self, content: str) -> Union[str, None]:
if UncommentRangeTag.regex is None:
whitespace = r"(?P<whitespace>\s*)"
UncommentRangeTag.regex = re.compile(f"{whitespace}{self.open_tag.data.comment}")
UncommentRangeTag.regex = re.compile(f"{whitespace}{self.open_tag.data.comment.open}")
range = content[self.start:self.end]
replacement = UncommentRangeTag.regex.sub(r"\g<whitespace>", range)
return replacement
21 changes: 14 additions & 7 deletions codestripper/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from codestripper.tags import ReplaceTag, UncommentCloseTag, IgnoreFileTag, RemoveOpenTag, RemoveCloseTag, \
UncommentOpenTag, LegacyOpenTag, LegacyCloseTag, RemoveTag, AddTag
from codestripper.tags.tag import SingleTag, Tag, RangeOpenTag, RangeCloseTag, RangeTag, TagData
from codestripper.utils.comments import Comment

default_tags: Set[Type[SingleTag]] = {
IgnoreFileTag,
Expand All @@ -25,30 +26,36 @@
CreateTagMapping = Dict[str, CreateTagLambda]


def calculate_mappings(tags: Set[Type[SingleTag]], comment: str) -> Tuple[CreateTagMapping, Pattern]:
def calculate_mappings(tags: Set[Type[SingleTag]], comment: Comment) -> Tuple[CreateTagMapping, Pattern]:
strings = [r"(?P<newline>\n)"]
mappings = {}
for tag in tags:
name = f"{tag.__name__}"
mappings[name] = lambda data, constructor=tag: constructor(data)
strings.append(f"(?P<{name}>{comment}{tag.regex})")
reg = f"(?P<{name}>{re.escape(comment.open)}{tag.regex})"
if comment.close is not None:
reg += re.escape(comment.close)
strings.append(reg)
regex = re.compile("|".join(strings), flags=re.MULTILINE)
return mappings, regex # type: ignore


class Tokenizer:
mapping_cache: Dict[str, Tuple[CreateTagMapping, Pattern]] = {}
mappings: CreateTagMapping = {}
regex: Pattern = re.compile("")
comment: str = ""
comment: Comment

def __init__(self, content: str, comment: str) -> None:
def __init__(self, content: str, comment: Comment) -> None:
self.content = content
self.ordered_tags: List[Tag] = []
self.open_stack: List[RangeOpenTag] = []
self.range_stack: Dict[int, Optional[List[Tag]]] = {}
if len(Tokenizer.mappings) == 0 or Tokenizer.comment != comment:
Tokenizer.mappings, Tokenizer.regex = calculate_mappings(default_tags, comment)
Tokenizer.comment = comment
Tokenizer.comment = comment
if not str(comment) in Tokenizer.mapping_cache:
Tokenizer.mapping_cache[str(comment)] = calculate_mappings(default_tags, comment)
Tokenizer.mappings = Tokenizer.mapping_cache[str(comment)][0]
Tokenizer.regex = Tokenizer.mapping_cache[str(comment)][1]
self.group_count = self.regex.groups

def tokenize(self) -> List[Tag]:
Expand Down
26 changes: 26 additions & 0 deletions codestripper/utils/comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from dataclasses import dataclass
from typing import Optional, Dict


@dataclass(frozen=True)
class Comment:
open: str
close: Optional[str] = None

comments_mapping: Dict[str, Comment] = {
".java": Comment("//"),
".cs": Comment("//"),
".js": Comment("//"),
".php": Comment("//"),
".swift": Comment("//"),
".xml": Comment("<!--", "-->"),
".tex": Comment("%"),
".m": Comment("%"),
".sql": Comment("--"),
".lua": Comment("--"),
".ml": Comment("(*", "*)"),
".r": Comment("#"),
".py": Comment("#"),
".ps1": Comment("#"),
".rb": Comment("#")
}
2 changes: 1 addition & 1 deletion codestripper/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __convert_to_paths_set(self, file_names: Iterable[str], recursive=True) -> S
return files

def get_matching_files(self) -> Iterable[str]:
"""Get files that fullfill requirements, match included and do not match excluded"""
"""Get files that fulfill requirements, match included and do not match excluded"""
os.chdir(self.cwd)
included_files = self.__convert_to_paths_set(self.included, self.recursive)
self.logger.debug(f"Included files are: {included_files}")
Expand Down
Empty file added py.typed
Empty file.
23 changes: 19 additions & 4 deletions tests/tags/test_add.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from codestripper.code_stripper import CodeStripper
from codestripper.utils.comments import Comment


def test_add_should_add():
Expand All @@ -12,26 +13,40 @@ def test_add_should_add():
//TODO
"""
output = CodeStripper(case, "//").strip()
output = CodeStripper(case, Comment("//")).strip()
assert output == expected, "Add should add the replacement"


def test_add_should_add_closing():
case = """
public class AssessmentResult {
<!--cs:add://TODO-->
"""
expected = """
public class AssessmentResult {
//TODO
"""
output = CodeStripper(case, Comment("<!--", "-->")).strip()
assert output == expected, "Add should add the replacement"

def test_add_valid():
case = "//cs:add"
expected = "//cs:add"
output = CodeStripper(case, "//").strip()
output = CodeStripper(case, Comment("//")).strip()
assert output == expected, "Add should only trigger with valid tag"


def test_add_without_replacement():
case = " //cs:add:"
expected = " "
output = CodeStripper(case, "//").strip()
output = CodeStripper(case, Comment("//")).strip()
assert output == expected, "Add without replacement keeps whitelines"


def test_add_with_content_before():
case = """asd//cs:add:test"""
expected = """asdtest"""
output = CodeStripper(case, "//").strip()
output = CodeStripper(case, Comment("//")).strip()
assert output == expected, "Replace should replace with empty string keeping whitespace"
10 changes: 8 additions & 2 deletions tests/tags/test_ignore_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from codestripper.code_stripper import CodeStripper
from codestripper.errors import InvalidTagError
from codestripper.tags import IgnoreFileError
from codestripper.utils.comments import Comment


def test_invalid_tag():
Expand All @@ -11,12 +12,17 @@ def test_invalid_tag():
//cs:ignore
"""
with pytest.raises(InvalidTagError) as ex:
CodeStripper(case, "//").strip()
CodeStripper(case, Comment("//")).strip()
message = str(ex)
assert message.__contains__("IgnoreFileTag")


def test_ignored_file():
case = "//cs:ignore"
with pytest.raises(IgnoreFileError):
CodeStripper(case, "//").strip()
CodeStripper(case, Comment("//")).strip()

def test_ignored_file_closing():
case = "<!--cs:ignore-->"
with pytest.raises(IgnoreFileError):
CodeStripper(case, Comment("<!--", "-->")).strip()
5 changes: 3 additions & 2 deletions tests/tags/test_legacy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from codestripper.code_stripper import CodeStripper
from codestripper.utils.comments import Comment


def test_legacy_should_remove():
Expand All @@ -9,7 +10,7 @@ def test_legacy_should_remove():
"""
expected = """
"""
output = CodeStripper(case, "//").strip()
output = CodeStripper(case, Comment("//")).strip()
assert output == expected, "Legacy should remove contents inbetween tags"


Expand All @@ -23,5 +24,5 @@ def test_legacy_should_replace():
start
end
"""
output = CodeStripper(case, "//").strip()
output = CodeStripper(case, Comment("//")).strip()
assert output == expected, "Legacy should replace on start and end"
5 changes: 3 additions & 2 deletions tests/tags/test_remove.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from codestripper.code_stripper import CodeStripper
from codestripper.utils.comments import Comment


def test_remove_range():
Expand All @@ -9,7 +10,7 @@ def test_remove_range():
"""
expected = """
"""
output = CodeStripper(case, "//").strip()
output = CodeStripper(case, Comment("//")).strip()
assert output == expected, "Remove should remove all"


Expand All @@ -19,5 +20,5 @@ def test_remove_single():
"""
expected = """
"""
output = CodeStripper(case, "//").strip()
output = CodeStripper(case, Comment("//")).strip()
assert output == expected, "Remove should remove single line"
Loading

0 comments on commit e4cfb63

Please sign in to comment.