Skip to content

Commit bbb541f

Browse files
KateVishnyaЕкатерина Московская
andauthored
Updating processing the url and repo_url (#22)
* update (includes.py): path processing * upgate (includes.py): update processing repo_url * update: add test * update (readme.md): add note for repo_url * update readme and test * resolve conflict * update: deleted print * fix: added error handling * fix: full links Co-authored-by: Екатерина Московская <ekaterina.markova@rt.ru>
1 parent 333b08e commit bbb541f

File tree

3 files changed

+127
-16
lines changed

3 files changed

+127
-16
lines changed

README.md

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,28 @@ Text below is taken from a remote repository on branch develop.
110110
<include repo_url="https://github.com/foo/bar.git" revision="develop" path="path/to/doc.md"></include>
111111
```
112112

113+
To include a text from HTTP(S) URL use the `url` attributes:
114+
115+
```markdown
116+
Text below is taken from a remote repository on branch develop.
117+
118+
<include url="https://github.com/foo/bar/path/to/doc.md"></include>
119+
```
120+
121+
> **Note**
122+
>
123+
> For projects in GitHub, you must specify the full path to the raw file, while not specifying the file extension `.md`.
124+
125+
``` markdown
126+
<include url="https://github.com/path/to/doc/raw/master/doc" nohead="true"></include>
127+
```
128+
129+
> For projects in GitLab, you must specify the full path to the raw file, and you must specify the file extension `.md`.
130+
131+
```markdown
132+
<include url="https://gitlub.com/path/to/doc/raw/master/doc.md" nohead="true"></include>
133+
```
134+
113135
To include a code snippet, use `wrap_code` and `code_language` attributes:
114136

115137
```markdown
@@ -118,8 +140,6 @@ wrap_code="triple_backticks" code_language="yaml">
118140
</include>
119141
```
120142

121-
122-
123143
#### Attributes
124144

125145
`src`
@@ -134,13 +154,18 @@ wrap_code="triple_backticks" code_language="yaml">
134154
`path`
135155
: Path to the file inside the remote Git repository.
136156

137-
> **Note**
138-
>
139-
> If you are using the new syntax, the `src` attribute is required to include a local file, `url` is required to include a remote file, and the `repo_url` and `path` attributes are required to include a file from a remote Git repository. All other attributes are optional.
157+
> **Note**
158+
>
159+
> This parameter is required!
160+
Its absence will lead to incorrect operation Foliant.
140161

141-
> **Note**
142-
>
143-
> Foliant 1.0.9 supports the processing of attribute values as YAML. You can precede the values of attributes by the `!path`, `!project_path`, and `!rel_path` modifiers (i.e. YAML tags). These modifiers can be useful in the `src`, `path`, and `project_root` attributes.
162+
> **Note**
163+
>
164+
> If you are using the new syntax, the `src` attribute is required to include a local file, `url` is required to include a remote file, and the `repo_url` and `path` attributes are required to include a file from a remote Git repository. All other attributes are optional.
165+
166+
> **Note**
167+
>
168+
> Foliant 1.0.9 supports the processing of attribute values as YAML. You can precede the values of attributes by the `!path`, `!project_path`, and `!rel_path` modifiers (i.e. YAML tags). These modifiers can be useful in the `src`, `path`, and `project_root` attributes.
144169

145170
`revision`
146171
: Revision of the Git repository.

foliant/preprocessors/includes.py

Lines changed: 69 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,20 @@ def _find_file(
7474

7575
return result
7676

77+
78+
def create_full_link(self, repo_url: str, revision: str, path: str):
79+
80+
if repo_url.endswith('.git'):
81+
repo_url = repo_url[:-4]
82+
83+
if revision:
84+
full_repo_url=repo_url + '/tree/' + revision + '/' + path.rpartition('/')[0]
85+
86+
else:
87+
full_repo_url=repo_url + '/-/blob/master/' + path.rpartition('/')[0]
88+
89+
return full_repo_url
90+
7791
def _download_file_from_url(self, url: str) -> Path:
7892
'''Download file as the content of resource located at specified URL.
7993
Place downloaded file into the cache directory with a unique name.
@@ -107,7 +121,7 @@ def _download_file_from_url(self, url: str) -> Path:
107121

108122
response = urllib.request.urlopen(url)
109123
charset = 'utf-8'
110-
124+
111125
if response.headers['Content-Type']:
112126
charset_match = re.search(r'(^|[\s;])charset=(?P<charset>[^\s;]+)', response.headers['Content-Type'])
113127

@@ -117,10 +131,31 @@ def _download_file_from_url(self, url: str) -> Path:
117131
self.logger.debug(f'Detected source charset: {charset}')
118132

119133
downloaded_content = response.read().decode(charset)
120-
134+
121135
self._downloaded_dir_path.mkdir(parents=True, exist_ok=True)
122136

137+
# The beginning of the block codes for converting relative paths to links
138+
dict_new_link = {}
139+
regexp_find_link = re.compile('\[.+?\]\(.+?\)')
140+
regexp_find_path = re.compile('\(.+?\)')
141+
142+
old_found_link = regexp_find_link.findall(downloaded_content)
143+
144+
for line in old_found_link:
145+
exceptions_simbols = re.findall(r'http|@|:',line)
146+
if exceptions_simbols:
147+
continue
148+
else:
149+
relative_path = regexp_find_path.findall(line)
150+
sub_relative_path = re.findall(r'\[.+?\]', line)
151+
dict_new_link[line] = sub_relative_path[0] + '(' + url.rpartition('/')[0].replace('raw', 'blob')+'/'+ relative_path[0].partition('(')[2]
152+
153+
for line in dict_new_link:
154+
downloaded_content = downloaded_content.replace(line, dict_new_link[line])
155+
# End of the conversion code block
156+
123157
with open(downloaded_file_path, 'w', encoding='utf8') as downloaded_file:
158+
124159
downloaded_file.write(downloaded_content)
125160

126161
else:
@@ -652,7 +687,8 @@ def _process_include(
652687
to_id: str or None = None,
653688
to_end: bool = False,
654689
sethead: int or None = None,
655-
nohead: bool = False
690+
nohead: bool = False,
691+
include_link: str or None = None
656692
) -> str:
657693
'''Replace a local include statement with the file content. Necessary
658694
adjustments are applied to the content: cut between certain headings,
@@ -677,10 +713,32 @@ def _process_include(
677713
f'Included file path: {included_file_path}, from heading: {from_heading}, ' +
678714
f'to heading: {to_heading}, sethead: {sethead}, nohead: {nohead}'
679715
)
680-
716+
681717
with open(included_file_path, encoding='utf8') as included_file:
682718
included_content = included_file.read()
683719

720+
# The beginning of the block codes for converting relative paths to links
721+
if include_link:
722+
dict_new_link = {}
723+
regexp_find_link = re.compile('\[.+?\]\(.+?\)')
724+
regexp_find_path = re.compile('\(.+?\)')
725+
726+
old_found_link = regexp_find_link.findall(included_content)
727+
728+
for line in old_found_link:
729+
exceptions_simbols = re.findall(r'http|@|:',line)
730+
if exceptions_simbols:
731+
continue
732+
else:
733+
relative_path = regexp_find_path.findall(line)
734+
sub_relative_path = re.findall(r'\[.+?\]', line)
735+
dict_new_link[line] = sub_relative_path[0] + '(' + include_link.rpartition('/')[0].replace('raw', 'blob')+'/'+ relative_path[0].partition('(')[2]
736+
737+
for line in dict_new_link:
738+
included_content = included_content.replace(line, dict_new_link[line])
739+
# End of the conversion code block
740+
741+
684742
if self.config.get('escape_code', False):
685743
if isinstance(self.config['escape_code'], dict):
686744
escapecode_options = self.config['escape_code'].get('options', {})
@@ -736,7 +794,7 @@ def _process_include(
736794
'!rel_path',
737795
included_file_path.parent
738796
)
739-
797+
740798
return included_content
741799

742800
def process_includes(
@@ -926,8 +984,10 @@ def process_includes(
926984
repo_path / options.get('project_root', '')
927985
).resolve()
928986

929-
self.logger.debug(f'Set new current project root path: {current_project_root_path}')
987+
include_link = self.create_full_link(options.get('repo_url'), options.get('revision'), options.get('path'))
930988

989+
self.logger.debug(f'Set new current project root path: {current_project_root_path}')
990+
931991
processed_content_part = self._process_include(
932992
included_file_path=included_file_path,
933993
project_root_path=current_project_root_path,
@@ -937,7 +997,8 @@ def process_includes(
937997
to_id=options.get('to_id'),
938998
to_end=options.get('to_end'),
939999
sethead=current_sethead,
940-
nohead=options.get('nohead')
1000+
nohead=options.get('nohead'),
1001+
include_link=include_link
9411002
)
9421003

9431004
elif options.get('url'):
@@ -1094,7 +1155,7 @@ def apply(self):
10941155
for source_file_path in self.working_dir.rglob(source_files_extension):
10951156
with open(source_file_path, encoding='utf8') as source_file:
10961157
source_content = source_file.read()
1097-
1158+
10981159
processed_content = self.process_includes(
10991160
source_file_path,
11001161
source_content,

test/test_includes.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from unittest import TestCase
66
from foliant_test.preprocessor import PreprocessorTestFramework
77
from .utils import data_file_content
8+
import urllib.request
89

910

1011
logging.disable(logging.CRITICAL)
@@ -28,6 +29,18 @@ def test_src(self):
2829
input_mapping=input_map,
2930
expected_mapping=expected_map,
3031
)
32+
33+
def test_url(self):
34+
input_map = {
35+
'index.md': '# My title\n\n<include url="https://github.com/foliant-docs/foliantcontrib.includes/raw/master/LICENSE" nohead="true"></include>',
36+
}
37+
expected_map = {
38+
'index.md': f'# My title\n\n{data_file_content("../LICENSE")}',
39+
}
40+
self.ptf.test_preprocessor(
41+
input_mapping=input_map,
42+
expected_mapping=expected_map,
43+
)
3144

3245
def test_repo_path(self):
3346
input_map = {
@@ -40,6 +53,18 @@ def test_repo_path(self):
4053
input_mapping=input_map,
4154
expected_mapping=expected_map,
4255
)
56+
57+
def test_include_link(self):
58+
input_map = {
59+
'index.md': '# My title\n\n<include repo_url="https://github.com/foliant-docs/foliantcontrib.includes" revision="master" path="LICENSE"></include>',
60+
}
61+
expected_map = {
62+
'index.md': f'# My title\n\n{data_file_content("../LICENSE")}',
63+
}
64+
self.ptf.test_preprocessor(
65+
input_mapping=input_map,
66+
expected_mapping=expected_map,
67+
)
4368

4469
def test_nohead(self):
4570
input_map = {

0 commit comments

Comments
 (0)