Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH pandoc parser minor improvements and support older pandoc #284

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
else pip install "scikit-learn~=${{ matrix.sklearn_version }}";
fi
if [ ${{ matrix.os }} == "ubuntu-latest" ];
then wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb && sudo dpkg -i pandoc-2.19.2-1-amd64.deb;
then sudo apt install pandoc && pandoc --version;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

woohoo :D

fi
python --version
pip --version
Expand Down
54 changes: 46 additions & 8 deletions skops/card/_markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self):
"RawInline": self._raw_inline,
"RawBlock": self._raw_block,
"SoftBreak": self._soft_break,
"LineBreak": self._line_break,
"Para": self._para,
"Header": self._header,
"Image": self._image,
Expand Down Expand Up @@ -124,6 +125,9 @@ def _soft_break(self, value) -> str:
incr = 0 if not self._indent_trace else self._indent_trace[-1]
return "\n" + self._get_indent(incr=incr)

def _line_break(self, value) -> str:
return "\n"

def _make_content(self, content):
parts = []
for item in content:
Expand Down Expand Up @@ -185,7 +189,14 @@ def _code(item: tuple[Any, str]) -> str:
_, txt = item
return f"`{txt}`"

def _table_cols(self, items) -> list[str]:
def _table_cols_old(self, items) -> list[str]:
columns = []
for (content,) in items:
column = self.__call__(content)
columns.append(column)
return columns

def _table_cols_new(self, items) -> list[str]: # pragma: no cover
columns = []
fn = self.__call__
for item in items:
Expand All @@ -194,7 +205,20 @@ def _table_cols(self, items) -> list[str]:
columns.append(column)
return columns

def _table_body(self, items) -> list[list[str]]:
def _table_body_old(self, items) -> list[list[str]]:
body = []
for row_items in items:
row = []
for col_row_item in row_items:
if not col_row_item:
content = ""
else:
content = col_row_item[0]
row.append(self.__call__(content))
body.append(row)
return body

def _table_body_new(self, items) -> list[list[str]]: # pragma: no cover
body = []
fn = self.__call__
for _, row_items in items:
Expand All @@ -205,20 +229,34 @@ def _table_body(self, items) -> list[list[str]]:
body.append(row)
return body

def _table(self, item) -> str:
def _table_old(self, item) -> tuple[list[str], list[list[str]]]:
# pandoc < 2.10
_, _, _, thead, tbody = item
columns = self._table_cols_old(thead)
body = self._table_body_old(tbody)
return columns, body

def _table_new(self, item) -> tuple[list[str], list[list[str]]]: # pragma: no cover
# pandoc >= 2.10
# attr capt specs thead tbody tfoot
_, _, _, thead, tbody, _ = item

# header
(_, thead_bodies) = thead
(attr, thead_body) = thead_bodies[0] # multiple headers?

columns = self._table_cols(thead_body)

columns = self._table_cols_new(thead_body)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have expected these lines to be covered.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the table change actually happens after 2.9, not 2.5 as I first suspected. Therefore, given that we now install pandoc via apt on CI, which uses 2.9, the old table implementation is covered, not the new one. I thus moved around the pragmas to the new implementation.

# rows
# attr rhc hd bd
_, _, _, trows = tbody[0] # multiple groups of rows?
body = self._table_body(trows)
body = self._table_body_new(trows)
return columns, body

def _table(self, item) -> str:
if len(item) == 6: # pragma: no cover
# pandoc >= 2.5
columns, body = self._table_new(item)
else:
# pandoc < 2.5
columns, body = self._table_old(item)

table: Mapping[str, Sequence[Any]]
if not body:
Expand Down
2 changes: 1 addition & 1 deletion skops/card/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from ._markup import Markdown, PandocItem

PANDOC_MIN_VERSION = "2.9.0"
PANDOC_MIN_VERSION = "2.0"


class PandocParser:
Expand Down
5 changes: 5 additions & 0 deletions skops/card/tests/examples/toy-example.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,8 @@ The "id" tag may change in order
<div class="warning" somekey key="with value" id="123">
<p>Divs are possible</p>
</div>

## Line breaks

A text with
a LineBreak item.
3 changes: 3 additions & 0 deletions skops/card/tests/examples/toy-example.md.diff
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,6 @@
-</div>
+
+<div id="123" class="warning" somekey key="with value"><p>Divs are possible</p></div>
@@ -186 +218 @@
-A text with
+A text with