Skip to content

Commit

Permalink
[TVMScript] Text underlining in DocPrinter based on Doc's source_paths (
Browse files Browse the repository at this point in the history
apache#12344)

This adds an ability to print a "diagnostic marker" based on a given ObjectPath. For example, say we are printing a fragment of TIR like
```
for i in T.serial(10):
    a[i] = 5
```
and we would like bring the user's attention to the bound of the loop:
```
for i in T.serial(10):
                  ^^
    a[i] = 5
```
In this case we would give the doc printer an object path that represents this loop bound, i.e. something like `path_to_underline=ObjectPath.root().attr("extent")`

Tracking issue: apache#11912
  • Loading branch information
gbonik authored and Mikael Sevenier committed Aug 12, 2022
1 parent 12fbe20 commit 32b34cc
Show file tree
Hide file tree
Showing 6 changed files with 718 additions and 17 deletions.
11 changes: 8 additions & 3 deletions include/tvm/script/printer/doc_printer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,15 @@ namespace printer {
* This function unpacks the DocPrinterOptions into function arguments
* to be FFI friendly.
*
* \param doc the doc to be converted
* \param indent_spaces the number of spaces used for indention
* \param doc Doc to be converted
* \param indent_spaces Number of spaces used for indentation
* \param print_line_numbers Whether to print line numbers
* \param num_context_lines Number of context lines to print around the underlined text
* \param path_to_underline Object path to be underlined
*/
String DocToPythonScript(Doc doc, int indent_spaces = 4);
String DocToPythonScript(Doc doc, int indent_spaces = 4, bool print_line_numbers = false,
int num_context_lines = -1,
Optional<ObjectPath> path_to_underline = NullOpt);

} // namespace printer
} // namespace script
Expand Down
22 changes: 20 additions & 2 deletions python/tvm/script/printer/doc_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,19 @@
# under the License.
"""Functions to print doc into text format"""

from typing import Optional
from tvm.runtime.object_path import ObjectPath
from . import _ffi_api
from .doc import Doc


def to_python_script(doc: Doc, indent_spaces: int = 4) -> str:
def to_python_script(
doc: Doc,
indent_spaces: int = 4,
print_line_numbers: bool = False,
num_context_lines: Optional[int] = None,
path_to_underline: Optional[ObjectPath] = None,
) -> str:
"""Convert Doc into Python script.
Parameters
Expand All @@ -29,10 +37,20 @@ def to_python_script(doc: Doc, indent_spaces: int = 4) -> str:
The doc to convert into Python script
indent_spaces : int
The number of indent spaces to use in the output
print_line_numbers: bool
Whether to print line numbers
num_context_lines : Optional[int]
Number of context lines to print around the underlined text
path_to_underline : Optional[ObjectPath]
Object path to be underlined
Returns
-------
script : str
The text representation of Doc in Python syntax
"""
return _ffi_api.DocToPythonScript(doc, indent_spaces) # type: ignore # pylint: disable=no-member
if num_context_lines is None:
num_context_lines = -1
return _ffi_api.DocToPythonScript( # type: ignore
doc, indent_spaces, print_line_numbers, num_context_lines, path_to_underline
)
261 changes: 258 additions & 3 deletions src/script/printer/base_doc_printer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,256 @@ namespace tvm {
namespace script {
namespace printer {

DocPrinter::DocPrinter(int indent_spaces) : indent_spaces_(indent_spaces) {}
namespace {

void DocPrinter::Append(const Doc& doc) { PrintDoc(doc); }
void SortAndMergeSpans(std::vector<ByteSpan>* spans) {
if (spans->empty()) {
return;
}
std::sort(spans->begin(), spans->end());
auto last = spans->begin();
for (auto cur = spans->begin() + 1; cur != spans->end(); ++cur) {
if (cur->first > last->second) {
*++last = *cur;
} else if (cur->second > last->second) {
last->second = cur->second;
}
}
spans->erase(++last, spans->end());
}

size_t GetTextWidth(const std::string& text, const ByteSpan& span) {
// FIXME: this only works for ASCII characters.
// To do this "correctly", we need to parse UTF-8 into codepoints
// and call wcwidth() or equivalent for every codepoint.
size_t ret = 0;
for (size_t i = span.first; i != span.second; ++i) {
if (isprint(text[i])) {
ret += 1;
}
}
return ret;
}

size_t MoveBack(size_t pos, size_t distance) { return distance > pos ? 0 : pos - distance; }

size_t MoveForward(size_t pos, size_t distance, size_t max) {
return distance > max - pos ? max : pos + distance;
}

size_t GetLineIndex(size_t byte_pos, const std::vector<size_t>& line_starts) {
auto it = std::upper_bound(line_starts.begin(), line_starts.end(), byte_pos);
return (it - line_starts.begin()) - 1;
}

using UnderlineIter = typename std::vector<ByteSpan>::const_iterator;

ByteSpan PopNextUnderline(UnderlineIter* next_underline, UnderlineIter end_underline) {
if (*next_underline == end_underline) {
return {std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max()};
} else {
return *(*next_underline)++;
}
}

void PrintChunk(const std::pair<size_t, size_t>& lines_range,
const std::pair<UnderlineIter, UnderlineIter>& underlines, const std::string& text,
const std::vector<size_t>& line_starts, const DocPrinterOptions& options,
size_t line_number_width, std::string* out) {
UnderlineIter next_underline = underlines.first;
ByteSpan current_underline = PopNextUnderline(&next_underline, underlines.second);

for (size_t line_idx = lines_range.first; line_idx < lines_range.second; ++line_idx) {
if (options.print_line_numbers) {
std::string line_num_str = std::to_string(line_idx + 1);
line_num_str.push_back(' ');
for (size_t i = line_num_str.size(); i < line_number_width; ++i) {
out->push_back(' ');
}
*out += line_num_str;
}

size_t line_start = line_starts.at(line_idx);
size_t line_end =
line_idx + 1 == line_starts.size() ? text.size() : line_starts.at(line_idx + 1);
out->append(text.begin() + line_start, text.begin() + line_end);

bool printed_underline = false;
size_t line_pos = line_start;
bool printed_extra_caret = 0;
while (current_underline.first < line_end) {
if (!printed_underline) {
*out += std::string(line_number_width, ' ');
printed_underline = true;
}

size_t underline_end_for_line = std::min(line_end, current_underline.second);
size_t num_spaces = GetTextWidth(text, {line_pos, current_underline.first});
if (num_spaces > 0 && printed_extra_caret) {
num_spaces -= 1;
printed_extra_caret = false;
}
*out += std::string(num_spaces, ' ');

size_t num_carets = GetTextWidth(text, {current_underline.first, underline_end_for_line});
if (num_carets == 0 && !printed_extra_caret) {
// Special case: when underlineing an empty or unprintable string, make sure to print
// at least one caret still.
num_carets = 1;
printed_extra_caret = true;
} else if (num_carets > 0 && printed_extra_caret) {
num_carets -= 1;
printed_extra_caret = false;
}
*out += std::string(num_carets, '^');

line_pos = current_underline.first = underline_end_for_line;
if (current_underline.first == current_underline.second) {
current_underline = PopNextUnderline(&next_underline, underlines.second);
}
}

if (printed_underline) {
out->push_back('\n');
}
}
}

void PrintCut(size_t num_lines_skipped, std::string* out) {
if (num_lines_skipped != 0) {
std::ostringstream s;
s << "(... " << num_lines_skipped << " lines skipped ...)\n";
*out += s.str();
}
}

std::pair<size_t, size_t> GetLinesForUnderline(const ByteSpan& underline,
const std::vector<size_t>& line_starts,
size_t num_lines, const DocPrinterOptions& options) {
size_t first_line_of_underline = GetLineIndex(underline.first, line_starts);
size_t first_line_of_chunk = MoveBack(first_line_of_underline, options.num_context_lines);
size_t end_line_of_underline = GetLineIndex(underline.second - 1, line_starts) + 1;
size_t end_line_of_chunk =
MoveForward(end_line_of_underline, options.num_context_lines, num_lines);

return {first_line_of_chunk, end_line_of_chunk};
}

// If there is only one line between the chunks, it is better to print it as is,
// rather than something like "(... 1 line skipped ...)".
constexpr const size_t kMinLinesToCutOut = 2;

bool TryMergeChunks(std::pair<size_t, size_t>* cur_chunk,
const std::pair<size_t, size_t>& new_chunk) {
if (new_chunk.first < cur_chunk->second + kMinLinesToCutOut) {
cur_chunk->second = new_chunk.second;
return true;
} else {
return false;
}
}

size_t GetNumLines(const std::string& text, const std::vector<size_t>& line_starts) {
if (line_starts.back() == text.size()) {
// Final empty line doesn't count as a line
return line_starts.size() - 1;
} else {
return line_starts.size();
}
}

size_t GetLineNumberWidth(size_t num_lines, const DocPrinterOptions& options) {
if (options.print_line_numbers) {
return std::to_string(num_lines).size() + 1;
} else {
return 0;
}
}

std::string DecorateText(const std::string& text, const std::vector<size_t>& line_starts,
const DocPrinterOptions& options,
const std::vector<ByteSpan>& underlines) {
size_t num_lines = GetNumLines(text, line_starts);
size_t line_number_width = GetLineNumberWidth(num_lines, options);

std::string ret;
if (underlines.empty()) {
PrintChunk({0, num_lines}, {underlines.begin(), underlines.begin()}, text, line_starts, options,
line_number_width, &ret);
return ret;
}

size_t last_end_line = 0;
std::pair<size_t, size_t> cur_chunk =
GetLinesForUnderline(underlines[0], line_starts, num_lines, options);
if (cur_chunk.first < kMinLinesToCutOut) {
cur_chunk.first = 0;
}

auto first_underline_in_cur_chunk = underlines.begin();
for (auto underline_it = underlines.begin() + 1; underline_it != underlines.end();
++underline_it) {
std::pair<size_t, size_t> new_chunk =
GetLinesForUnderline(*underline_it, line_starts, num_lines, options);

if (!TryMergeChunks(&cur_chunk, new_chunk)) {
PrintCut(cur_chunk.first - last_end_line, &ret);
PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underline_it}, text, line_starts,
options, line_number_width, &ret);
last_end_line = cur_chunk.second;
cur_chunk = new_chunk;
first_underline_in_cur_chunk = underline_it;
}
}

PrintCut(cur_chunk.first - last_end_line, &ret);
if (num_lines - cur_chunk.second < kMinLinesToCutOut) {
cur_chunk.second = num_lines;
}
PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underlines.end()}, text, line_starts,
options, line_number_width, &ret);
PrintCut(num_lines - cur_chunk.second, &ret);
return ret;
}

} // anonymous namespace

DocPrinter::DocPrinter(const DocPrinterOptions& options) : options_(options) {
line_starts_.push_back(0);
}

void DocPrinter::Append(const Doc& doc) { Append(doc, NullOpt); }

void DocPrinter::Append(const Doc& doc, Optional<ObjectPath> path_to_underline) {
path_to_underline_ = path_to_underline;
current_max_path_length_ = 0;
current_underline_candidates_.clear();
PrintDoc(doc);

underlines_.insert(underlines_.end(), current_underline_candidates_.begin(),
current_underline_candidates_.end());
}

String DocPrinter::GetString() const {
std::string text = output_.str();

// Remove any trailing indentation
while (!text.empty() && text.back() == ' ') {
text.pop_back();
}

if (!text.empty() && text.back() != '\n') {
text.push_back('\n');
}
return text;

std::vector<ByteSpan> underlines = underlines_;
SortAndMergeSpans(&underlines);
return DecorateText(text, line_starts_, options_, underlines);
}

void DocPrinter::PrintDoc(const Doc& doc) {
size_t start_pos = output_.tellp();

if (const auto* doc_node = doc.as<LiteralDocNode>()) {
PrintTypedDoc(GetRef<LiteralDoc>(doc_node));
} else if (const auto* doc_node = doc.as<IdDocNode>()) {
Expand Down Expand Up @@ -84,6 +321,24 @@ void DocPrinter::PrintDoc(const Doc& doc) {
LOG(FATAL) << "Do not know how to print " << doc->GetTypeKey();
throw;
}

size_t end_pos = output_.tellp();
for (const ObjectPath& path : doc->source_paths) {
MarkSpan({start_pos, end_pos}, path);
}
}

void DocPrinter::MarkSpan(const ByteSpan& span, const ObjectPath& path) {
if (path_to_underline_.defined()) {
if (path->Length() >= current_max_path_length_ &&
path->IsPrefixOf(path_to_underline_.value())) {
if (path->Length() > current_max_path_length_) {
current_max_path_length_ = path->Length();
current_underline_candidates_.clear();
}
current_underline_candidates_.push_back(span);
}
}
}

} // namespace printer
Expand Down
Loading

0 comments on commit 32b34cc

Please sign in to comment.