Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse simple programs (integer, string, float, or rational) with Prism #25

Merged
merged 5 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions main/options/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ struct StopAfterOptions {
Phase flag;
};

struct ParserOptions {
string option;
Parser flag;
};

const vector<StopAfterOptions> stop_after_options({
{"init", Phase::INIT},
{"parser", Phase::PARSER},
Expand All @@ -175,6 +180,11 @@ const vector<StopAfterOptions> stop_after_options({
{"inferencer", Phase::INFERENCER},
});

const vector<ParserOptions> parser_options({
{"sorbet", Parser::SORBET},
{"prism", Parser::PRISM},
});

core::TrackUntyped text2TrackUntyped(string_view key, spdlog::logger &logger) {
if (key == "") {
return core::TrackUntyped::Everywhere;
Expand Down Expand Up @@ -548,6 +558,9 @@ buildOptions(const vector<pipeline::semantic_extension::SemanticExtensionProvide
options.add_options("dev")(
"force-hashing", "Forces Sorbet to calculate file hashes when run from CLI. Useful for profiling purposes.");

options.add_options("dev")("parser", "Which parser to use", cxxopts::value<string>()->default_value("sorbet"),
"{sorbet, prism}");

for (auto &provider : semanticExtensionProviders) {
provider->injectOptions(options);
}
Expand Down Expand Up @@ -625,6 +638,22 @@ Phase extractStopAfter(cxxopts::ParseResult &raw, shared_ptr<spdlog::logger> log
return Phase::INIT;
}

Parser extractParser(cxxopts::ParseResult &raw, shared_ptr<spdlog::logger> logger) {
string opt = raw["parser"].as<string>();
for (auto &known : parser_options) {
if (known.option == opt) {
return known.flag;
}
}
vector<string_view> allOptions;
for (auto &known : parser_options) {
allOptions.emplace_back(known.option);
}

logger->error("Unknown --parser option: {}\nValid values: {}", opt, fmt::join(allOptions, ", "));
return Parser::SORBET;
}

// Given a path, strips any trailing forward slashes (/) at the end of the path.
string_view stripTrailingSlashes(string_view path) {
while (path.back() == '/') {
Expand Down Expand Up @@ -783,6 +812,7 @@ void readOptions(Options &opts,
throw EarlyReturnWithCode(1);
}
opts.stopAfterPhase = extractStopAfter(raw, logger);
opts.parser = extractParser(raw, logger);

opts.silenceErrors = raw["quiet"].as<bool>();
opts.autocorrect = raw["autocorrect"].as<bool>();
Expand Down Expand Up @@ -1156,6 +1186,7 @@ void readOptions(Options &opts,
fmt::print("Sorbet typechecker {}\n", sorbet_full_version_string);
throw EarlyReturnWithCode(0);
}

} catch (cxxopts::OptionParseException &e) {
logger->info("{}. To see all available options pass `--help`.", e.what());
throw EarlyReturnWithCode(1);
Expand Down
6 changes: 6 additions & 0 deletions main/options/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ enum class Phase {
INFERENCER,
};

enum class Parser {
PRISM,
SORBET,
};

struct AutogenConstCacheConfig {
// A file which contains a cache that can be used to potentially skip autogen
std::string cacheFile;
Expand All @@ -127,6 +132,7 @@ constexpr size_t MAX_CACHE_SIZE_BYTES = 1L * 1024 * 1024 * 1024; // 1 GiB
struct Options {
Printers print;
Phase stopAfterPhase = Phase::INFERENCER;
Parser parser = Parser::SORBET;
bool noStdlib = false;

// Should we monitor STDOUT for HUP and exit if it hangs up. This is a
Expand Down
250 changes: 243 additions & 7 deletions main/pipeline/pipeline.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@
#include "rewriter/rewriter.h"

extern "C" {
#include "prism.h"
#include "prism.h"
}
#include "core/LocOffsets.h"
#include <iostream>

using namespace std;

Expand Down Expand Up @@ -122,6 +124,7 @@ unique_ptr<parser::Node> runParser(core::GlobalState &gs, core::FileRef file, co
auto settings = parser::Parser::Settings{traceLexer, traceParser, indentationAware};
nodes = parser::Parser::run(gs, file, settings);
}

if (print.ParseTree.enabled) {
print.ParseTree.fmt("{}\n", nodes->toStringWithTabs(gs, 0));
}
Expand All @@ -137,6 +140,234 @@ unique_ptr<parser::Node> runParser(core::GlobalState &gs, core::FileRef file, co
return nodes;
}

core::LocOffsets locOffset(pm_location_t *loc, pm_parser_t *parser) {
uint32_t locStart = static_cast<uint32_t>(loc->start - parser->start);
uint32_t locEnd = static_cast<uint32_t>(loc->end - parser->start);

std::cout << "locStart: " << locStart << std::endl;
std::cout << "locEnd: " << locEnd << std::endl;

return core::LocOffsets{locStart, locEnd};
}

const unique_ptr<parser::Node> convertPrismToSorbet(pm_node_t *node, pm_parser_t *parser, core::GlobalState &gs) {
switch (PM_NODE_TYPE(node)) {
case PM_FLOAT_NODE: {
auto floatNode = reinterpret_cast<pm_float_node *>(node);
pm_location_t *loc = &floatNode->base.location;

return make_unique<parser::Float>(locOffset(loc, parser), std::to_string(floatNode->value));
}
case PM_INTEGER_NODE: {
auto intNode = reinterpret_cast<pm_integer_node *>(node);
pm_location_t *loc = &intNode->base.location;

// Will only work for positive, 32-bit integers
return make_unique<parser::Integer>(locOffset(loc, parser), std::to_string(intNode->value.value));
egiurleo marked this conversation as resolved.
Show resolved Hide resolved
}
case PM_PROGRAM_NODE: {
pm_statements_node *stmts = (reinterpret_cast<pm_program_node *>(node))->statements;
return convertPrismToSorbet((pm_node *)stmts, parser, gs);
}
case PM_RATIONAL_NODE: {
auto *rationalNode = reinterpret_cast<pm_rational_node *>(node);
pm_location_t *loc = &rationalNode->base.location;

const uint8_t *start = rationalNode->numeric->location.start;
const uint8_t *end = rationalNode->numeric->location.end;

std::string value = std::string(reinterpret_cast<const char *>(start), end - start);

return make_unique<parser::Rational>(locOffset(loc, parser), value);

break;
}
case PM_STATEMENTS_NODE: {
pm_node_list *body = &(reinterpret_cast<pm_statements_node *>(node))->body;
// TODO: Handle multiple statements
pm_node *first = body->nodes[0];

return convertPrismToSorbet(first, parser, gs);
}
case PM_STRING_NODE: {
auto strNode = reinterpret_cast<pm_string_node *>(node);
pm_location_t *loc = &strNode->base.location;

auto unescaped = &strNode->unescaped;
auto source =
std::string(reinterpret_cast<const char *>(pm_string_source(unescaped)), pm_string_length(unescaped));

// TODO: handle different string encodings
return make_unique<parser::String>(locOffset(loc, parser), gs.enterNameUTF8(source));
egiurleo marked this conversation as resolved.
Show resolved Hide resolved
}
case PM_ALIAS_GLOBAL_VARIABLE_NODE:
case PM_ALIAS_METHOD_NODE:
case PM_ALTERNATION_PATTERN_NODE:
case PM_AND_NODE:
case PM_ARGUMENTS_NODE:
case PM_ARRAY_NODE:
case PM_ARRAY_PATTERN_NODE:
case PM_ASSOC_NODE:
case PM_ASSOC_SPLAT_NODE:
case PM_BACK_REFERENCE_READ_NODE:
case PM_BEGIN_NODE:
case PM_BLOCK_ARGUMENT_NODE:
case PM_BLOCK_LOCAL_VARIABLE_NODE:
case PM_BLOCK_NODE:
case PM_BLOCK_PARAMETER_NODE:
case PM_BLOCK_PARAMETERS_NODE:
case PM_BREAK_NODE:
case PM_CALL_AND_WRITE_NODE:
case PM_CALL_NODE:
case PM_CALL_OPERATOR_WRITE_NODE:
case PM_CALL_OR_WRITE_NODE:
case PM_CALL_TARGET_NODE:
case PM_CAPTURE_PATTERN_NODE:
case PM_CASE_MATCH_NODE:
case PM_CASE_NODE:
case PM_CLASS_NODE:
case PM_CLASS_VARIABLE_AND_WRITE_NODE:
case PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE:
case PM_CLASS_VARIABLE_OR_WRITE_NODE:
case PM_CLASS_VARIABLE_READ_NODE:
case PM_CLASS_VARIABLE_TARGET_NODE:
case PM_CLASS_VARIABLE_WRITE_NODE:
case PM_CONSTANT_AND_WRITE_NODE:
case PM_CONSTANT_OPERATOR_WRITE_NODE:
case PM_CONSTANT_OR_WRITE_NODE:
case PM_CONSTANT_PATH_AND_WRITE_NODE:
case PM_CONSTANT_PATH_NODE:
case PM_CONSTANT_PATH_OPERATOR_WRITE_NODE:
case PM_CONSTANT_PATH_OR_WRITE_NODE:
case PM_CONSTANT_PATH_TARGET_NODE:
case PM_CONSTANT_PATH_WRITE_NODE:
case PM_CONSTANT_READ_NODE:
case PM_CONSTANT_TARGET_NODE:
case PM_CONSTANT_WRITE_NODE:
case PM_DEF_NODE:
case PM_DEFINED_NODE:
case PM_ELSE_NODE:
case PM_EMBEDDED_STATEMENTS_NODE:
case PM_EMBEDDED_VARIABLE_NODE:
case PM_ENSURE_NODE:
case PM_FALSE_NODE:
case PM_FIND_PATTERN_NODE:
case PM_FLIP_FLOP_NODE:
case PM_FOR_NODE:
case PM_FORWARDING_ARGUMENTS_NODE:
case PM_FORWARDING_PARAMETER_NODE:
case PM_FORWARDING_SUPER_NODE:
case PM_GLOBAL_VARIABLE_AND_WRITE_NODE:
case PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE:
case PM_GLOBAL_VARIABLE_OR_WRITE_NODE:
case PM_GLOBAL_VARIABLE_READ_NODE:
case PM_GLOBAL_VARIABLE_TARGET_NODE:
case PM_GLOBAL_VARIABLE_WRITE_NODE:
case PM_HASH_NODE:
case PM_HASH_PATTERN_NODE:
case PM_IF_NODE:
case PM_IMAGINARY_NODE:
case PM_IMPLICIT_NODE:
case PM_IMPLICIT_REST_NODE:
case PM_IN_NODE:
case PM_INDEX_AND_WRITE_NODE:
case PM_INDEX_OPERATOR_WRITE_NODE:
case PM_INDEX_OR_WRITE_NODE:
case PM_INDEX_TARGET_NODE:
case PM_INSTANCE_VARIABLE_AND_WRITE_NODE:
case PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE:
case PM_INSTANCE_VARIABLE_OR_WRITE_NODE:
case PM_INSTANCE_VARIABLE_READ_NODE:
case PM_INSTANCE_VARIABLE_TARGET_NODE:
case PM_INSTANCE_VARIABLE_WRITE_NODE:
case PM_INTERPOLATED_MATCH_LAST_LINE_NODE:
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
case PM_INTERPOLATED_STRING_NODE:
case PM_INTERPOLATED_SYMBOL_NODE:
case PM_INTERPOLATED_X_STRING_NODE:
case PM_IT_PARAMETERS_NODE:
case PM_KEYWORD_HASH_NODE:
case PM_KEYWORD_REST_PARAMETER_NODE:
case PM_LAMBDA_NODE:
case PM_LOCAL_VARIABLE_AND_WRITE_NODE:
case PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE:
case PM_LOCAL_VARIABLE_OR_WRITE_NODE:
case PM_LOCAL_VARIABLE_READ_NODE:
case PM_LOCAL_VARIABLE_TARGET_NODE:
case PM_LOCAL_VARIABLE_WRITE_NODE:
case PM_MATCH_LAST_LINE_NODE:
case PM_MATCH_PREDICATE_NODE:
case PM_MATCH_REQUIRED_NODE:
case PM_MATCH_WRITE_NODE:
case PM_MISSING_NODE:
case PM_MODULE_NODE:
case PM_MULTI_TARGET_NODE:
case PM_MULTI_WRITE_NODE:
case PM_NEXT_NODE:
case PM_NIL_NODE:
case PM_NO_KEYWORDS_PARAMETER_NODE:
case PM_NUMBERED_PARAMETERS_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
case PM_OPTIONAL_KEYWORD_PARAMETER_NODE:
case PM_OPTIONAL_PARAMETER_NODE:
case PM_OR_NODE:
case PM_PARAMETERS_NODE:
case PM_PARENTHESES_NODE:
case PM_PINNED_EXPRESSION_NODE:
case PM_PINNED_VARIABLE_NODE:
case PM_POST_EXECUTION_NODE:
case PM_PRE_EXECUTION_NODE:
case PM_RANGE_NODE:
case PM_REDO_NODE:
case PM_REGULAR_EXPRESSION_NODE:
case PM_REQUIRED_KEYWORD_PARAMETER_NODE:
case PM_REQUIRED_PARAMETER_NODE:
case PM_RESCUE_MODIFIER_NODE:
case PM_RESCUE_NODE:
case PM_REST_PARAMETER_NODE:
case PM_RETRY_NODE:
case PM_RETURN_NODE:
case PM_SELF_NODE:
case PM_SHAREABLE_CONSTANT_NODE:
case PM_SINGLETON_CLASS_NODE:
case PM_SOURCE_ENCODING_NODE:
case PM_SOURCE_FILE_NODE:
case PM_SOURCE_LINE_NODE:
case PM_SPLAT_NODE:
case PM_SUPER_NODE:
case PM_SYMBOL_NODE:
case PM_TRUE_NODE:
case PM_UNDEF_NODE:
case PM_UNLESS_NODE:
case PM_UNTIL_NODE:
case PM_WHEN_NODE:
case PM_WHILE_NODE:
case PM_X_STRING_NODE:
case PM_YIELD_NODE:
case PM_SCOPE_NODE:
std::unique_ptr<parser::Node> ast;
return ast;
}
}

unique_ptr<parser::Node> runPrismParser(core::GlobalState &gs, core::FileRef file, const options::Printers &print,
bool traceLexer, bool traceParser) {
auto source = file.data(gs).source();

core::UnfreezeNameTable nameTableAccess(gs);

pm_parser_t parser;
pm_parser_init(&parser, reinterpret_cast<const uint8_t *>(source.data()), source.size(), NULL);

pm_node_t *root = pm_parse(&parser);
std::unique_ptr<parser::Node> ast = convertPrismToSorbet(root, &parser, gs);

pm_node_destroy(&parser, root);
pm_parser_free(&parser);

return ast;
}

ast::ExpressionPtr runDesugar(core::GlobalState &gs, core::FileRef file, unique_ptr<parser::Node> parseTree,
const options::Printers &print) {
Timer timeit(gs.tracer(), "runDesugar", {{"file", string(file.data(gs).path())}});
Expand Down Expand Up @@ -194,6 +425,8 @@ ast::ExpressionPtr desugarOne(const options::Options &opts, core::GlobalState &g
ast::ParsedFile indexOne(const options::Options &opts, core::GlobalState &lgs, core::FileRef file,
ast::ExpressionPtr tree) {
auto &print = opts.print;
auto &parser = opts.parser;

ast::ParsedFile rewritten{nullptr, file};

Timer timeit(lgs.tracer(), "indexOne", {{"file", string(file.data(lgs).path())}});
Expand All @@ -203,7 +436,15 @@ ast::ParsedFile indexOne(const options::Options &opts, core::GlobalState &lgs, c
if (file.data(lgs).strictLevel == core::StrictLevel::Ignore) {
return emptyParsedFile(file);
}
auto parseTree = runParser(lgs, file, print, opts.traceLexer, opts.traceParser);

unique_ptr<parser::Node> parseTree;

if (parser == options::Parser::SORBET) {
parseTree = runParser(lgs, file, print, opts.traceLexer, opts.traceParser);
} else if (parser == options::Parser::PRISM) {
parseTree = runPrismParser(lgs, file, print, opts.traceLexer, opts.traceParser);
} // Any other option would have been handled in the options parser

if (opts.stopAfterPhase == options::Phase::PARSER) {
return emptyParsedFile(file);
}
Expand Down Expand Up @@ -607,11 +848,6 @@ vector<ast::ParsedFile> index(core::GlobalState &gs, absl::Span<core::FileRef> f
vector<ast::ParsedFile> ret;
vector<ast::ParsedFile> empty;

// TODO: remove once we are actually parsing with prism
cout << "***"
<< pm_version()
<< "***";

if (opts.stopAfterPhase == options::Phase::INIT) {
return empty;
}
Expand Down