Skip to content

Commit

Permalink
Add TextRange to Identifier (#8)
Browse files Browse the repository at this point in the history
## Summary

This PR adds `TextRange` to `Identifier`. Right now, the AST only
includes ranges for identifiers in certain cases (`Expr::Name`,
`Keyword`, etc.), namely when the identifier comprises an entire AST
node. In Ruff, we do additional ad-hoc lexing to extract identifiers
from source code.

One frequent example: given a function `def f(): ...`, we lex to find
the range of `f`, for use in diagnostics.

Another: `except ValueError as e`, for which the AST doesn't include a
range for `e`.

Note that, as an optimization, we avoid storing the `TextRange` for
`Expr::Name`, since it's already included.
  • Loading branch information
charliermarsh authored Jun 20, 2023
1 parent f0d200c commit ed3b4eb
Show file tree
Hide file tree
Showing 82 changed files with 3,868 additions and 4,207 deletions.
6 changes: 6 additions & 0 deletions ast/asdl_rs.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,12 @@ def visitField(self, field, parent, vis, depth, constructor=None):
if typ == "Int":
typ = BUILTIN_INT_NAMES.get(field.name, typ)
name = rust_field(field.name)

# Use a String, rather than an Identifier, for the `id` field of `Expr::Name`.
# Names already include a range, so there's no need to duplicate the span.
if name == "id":
typ = "String"

self.emit(f"{vis}{name}: {typ},", depth)

def visitProduct(self, product, type, depth):
Expand Down
52 changes: 27 additions & 25 deletions ast/src/builtin.rs
Original file line number Diff line number Diff line change
@@ -1,86 +1,87 @@
//! `builtin_types` in asdl.py and Attributed

use rustpython_parser_core::text_size::TextRange;

use crate::bigint::BigInt;
use crate::Ranged;

pub type String = std::string::String;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Identifier(String);
pub struct Identifier {
id: String,
range: TextRange,
}

impl Identifier {
#[inline]
pub fn new(s: impl Into<String>) -> Self {
Self(s.into())
pub fn new(id: impl Into<String>, range: TextRange) -> Self {
Self {
id: id.into(),
range,
}
}
}

impl Identifier {
#[inline]
pub fn as_str(&self) -> &str {
self.0.as_str()
self.id.as_str()
}
}

impl std::cmp::PartialEq<str> for Identifier {
impl PartialEq<str> for Identifier {
#[inline]
fn eq(&self, other: &str) -> bool {
self.0 == other
self.id == other
}
}

impl std::cmp::PartialEq<String> for Identifier {
impl PartialEq<String> for Identifier {
#[inline]
fn eq(&self, other: &String) -> bool {
&self.0 == other
&self.id == other
}
}

impl std::ops::Deref for Identifier {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.0.as_str()
self.id.as_str()
}
}

impl AsRef<str> for Identifier {
#[inline]
fn as_ref(&self) -> &str {
self.0.as_str()
self.id.as_str()
}
}

impl AsRef<String> for Identifier {
#[inline]
fn as_ref(&self) -> &String {
&self.0
&self.id
}
}

impl std::fmt::Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
self.id.fmt(f)
}
}

impl From<Identifier> for String {
#[inline]
fn from(id: Identifier) -> String {
id.0
fn from(identifier: Identifier) -> String {
identifier.id
}
}

impl From<String> for Identifier {
#[inline]
fn from(id: String) -> Self {
Self(id)
}
}

impl<'a> From<&'a str> for Identifier {
#[inline]
fn from(id: &'a str) -> Identifier {
id.to_owned().into()
impl Ranged for Identifier {
fn range(&self) -> TextRange {
self.range
}
}

Expand Down Expand Up @@ -207,6 +208,7 @@ impl std::fmt::Display for Constant {
#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_is_macro() {
let none = Constant::None;
Expand Down
2 changes: 1 addition & 1 deletion ast/src/gen/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1563,7 +1563,7 @@ impl<R> From<ExprStarred<R>> for Ast<R> {
#[derive(Clone, Debug, PartialEq)]
pub struct ExprName<R = TextRange> {
pub range: R,
pub id: Identifier,
pub id: String,
pub ctx: ExprContext,
}

Expand Down
4 changes: 2 additions & 2 deletions ast/src/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ impl<R> Expr<R> {
#[cfg(target_arch = "x86_64")]
static_assertions::assert_eq_size!(crate::Expr, [u8; 72]);
#[cfg(target_arch = "x86_64")]
static_assertions::assert_eq_size!(crate::Stmt, [u8; 136]);
static_assertions::assert_eq_size!(crate::Stmt, [u8; 144]);
#[cfg(target_arch = "x86_64")]
static_assertions::assert_eq_size!(crate::Pattern, [u8; 96]);
#[cfg(target_arch = "x86_64")]
static_assertions::assert_eq_size!(crate::ExceptHandler, [u8; 64]);
static_assertions::assert_eq_size!(crate::ExceptHandler, [u8; 72]);
4 changes: 2 additions & 2 deletions parser/src/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ pub(crate) fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentLis
Some((start, end, name)) => {
// Check for duplicate keyword arguments in the call.
if let Some(keyword_name) = &name {
if !keyword_names.insert(keyword_name.clone()) {
if !keyword_names.insert(keyword_name.to_string()) {
return Err(LexicalError {
error: LexicalErrorType::DuplicateKeywordArgumentError(
keyword_name.to_string(),
Expand All @@ -103,7 +103,7 @@ pub(crate) fn parse_args(func_args: Vec<FunctionArgument>) -> Result<ArgumentLis
}

keywords.push(ast::Keyword {
arg: name.map(ast::Identifier::new),
arg: name.map(|name| ast::Identifier::new(name, TextRange::new(start, end))),
value,
range: TextRange::new(start, end),
});
Expand Down
5 changes: 4 additions & 1 deletion parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ impl Parse for ast::Identifier {
) -> Result<Self, ParseError> {
let expr = ast::Expr::parse_tokens(lxr, source_path)?;
match expr {
ast::Expr::Name(name) => Ok(name.id),
ast::Expr::Name(name) => {
let range = name.range();
Ok(ast::Identifier::new(name.id, range))
}
expr => Err(ParseError {
error: ParseErrorType::InvalidToken,
offset: expr.range().start(),
Expand Down
20 changes: 10 additions & 10 deletions parser/src/python.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ ImportAsNames: Vec<ast::Alias> = {
<location:@L> "(" <i:OneOrMore<ImportAsAlias<Identifier>>> ","? ")" <end_location:@R> => i,
<location:@L> "*" <end_location:@R> => {
// Star import all
vec![ast::Alias { name: ast::Identifier::new("*"), asname: None, range: (location..end_location).into() }]
vec![ast::Alias { name: ast::Identifier::new("*", (location..end_location).into()), asname: None, range: (location..end_location).into() }]
},
};

Expand All @@ -278,14 +278,14 @@ ImportAsAlias<I>: ast::Alias = {

// A name like abc or abc.def.ghi
DottedName: ast::Identifier = {
<n:name> => ast::Identifier::new(n),
<n:name> <n2: ("." Identifier)+> => {
<location:@L> <n:name> <end_location:@R> => ast::Identifier::new(n, (location..end_location).into()),
<location:@L> <n:name> <n2: ("." Identifier)+> <end_location:@R> => {
let mut r = n.to_string();
for x in n2 {
r.push('.');
r.push_str(x.1.as_str());
}
ast::Identifier::new(r)
ast::Identifier::new(r, (location..end_location).into())
},
};

Expand Down Expand Up @@ -563,8 +563,8 @@ CapturePattern: ast::Pattern = {
}

MatchName: ast::Expr = {
<location:@L> <name:Identifier> <end_location:@R> => ast::Expr::Name(
ast::ExprName { id: name, ctx: ast::ExprContext::Load, range: (location..end_location).into() },
<location:@L> <id:Identifier> <end_location:@R> => ast::Expr::Name(
ast::ExprName { id: id.into(), ctx: ast::ExprContext::Load, range: (location..end_location).into() },
),
}

Expand Down Expand Up @@ -1189,7 +1189,7 @@ NamedExpression: ast::Expr = {
ast::Expr::NamedExpr(
ast::ExprNamedExpr {
target: Box::new(ast::Expr::Name(
ast::ExprName { id, ctx: ast::ExprContext::Store, range: (location..end_location).into() },
ast::ExprName { id: id.into(), ctx: ast::ExprContext::Store, range: (location..end_location).into() },
)),
range: (location..value.end()).into(),
value: Box::new(value),
Expand Down Expand Up @@ -1405,8 +1405,8 @@ Atom<Goal>: ast::Expr = {
<location:@L> <value:Constant> <end_location:@R> => ast::Expr::Constant(
ast::ExprConstant { value, kind: None, range: (location..end_location).into() }
),
<location:@L> <name:Identifier> <end_location:@R> => ast::Expr::Name(
ast::ExprName { id: name, ctx: ast::ExprContext::Load, range: (location..end_location).into() }
<location:@L> <id:Identifier> <end_location:@R> => ast::Expr::Name(
ast::ExprName { id: id.into(), ctx: ast::ExprContext::Load, range: (location..end_location).into() }
),
<location:@L> "[" <e:ListLiteralValues?> "]"<end_location:@R> => {
let elts = e.unwrap_or_default();
Expand Down Expand Up @@ -1641,7 +1641,7 @@ Constant: ast::Constant = {
};

Identifier: ast::Identifier = {
<s:name> => ast::Identifier::new(s)
<location:@L> <s:name> <end_location:@R> => ast::Identifier::new(s, (location..end_location).into())
};

// Hook external lexer:
Expand Down
Loading

0 comments on commit ed3b4eb

Please sign in to comment.