Skip to content

Commit 9b8a2d1

Browse files
xitepiffyio
andauthored
Extract source comments (#2107)
Co-authored-by: Ifeanyi Ubah <ify1992@yahoo.com>
1 parent 1b842d3 commit 9b8a2d1

File tree

5 files changed

+459
-7
lines changed

5 files changed

+459
-7
lines changed

src/ast/comments.rs

Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
// Licensed under the Apache License, Version 2.0 (the "License");
2+
// you may not use this file except in compliance with the License.
3+
// You may obtain a copy of the License at
4+
//
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
//
7+
// Unless required by applicable law or agreed to in writing, software
8+
// distributed under the License is distributed on an "AS IS" BASIS,
9+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
// See the License for the specific language governing permissions and
11+
// limitations under the License.
12+
13+
//! Provides a representation of source code comments in parsed SQL code.
14+
//!
15+
//! See [Comments::find] for an example.
16+
17+
#[cfg(not(feature = "std"))]
18+
use alloc::{string::String, vec::Vec};
19+
20+
use core::{
21+
ops::{Bound, Deref, RangeBounds},
22+
slice,
23+
};
24+
25+
use crate::tokenizer::{Location, Span};
26+
27+
/// An opaque container for comments from a parse SQL source code.
28+
#[derive(Default, Debug)]
29+
pub struct Comments(Vec<CommentWithSpan>);
30+
31+
impl Comments {
32+
/// Accepts `comment` if its the first or is located strictly after the
33+
/// last accepted comment. In other words, this method will skip the
34+
/// comment if its comming out of order (as encountered in the parsed
35+
/// source code.)
36+
pub(crate) fn offer(&mut self, comment: CommentWithSpan) {
37+
if self
38+
.0
39+
.last()
40+
.map(|last| last.span < comment.span)
41+
.unwrap_or(true)
42+
{
43+
self.0.push(comment);
44+
}
45+
}
46+
47+
/// Finds comments starting within the given location range. The order of
48+
/// iterator reflects the order of the comments as encountered in the parsed
49+
/// source code.
50+
///
51+
/// # Example
52+
/// ```rust
53+
/// use sqlparser::{dialect::GenericDialect, parser::Parser, tokenizer::Location};
54+
///
55+
/// let sql = r#"/*
56+
/// header comment ...
57+
/// ... spanning multiple lines
58+
/// */
59+
///
60+
/// -- first statement
61+
/// SELECT 'hello' /* world */ FROM DUAL;
62+
///
63+
/// -- second statement
64+
/// SELECT 123 FROM DUAL;
65+
///
66+
/// -- trailing comment
67+
/// "#;
68+
///
69+
/// let (ast, comments) = Parser::parse_sql_with_comments(&GenericDialect, sql).unwrap();
70+
///
71+
/// // all comments appearing before line seven, i.e. before the first statement itself
72+
/// assert_eq!(
73+
/// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::<Vec<_>>(),
74+
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]);
75+
///
76+
/// // all comments appearing within the first statement
77+
/// assert_eq!(
78+
/// &comments.find(Location::new(7, 1)..Location::new(8,1)).map(|c| c.as_str()).collect::<Vec<_>>(),
79+
/// &[" world "]);
80+
///
81+
/// // all comments appearing within or after the first statement
82+
/// assert_eq!(
83+
/// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::<Vec<_>>(),
84+
/// &[" world ", " second statement\n", " trailing comment\n"]);
85+
/// ```
86+
///
87+
/// The [Spanned](crate::ast::Spanned) trait allows you to access location
88+
/// information for certain AST nodes.
89+
pub fn find<R: RangeBounds<Location>>(&self, range: R) -> Iter<'_> {
90+
let (start, end) = (
91+
self.start_index(range.start_bound()),
92+
self.end_index(range.end_bound()),
93+
);
94+
debug_assert!((0..=self.0.len()).contains(&start));
95+
debug_assert!((0..=self.0.len()).contains(&end));
96+
// in case the user specified a reverse range
97+
Iter(if start <= end {
98+
self.0[start..end].iter()
99+
} else {
100+
self.0[0..0].iter()
101+
})
102+
}
103+
104+
/// Find the index of the first comment starting "before" the given location.
105+
///
106+
/// The returned index is _inclusive_ and within the range of `0..=self.0.len()`.
107+
fn start_index(&self, location: Bound<&Location>) -> usize {
108+
match location {
109+
Bound::Included(location) => {
110+
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
111+
Ok(i) => i,
112+
Err(i) => i,
113+
}
114+
}
115+
Bound::Excluded(location) => {
116+
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
117+
Ok(i) => i + 1,
118+
Err(i) => i,
119+
}
120+
}
121+
Bound::Unbounded => 0,
122+
}
123+
}
124+
125+
/// Find the index of the first comment starting "after" the given location.
126+
///
127+
/// The returned index is _exclusive_ and within the range of `0..=self.0.len()`.
128+
fn end_index(&self, location: Bound<&Location>) -> usize {
129+
match location {
130+
Bound::Included(location) => {
131+
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
132+
Ok(i) => i + 1,
133+
Err(i) => i,
134+
}
135+
}
136+
Bound::Excluded(location) => {
137+
match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
138+
Ok(i) => i,
139+
Err(i) => i,
140+
}
141+
}
142+
Bound::Unbounded => self.0.len(),
143+
}
144+
}
145+
}
146+
147+
impl From<Comments> for Vec<CommentWithSpan> {
148+
fn from(comments: Comments) -> Self {
149+
comments.0
150+
}
151+
}
152+
153+
/// A source code comment with information of its entire span.
154+
#[derive(Debug, Clone, PartialEq, Eq)]
155+
pub struct CommentWithSpan {
156+
/// The source code comment iself
157+
pub comment: Comment,
158+
/// The span of the comment including its markers
159+
pub span: Span,
160+
}
161+
162+
impl Deref for CommentWithSpan {
163+
type Target = Comment;
164+
165+
fn deref(&self) -> &Self::Target {
166+
&self.comment
167+
}
168+
}
169+
170+
/// A unified type of the different source code comment formats.
171+
#[derive(Debug, Clone, PartialEq, Eq)]
172+
pub enum Comment {
173+
/// A single line comment, typically introduced with a prefix and spanning
174+
/// until end-of-line or end-of-file in the source code.
175+
///
176+
/// Note: `content` will include the terminating new-line character, if any.
177+
SingleLine { content: String, prefix: String },
178+
179+
/// A multi-line comment, typically enclosed in `/* .. */` markers. The
180+
/// string represents the content excluding the markers.
181+
MultiLine(String),
182+
}
183+
184+
impl Comment {
185+
/// Retrieves the content of the comment as string slice.
186+
pub fn as_str(&self) -> &str {
187+
match self {
188+
Comment::SingleLine { content, prefix: _ } => content.as_str(),
189+
Comment::MultiLine(content) => content.as_str(),
190+
}
191+
}
192+
}
193+
194+
impl Deref for Comment {
195+
type Target = str;
196+
197+
fn deref(&self) -> &Self::Target {
198+
self.as_str()
199+
}
200+
}
201+
202+
/// An opaque iterator implementation over comments served by [Comments::find].
203+
pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>);
204+
205+
impl<'a> Iterator for Iter<'a> {
206+
type Item = &'a CommentWithSpan;
207+
208+
fn next(&mut self) -> Option<Self::Item> {
209+
self.0.next()
210+
}
211+
}
212+
213+
#[cfg(test)]
214+
mod tests {
215+
use super::*;
216+
217+
#[test]
218+
fn test_find() {
219+
let comments = {
220+
// ```
221+
// -- abc
222+
// /* hello */--, world
223+
// /* def
224+
// ghi
225+
// jkl
226+
// */
227+
// ```
228+
let mut c = Comments(Vec::new());
229+
c.offer(CommentWithSpan {
230+
comment: Comment::SingleLine {
231+
content: " abc".into(),
232+
prefix: "--".into(),
233+
},
234+
span: Span::new((1, 1).into(), (1, 7).into()),
235+
});
236+
c.offer(CommentWithSpan {
237+
comment: Comment::MultiLine(" hello ".into()),
238+
span: Span::new((2, 3).into(), (2, 14).into()),
239+
});
240+
c.offer(CommentWithSpan {
241+
comment: Comment::SingleLine {
242+
content: ", world".into(),
243+
prefix: "--".into(),
244+
},
245+
span: Span::new((2, 14).into(), (2, 21).into()),
246+
});
247+
c.offer(CommentWithSpan {
248+
comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()),
249+
span: Span::new((3, 3).into(), (7, 1).into()),
250+
});
251+
c
252+
};
253+
254+
fn find<R: RangeBounds<Location>>(comments: &Comments, range: R) -> Vec<&str> {
255+
comments.find(range).map(|c| c.as_str()).collect::<Vec<_>>()
256+
}
257+
258+
// ~ end-points only --------------------------------------------------
259+
assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new());
260+
assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]);
261+
assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]);
262+
assert_eq!(
263+
find(&comments, ..=Location::new(2, 3)),
264+
vec![" abc", " hello "]
265+
);
266+
assert_eq!(
267+
find(&comments, ..=Location::new(2, 3)),
268+
vec![" abc", " hello "]
269+
);
270+
assert_eq!(
271+
find(&comments, ..Location::new(2, 15)),
272+
vec![" abc", " hello ", ", world"]
273+
);
274+
275+
// ~ start-points only ------------------------------------------------
276+
assert_eq!(
277+
find(&comments, Location::new(1000, 1000)..),
278+
Vec::<&str>::new()
279+
);
280+
assert_eq!(
281+
find(&comments, Location::new(2, 14)..),
282+
vec![", world", " def\n ghi\n jkl\n"]
283+
);
284+
assert_eq!(
285+
find(&comments, Location::new(2, 15)..),
286+
vec![" def\n ghi\n jkl\n"]
287+
);
288+
assert_eq!(
289+
find(&comments, Location::new(0, 0)..),
290+
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
291+
);
292+
assert_eq!(
293+
find(&comments, Location::new(1, 1)..),
294+
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
295+
);
296+
297+
// ~ ranges -----------------------------------------------------------
298+
assert_eq!(
299+
find(&comments, Location::new(2, 1)..Location::new(1, 1)),
300+
Vec::<&str>::new()
301+
);
302+
assert_eq!(
303+
find(&comments, Location::new(1, 1)..Location::new(2, 3)),
304+
vec![" abc"]
305+
);
306+
assert_eq!(
307+
find(&comments, Location::new(1, 1)..=Location::new(2, 3)),
308+
vec![" abc", " hello "]
309+
);
310+
assert_eq!(
311+
find(&comments, Location::new(1, 1)..=Location::new(2, 10)),
312+
vec![" abc", " hello "]
313+
);
314+
assert_eq!(
315+
find(&comments, Location::new(1, 1)..=Location::new(2, 14)),
316+
vec![" abc", " hello ", ", world"]
317+
);
318+
assert_eq!(
319+
find(&comments, Location::new(1, 1)..Location::new(2, 15)),
320+
vec![" abc", " hello ", ", world"]
321+
);
322+
323+
// ~ find everything --------------------------------------------------
324+
assert_eq!(
325+
find(&comments, ..),
326+
vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
327+
);
328+
}
329+
}

src/ast/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ mod query;
136136
mod spans;
137137
pub use spans::Spanned;
138138

139+
pub mod comments;
139140
mod trigger;
140141
mod value;
141142

src/ast/spans.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use core::iter;
2828
use crate::tokenizer::Span;
2929

3030
use super::{
31-
dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation,
31+
comments, dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation,
3232
AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget,
3333
AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef,
3434
ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements,
@@ -2477,6 +2477,12 @@ impl Spanned for OutputClause {
24772477
}
24782478
}
24792479

2480+
impl Spanned for comments::CommentWithSpan {
2481+
fn span(&self) -> Span {
2482+
self.span
2483+
}
2484+
}
2485+
24802486
#[cfg(test)]
24812487
pub mod tests {
24822488
use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect};

0 commit comments

Comments
 (0)