Skip to content

Commit 4003771

Browse files
committed
syntax: add 'std' feature
In effect, this adds support for no_std by depending on only core and alloc. There is still currently some benefit to enabling std support, namely, getting the 'std::error::Error' trait impls for the various error types. (Although, it seems like the 'Error' trait is going to get moved to 'core' finally.) Otherwise, the only 'std' things we use are in tests for tweaking stack sizes. This is the first step in an effort to make 'regex' itself work without depending on 'std'. 'regex' itself will be more precarious since it uses things like HashMap and Mutex that we'll need to find a way around. Getting around HashMap is easy (just use BTreeMap), but figuring out how to synchronize the threadpool will be interesting. Ref #476, Ref #477
1 parent d258c29 commit 4003771

18 files changed

+245
-179
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ finite automata and guarantees linear time matching on all inputs.
1414
categories = ["text-processing"]
1515
autotests = false
1616
exclude = ["/scripts/*", "/.github/*"]
17-
edition = "2018"
17+
edition = "2021"
1818
rust-version = "1.60.0"
1919

2020
[workspace]

regex-syntax/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ rust-version = "1.60.0"
1414
# Features are documented in the "Crate features" section of the crate docs:
1515
# https://docs.rs/regex-syntax/*/#crate-features
1616
[features]
17-
default = ["unicode"]
17+
default = ["std", "unicode"]
18+
std = []
1819

1920
unicode = [
2021
"unicode-age",

regex-syntax/src/ast/mod.rs

+17-15
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
Defines an abstract syntax for regular expressions.
33
*/
44

5-
use std::cmp::Ordering;
6-
use std::fmt;
5+
use core::cmp::Ordering;
6+
7+
use alloc::{boxed::Box, string::String, vec, vec::Vec};
78

89
pub use crate::ast::visitor::{visit, Visitor};
910

@@ -174,23 +175,24 @@ pub enum ErrorKind {
174175
UnsupportedLookAround,
175176
}
176177

178+
#[cfg(feature = "std")]
177179
impl std::error::Error for Error {}
178180

179-
impl fmt::Display for Error {
180-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
181+
impl core::fmt::Display for Error {
182+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
181183
crate::error::Formatter::from(self).fmt(f)
182184
}
183185
}
184186

185-
impl fmt::Display for ErrorKind {
186-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187+
impl core::fmt::Display for ErrorKind {
188+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
187189
use self::ErrorKind::*;
188190
match *self {
189191
CaptureLimitExceeded => write!(
190192
f,
191193
"exceeded the maximum number of \
192194
capturing groups ({})",
193-
::std::u32::MAX
195+
u32::MAX
194196
),
195197
ClassEscapeInvalid => {
196198
write!(f, "invalid escape sequence found in character class")
@@ -283,8 +285,8 @@ pub struct Span {
283285
pub end: Position,
284286
}
285287

286-
impl fmt::Debug for Span {
287-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288+
impl core::fmt::Debug for Span {
289+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
288290
write!(f, "Span({:?}, {:?})", self.start, self.end)
289291
}
290292
}
@@ -316,8 +318,8 @@ pub struct Position {
316318
pub column: usize,
317319
}
318320

319-
impl fmt::Debug for Position {
320-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321+
impl core::fmt::Debug for Position {
322+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
321323
write!(
322324
f,
323325
"Position(o: {:?}, l: {:?}, c: {:?})",
@@ -497,8 +499,8 @@ impl Ast {
497499
///
498500
/// This implementation uses constant stack space and heap space proportional
499501
/// to the size of the `Ast`.
500-
impl fmt::Display for Ast {
501-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
502+
impl core::fmt::Display for Ast {
503+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
502504
use crate::ast::print::Printer;
503505
Printer::new().print(self, f)
504506
}
@@ -1315,7 +1317,7 @@ pub enum Flag {
13151317
/// space but heap space proportional to the depth of the `Ast`.
13161318
impl Drop for Ast {
13171319
fn drop(&mut self) {
1318-
use std::mem;
1320+
use core::mem;
13191321

13201322
match *self {
13211323
Ast::Empty(_)
@@ -1365,7 +1367,7 @@ impl Drop for Ast {
13651367
/// stack space but heap space proportional to the depth of the `ClassSet`.
13661368
impl Drop for ClassSet {
13671369
fn drop(&mut self) {
1368-
use std::mem;
1370+
use core::mem;
13691371

13701372
match *self {
13711373
ClassSet::Item(ref item) => match *item {

regex-syntax/src/ast/parse.rs

+30-27
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,26 @@
22
This module provides a regular expression parser.
33
*/
44

5-
use std::borrow::Borrow;
6-
use std::cell::{Cell, RefCell};
7-
use std::mem;
8-
use std::result;
9-
10-
use crate::ast::{self, Ast, Position, Span};
11-
use crate::either::Either;
12-
13-
use crate::is_meta_character;
14-
15-
type Result<T> = result::Result<T, ast::Error>;
5+
use core::{
6+
borrow::Borrow,
7+
cell::{Cell, RefCell},
8+
mem,
9+
};
10+
11+
use alloc::{
12+
boxed::Box,
13+
string::{String, ToString},
14+
vec,
15+
vec::Vec,
16+
};
17+
18+
use crate::{
19+
ast::{self, Ast, Position, Span},
20+
either::Either,
21+
is_meta_character,
22+
};
23+
24+
type Result<T> = core::result::Result<T, ast::Error>;
1625

1726
/// A primitive is an expression with no sub-expressions. This includes
1827
/// literals, assertions and non-set character classes. This representation
@@ -1533,9 +1542,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
15331542
/// Assuming the preconditions are met, this routine can never fail.
15341543
#[inline(never)]
15351544
fn parse_octal(&self) -> ast::Literal {
1536-
use std::char;
1537-
use std::u32;
1538-
15391545
assert!(self.parser().octal);
15401546
assert!('0' <= self.char() && self.char() <= '7');
15411547
let start = self.pos();
@@ -1600,9 +1606,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
16001606
&self,
16011607
kind: ast::HexLiteralKind,
16021608
) -> Result<ast::Literal> {
1603-
use std::char;
1604-
use std::u32;
1605-
16061609
let mut scratch = self.parser().scratch.borrow_mut();
16071610
scratch.clear();
16081611

@@ -1646,9 +1649,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
16461649
&self,
16471650
kind: ast::HexLiteralKind,
16481651
) -> Result<ast::Literal> {
1649-
use std::char;
1650-
use std::u32;
1651-
16521652
let mut scratch = self.parser().scratch.borrow_mut();
16531653
scratch.clear();
16541654

@@ -2146,7 +2146,7 @@ impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
21462146
let new = self.depth.checked_add(1).ok_or_else(|| {
21472147
self.p.error(
21482148
span.clone(),
2149-
ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2149+
ast::ErrorKind::NestLimitExceeded(u32::MAX),
21502150
)
21512151
})?;
21522152
let limit = self.p.parser().nest_limit;
@@ -2297,11 +2297,14 @@ fn specialize_err<T>(
22972297

22982298
#[cfg(test)]
22992299
mod tests {
2300-
use std::ops::Range;
2300+
use core::ops::Range;
2301+
2302+
use alloc::format;
23012303

2302-
use super::{Parser, ParserBuilder, ParserI, Primitive};
23032304
use crate::ast::{self, Ast, Position, Span};
23042305

2306+
use super::*;
2307+
23052308
// Our own assert_eq, which has slightly better formatting (but honestly
23062309
// still kind of crappy).
23072310
macro_rules! assert_eq {
@@ -4272,7 +4275,7 @@ bar
42724275
Ok(Primitive::Literal(ast::Literal {
42734276
span: span(0..pat.len()),
42744277
kind: ast::LiteralKind::Octal,
4275-
c: ::std::char::from_u32(i).unwrap(),
4278+
c: char::from_u32(i).unwrap(),
42764279
}))
42774280
);
42784281
}
@@ -4347,7 +4350,7 @@ bar
43474350
Ok(Primitive::Literal(ast::Literal {
43484351
span: span(0..pat.len()),
43494352
kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4350-
c: ::std::char::from_u32(i).unwrap(),
4353+
c: char::from_u32(i).unwrap(),
43514354
}))
43524355
);
43534356
}
@@ -4378,7 +4381,7 @@ bar
43784381
#[test]
43794382
fn parse_hex_four() {
43804383
for i in 0..65536 {
4381-
let c = match ::std::char::from_u32(i) {
4384+
let c = match char::from_u32(i) {
43824385
None => continue,
43834386
Some(c) => c,
43844387
};
@@ -4442,7 +4445,7 @@ bar
44424445
#[test]
44434446
fn parse_hex_eight() {
44444447
for i in 0..65536 {
4445-
let c = match ::std::char::from_u32(i) {
4448+
let c = match char::from_u32(i) {
44464449
None => continue,
44474450
Some(c) => c,
44484451
};

regex-syntax/src/ast/print.rs

+10-4
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
This module provides a regular expression printer for `Ast`.
33
*/
44

5-
use std::fmt;
5+
use core::fmt;
66

7-
use crate::ast::visitor::{self, Visitor};
8-
use crate::ast::{self, Ast};
7+
use crate::ast::{
8+
self,
9+
visitor::{self, Visitor},
10+
Ast,
11+
};
912

1013
/// A builder for constructing a printer.
1114
///
@@ -395,9 +398,12 @@ impl<W: fmt::Write> Writer<W> {
395398

396399
#[cfg(test)]
397400
mod tests {
398-
use super::Printer;
401+
use alloc::string::String;
402+
399403
use crate::ast::parse::ParserBuilder;
400404

405+
use super::*;
406+
401407
fn roundtrip(given: &str) {
402408
roundtrip_with(|b| b, given);
403409
}

regex-syntax/src/ast/visitor.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::fmt;
1+
use alloc::{vec, vec::Vec};
22

33
use crate::ast::{self, Ast};
44

@@ -475,8 +475,8 @@ impl<'a> ClassInduct<'a> {
475475
}
476476
}
477477

478-
impl<'a> fmt::Debug for ClassFrame<'a> {
479-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
478+
impl<'a> core::fmt::Debug for ClassFrame<'a> {
479+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
480480
let x = match *self {
481481
ClassFrame::Union { .. } => "Union",
482482
ClassFrame::Binary { .. } => "Binary",
@@ -487,8 +487,8 @@ impl<'a> fmt::Debug for ClassFrame<'a> {
487487
}
488488
}
489489

490-
impl<'a> fmt::Debug for ClassInduct<'a> {
491-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
490+
impl<'a> core::fmt::Debug for ClassInduct<'a> {
491+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
492492
let x = match *self {
493493
ClassInduct::Item(it) => match *it {
494494
ast::ClassSetItem::Empty(_) => "Item(Empty)",

regex-syntax/src/error.rs

+13-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
use std::cmp;
2-
use std::fmt;
3-
use std::result;
1+
use core::{cmp, fmt, result};
42

5-
use crate::ast;
6-
use crate::hir;
3+
use alloc::{
4+
format,
5+
string::{String, ToString},
6+
vec,
7+
vec::Vec,
8+
};
9+
10+
use crate::{ast, hir};
711

812
/// A type alias for dealing with errors returned by this crate.
913
pub type Result<T> = result::Result<T, Error>;
@@ -35,6 +39,7 @@ impl From<hir::Error> for Error {
3539
}
3640
}
3741

42+
#[cfg(feature = "std")]
3843
impl std::error::Error for Error {}
3944

4045
impl fmt::Display for Error {
@@ -266,11 +271,13 @@ impl<'p> Spans<'p> {
266271
}
267272

268273
fn repeat_char(c: char, count: usize) -> String {
269-
::std::iter::repeat(c).take(count).collect()
274+
core::iter::repeat(c).take(count).collect()
270275
}
271276

272277
#[cfg(test)]
273278
mod tests {
279+
use alloc::string::ToString;
280+
274281
use crate::ast::parse::Parser;
275282

276283
fn assert_panic_message(pattern: &str, expected_msg: &str) {

regex-syntax/src/hir/interval.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
use std::char;
2-
use std::cmp;
3-
use std::fmt::Debug;
4-
use std::slice;
5-
use std::u8;
1+
use core::{char, cmp, fmt::Debug, slice};
2+
3+
use alloc::vec::Vec;
64

75
use crate::unicode;
86

0 commit comments

Comments
 (0)