Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expose new crate features for optionally shrinking regex #613

Merged
merged 15 commits into from
Sep 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
1.3.0 (2019-09-02)
==================
This release adds a plethora of new crate features that permit users of regex
to shrink its size considerably, in exchange for giving up either functionality
(such as Unicode support) or runtime performance. When all such features are
disabled, the dependency tree for `regex` shrinks to exactly 1 crate
(`regex-syntax`). More information about the new crate features can be
[found in the docs](https://docs.rs/regex/*/#crate-features).

Note that while this is a new minor version release, the minimum supported
Rust version for this crate remains at `1.28.0`.

New features:

* [FEATURE #474](https://github.com/rust-lang/regex/issues/474):
The `use_std` feature has been deprecated in favor of the `std` feature.
The `use_std` feature will be removed in regex 2. Until then, `use_std` will
remain as an alias for the `std` feature.
* [FEATURE #583](https://github.com/rust-lang/regex/issues/583):
Add a substantial number of crate features shrinking `regex`.


1.2.1 (2019-08-03)
==================
This release does a bit of house cleaning. Namely:
Expand Down
120 changes: 95 additions & 25 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,105 @@ members = [
"bench", "regex-capi", "regex-debug", "regex-syntax",
]

[dependencies]
[lib]
# There are no benchmarks in the library code itself
bench = false
# Doc tests fail when some features aren't present. The easiest way to work
# around this is to disable automatic doc testing, but explicitly test them
# with `cargo test --doc`.
doctest = false

# Features are documented in the "Crate features" section of the crate docs:
# https://docs.rs/regex/*/#crate-features
[features]
default = ["std", "perf", "unicode"]

# ECOSYSTEM FEATURES

# The 'std' feature permits the regex crate to use the standard library. This
# is intended to support future use cases where the regex crate may be able
# to compile without std, and instead just rely on 'core' and 'alloc' (for
# example). Currently, this isn't supported, and removing the 'std' feature
# will prevent regex from compiling.
std = []
# The 'use_std' feature is DEPRECATED. It will be removed in regex 2. Until
# then, it is alias for the 'std' feature.
use_std = ["std"]


# PERFORMANCE FEATURES

# Enables all performance features.
perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
# Enables fast caching. (If disabled, caching is still used, but is slower.)
perf-cache = ["thread_local"]
# Enables use of a lazy DFA when possible.
perf-dfa = []
# Enables aggressive use of inlining.
perf-inline = []
# Enables literal optimizations.
perf-literal = ["aho-corasick", "memchr"]


# UNICODE DATA FEATURES

# Enables all Unicode features. This expands if new Unicode features are added.
unicode = [
"unicode-age",
"unicode-bool",
"unicode-case",
"unicode-gencat",
"unicode-perl",
"unicode-script",
"unicode-segment",
]
# Enables use of the `Age` property, e.g., `\p{Age:3.0}`.
unicode-age = ["regex-syntax/unicode-age"]
# Enables use of a smattering of boolean properties, e.g., `\p{Emoji}`.
unicode-bool = ["regex-syntax/unicode-bool"]
# Enables Unicode-aware case insensitive matching, e.g., `(?i)β`.
unicode-case = ["regex-syntax/unicode-case"]
# Enables Unicode general categories, e.g., `\p{Letter}` or `\pL`.
unicode-gencat = ["regex-syntax/unicode-gencat"]
# Enables Unicode-aware Perl classes corresponding to `\w`, `\s` and `\d`.
unicode-perl = ["regex-syntax/unicode-perl"]
# Enables Unicode scripts and script extensions, e.g., `\p{Greek}`.
unicode-script = ["regex-syntax/unicode-script"]
# Enables Unicode segmentation properties, e.g., `\p{gcb=Extend}`.
unicode-segment = ["regex-syntax/unicode-segment"]


# UNSTABLE FEATURES (requires Rust nightly)

# A blanket feature that governs whether unstable features are enabled or not.
# Unstable features are disabled by default, and typically rely on unstable
# features in rustc itself.
unstable = ["pattern"]

# Enable to use the unstable pattern traits defined in std. This is enabled
# by default if the unstable feature is enabled.
pattern = []

# For very fast prefix literal matching.
aho-corasick = "0.7.6"
[dependencies.aho-corasick]
version = "0.7.6"
optional = true

# For skipping along search text quickly when a leading byte is known.
memchr = "2.2.1"
[dependencies.memchr]
version = "2.2.1"
optional = true

# For managing regex caches quickly across multiple threads.
thread_local = "0.3.6"
[dependencies.thread_local]
version = "0.3.6"
optional = true

# For parsing regular expressions.
regex-syntax = { path = "regex-syntax", version = "0.6.11" }
[dependencies.regex-syntax]
path = "regex-syntax"
version = "0.6.11"
default-features = false

[dev-dependencies]
# For examples.
Expand All @@ -44,26 +134,6 @@ rand = "0.6.5"
# To check README's example
doc-comment = "0.3"

[features]
default = ["use_std"]
# The 'use_std' feature permits the regex crate to use the standard library.
# This is intended to support future use cases where the regex crate may be
# able to compile without std, and instead just rely on 'core' and 'alloc'
# (for example). Currently, this isn't supported, and removing the 'use_std'
# feature will prevent regex from compiling.
use_std = []
# A blanket feature that governs whether unstable features are enabled or not.
# Unstable features are disabled by default, and typically rely on unstable
# features in rustc itself.
unstable = ["pattern"]
# Enable to use the unstable pattern traits defined in std. This is enabled
# by default if the unstable feature is enabled.
pattern = []

[lib]
# There are no benchmarks in the library code itself
bench = false

# Run the test suite on the default behavior of Regex::new.
# This includes a mish mash of NFAs and DFAs, which are chosen automatically
# based on the regex. We test both of the NFA implementations by forcing their
Expand Down
31 changes: 29 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ linear time with respect to the size of the regular expression and search text.
Much of the syntax and implementation is inspired
by [RE2](https://github.com/google/re2).

[![Build Status](https://travis-ci.com/rust-lang/regex.svg?branch=master)](https://travis-ci.com/rust-lang/regex)
[![Build status](https://travis-ci.com/rust-lang/regex.svg?branch=master)](https://travis-ci.com/rust-lang/regex)
[![Build status](https://ci.appveyor.com/api/projects/status/github/rust-lang/regex?svg=true)](https://ci.appveyor.com/project/rust-lang-libs/regex)
[![Coverage Status](https://coveralls.io/repos/github/rust-lang/regex/badge.svg?branch=master)](https://coveralls.io/github/rust-lang/regex?branch=master)
[![](https://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex)
Expand Down Expand Up @@ -201,9 +201,36 @@ recommended for general use.
[Documentation `regex-syntax`.](https://docs.rs/regex-syntax)


### Crate features

This crate comes with several features that permit tweaking the trade off
between binary size, compilation time and runtime performance. Users of this
crate can selectively disable Unicode tables, or choose from a variety of
optimizations performed by this crate to disable.

When all of these features are disabled, runtime match performance may be much
worse, but if you're matching on short strings, or if high performance isn't
necessary, then such a configuration is perfectly serviceable. To disable
all such features, use the following `Cargo.toml` dependency configuration:

```toml
[dependencies.regex]
version = "1.3"
default-features = false
# regex currently requires the standard library, you must re-enable it.
features = ["std"]
```

This will reduce the dependency tree of `regex` down to a single crate
(`regex-syntax`).

The full set of features one can disable are
[in the "Crate features" section of the documentation](https://docs.rs/regex/*/#crate-features).


### Minimum Rust version policy

This crate's minimum supported `rustc` version is `1.28.1`.
This crate's minimum supported `rustc` version is `1.28.0`.

The current **tentative** policy is that the minimum Rust version required
to use this crate can be increased in minor version updates. For example, if
Expand Down
48 changes: 25 additions & 23 deletions ci/script.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/sh

# vim: tabstop=2 shiftwidth=2 softtabstop=2

# This is the main CI script for testing the regex crate and its sub-crates.

set -ex
Expand All @@ -18,26 +20,33 @@ if [ "$TRAVIS_RUST_VERSION" = "$MSRV" ]; then
exit
fi

# Run tests. If we have nightly, then enable our nightly features.
# Right now there are no nightly features, but that may change in the
# future.
CARGO_TEST_EXTRA_FLAGS=""
if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
CARGO_TEST_EXTRA_FLAGS=""
# Check formatting, but make sure we use the stable version of rustfmt.
if [ "$TRAVIS_RUST_VERSION" = "stable" ]; then
rustup component add rustfmt
cargo fmt --all -- --check
fi
cargo test --verbose ${CARGO_TEST_EXTRA_FLAGS}

# Run the random tests in release mode, as this is faster.
RUST_REGEX_RANDOM_TEST=1 \
cargo test --release --verbose \
${CARGO_TEST_EXTRA_FLAGS} --test crates-regex
# Only run the full test suite on one job to keep build times lower.
if [ "$TRAVIS_RUST_VERSION" = "stable" ]; then
./test

# Run the random tests in release mode, as this is faster.
RUST_REGEX_RANDOM_TEST=1 cargo test --release --verbose --test crates-regex
else
cargo test --verbose --test default
fi

# Run a test that confirms the shootout benchmarks are correct.
ci/run-shootout-test

# Run tests on regex-syntax crate.
cargo test --verbose --manifest-path regex-syntax/Cargo.toml
cargo doc --verbose --manifest-path regex-syntax/Cargo.toml
# Only run the full test suite on one job, to conserve resources.
if [ "$TRAVIS_RUST_VERSION" = "stable" ]; then
(cd regex-syntax && ./test)
else
cargo test --verbose --manifest-path regex-syntax/Cargo.toml
fi

# Run tests on regex-capi crate.
ci/test-regex-capi
Expand All @@ -50,17 +59,10 @@ if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then

# Test minimal versions.
#
# For now, we remove this check, because it doesn't seem possible to convince
# some maintainers of *core* crates that this is a worthwhile test to add.
# In particular, this test uncovers any *incorrect* dependency specification
# in the chain of dependencies.
#
# We might consider figuring out how to migrate off of rand in order to get
# this check working. (This will be hard, since it either requires dropping
# quickcheck or migrating quickcheck off of rand, which is just probably
# not practical.)
#
# So frustrating.
# rand has started putting the minimal version check in their CI, so we
# should be able to re-enable this soon. This will require upgrading to
# rand 0.7, which breaks our MSRV since it relies on Rust 2018 features in
# order to read the Cargo.toml.
# cargo +nightly generate-lockfile -Z minimal-versions
# cargo build --verbose
# cargo test --verbose
Expand Down
22 changes: 22 additions & 0 deletions regex-syntax/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,25 @@ documentation = "https://docs.rs/regex-syntax"
homepage = "https://github.com/rust-lang/regex"
description = "A regular expression parser."
workspace = ".."

# Features are documented in the "Crate features" section of the crate docs:
# https://docs.rs/regex-syntax/*/#crate-features
[features]
default = ["unicode"]

unicode = [
"unicode-age",
"unicode-bool",
"unicode-case",
"unicode-gencat",
"unicode-perl",
"unicode-script",
"unicode-segment",
]
unicode-age = []
unicode-bool = []
unicode-case = []
unicode-gencat = []
unicode-perl = []
unicode-script = []
unicode-segment = []
Loading