diff --git a/NEWS.md b/NEWS.md index bab64a2d195ac..fb6d79ec78836 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,11 @@ New language features difference between `public` and `export` is that `public` names do not become available when `using` a package/module. ([#50105]) * `ScopedValue` implement dynamic scope with inheritance across tasks ([#50958]). +* A new `AbstractString` type, `AnnotatedString`, is introduced that allows for + regional annotations to be attached to an underlying string. This type is + particularly useful for holding styling information, and is used extensively + in the new `StyledStrings` standard library. There is also a new `AnnotatedChar` + type, that is the equivalent new `AbstractChar` type. Language changes ---------------- @@ -51,6 +56,17 @@ New library features Standard library changes ------------------------ +#### StyledStrings + +* A new standard library for handling styling in a more comprehensive and structured way. +* The new `Faces` struct serves as a container for text styling information + (think typeface, as well as color and decoration), and comes with a framework + to provide a convenient, extensible (via `addface!`), and customisable (with a + user's `Faces.toml` and `loadfaces!`) approach to + styled content. +* The new `@styled_str` string macro provides a convenient way of creating a + `AnnotatedString` with various faces or other attributes applied. + #### Package Manager #### LinearAlgebra diff --git a/base/client.jl b/base/client.jl index 7339bf0870990..d55c2695e11e8 100644 --- a/base/client.jl +++ b/base/client.jl @@ -271,6 +271,10 @@ function exec_options(opts) interactiveinput = (repl || is_interactive::Bool) && isa(stdin, TTY) is_interactive::Bool |= interactiveinput + # load terminfo in for styled printing + term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb") + global current_terminfo = load_terminfo(term_env) + # load ~/.julia/config/startup.jl file if startup try @@ -416,11 +420,9 @@ function run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_f end end # TODO cleanup REPL_MODULE_REF - if !fallback_repl && interactive && isassigned(REPL_MODULE_REF) invokelatest(REPL_MODULE_REF[]) do REPL term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb") - global current_terminfo = load_terminfo(term_env) term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr) banner == :no || Base.banner(term, short=banner==:short) if term.term_type == "dumb" diff --git a/base/exports.jl b/base/exports.jl index 81296f7d34b18..b6f7ea0d6ad35 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -1089,8 +1089,15 @@ public Generator, ImmutableDict, OneTo, + AnnotatedString, + AnnotatedChar, UUID, +# Annotated strings + annotatedstring, + annotate!, + annotations, + # Semaphores Semaphore, acquire, diff --git a/base/regex.jl b/base/regex.jl index 3e161806c50ea..78eefa1741b0c 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -212,14 +212,18 @@ julia> hr "11" ``` """ -struct RegexMatch <: AbstractMatch - match::SubString{String} - captures::Vector{Union{Nothing,SubString{String}}} +struct RegexMatch{S<:AbstractString} <: AbstractMatch + match::SubString{S} + captures::Vector{Union{Nothing,SubString{S}}} offset::Int offsets::Vector{Int} regex::Regex end +RegexMatch(match::SubString{S}, captures::Vector{Union{Nothing,SubString{S}}}, + offset::Union{Int, UInt}, offsets::Vector{Int}, regex::Regex) where {S<:AbstractString} = + RegexMatch{S}(match, captures, offset, offsets, regex) + """ keys(m::RegexMatch) -> Vector @@ -423,9 +427,35 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, return result end +function _annotatedmatch(m::RegexMatch{S}, str::AnnotatedString{S}) where {S<:AbstractString} + RegexMatch{AnnotatedString{S}}( + (@inbounds SubString{AnnotatedString{S}}( + str, m.match.offset, m.match.ncodeunits, Val(:noshift))), + Union{Nothing,SubString{AnnotatedString{S}}}[ + if !isnothing(cap) + (@inbounds SubString{AnnotatedString{S}}( + str, cap.offset, cap.ncodeunits, Val(:noshift))) + end for cap in m.captures], + m.offset, m.offsets, m.regex) +end + +function match(re::Regex, str::AnnotatedString) + m = match(re, str.string) + if !isnothing(m) + _annotatedmatch(m, str) + end +end + +function match(re::Regex, str::AnnotatedString, idx::Integer, add_opts::UInt32=UInt32(0)) + m = match(re, str.string, idx, add_opts) + if !isnothing(m) + _annotatedmatch(m, str) + end +end + match(r::Regex, s::AbstractString) = match(r, s, firstindex(s)) match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError( - "regex matching is only available for the String type; use String(s) to convert" + "regex matching is only available for the String and AnnotatedString types; use String(s) to convert" )) findnext(re::Regex, str::Union{String,SubString}, idx::Integer) = _findnext_re(re, str, idx, C_NULL) @@ -671,18 +701,19 @@ function _replace(io, repl_s::SubstitutionString, str, r, re) end end -struct RegexMatchIterator +struct RegexMatchIterator{S <: AbstractString} regex::Regex - string::String + string::S overlap::Bool - function RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false) - new(regex, string, ovr) - end + RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false) = + new{String}(regex, String(string), ovr) + RegexMatchIterator(regex::Regex, string::AnnotatedString, ovr::Bool=false) = + new{AnnotatedString{String}}(regex, AnnotatedString(String(string.string), string.annotations), ovr) end compile(itr::RegexMatchIterator) = (compile(itr.regex); itr) -eltype(::Type{RegexMatchIterator}) = RegexMatch -IteratorSize(::Type{RegexMatchIterator}) = SizeUnknown() +eltype(::Type{<:RegexMatchIterator}) = RegexMatch +IteratorSize(::Type{<:RegexMatchIterator}) = SizeUnknown() function iterate(itr::RegexMatchIterator, (offset,prevempty)=(1,false)) opts_nonempty = UInt32(PCRE.ANCHORED | PCRE.NOTEMPTY_ATSTART) @@ -727,7 +758,7 @@ julia> rx = r"a.a" r"a.a" julia> m = eachmatch(rx, "a1a2a3a") -Base.RegexMatchIterator(r"a.a", "a1a2a3a", false) +Base.RegexMatchIterator{String}(r"a.a", "a1a2a3a", false) julia> collect(m) 2-element Vector{RegexMatch}: diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl new file mode 100644 index 0000000000000..2df0ece80da1f --- /dev/null +++ b/base/strings/annotated.jl @@ -0,0 +1,388 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +""" + AnnotatedString{S <: AbstractString} <: AbstractString + +A string with metadata, in the form of annotated regions. + +More specifically, this is a simple wrapper around any other +[`AbstractString`](@ref) that allows for regions of the wrapped string to be +annotated with labeled values. + +```text + C + ┌──────┸─────────┐ + "this is an example annotated string" + └──┰────────┼─────┘ │ + A └─────┰─────────┘ + B +``` + +The above diagram represents a `AnnotatedString` where three ranges have been +annotated (labeled `A`, `B`, and `C`). Each annotation holds a label (`Symbol`) +and a value (`Any`), paired together as a `Pair{Symbol, <:Any}`. + +Labels do not need to be unique, the same region can hold multiple annotations +with the same label. + +See also [`AnnotatedChar`](@ref), [`annotatedstring`](@ref), +[`annotations`](@ref), and [`annotate!`](@ref). + +!!! warning + While the constructors are part of the Base public API, the fields + of `AnnotatedString` are not. This is to allow for potential future + changes in the implementation of this type. Instead use the + [`annotations`](@ref), and [`annotate!`](@ref) getter/setter + functions. + +# Constructors + +```julia +AnnotatedString(s::S<:AbstractString) -> AnnotatedString{S} +AnnotatedString(s::S<:AbstractString, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, <:Any}}}) +``` + +A AnnotatedString can also be created with [`annotatedstring`](@ref), which acts much +like [`string`](@ref) but preserves any annotations present in the arguments. + +# Example + +```julia-repl +julia> AnnotatedString("this is an example annotated string", + [(1:18, :A => 1), (12:28, :B => 2), (18:35, :C => 3)]) +"this is an example annotated string" +``` +""" +struct AnnotatedString{S <: AbstractString} <: AbstractString + string::S + annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}} +end + +""" + AnnotatedChar{S <: AbstractChar} <: AbstractChar + +A Char with annotations. + +More specifically, this is a simple wrapper around any other +[`AbstractChar`](@ref), which holds a list of arbitrary labeled annotations +(`Pair{Symbol, <:Any}`) with the wrapped character. + +See also: [`AnnotatedString`](@ref), [`annotatedstring`](@ref), `annotations`, +and `annotate!`. + +!!! warning + While the constructors are part of the Base public API, the fields + of `AnnotatedChar` are not. This it to allow for potential future + changes in the implementation of this type. Instead use the + [`annotations`](@ref), and [`annotate!`](@ref) getter/setter + functions. + +# Constructors + +```julia +AnnotatedChar(s::S) -> AnnotatedChar{S} +AnnotatedChar(s::S, annotations::Vector{Pair{Symbol, <:Any}}) +``` + +# Examples + +```julia-repl +julia> AnnotatedChar('j', :label => 1) +'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase) +``` +""" +struct AnnotatedChar{C <: AbstractChar} <: AbstractChar + char::C + annotations::Vector{Pair{Symbol, Any}} +end + +## Constructors ## + +# When called with overly-specialised arguments + +AnnotatedString(s::AbstractString, annots::Vector{<:Tuple{UnitRange{Int}, <:Pair{Symbol, <:Any}}}) = + AnnotatedString(s, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}(annots)) + +AnnotatedChar(c::AbstractChar, annots::Vector{<:Pair{Symbol, <:Any}}) = + AnnotatedChar(c, Vector{Pair{Symbol, Any}}(annots)) + +# Constructors to avoid recursive wrapping + +AnnotatedString(s::AnnotatedString, annots::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}) = + AnnotatedString(s.string, vcat(s.annotations, annots)) + +AnnotatedChar(c::AnnotatedChar, annots::Vector{Pair{Symbol, Any}}) = + AnnotatedChar(c.char, vcat(s.annotations, annots)) + +String(s::AnnotatedString{String}) = s.string # To avoid pointless overhead + +## Conversion/promotion ## + +convert(::Type{AnnotatedString}, s::AnnotatedString) = s +convert(::Type{AnnotatedString{S}}, s::S) where {S <: AbstractString} = + AnnotatedString(s, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()) +convert(::Type{AnnotatedString}, s::S) where {S <: AbstractString} = + convert(AnnotatedString{S}, s) +AnnotatedString(s::S) where {S <: AbstractString} = convert(AnnotatedString{S}, s) + +convert(::Type{AnnotatedChar}, c::AnnotatedChar) = c +convert(::Type{AnnotatedChar{C}}, c::C) where { C <: AbstractChar } = + AnnotatedChar{C}(c, Vector{Pair{Symbol, Any}}()) +convert(::Type{AnnotatedChar}, c::C) where { C <: AbstractChar } = + convert(AnnotatedChar{C}, c) + +AnnotatedChar(c::AbstractChar) = convert(AnnotatedChar, c) +AnnotatedChar(c::UInt32) = convert(AnnotatedChar, Char(c)) +AnnotatedChar{C}(c::UInt32) where {C <: AbstractChar} = convert(AnnotatedChar, C(c)) + +promote_rule(::Type{<:AnnotatedString}, ::Type{<:AbstractString}) = AnnotatedString + +## AbstractString interface ## + +ncodeunits(s::AnnotatedString) = ncodeunits(s.string) +codeunits(s::AnnotatedString) = codeunits(s.string) +codeunit(s::AnnotatedString) = codeunit(s.string) +codeunit(s::AnnotatedString, i::Integer) = codeunit(s.string, i) +isvalid(s::AnnotatedString, i::Integer) = isvalid(s.string, i) +@propagate_inbounds iterate(s::AnnotatedString, i::Integer=firstindex(s)) = + if i <= lastindex(s.string); (s[i], nextind(s, i)) end +eltype(::Type{<:AnnotatedString{S}}) where {S} = AnnotatedChar{eltype(S)} +firstindex(s::AnnotatedString) = firstindex(s.string) +lastindex(s::AnnotatedString) = lastindex(s.string) + +function getindex(s::AnnotatedString, i::Integer) + @boundscheck checkbounds(s, i) + @inbounds if isvalid(s, i) + AnnotatedChar(s.string[i], annotations(s, i)) + else + string_index_err(s, i) + end +end + +## AbstractChar interface ## + +ncodeunits(c::AnnotatedChar) = ncodeunits(c.char) +codepoint(c::AnnotatedChar) = codepoint(c.char) + +# Avoid the iteration fallback with comparison +cmp(a::AnnotatedString, b::AbstractString) = cmp(a.string, b) +cmp(a::AbstractString, b::AnnotatedString) = cmp(a, b.string) +# To avoid method ambiguity +cmp(a::AnnotatedString, b::AnnotatedString) = cmp(a.string, b.string) + +==(a::AnnotatedString, b::AnnotatedString) = + a.string == b.string && a.annotations == b.annotations + +==(a::AnnotatedString, b::AbstractString) = isempty(a.annotations) && a.string == b +==(a::AbstractString, b::AnnotatedString) = isempty(b.annotations) && a == b.string + +""" + annotatedstring(values...) + +Create a `AnnotatedString` from any number of `values` using their +[`print`](@ref)ed representation. + +This acts like [`string`](@ref), but takes care to preserve any annotations +present (in the form of [`AnnotatedString`](@ref) or [`AnnotatedChar`](@ref) values). + +See also [`AnnotatedString`](@ref) and [`AnnotatedChar`](@ref). + +## Examples + +```julia-repl +julia> annotatedstring("now a AnnotatedString") +"now a AnnotatedString" + +julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label => 1)]), ", and unannotated") +"annotated, and unannotated" +``` +""" +function annotatedstring(xs...) + isempty(xs) && return AnnotatedString("") + size = mapreduce(_str_sizehint, +, xs) + s = IOContext(IOBuffer(sizehint=size), :color => true) + annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}() + for x in xs + if x isa AnnotatedString + for (region, annot) in x.annotations + push!(annotations, (s.io.size .+ (region), annot)) + end + print(s, x.string) + elseif x isa SubString{<:AnnotatedString} + for (region, annot) in x.string.annotations + start, stop = first(region), last(region) + if start <= x.offset + x.ncodeunits && stop > x.offset + rstart = s.io.size + max(0, start - x.offset) + 1 + rstop = s.io.size + min(stop, x.offset + x.ncodeunits) - x.offset + push!(annotations, (rstart:rstop, annot)) + end + end + print(s, SubString(x.string.string, x.offset, x.ncodeunits, Val(:noshift))) + elseif x isa AnnotatedChar + for annot in x.annotations + push!(annotations, (1+s.io.size:1+s.io.size, annot)) + end + print(s, x.char) + else + print(s, x) + end + end + str = String(resize!(s.io.data, s.io.size)) + AnnotatedString(str, annotations) +end + +annotatedstring(s::AnnotatedString) = s +annotatedstring(c::AnnotatedChar) = + AnnotatedString(string(c.char), [(1:ncodeunits(c), annot) for annot in c.annotations]) + +AnnotatedString(s::SubString{<:AnnotatedString}) = annotatedstring(s) + +""" + annotatedstring_optimize!(str::AnnotatedString) + +Merge contiguous identical annotations in `str`. +""" +function annotatedstring_optimize!(s::AnnotatedString) + last_seen = Dict{Pair{Symbol, Any}, Int}() + i = 1 + while i <= length(s.annotations) + region, keyval = s.annotations[i] + prev = get(last_seen, keyval, 0) + if prev > 0 + lregion, _ = s.annotations[prev] + if last(lregion) + 1 == first(region) + s.annotations[prev] = + setindex(s.annotations[prev], + first(lregion):last(region), + 1) + deleteat!(s.annotations, i) + else + delete!(last_seen, keyval) + end + else + last_seen[keyval] = i + i += 1 + end + end + s +end + +function repeat(str::AnnotatedString, r::Integer) + r == 0 && return one(AnnotatedString) + r == 1 && return str + unannot = repeat(str.string, r) + annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}() + len = ncodeunits(str) + fullregion = firstindex(str):lastindex(str) + for (region, annot) in str.annotations + if region == fullregion + push!(annotations, (firstindex(unannot):lastindex(unannot), annot)) + end + end + for offset in 0:len:(r-1)*len + for (region, annot) in str.annotations + if region != fullregion + push!(annotations, (region .+ offset, annot)) + end + end + end + AnnotatedString(unannot, annotations) |> annotatedstring_optimize! +end + +repeat(str::SubString{<:AnnotatedString}, r::Integer) = + repeat(AnnotatedString(str), r) + +function repeat(c::AnnotatedChar, r::Integer) + str = repeat(c.char, r) + fullregion = firstindex(str):lastindex(str) + AnnotatedString(str, [(fullregion, annot) for annot in c.annotations]) +end + +function reverse(s::AnnotatedString) + lastind = lastindex(s) + AnnotatedString(reverse(s.string), + [(UnitRange(1 + lastind - last(region), + 1 + lastind - first(region)), + annot) + for (region, annot) in s.annotations]) +end + +# TODO optimise? +reverse(s::SubString{<:AnnotatedString}) = reverse(AnnotatedString(s)) + +# TODO implement `replace(::AnnotatedString, ...)` + +## End AbstractString interface ## + +""" + annotate!(str::AnnotatedString, [range::UnitRange{Int}], label::Symbol => value) + annotate!(str::SubString{AnnotatedString}, [range::UnitRange{Int}], label::Symbol => value) + +Annotate a `range` of `str` (or the entire string) with a labeled value (`label` => `value`). +To remove existing `label` annotations, use a value of `nothing`. +""" +function annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) + label, val = labelval + indices = searchsorted(s.annotations, (range,), by=first) + if val === nothing + labelindex = filter(i -> first(s.annotations[i][2]) === label, indices) + for index in Iterators.reverse(labelindex) + deleteat!(s.annotations, index) + end + else + splice!(s.annotations, indices, [(range, Pair{Symbol, Any}(label, val))]) + end + s +end + +annotate!(ss::AnnotatedString, @nospecialize(labelval::Pair{Symbol, <:Any})) = + annotate!(ss, firstindex(ss):lastindex(ss), labelval) + +annotate!(s::SubString{<:AnnotatedString}, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) = + (annotate!(s.string, s.offset .+ (range), labelval); s) + +annotate!(s::SubString{<:AnnotatedString}, @nospecialize(labelval::Pair{Symbol, <:Any})) = + (annotate!(s.string, s.offset .+ (1:s.ncodeunits), labelval); s) + +""" + annotate!(char::AnnotatedChar, label::Symbol => value) + +Annotate `char` with the pair `label => value`. +""" +annotate!(c::AnnotatedChar, @nospecialize(labelval::Pair{Symbol, <:Any})) = + (push!(c.annotations, labelval); c) + +""" + annotations(str::AnnotatedString, [position::Union{Integer, UnitRange}]) + annotations(str::SubString{AnnotatedString}, [position::Union{Integer, UnitRange}]) + +Get all annotations that apply to `str`. Should `position` be provided, only +annotations that overlap with `position` will be returned. + +See also: `annotate!`. +""" +annotations(s::AnnotatedString) = s.annotations + +annotations(s::SubString{<:AnnotatedString}) = + annotations(s, s.offset+1:s.offset+s.ncodeunits) + +function annotations(s::AnnotatedString, pos::UnitRange{<:Integer}) + # TODO optimise + annots = filter(label -> !isempty(intersect(pos, first(label))), + s.annotations) + last.(annots) +end + +annotations(s::AnnotatedString, pos::Integer) = annotations(s, pos:pos) + +annotations(s::SubString{<:AnnotatedString}, pos::Integer) = + annotations(s.string, s.offset + pos) +annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) = + annotations(s.string, first(pos)+s.offset:last(pos)+s.offset) + +""" + annotations(chr::AnnotatedChar) + +Get all annotations of `chr`. +""" +annotations(c::AnnotatedChar) = c.annotations diff --git a/base/strings/basic.jl b/base/strings/basic.jl index d2bc157aefd94..330cff1cf8f00 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -241,9 +241,10 @@ end """ *(s::Union{AbstractString, AbstractChar}, t::Union{AbstractString, AbstractChar}...) -> AbstractString -Concatenate strings and/or characters, producing a [`String`](@ref). This is equivalent -to calling the [`string`](@ref) function on the arguments. Concatenation of built-in -string types always produces a value of type `String` but other string types may choose +Concatenate strings and/or characters, producing a [`String`](@ref) or +[`AnnotatedString`](@ref) (as appropriate). This is equivalent to calling the +[`string`](@ref) or [`annotatedstring`](@ref) function on the arguments. Concatenation of built-in string +types always produces a value of type `String` but other string types may choose to return a string of a different type as appropriate. # Examples @@ -255,7 +256,15 @@ julia> 'j' * "ulia" "julia" ``` """ -(*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...) = string(s1, ss...) +function (*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...) + isannotated = s1 isa AnnotatedString || s1 isa AnnotatedChar || + any(s -> s isa AnnotatedString || s isa AnnotatedChar, ss) + if isannotated + annotatedstring(s1, ss...) + else + string(s1, ss...) + end +end one(::Union{T,Type{T}}) where {T<:AbstractString} = convert(T, "") @@ -309,7 +318,8 @@ end ==(a::AbstractString, b::AbstractString) -> Bool Test whether two strings are equal character by character (technically, Unicode -code point by code point). +code point by code point). Should either string be a [`AnnotatedString`](@ref) the +string properties must match too. # Examples ```jldoctest diff --git a/base/strings/io.jl b/base/strings/io.jl index 987a64798d3da..c45d0ac84640e 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -353,9 +353,31 @@ function join(io::IO, iterator, delim="") end end -join(iterator) = sprint(join, iterator) -join(iterator, delim) = sprint(join, iterator, delim) -join(iterator, delim, last) = sprint(join, iterator, delim, last) +# TODO: If/when we have `AnnotatedIO`, we can revisit this and +# implement it more nicely. +function join_annotated(iterator, delim="", last=delim) + xs = zip(iterator, Iterators.repeated(delim)) |> Iterators.flatten |> collect + xs = xs[1:end-1] + if length(xs) > 1 + xs[end-1] = last + end + annotatedstring(xs...)::AnnotatedString{String} +end + +function _join_maybe_annotated(args...) + if any(function (arg) + t = eltype(arg) + !(t == Union{}) && (t <: AnnotatedString || t <: AnnotatedChar) + end, args) + join_annotated(args...) + else + sprint(join, args...) + end +end + +join(iterator) = _join_maybe_annotated(iterator) +join(iterator, delim) = _join_maybe_annotated(iterator, delim) +join(iterator, delim, last) = _join_maybe_annotated(iterator, delim, last) ## string escaping & unescaping ## @@ -764,3 +786,26 @@ function String(chars::AbstractVector{<:AbstractChar}) end end end + +function AnnotatedString(chars::AbstractVector{C}) where {C<:AbstractChar} + str = if C <: AnnotatedChar + String(getfield.(chars, :char)) + else + sprint(sizehint=length(chars)) do io + for c in chars + print(io, c) + end + end + end + props = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[] + point = 1 + for c in chars + if c isa AnnotatedChar + for prop in c.properties + push!(props, (point:point, prop)) + end + end + point += ncodeunits(c) + end + AnnotatedString(str, props) +end diff --git a/base/strings/strings.jl b/base/strings/strings.jl index d995d8535e24b..8dae311f475b4 100644 --- a/base/strings/strings.jl +++ b/base/strings/strings.jl @@ -1,5 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +include("strings/annotated.jl") include("strings/search.jl") include("strings/unicode.jl") diff --git a/base/strings/substring.jl b/base/strings/substring.jl index 792925f24b12b..dfd8770b08d47 100644 --- a/base/strings/substring.jl +++ b/base/strings/substring.jl @@ -36,9 +36,18 @@ struct SubString{T<:AbstractString} <: AbstractString end return new(s, i-1, nextind(s,j)-i) end + function SubString{T}(s::T, i::Int, j::Int, ::Val{:noshift}) where T<:AbstractString + @boundscheck begin + si, sj = i + 1, prevind(s, j + i + 1) + @inbounds isvalid(s, si) || string_index_err(s, si) + @inbounds isvalid(s, sj) || string_index_err(s, sj) + end + new(s, i, j) + end end @propagate_inbounds SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j) +@propagate_inbounds SubString(s::T, i::Int, j::Int, v::Val{:noshift}) where {T<:AbstractString} = SubString{T}(s, i, j, v) @propagate_inbounds SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s)) = SubString(s, Int(i), Int(j)) @propagate_inbounds SubString(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, first(r), last(r)) diff --git a/base/strings/util.jl b/base/strings/util.jl index 890afaf62b2ee..fae40cb568842 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -458,13 +458,15 @@ function lpad( s::Union{AbstractChar,AbstractString}, n::Integer, p::Union{AbstractChar,AbstractString}=' ', -) :: String +) + stringfn = if any(isa.((s, p), Union{AnnotatedString, AnnotatedChar, SubString{<:AnnotatedString}})) + annotatedstring else string end n = Int(n)::Int m = signed(n) - Int(textwidth(s))::Int - m ≤ 0 && return string(s) + m ≤ 0 && return stringfn(s) l = textwidth(p) q, r = divrem(m, l) - r == 0 ? string(p^q, s) : string(p^q, first(p, r), s) + r == 0 ? stringfn(p^q, s) : stringfn(p^q, first(p, r), s) end """ @@ -488,13 +490,15 @@ function rpad( s::Union{AbstractChar,AbstractString}, n::Integer, p::Union{AbstractChar,AbstractString}=' ', -) :: String +) + stringfn = if any(isa.((s, p), Union{AnnotatedString, AnnotatedChar, SubString{<:AnnotatedString}})) + annotatedstring else string end n = Int(n)::Int m = signed(n) - Int(textwidth(s))::Int - m ≤ 0 && return string(s) + m ≤ 0 && return stringfn(s) l = textwidth(p) q, r = divrem(m, l) - r == 0 ? string(s, p^q) : string(s, p^q, first(p, r)) + r == 0 ? stringfn(s, p^q) : stringfn(s, p^q, first(p, r)) end """ diff --git a/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/md5 b/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/md5 new file mode 100644 index 0000000000000..5d9fdfb8ebd7e --- /dev/null +++ b/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/md5 @@ -0,0 +1 @@ +311f5b6b7e109fea852303ec09324b00 diff --git a/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/sha512 b/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/sha512 new file mode 100644 index 0000000000000..8a3c1c9238514 --- /dev/null +++ b/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/sha512 @@ -0,0 +1 @@ +fbf4b2fdde4fd2c2bb321b915d9833ea34952aec1bf9fdcf51e229e6bae5fc0fbfd30db31e410c634955144c6a4295289a313185621e2c5f16b06f22a049739f diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md index 979ba1157fb23..2504f3dbd583a 100644 --- a/doc/src/base/strings.md +++ b/doc/src/base/strings.md @@ -17,6 +17,11 @@ Core.String(::AbstractString) Base.SubString Base.LazyString Base.@lazy_str +Base.AnnotatedString +Base.AnnotatedChar +Base.annotatedstring +Base.annotations +Base.annotate! Base.transcode Base.unsafe_string Base.ncodeunits(::AbstractString) diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md index 9c4fde5b8a701..ec146125024b8 100644 --- a/doc/src/manual/strings.md +++ b/doc/src/manual/strings.md @@ -1203,3 +1203,51 @@ Notice that the first two backslashes appear verbatim in the output, since they precede a quote character. However, the next backslash character escapes the backslash that follows it, and the last backslash escapes a quote, since these backslashes appear before a quote. + + +## [Annotated Strings](@id man-annotated-strings) + +It is sometimes useful to be able to hold metadata relating to regions of a +string. A [`AnnotatedString`](@ref Base.AnnotatedString) wraps another string and +allows for regions of it to be annotated with labelled values (`:label => value`). +All generic string operations are applied to the underlying string. However, +when possible, styling information is preserved. This means you can manipulate a +[`AnnotatedString`](@ref Base.AnnotatedString) —taking substrings, padding them, +concatenating them with other strings— and the metadata annotations will "come +along for the ride". + +This string type is fundamental to the [StyledStrings stdlib](@ref +stdlib-styledstrings), which uses `:face`-labelled annotations to hold styling +information. + +When concatenating a [`AnnotatedString`](@ref Base.AnnotatedString), take care to use +[`annotatedstring`](@ref Base.annotatedstring) instead of [`string`](@ref) if you want +to keep the string annotations. + +```jldoctest +julia> str = Base.AnnotatedString("hello there", + [(1:5, :word => :greeting), (7:11, :label => 1)]) +"hello there" + +julia> length(str) +11 + +julia> lpad(str, 14) +" hello there" + +julia> typeof(lpad(str, 7)) +Base.AnnotatedString{String} + +julia> str2 = Base.AnnotatedString(" julia", [(2:6, :face => :magenta)]) +" julia" + +julia> Base.annotatedstring(str, str2) +"hello there julia" + +julia> str * str2 == Base.annotatedstring(str, str2) # *-concatenation still works +true +``` + +The annotations of a [`AnnotatedString`](@ref Base.AnnotatedString) can be accessed +and modified via the [`annotations`](@ref Base.annotations) and +[`annotate!`](@ref Base.annotate!) functions. diff --git a/pkgimage.mk b/pkgimage.mk index 9a91488955420..0b46531cfa137 100644 --- a/pkgimage.mk +++ b/pkgimage.mk @@ -70,6 +70,7 @@ $(eval $(call stdlib_builder,Serialization,)) $(eval $(call stdlib_builder,Sockets,)) $(eval $(call stdlib_builder,Unicode,)) $(eval $(call stdlib_builder,Profile,)) +$(eval $(call stdlib_builder,StyledStrings,)) # 1-depth packages $(eval $(call stdlib_builder,GMP_jll,Artifacts Libdl)) diff --git a/stdlib/.gitignore b/stdlib/.gitignore index f76eb3df57145..ce744aa43d9f5 100644 --- a/stdlib/.gitignore +++ b/stdlib/.gitignore @@ -25,6 +25,8 @@ /LazyArtifacts /Distributed-* /Distributed +/StyledStrings-* +/StyledStrings /*_jll/StdlibArtifacts.toml /*/Manifest.toml /*.image diff --git a/stdlib/Makefile b/stdlib/Makefile index 6b09344ac422d..1c8a2849d75f1 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -47,7 +47,7 @@ STDLIBS = Artifacts Base64 CRC32c Dates FileWatching \ $(JLL_NAMES) STDLIBS_EXT = Pkg Statistics LazyArtifacts LibCURL DelimitedFiles Downloads ArgTools \ - Tar NetworkOptions SuiteSparse SparseArrays SHA Distributed + Tar NetworkOptions SuiteSparse SparseArrays StyledStrings SHA Distributed $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z)))) diff --git a/stdlib/StyledStrings.version b/stdlib/StyledStrings.version new file mode 100644 index 0000000000000..bca2e9dca3e67 --- /dev/null +++ b/stdlib/StyledStrings.version @@ -0,0 +1,4 @@ +STYLEDSTRINGS_BRANCH = main +STYLEDSTRINGS_SHA1 = 61e7b105b157b40807ed0b4840166a25b0948549 +STYLEDSTRINGS_GIT_URL := https://github.com/JuliaLang/StyledStrings.jl.git +STYLEDSTRINGS_TAR_URL = https://api.github.com/repos/JuliaLang/StyledStrings.jl/tarball/$1 diff --git a/stdlib/stdlib.mk b/stdlib/stdlib.mk index 99bdefc66fa90..696b24a8f8bf1 100644 --- a/stdlib/stdlib.mk +++ b/stdlib/stdlib.mk @@ -6,7 +6,7 @@ INDEPENDENT_STDLIBS := \ ArgTools Base64 CRC32c Dates DelimitedFiles Distributed Downloads Future \ InteractiveUtils LazyArtifacts LibGit2 LibCURL Logging Markdown Mmap \ NetworkOptions Profile Printf Pkg REPL Serialization SharedArrays SparseArrays \ - Statistics Tar Test TOML Unicode UUIDs \ + Statistics StyledStrings Tar Test TOML Unicode UUIDs \ dSFMT_jll GMP_jll libLLVM_jll LLD_jll LLVMLibUnwind_jll LibUnwind_jll LibUV_jll \ LibCURL_jll LibSSH2_jll LibGit2_jll nghttp2_jll MozillaCACerts_jll MbedTLS_jll \ MPFR_jll OpenLibm_jll PCRE2_jll p7zip_jll Zlib_jll diff --git a/test/choosetests.jl b/test/choosetests.jl index 221a49b710d8b..beed4e15a58df 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -151,7 +151,7 @@ function choosetests(choices = []) filtertests!(tests, "unicode", ["unicode/utf8"]) filtertests!(tests, "strings", ["strings/basic", "strings/search", "strings/util", - "strings/io", "strings/types"]) + "strings/io", "strings/types", "strings/annotated"]) # do subarray before sparse but after linalg filtertests!(tests, "subarray") filtertests!(tests, "compiler", [ diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl new file mode 100644 index 0000000000000..324c1ccb495f6 --- /dev/null +++ b/test/strings/annotated.jl @@ -0,0 +1,99 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +@testset "AnnotatedString" begin + str = Base.AnnotatedString("some string") + @test str == Base.AnnotatedString(str.string, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]) + @test length(str) == 11 + @test ncodeunits(str) == 11 + @test eltype(str) == Base.AnnotatedChar{eltype(str.string)} + @test first(str) == Base.AnnotatedChar(first(str.string), Pair{Symbol, Any}[]) + @test str[1:4] isa SubString{typeof(str)} + @test str[1:4] == Base.AnnotatedString("some") + @test "a" * str == Base.AnnotatedString("asome string") + @test str * "a" == Base.AnnotatedString("some stringa") + @test str * str == Base.AnnotatedString("some stringsome string") + Base.annotate!(str, 1:4, :thing => 0x01) + Base.annotate!(str, 5:11, :other => 0x02) + Base.annotate!(str, 1:11, :all => 0x03) + @test str[3:4] == SubString(str, 3, 4) + @test Base.AnnotatedString(str[3:4]) == + Base.AnnotatedString("me", [(1:2, :thing => 0x01), (1:2, :all => 0x03)]) + @test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) + @test str != Base.AnnotatedString("some string") + @test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (5:5, :other => 0x02), (11:11, :all => 0x03)]) + @test str != Base.AnnotatedString("some string", [(1:4, :thing => 0x11), (1:11, :all => 0x13), (5:11, :other => 0x12)]) + @test str != Base.AnnotatedString("some thingg", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) + let allstrings = + ['a', Base.AnnotatedChar('a'), Base.AnnotatedChar('a', [:aaa => 0x04]), + "a string", Base.AnnotatedString("a string"), + Base.AnnotatedString("a string", [(1:2, :hmm => '%')])] + for str1 in repeat(allstrings, 2) + for str2 in repeat(allstrings, 2) + @test String(str1 * str2) == + String(string(str1, str2)) == + String(string(str1)) * String(string(str2)) + @test Base.annotatedstring(str1 * str2) == + Base.annotatedstring(str1, str2) == + Base.annotatedstring(str1) * Base.annotatedstring(str2) + end + end + end + # @test collect(Base.eachstyle(str)) == + # [("some", [:thing => 0x01, :all => 0x03]), + # (" string", [:all => 0x03, :other => 0x02])] + @test ==(Base.annotatedstring_optimize!( + Base.AnnotatedString("abc", [(1:1, :val => 1), + (2:2, :val => 2), + (2:2, :val => 1), + (3:3, :val => 2)])), + Base.AnnotatedString("abc", [(1:2, :val => 1), + (2:3, :val => 2)])) +end + +@testset "AnnotatedChar" begin + chr = Base.AnnotatedChar('c') + @test chr == Base.AnnotatedChar(chr.char, Pair{Symbol, Any}[]) + str = Base.AnnotatedString("hmm", [(1:1, :attr => "h0h0"), + (1:2, :attr => "h0m1"), + (2:3, :attr => "m1m2")]) + @test str[1] == Base.AnnotatedChar('h', Pair{Symbol, Any}[:attr => "h0h0"]) + @test str[2] == Base.AnnotatedChar('m', Pair{Symbol, Any}[:attr => "h0m1", :attr => "m1m2"]) + @test str[3] == Base.AnnotatedChar('m', Pair{Symbol, Any}[:attr => "m1m2"]) +end + +@testset "Styling preservation" begin + str = Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (5:11, :other => 0x02)]) + @test match(r".e", str).match == str[3:4] + @test match(r"(.e)", str).captures == [str[3:4]] + let m0 = match(r"(.)e", str) + m1 = first(eachmatch(r"(.)e", str)) + for f in fieldnames(RegexMatch) + @test getfield(m0, f) == getfield(m1, f) + end + end + @test lpad(str, 12) == + Base.AnnotatedString(" some string", [(2:5, :thing => 0x01), + (2:12, :all => 0x03), + (6:12, :other => 0x02)]) + @test rpad(str, 12) == + Base.AnnotatedString("some string ", [(1:4, :thing => 0x01), + (1:11, :all => 0x03), + (5:11, :other => 0x02)]) + str1 = Base.AnnotatedString("test", [(1:4, :label => 5)]) + str2 = Base.AnnotatedString("case", [(2:3, :label => "oomph")]) + @test join([str1, str1], Base.AnnotatedString(" ")) == + Base.AnnotatedString("test test", + [(1:4, :label => 5), + (6:9, :label => 5)]) + @test join([str1, str1], Base.AnnotatedString(" ", [(1:1, :label => 2)])) == + Base.AnnotatedString("test test", + [(1:4, :label => 5), + (5:5, :label => 2), + (6:9, :label => 5)]) + @test repeat(str1, 2) == Base.AnnotatedString("testtest", [(1:8, :label => 5)]) + @test repeat(str2, 2) == Base.AnnotatedString("casecase", [(2:3, :label => "oomph"), + (6:7, :label => "oomph")]) + @test repeat(str1[1], 3) == Base.AnnotatedString("ttt", [(1:3, :label => 5)]) + @test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label => 5)]) + @test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label => "oomph")]) +end