Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce a Shortstring based Name type #324

Closed
wants to merge 11 commits into from
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
ShortStrings = "63221d1c-8677-4ff0-9126-0ff0817b4975"
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[compat]
EzXML = "0.9.1, 1"
Mocking = "0.7"
RecipesBase = "0.7, 0.8, 1"
ShortStrings = "0.3.7"
julia = "1"

[extras]
Expand Down
1 change: 1 addition & 0 deletions dev/ShortStrings
Submodule ShortStrings added at 65ca23
3 changes: 3 additions & 0 deletions src/TimeZones.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ module TimeZones

using Dates
using Printf
using ShortStrings
using Serialization
using RecipesBase: RecipesBase, @recipe
using ShortStrings: ShortString15
using Unicode

import Dates: TimeZone, UTC
Expand Down Expand Up @@ -55,6 +57,7 @@ include("indexable_generator.jl")

include("class.jl")
include("utcoffset.jl")
include(joinpath("types", "name.jl"))
include(joinpath("types", "timezone.jl"))
include(joinpath("types", "fixedtimezone.jl"))
include(joinpath("types", "variabletimezone.jl"))
Expand Down
10 changes: 5 additions & 5 deletions src/arithmetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ function Base.:(-)(zdt::ZonedDateTime, p::TimePeriod)
return ZonedDateTime(DateTime(zdt, UTC) - p, timezone(zdt); from_utc=true)
end

function broadcasted(::typeof(+), r::StepRange{ZonedDateTime}, p::DatePeriod)
function broadcasted(::typeof(+), r::StepRange{<:ZonedDateTime}, p::DatePeriod)
start, step, stop = first(r), Base.step(r), last(r)

# Since the local time + period can result in an invalid local datetime when working with
Expand All @@ -41,10 +41,10 @@ function broadcasted(::typeof(+), r::StepRange{ZonedDateTime}, p::DatePeriod)
return StepRange(start, step, stop)
end

function broadcasted(::typeof(+), r::StepRange{ZonedDateTime}, p::TimePeriod)
function broadcasted(::typeof(+), r::StepRange{<:ZonedDateTime}, p::TimePeriod)
return StepRange(r.start + p, r.step, r.stop + p)
end

broadcasted(::typeof(+), p::Period, r::StepRange{ZonedDateTime}) = broadcasted(+, r, p)
broadcasted(::typeof(-), r::StepRange{ZonedDateTime}, p::Period) = broadcasted(+, r, -p)
broadcasted(::typeof(-), p::Period, r::StepRange{ZonedDateTime}) = broadcasted(-, r, p)
broadcasted(::typeof(+), p::Period, r::StepRange{<:ZonedDateTime}) = broadcasted(+, r, p)
broadcasted(::typeof(-), r::StepRange{<:ZonedDateTime}, p::Period) = broadcasted(+, r, -p)
broadcasted(::typeof(-), p::Period, r::StepRange{<:ZonedDateTime}) = broadcasted(-, r, p)
14 changes: 7 additions & 7 deletions src/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,17 @@ function Base.show(io::IO, tz::VariableTimeZone)

# Compact printing of a custom time zone which is non-constructable
elseif get(io, :compact, false)
print(io, VariableTimeZone, "(")
show(io, tz.name)
print(io, ", ...)")
print(io, VariableTimeZone, "(\"")
print(io, tz.name)
print(io, "\", ...)")

# Verbose printing which should print a fully constructable `VariableTimeZone`.
else
# Force `:compact => false` to make the force the transition vector printing into
# long form.
print(io, VariableTimeZone, "(")
show(io, tz.name)
print(io, ", ")
print(io, VariableTimeZone, "(\"")
print(io, tz.name)
print(io, "\", ")
show(IOContext(io, :compact => false), tz.transitions)
print(io, ", ")
show(io, tz.cutoff)
Expand Down Expand Up @@ -128,7 +128,7 @@ Base.show(io::IO, ::MIME"text/plain", zdt::ZonedDateTime) = print(io, zdt)

# https://github.com/JuliaLang/julia/pull/33290
if VERSION >= v"1.5.0-DEV.224"
Base.typeinfo_implicit(::Type{ZonedDateTime}) = true
Base.typeinfo_implicit(::Type{<:ZonedDateTime}) = true
end

# Use compact printing on certain element types for Julia versions before:
Expand Down
8 changes: 6 additions & 2 deletions src/types/fixedtimezone.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Ideally would always use ShortString15, but it's `hash` is broken on 32-bit systems.
# https://github.com/JuliaString/MurmurHash3.jl/issues/12
const FixedTimeZoneName = Int === Int64 ? ShortString15 : String

const FIXED_TIME_ZONE_REGEX = r"""
^(?|
Z
Expand Down Expand Up @@ -30,7 +34,7 @@ const FIXED_TIME_ZONE_REGEX = r"""
A `TimeZone` with a constant offset for all of time.
"""
struct FixedTimeZone <: TimeZone
name::String
name::FixedTimeZoneName
offset::UTCOffset
end

Expand Down Expand Up @@ -72,7 +76,7 @@ UTC+15:45:21
function FixedTimeZone(s::AbstractString)
s == "Z" && return UTC_ZERO

m = match(FIXED_TIME_ZONE_REGEX, s)
m = match(FIXED_TIME_ZONE_REGEX, String(s))
m === nothing && throw(ArgumentError("Unrecognized time zone: $s"))

coefficient = m[:sign] == "-" ? -1 : 1
Expand Down
59 changes: 59 additions & 0 deletions src/types/name.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
struct SName
region::ShortString15
locality1::ShortString15
locality2::ShortString15
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe should be a subtype of AbstractString?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was considering it.
But it is a lot of work to subtype AbstractString. Especially because we need to match equality and thus hash with String.
And this is a internal type that is used for an internal field.
And the main operation that matters is comparing for equality with other SNames

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just use a larger ShortString and skip this custom type all together? If ShortString63 is larger than you want you could always tweak the size by defining a primitive type and use ShortString{T}. This gets you the string functionality without you having to write it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you think that is viable than sure.
Based on your earlier comments that i reinterated here #271 (comment)
I thought it wasn't
In particular due to the BitInterger seralization one (which i now realized has been fixed).

I also though that they would be slower, as i have found that they are much slower than expected once you get large, but it seems that that doesn't really kick in for ShortString63.
It is a bit slower for some operations but a bit faster for others.

Benchmarks

== false

julia> @btime x==y setup=(x=convert(TimeZones.SName, "America/Winnipeg"); y=convert(TimeZones.SName, "America/Argentina/ComodRivadavia"))
  1.504 ns (0 allocations: 0 bytes)
false

julia> @btime x==y setup=(x=ShortString63("America/Winnipeg"); y=ShortString63("America/Argentina/ComodRivadavia"))
  2.117 ns (0 allocations: 0 bytes)
false

== true

(basically exactly the same as the false case)

julia> @btime x==y setup=(x=convert(TimeZones.SName, "America/Winnipeg"); y=convert(TimeZones.SName, "America/Winnipeg"))
  1.505 ns (0 allocations: 0 bytes)
true

julia> @btime x==y setup=(x=ShortString63("America/Winnipeg"); y=ShortString63("America/Winnipeg"))
  2.115 ns (0 allocations: 0 bytes)
true

hash

julia> @btime hash(x)==hash(y) setup=(x=convert(TimeZones.SName, "America/Winnipeg"); y=convert(TimeZones.SName, "America/Argentina/ComodRivadavia"))
  34.256 ns (0 allocations: 0 bytes);
  
julia> @btime hash(x)==hash(y) setup=(x=ShortString63("America/Winnipeg"); y=ShortString63("America/Argentina/ComodRivadavia"));
  27.598 ns (0 allocations: 0 bytes)

So I will make that change, since it is simpler


function Base.print(io::IO, name::SName)
print(io, name.region)
if !isempty(name.locality1)
print(io,"/", name.locality1)
if !isempty(name.locality2)
print(io,"/", name.locality2)
end
end
end

Base.convert(::Type{String}, name::SName) = string(name)
function Base.convert(::Type{SName}, str::AbstractString)
name = try_convert(SName, str)
if name isa Nothing
throw(DomainError(str, "Timezone must have 3 or fewer parts, all with length < 16"))
end
return name
end

try_convert(::Type{SName}, name::SName) = name
try_convert(::Type{String}, name::String) = name
function try_convert(::Type{SName}, str::AbstractString)
parts = split(str, "/")
(length(parts) <= 3) || return nothing
all(length(part)<16 for part in parts) || return nothing
return if length(parts) == 3
SName(parts[1], parts[2], parts[3])
elseif length(parts) == 2
SName(parts[1], parts[2], ss15"")
else
SName(parts[1], ss15"", ss15"")
end
omus marked this conversation as resolved.
Show resolved Hide resolved
end


Base.isempty(name::SName) = isempty(name.region) # region being empty implies all empty

name_parts(str::AbstractString) = split(str, "/")
function name_parts(name::SName)
# TODO this could be faster by returning an iterator but not really performance critial
parts = [name.region]
if !isempty(name.locality1)
push!(parts, name.locality1)
if !isempty(name.locality2)
push!(parts, name.locality2)
end
end
return parts
end

# Short strings are broken on 32bit:
# TODO: https://github.com/JuliaString/MurmurHash3.jl/issues/12
const Name = Int === Int32 ? String : SName
22 changes: 14 additions & 8 deletions src/types/timezone.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const TIME_ZONE_CACHE = Dict{String,Tuple{TimeZone,Class}}()
const TIME_ZONE_CACHE = Dict{Name,Tuple{TimeZone,Class}}()

"""
TimeZone(str::AbstractString) -> TimeZone
Expand Down Expand Up @@ -41,11 +41,15 @@ US/Pacific (UTC-8/UTC-7)
TimeZone(::AbstractString, ::Class)

function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT))
return TimeZone(convert(Name, str), mask)
end

function TimeZone(name::Name, mask::Class=Class(:DEFAULT))
str = string(name)
# Note: If the class `mask` does not match the time zone we'll still load the
# information into the cache to ensure the result is consistent.
tz, class = get!(TIME_ZONE_CACHE, str) do
tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...)

tz, class = get!(TIME_ZONE_CACHE, name) do
tz_path = joinpath(TZData.COMPILED_DIR, name_parts(name)...)
if isfile(tz_path)
open(deserialize, tz_path, "r")
elseif occursin(FIXED_TIME_ZONE_REGEX, str)
Expand Down Expand Up @@ -91,19 +95,21 @@ end

Check whether a string is a valid for constructing a `TimeZone` with the provided `mask`.
"""
function istimezone(str::AbstractString, mask::Class=Class(:DEFAULT))
function istimezone(str::Union{AbstractString, Name}, mask::Class=Class(:DEFAULT))
# Start by performing quick FIXED class test
if mask & Class(:FIXED) != Class(:NONE) && occursin(FIXED_TIME_ZONE_REGEX, str)
if mask & Class(:FIXED) != Class(:NONE) && occursin(FIXED_TIME_ZONE_REGEX, string(str))
return true
end
name = try_convert(Name, str)
name isa Nothing && return false

# Perform more expensive checks against pre-compiled time zones
tz, class = get(TIME_ZONE_CACHE, str) do
tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...)
tz_path = joinpath(TZData.COMPILED_DIR, name_parts(name)...)

if isfile(tz_path)
# Cache the data since we're already performing the deserialization
TIME_ZONE_CACHE[str] = open(deserialize, tz_path, "r")
TIME_ZONE_CACHE[name] = open(deserialize, tz_path, "r")
else
nothing, Class(:NONE)
end
Expand Down
9 changes: 4 additions & 5 deletions src/types/variabletimezone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,12 @@ Base.isless(a::Transition, b::Transition) = isless(a.utc_datetime, b.utc_datetim
A `TimeZone` with an offset that changes over time.
"""
struct VariableTimeZone <: TimeZone
name::String
name::Name
transitions::Vector{Transition}
cutoff::Union{DateTime,Nothing}

function VariableTimeZone(name::AbstractString, transitions::Vector{Transition}, cutoff::Union{DateTime,Nothing}=nothing)
new(name, transitions, cutoff)
end
end
function VariableTimeZone(name::AbstractString, transitions::Vector{Transition})
VariableTimeZone(name, transitions, nothing)
end

name(tz::VariableTimeZone) = tz.name
Expand Down
26 changes: 12 additions & 14 deletions src/types/zoneddatetime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,20 @@ using Dates: AbstractDateTime, argerror, validargs
# A `DateTime` that includes `TimeZone` information.
# """

struct ZonedDateTime <: AbstractDateTime
struct ZonedDateTime{T<:TimeZone} <: AbstractDateTime
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I learned from Intervals.jl changing the type parameters like this should be considered a breaking change. I'd probably punt this as part of this PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't achieve the goal of making a ZonedDateTime with a FixedTimeZone isbits without this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But i could remove it from this PR.
though we would lose many of the benifits.
(we would get most of those benifits from just #327)

though it would make it easier to make a PR that pushed the timezone as a value in the type-parameter.
Which might be the preferred breaking change.

utc_datetime::DateTime
timezone::TimeZone
timezone::T
zone::FixedTimeZone # The current zone for the utc_datetime.
end

function ZonedDateTime(utc_datetime::DateTime, timezone::TimeZone, zone::FixedTimeZone)
return new(utc_datetime, timezone, zone)
function ZonedDateTime(
utc_datetime::DateTime, timezone::VariableTimeZone, zone::FixedTimeZone
)
if timezone.cutoff !== nothing && utc_datetime >= timezone.cutoff
throw(UnhandledTimeError(timezone))
end

function ZonedDateTime(utc_datetime::DateTime, timezone::VariableTimeZone, zone::FixedTimeZone)
if timezone.cutoff !== nothing && utc_datetime >= timezone.cutoff
throw(UnhandledTimeError(timezone))
end

return new(utc_datetime, timezone, zone)
end
return ZonedDateTime{VariableTimeZone}(utc_datetime, timezone, zone)
end

"""
Expand Down Expand Up @@ -181,11 +179,11 @@ function Base.hash(zdt::ZonedDateTime, h::UInt)
return h
end

Base.typemin(::Type{ZonedDateTime}) = ZonedDateTime(typemin(DateTime), utc_tz; from_utc=true)
Base.typemax(::Type{ZonedDateTime}) = ZonedDateTime(typemax(DateTime), utc_tz; from_utc=true)
Base.typemin(::Type{<:ZonedDateTime}) = ZonedDateTime(typemin(DateTime), utc_tz; from_utc=true)
Base.typemax(::Type{<:ZonedDateTime}) = ZonedDateTime(typemax(DateTime), utc_tz; from_utc=true)

# Note: The `validargs` function is as part of the Dates parsing interface.
function Dates.validargs(::Type{ZonedDateTime}, y::Int64, m::Union{Int64, Int32}, d::Int64, h::Int64, mi::Int64, s::Int64, ms::Int64, tz::AbstractString)
function Dates.validargs(::Type{<:ZonedDateTime}, y::Int64, m::Union{Int64, Int32}, d::Int64, h::Int64, mi::Int64, s::Int64, ms::Int64, tz::AbstractString)
err = validargs(DateTime, y, Int64(m), d, h, mi, s, ms)
err === nothing || return err
istimezone(tz) || return argerror("TimeZone: \"$tz\" is not a recognized time zone")
Expand Down
4 changes: 2 additions & 2 deletions src/tzdata/compile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using Dates: parse_components

using ...TimeZones: TIME_ZONE_CACHE
using ...TimeZones: TimeZones, TimeZone, FixedTimeZone, VariableTimeZone, Transition, Class
using ...TimeZones: rename
using ...TimeZones: name_parts, rename, try_convert
using ..TZData: TimeOffset, ZERO, MIN_GMT_OFFSET, MAX_GMT_OFFSET, MIN_SAVE, MAX_SAVE,
ABS_DIFF_OFFSET

Expand Down Expand Up @@ -697,7 +697,7 @@ function compile(tz_source::TZSource, dest_dir::AbstractString; kwargs...)
empty!(TIME_ZONE_CACHE)

for (tz, class) in results
parts = split(TimeZones.name(tz), '/')
parts = name_parts(TimeZones.name(tz))
tz_path = joinpath(dest_dir, parts...)
tz_dir = dirname(tz_path)

Expand Down
6 changes: 3 additions & 3 deletions test/arithmetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ spring_zdt = ZonedDateTime(spring, warsaw)

# Arithmetic with a StepRange should always work even when the start/stop lands on
# ambiguous or non-existent DateTimes.
@testset "StepRange{ZonedDateTime}" begin
@testset "StepRange{<:ZonedDateTime}" begin
@testset "time-period" begin
dt = DateTime(2015, 6, 1)

Expand All @@ -71,7 +71,7 @@ spring_zdt = ZonedDateTime(spring, warsaw)
)
@test results == expected
@test length(results) == 2
@test results isa StepRange{ZonedDateTime}
@test results isa StepRange{<:ZonedDateTime}
end

@testset "date-period" begin
Expand All @@ -89,7 +89,7 @@ spring_zdt = ZonedDateTime(spring, warsaw)
)
@test results == expected
@test length(results) == 2
@test results isa StepRange{ZonedDateTime}
@test results isa StepRange{<:ZonedDateTime}
end

@testset "ambiguous" begin
Expand Down
2 changes: 1 addition & 1 deletion test/interpret.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ long = VariableTimeZone("Test/LongGap", [
])

# A time zone with an unnecessary transition that typically is hidden to the user
hidden = VariableTimeZone("Test/HiddenTransition", [
hidden = VariableTimeZone("Test/Hidden", [
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
Transition(DateTime(1800,1,1,0), zone["T+1"])
Transition(DateTime(1900,1,1,0), zone["T+0"])
Transition(DateTime(1935,4,1,2), zone["T+1"]) # The hidden transition
Expand Down
2 changes: 1 addition & 1 deletion test/parse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ end
consistent_years = t -> year(t.utc_datetime) >= 2007

tz, i = _parsesub_tz("CST+6CDT+5,M3.2.0/2,M11.1.0/2")
@test tz.name == "CST/CDT"
@test string(tz.name) == "CST/CDT"
@test tz.name != wpg.name
@test filter(consistent_years, tz.transitions) == filter(consistent_years, wpg.transitions)
@test tz.cutoff == wpg.cutoff
Expand Down
10 changes: 10 additions & 0 deletions test/types/fixedtimezone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,14 @@
fixed_tz = FixedTimeZone("UTC")
@test size(fixed_tz .== fixed_tz) == ()
end

@testset "isbits" begin
# We are not using ShortStrings on 32-bit due to hash being broken on 32-bit.
# See https://github.com/JuliaString/MurmurHash3.jl/issues/12
if Int === Int64
@test isbits(FixedTimeZone("0123"))
else
@test_broken isbits(FixedTimeZone("0123"))
end
end
end
Loading