From 9125776bb38ea2f1b80fad6b49865dc0507a6dea Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Fri, 9 Apr 2021 20:06:17 +0100 Subject: [PATCH 01/10] Introduce a Shortstring based Name type --- Project.toml | 2 + src/TimeZones.jl | 2 + src/types/fixedtimezone.jl | 2 +- src/types/name.jl | 46 ++++++++++++++++++++ src/types/timezone.jl | 21 +++++---- src/types/variabletimezone.jl | 9 ++-- src/types/zoneddatetime.jl | 20 ++++----- src/tzdata/compile.jl | 4 +- test/arithmetic.jl | 4 +- test/interpret.jl | 2 +- test/types/zoneddatetime.jl | 80 +++++++++++++++++++++-------------- test/tzdata/compile.jl | 8 ++-- 12 files changed, 134 insertions(+), 66 deletions(-) create mode 100644 src/types/name.jl diff --git a/Project.toml b/Project.toml index aabb70883..ca20361d5 100644 --- a/Project.toml +++ b/Project.toml @@ -11,12 +11,14 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +ShortStrings = "63221d1c-8677-4ff0-9126-0ff0817b4975" Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [compat] EzXML = "0.9.1, 1" Mocking = "0.7" RecipesBase = "0.7, 0.8, 1" +ShortStrings = 0.3.5 julia = "1" [extras] diff --git a/src/TimeZones.jl b/src/TimeZones.jl index 754cecf58..07eb77ab7 100644 --- a/src/TimeZones.jl +++ b/src/TimeZones.jl @@ -2,6 +2,7 @@ module TimeZones using Dates using Printf +using ShortStrings using Serialization using RecipesBase: RecipesBase, @recipe using Unicode @@ -55,6 +56,7 @@ include("indexable_generator.jl") include("class.jl") include("utcoffset.jl") +include(joinpath("types", "name.jl")) include(joinpath("types", "timezone.jl")) include(joinpath("types", "fixedtimezone.jl")) include(joinpath("types", "variabletimezone.jl")) diff --git a/src/types/fixedtimezone.jl b/src/types/fixedtimezone.jl index 1d349ca07..71ab635cb 100644 --- a/src/types/fixedtimezone.jl +++ b/src/types/fixedtimezone.jl @@ -30,7 +30,7 @@ const FIXED_TIME_ZONE_REGEX = r""" A `TimeZone` with a constant offset for all of time. """ struct FixedTimeZone <: TimeZone - name::String + name::Name offset::UTCOffset end diff --git a/src/types/name.jl b/src/types/name.jl new file mode 100644 index 000000000..cb4506c5b --- /dev/null +++ b/src/types/name.jl @@ -0,0 +1,46 @@ +struct SName + region::ShortString15 + locality1::ShortString15 + locality2::ShortString15 +end + +function Base.print(io::IO, name::SName) + print(io, name.region) + if !isempty(name.locality1) + print(io,"/", name.locality1) + if !isempty(name.locality2) + print(io,"/", name.locality2) + end + end +end + +Base.convert(::Type{String}, name::SName) = string(name) +function Base.convert(::Type{SName}, str::AbstractString) + parts = split(str, "/"; limit=3) + return if length(parts) == 3 + SName(parts[1], parts[2], parts[3]) + elseif length(parts) == 2 + SName(parts[1], parts[2], ss15"") + else + SName(parts[1], ss15"", ss15"") + end +end + +Base.isempty(name::SName) = isempty(name.region) # region being empty implies all empty + +name_parts(str::AbstractString) = split(str, "/") +function name_parts(name::SName) + # TODO this could be faster by returning an iterator but not really performance critial + parts = [name.region] + if !isempty(name.locality1) + push!(parts, name.locality1) + if !isempty(name.locality2) + push!(parts, name.locality2) + end + end + return parts +end + +# Short strings are broken on 32bit: +# TODO: https://github.com/JuliaString/MurmurHash3.jl/issues/12 +const Name = Int === Int32 ? String : SName diff --git a/src/types/timezone.jl b/src/types/timezone.jl index 7e8289b1f..ef78aa8b8 100644 --- a/src/types/timezone.jl +++ b/src/types/timezone.jl @@ -1,4 +1,4 @@ -const TIME_ZONE_CACHE = Dict{String,Tuple{TimeZone,Class}}() +const TIME_ZONE_CACHE = Dict{Name,Tuple{TimeZone,Class}}() """ TimeZone(str::AbstractString) -> TimeZone @@ -41,11 +41,15 @@ US/Pacific (UTC-8/UTC-7) TimeZone(::AbstractString, ::Class) function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT)) + return TimeZone(convert(Name, str), mask) +end + +function TimeZone(name::Name, mask::Class=Class(:DEFAULT)) + str = string(name) # Note: If the class `mask` does not match the time zone we'll still load the # information into the cache to ensure the result is consistent. - tz, class = get!(TIME_ZONE_CACHE, str) do - tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...) - + tz, class = get!(TIME_ZONE_CACHE, name) do + tz_path = joinpath(TZData.COMPILED_DIR, name_parts(name)...) if isfile(tz_path) open(deserialize, tz_path, "r") elseif occursin(FIXED_TIME_ZONE_REGEX, str) @@ -91,19 +95,20 @@ end Check whether a string is a valid for constructing a `TimeZone` with the provided `mask`. """ -function istimezone(str::AbstractString, mask::Class=Class(:DEFAULT)) +function istimezone(str::Union{AbstractString, Name}, mask::Class=Class(:DEFAULT)) # Start by performing quick FIXED class test - if mask & Class(:FIXED) != Class(:NONE) && occursin(FIXED_TIME_ZONE_REGEX, str) + if mask & Class(:FIXED) != Class(:NONE) && occursin(FIXED_TIME_ZONE_REGEX, string(str)) return true end + name = convert(Name, str) # Perform more expensive checks against pre-compiled time zones tz, class = get(TIME_ZONE_CACHE, str) do - tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...) + tz_path = joinpath(TZData.COMPILED_DIR, name_parts(name)...) if isfile(tz_path) # Cache the data since we're already performing the deserialization - TIME_ZONE_CACHE[str] = open(deserialize, tz_path, "r") + TIME_ZONE_CACHE[name] = open(deserialize, tz_path, "r") else nothing, Class(:NONE) end diff --git a/src/types/variabletimezone.jl b/src/types/variabletimezone.jl index 35b2b89b6..842d615b0 100644 --- a/src/types/variabletimezone.jl +++ b/src/types/variabletimezone.jl @@ -11,13 +11,12 @@ Base.isless(a::Transition, b::Transition) = isless(a.utc_datetime, b.utc_datetim A `TimeZone` with an offset that changes over time. """ struct VariableTimeZone <: TimeZone - name::String + name::Name transitions::Vector{Transition} cutoff::Union{DateTime,Nothing} - - function VariableTimeZone(name::AbstractString, transitions::Vector{Transition}, cutoff::Union{DateTime,Nothing}=nothing) - new(name, transitions, cutoff) - end +end +function VariableTimeZone(name::AbstractString, transitions::Vector{Transition}) + VariableTimeZone(name, transitions, nothing) end name(tz::VariableTimeZone) = tz.name diff --git a/src/types/zoneddatetime.jl b/src/types/zoneddatetime.jl index 2c832e8d9..fd489cc0b 100644 --- a/src/types/zoneddatetime.jl +++ b/src/types/zoneddatetime.jl @@ -6,22 +6,20 @@ using Dates: AbstractDateTime, argerror, validargs # A `DateTime` that includes `TimeZone` information. # """ -struct ZonedDateTime <: AbstractDateTime +struct ZonedDateTime{T<:TimeZone} <: AbstractDateTime utc_datetime::DateTime - timezone::TimeZone + timezone::T zone::FixedTimeZone # The current zone for the utc_datetime. +end - function ZonedDateTime(utc_datetime::DateTime, timezone::TimeZone, zone::FixedTimeZone) - return new(utc_datetime, timezone, zone) +function ZonedDateTime( + utc_datetime::DateTime, timezone::VariableTimeZone, zone::FixedTimeZone +) + if timezone.cutoff !== nothing && utc_datetime >= timezone.cutoff + throw(UnhandledTimeError(timezone)) end - function ZonedDateTime(utc_datetime::DateTime, timezone::VariableTimeZone, zone::FixedTimeZone) - if timezone.cutoff !== nothing && utc_datetime >= timezone.cutoff - throw(UnhandledTimeError(timezone)) - end - - return new(utc_datetime, timezone, zone) - end + return ZonedDateTime{VariableTimeZone}(utc_datetime, timezone, zone) end """ diff --git a/src/tzdata/compile.jl b/src/tzdata/compile.jl index ec4a29f70..c5954233e 100644 --- a/src/tzdata/compile.jl +++ b/src/tzdata/compile.jl @@ -4,7 +4,7 @@ using Dates: parse_components using ...TimeZones: TIME_ZONE_CACHE using ...TimeZones: TimeZones, TimeZone, FixedTimeZone, VariableTimeZone, Transition, Class -using ...TimeZones: rename +using ...TimeZones: name_parts, rename using ..TZData: TimeOffset, ZERO, MIN_GMT_OFFSET, MAX_GMT_OFFSET, MIN_SAVE, MAX_SAVE, ABS_DIFF_OFFSET @@ -697,7 +697,7 @@ function compile(tz_source::TZSource, dest_dir::AbstractString; kwargs...) empty!(TIME_ZONE_CACHE) for (tz, class) in results - parts = split(TimeZones.name(tz), '/') + parts = name_parts(TimeZones.name(tz)) tz_path = joinpath(dest_dir, parts...) tz_dir = dirname(tz_path) diff --git a/test/arithmetic.jl b/test/arithmetic.jl index 2c8f66aa7..6a272dae3 100644 --- a/test/arithmetic.jl +++ b/test/arithmetic.jl @@ -71,7 +71,7 @@ spring_zdt = ZonedDateTime(spring, warsaw) ) @test results == expected @test length(results) == 2 - @test results isa StepRange{ZonedDateTime} + @test results isa StepRange{<:ZonedDateTime} end @testset "date-period" begin @@ -89,7 +89,7 @@ spring_zdt = ZonedDateTime(spring, warsaw) ) @test results == expected @test length(results) == 2 - @test results isa StepRange{ZonedDateTime} + @test results isa StepRange{<:ZonedDateTime} end @testset "ambiguous" begin diff --git a/test/interpret.jl b/test/interpret.jl index c946b18d7..144571390 100644 --- a/test/interpret.jl +++ b/test/interpret.jl @@ -125,7 +125,7 @@ long = VariableTimeZone("Test/LongGap", [ ]) # A time zone with an unnecessary transition that typically is hidden to the user -hidden = VariableTimeZone("Test/HiddenTransition", [ +hidden = VariableTimeZone("Test/Hidden", [ Transition(DateTime(1800,1,1,0), zone["T+1"]) Transition(DateTime(1900,1,1,0), zone["T+0"]) Transition(DateTime(1935,4,1,2), zone["T+1"]) # The hidden transition diff --git a/test/types/zoneddatetime.jl b/test/types/zoneddatetime.jl index e776e5bb9..748d99e1a 100644 --- a/test/types/zoneddatetime.jl +++ b/test/types/zoneddatetime.jl @@ -77,13 +77,13 @@ using Dates: Hour, Second, UTM, @dateformat_str utc_dt = DateTime(1916, 1, 31, 23) # Disambiguating parameters ignored when there is no ambiguity. - @test ZonedDateTime(local_dt, warsaw).zone.name == "CET" - @test ZonedDateTime(local_dt, warsaw, 0).zone.name == "CET" - @test ZonedDateTime(local_dt, warsaw, 1).zone.name == "CET" - @test ZonedDateTime(local_dt, warsaw, 2).zone.name == "CET" - @test ZonedDateTime(local_dt, warsaw, true).zone.name == "CET" - @test ZonedDateTime(local_dt, warsaw, false).zone.name == "CET" - @test ZonedDateTime(utc_dt, warsaw, from_utc=true).zone.name == "CET" + @test string(ZonedDateTime(local_dt, warsaw).zone.name) == "CET" + @test string(ZonedDateTime(local_dt, warsaw, 0).zone.name) == "CET" + @test string(ZonedDateTime(local_dt, warsaw, 1).zone.name) == "CET" + @test string(ZonedDateTime(local_dt, warsaw, 2).zone.name) == "CET" + @test string(ZonedDateTime(local_dt, warsaw, true).zone.name) == "CET" + @test string(ZonedDateTime(local_dt, warsaw, false).zone.name) == "CET" + @test string(ZonedDateTime(utc_dt, warsaw, from_utc=true).zone.name) == "CET" @test ZonedDateTime(local_dt, warsaw).utc_datetime == utc_dt @test ZonedDateTime(local_dt, warsaw, 0).utc_datetime == utc_dt @@ -99,13 +99,13 @@ using Dates: Hour, Second, UTM, @dateformat_str utc_dt = DateTime(1916, 5, 31, 22) # Disambiguating parameters ignored when there is no ambiguity. - @test ZonedDateTime(local_dt, warsaw).zone.name == "CEST" - @test ZonedDateTime(local_dt, warsaw, 0).zone.name == "CEST" - @test ZonedDateTime(local_dt, warsaw, 1).zone.name == "CEST" - @test ZonedDateTime(local_dt, warsaw, 2).zone.name == "CEST" - @test ZonedDateTime(local_dt, warsaw, true).zone.name == "CEST" - @test ZonedDateTime(local_dt, warsaw, false).zone.name == "CEST" - @test ZonedDateTime(utc_dt, warsaw, from_utc=true).zone.name == "CEST" + @test string(ZonedDateTime(local_dt, warsaw).zone.name) == "CEST" + @test string(ZonedDateTime(local_dt, warsaw, 0).zone.name) == "CEST" + @test string(ZonedDateTime(local_dt, warsaw, 1).zone.name) == "CEST" + @test string(ZonedDateTime(local_dt, warsaw, 2).zone.name) == "CEST" + @test string(ZonedDateTime(local_dt, warsaw, true).zone.name) == "CEST" + @test string(ZonedDateTime(local_dt, warsaw, false).zone.name) == "CEST" + @test string(ZonedDateTime(utc_dt, warsaw, from_utc=true).zone.name) == "CEST" @test ZonedDateTime(local_dt, warsaw).utc_datetime == utc_dt @test ZonedDateTime(local_dt, warsaw, 0).utc_datetime == utc_dt @@ -133,10 +133,10 @@ using Dates: Hour, Second, UTM, @dateformat_str @test_throws NonExistentTimeError ZonedDateTime(local_dts[2], warsaw, true) @test_throws NonExistentTimeError ZonedDateTime(local_dts[2], warsaw, false) - @test ZonedDateTime(local_dts[1], warsaw).zone.name == "CET" - @test ZonedDateTime(local_dts[3], warsaw).zone.name == "CEST" - @test ZonedDateTime(utc_dts[1], warsaw, from_utc=true).zone.name == "CET" - @test ZonedDateTime(utc_dts[2], warsaw, from_utc=true).zone.name == "CEST" + @test string(ZonedDateTime(local_dts[1], warsaw).zone.name) == "CET" + @test string(ZonedDateTime(local_dts[3], warsaw).zone.name) == "CEST" + @test string(ZonedDateTime(utc_dts[1], warsaw, from_utc=true).zone.name) == "CET" + @test string(ZonedDateTime(utc_dts[2], warsaw, from_utc=true).zone.name) == "CEST" @test ZonedDateTime(local_dts[1], warsaw).utc_datetime == utc_dts[1] @test ZonedDateTime(local_dts[3], warsaw).utc_datetime == utc_dts[2] @@ -151,12 +151,12 @@ using Dates: Hour, Second, UTM, @dateformat_str @test_throws AmbiguousTimeError ZonedDateTime(local_dt, warsaw) @test_throws AmbiguousTimeError ZonedDateTime(local_dt, warsaw, 0) - @test ZonedDateTime(local_dt, warsaw, 1).zone.name == "CEST" - @test ZonedDateTime(local_dt, warsaw, 2).zone.name == "CET" - @test ZonedDateTime(local_dt, warsaw, true).zone.name == "CEST" - @test ZonedDateTime(local_dt, warsaw, false).zone.name == "CET" - @test ZonedDateTime(utc_dts[1], warsaw, from_utc=true).zone.name == "CEST" - @test ZonedDateTime(utc_dts[2], warsaw, from_utc=true).zone.name == "CET" + @test string(ZonedDateTime(local_dt, warsaw, 1).zone.name) == "CEST" + @test string(ZonedDateTime(local_dt, warsaw, 2).zone.name) == "CET" + @test string(ZonedDateTime(local_dt, warsaw, true).zone.name) == "CEST" + @test string(ZonedDateTime(local_dt, warsaw, false).zone.name) == "CET" + @test string(ZonedDateTime(utc_dts[1], warsaw, from_utc=true).zone.name) == "CEST" + @test string(ZonedDateTime(utc_dts[2], warsaw, from_utc=true).zone.name) == "CET" @test ZonedDateTime(local_dt, warsaw, 1).utc_datetime == utc_dts[1] @test ZonedDateTime(local_dt, warsaw, 2).utc_datetime == utc_dts[2] @@ -172,12 +172,12 @@ using Dates: Hour, Second, UTM, @dateformat_str utc_dts = (DateTime(1922, 5, 31, 21), DateTime(1922, 5, 31, 22)) @test_throws AmbiguousTimeError ZonedDateTime(local_dt, warsaw) - @test ZonedDateTime(local_dt, warsaw, 1).zone.name == "EET" - @test ZonedDateTime(local_dt, warsaw, 2).zone.name == "CET" + @test string(ZonedDateTime(local_dt, warsaw, 1).zone.name) == "EET" + @test string(ZonedDateTime(local_dt, warsaw, 2).zone.name) == "CET" @test_throws AmbiguousTimeError ZonedDateTime(local_dt, warsaw, true) @test_throws AmbiguousTimeError ZonedDateTime(local_dt, warsaw, false) - @test ZonedDateTime(utc_dts[1], warsaw, from_utc=true).zone.name == "EET" - @test ZonedDateTime(utc_dts[2], warsaw, from_utc=true).zone.name == "CET" + @test string(ZonedDateTime(utc_dts[1], warsaw, from_utc=true).zone.name) == "EET" + @test string(ZonedDateTime(utc_dts[2], warsaw, from_utc=true).zone.name) == "CET" @test ZonedDateTime(local_dt, warsaw, 1).utc_datetime == utc_dts[1] @test ZonedDateTime(local_dt, warsaw, 2).utc_datetime == utc_dts[2] @@ -283,14 +283,14 @@ using Dates: Hour, Second, UTM, @dateformat_str # Make sure that the duplicated hour only doesn't contain an additional entry. @test_throws AmbiguousTimeError ZonedDateTime(DateTime(1935,9,1), dup) - @test ZonedDateTime(DateTime(1935,9,1), dup, 1).zone.name == "DTDT-2" - @test ZonedDateTime(DateTime(1935,9,1), dup, 2).zone.name == "DTST" + @test string(ZonedDateTime(DateTime(1935,9,1), dup, 1).zone.name) == "DTDT-2" + @test string(ZonedDateTime(DateTime(1935,9,1), dup, 2).zone.name) == "DTST" @test_throws BoundsError ZonedDateTime(DateTime(1935,9,1), dup, 3) # Ensure that DTDT-1 is completely ignored. @test_throws NonExistentTimeError ZonedDateTime(DateTime(1935,4,1), dup) - @test ZonedDateTime(DateTime(1935,4,1,1), dup).zone.name == "DTDT-2" - @test ZonedDateTime(DateTime(1935,8,31,23), dup).zone.name == "DTDT-2" + @test string(ZonedDateTime(DateTime(1935,4,1,1), dup).zone.name) == "DTDT-2" + @test string(ZonedDateTime(DateTime(1935,8,31,23), dup).zone.name) == "DTDT-2" end @testset "equality" begin @@ -430,4 +430,20 @@ using Dates: Hour, Second, UTM, @dateformat_str @test typemin(ZonedDateTime) <= ZonedDateTime(typemin(DateTime), utc) @test typemax(ZonedDateTime) >= ZonedDateTime(typemax(DateTime), utc) end + + # TODO: isbits is not working on 32 bit because of not using SName type, because of + # https://github.com/JuliaString/MurmurHash3.jl/issues/12 + Int==Int64 && @testset "isbits" begin + utc_zdt = ZonedDateTime(1, 2, 3, 4, 5, 6, 7, utc) + @test isbits(utc) + + var_zdt = ZonedDateTime(Date(2011, 6, 1), tz"America/Winnipeg") + @test !isbits(var_zdt) # we might like this, but we don't have it. + @test isbits(var_zdt.utc_datetime) + @test isbits(var_zdt.zone) + @test isbits(var_zdt.utc_datetime) + @test isbits(var_zdt.timezone.cutoff) + @test isbits(var_zdt.timezone.name) + @test isbitstype(eltype(var_zdt.timezone.transitions)) + end end diff --git a/test/tzdata/compile.jl b/test/tzdata/compile.jl index b2e80d9fc..c0cae1943 100644 --- a/test/tzdata/compile.jl +++ b/test/tzdata/compile.jl @@ -132,10 +132,10 @@ dates, ordered = order_rules([rule_post, rule_endless, rule_overlap, rule_pre], # Europe/Warsaw time zone has a combination of factors that requires computing # the abbreviation to be done in a specific way. - @test tz.transitions[1].zone.name == "LMT" - @test tz.transitions[2].zone.name == "WMT" - @test tz.transitions[3].zone.name == "CET" # Standard time - @test tz.transitions[4].zone.name == "CEST" # Daylight saving time + @test string(tz.transitions[1].zone.name) == "LMT" + @test string(tz.transitions[2].zone.name) == "WMT" + @test string(tz.transitions[3].zone.name) == "CET" # Standard time + @test string(tz.transitions[4].zone.name) == "CEST" # Daylight saving time @test issorted(tz.transitions) zone = Dict{AbstractString,FixedTimeZone}() From f07d2469b76b4a335f89f2ee8f118f5f9eb78c64 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Fri, 9 Apr 2021 20:18:49 +0100 Subject: [PATCH 02/10] add try_convert --- Project.toml | 2 +- src/types/name.jl | 10 ++++++++++ src/types/timezone.jl | 3 ++- src/tzdata/compile.jl | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index ca20361d5..048fefb6f 100644 --- a/Project.toml +++ b/Project.toml @@ -18,7 +18,7 @@ Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" EzXML = "0.9.1, 1" Mocking = "0.7" RecipesBase = "0.7, 0.8, 1" -ShortStrings = 0.3.5 +ShortStrings = "0.3.5" julia = "1" [extras] diff --git a/src/types/name.jl b/src/types/name.jl index cb4506c5b..98da06c57 100644 --- a/src/types/name.jl +++ b/src/types/name.jl @@ -16,7 +16,16 @@ end Base.convert(::Type{String}, name::SName) = string(name) function Base.convert(::Type{SName}, str::AbstractString) + name = try_convert(SName, str) + name isa Nothing && DomainError(str, "All timezone name parts must have length < 16") + return name +end + +try_convert(::Type{SName}, name::SName) = name +try_convert(::Type{String}, name::String) = name +function try_convert(::Type{SName}, str::AbstractString) parts = split(str, "/"; limit=3) + all(length(parts) < 16) ||return nothing return if length(parts) == 3 SName(parts[1], parts[2], parts[3]) elseif length(parts) == 2 @@ -26,6 +35,7 @@ function Base.convert(::Type{SName}, str::AbstractString) end end + Base.isempty(name::SName) = isempty(name.region) # region being empty implies all empty name_parts(str::AbstractString) = split(str, "/") diff --git a/src/types/timezone.jl b/src/types/timezone.jl index ef78aa8b8..60a0ea2c7 100644 --- a/src/types/timezone.jl +++ b/src/types/timezone.jl @@ -100,7 +100,8 @@ function istimezone(str::Union{AbstractString, Name}, mask::Class=Class(:DEFAULT if mask & Class(:FIXED) != Class(:NONE) && occursin(FIXED_TIME_ZONE_REGEX, string(str)) return true end - name = convert(Name, str) + name = try_convert(Name, str) + name isa Nothing && return false # Perform more expensive checks against pre-compiled time zones tz, class = get(TIME_ZONE_CACHE, str) do diff --git a/src/tzdata/compile.jl b/src/tzdata/compile.jl index c5954233e..03428f605 100644 --- a/src/tzdata/compile.jl +++ b/src/tzdata/compile.jl @@ -4,7 +4,7 @@ using Dates: parse_components using ...TimeZones: TIME_ZONE_CACHE using ...TimeZones: TimeZones, TimeZone, FixedTimeZone, VariableTimeZone, Transition, Class -using ...TimeZones: name_parts, rename +using ...TimeZones: name_parts, rename, try_convert using ..TZData: TimeOffset, ZERO, MIN_GMT_OFFSET, MAX_GMT_OFFSET, MIN_SAVE, MAX_SAVE, ABS_DIFF_OFFSET From d5dd9bccfe2c432825e9c99a31a680606a9b04ca Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Mon, 12 Apr 2021 19:02:11 +0100 Subject: [PATCH 03/10] make FixedTimeZones be isbits by using ShortStrings --- Project.toml | 2 ++ src/TimeZones.jl | 1 + src/types/fixedtimezone.jl | 8 ++++++-- test/types/fixedtimezone.jl | 10 ++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index aabb70883..5cd54a2d0 100644 --- a/Project.toml +++ b/Project.toml @@ -11,12 +11,14 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +ShortStrings = "63221d1c-8677-4ff0-9126-0ff0817b4975" Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [compat] EzXML = "0.9.1, 1" Mocking = "0.7" RecipesBase = "0.7, 0.8, 1" +ShortStrings = "0.3.6" julia = "1" [extras] diff --git a/src/TimeZones.jl b/src/TimeZones.jl index 754cecf58..17c1d9ece 100644 --- a/src/TimeZones.jl +++ b/src/TimeZones.jl @@ -4,6 +4,7 @@ using Dates using Printf using Serialization using RecipesBase: RecipesBase, @recipe +using ShortStrings: ShortString15 using Unicode import Dates: TimeZone, UTC diff --git a/src/types/fixedtimezone.jl b/src/types/fixedtimezone.jl index 1d349ca07..a6bd2a181 100644 --- a/src/types/fixedtimezone.jl +++ b/src/types/fixedtimezone.jl @@ -1,3 +1,7 @@ +# Ideally would always use ShortString15, but it's `hash` is broken on 32-bit systems. +# https://github.com/JuliaString/MurmurHash3.jl/issues/12 +const FixedTimeZoneName = Int === Int64 ? ShortString15 : String + const FIXED_TIME_ZONE_REGEX = r""" ^(?| Z @@ -30,7 +34,7 @@ const FIXED_TIME_ZONE_REGEX = r""" A `TimeZone` with a constant offset for all of time. """ struct FixedTimeZone <: TimeZone - name::String + name::FixedTimeZoneName offset::UTCOffset end @@ -72,7 +76,7 @@ UTC+15:45:21 function FixedTimeZone(s::AbstractString) s == "Z" && return UTC_ZERO - m = match(FIXED_TIME_ZONE_REGEX, s) + m = match(FIXED_TIME_ZONE_REGEX, String(s)) m === nothing && throw(ArgumentError("Unrecognized time zone: $s")) coefficient = m[:sign] == "-" ? -1 : 1 diff --git a/test/types/fixedtimezone.jl b/test/types/fixedtimezone.jl index 94cae0ab4..d4c37a83c 100644 --- a/test/types/fixedtimezone.jl +++ b/test/types/fixedtimezone.jl @@ -41,4 +41,14 @@ fixed_tz = FixedTimeZone("UTC") @test size(fixed_tz .== fixed_tz) == () end + + @testset "isbits" begin + # We are not using ShortStrings on 32 bit due to hash being broken on it. + # see https://github.com/JuliaString/MurmurHash3.jl/issues/12 + if Int === Int64 + @test isbits FixedTimeZone("0123") + else + @test_broken isbits FixedTimeZone("0123") + end + end end From b12c7db20daf8f599acd76eb4f5f743112af097a Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 13 Apr 2021 11:05:13 +0100 Subject: [PATCH 04/10] Tweak comment --- src/tzdata/compile.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tzdata/compile.jl b/src/tzdata/compile.jl index ec4a29f70..b42f450c8 100644 --- a/src/tzdata/compile.jl +++ b/src/tzdata/compile.jl @@ -697,7 +697,9 @@ function compile(tz_source::TZSource, dest_dir::AbstractString; kwargs...) empty!(TIME_ZONE_CACHE) for (tz, class) in results - parts = split(TimeZones.name(tz), '/') + # Need to convert all abstract strings to `String`s because ShortString's don't + # support split. + parts = split(String(TimeZones.name(tz)), '/') tz_path = joinpath(dest_dir, parts...) tz_dir = dirname(tz_path) From 9e7b0732d5d82003fab1e11b1769e45561f954be Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 13 Apr 2021 11:05:22 +0100 Subject: [PATCH 05/10] tweak comment --- dev/ShortStrings | 1 + test/types/fixedtimezone.jl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 160000 dev/ShortStrings diff --git a/dev/ShortStrings b/dev/ShortStrings new file mode 160000 index 000000000..65ca2364a --- /dev/null +++ b/dev/ShortStrings @@ -0,0 +1 @@ +Subproject commit 65ca2364af105fc2647befe0b73ca3ecdd8c9f7a diff --git a/test/types/fixedtimezone.jl b/test/types/fixedtimezone.jl index d4c37a83c..5285b1961 100644 --- a/test/types/fixedtimezone.jl +++ b/test/types/fixedtimezone.jl @@ -43,8 +43,8 @@ end @testset "isbits" begin - # We are not using ShortStrings on 32 bit due to hash being broken on it. - # see https://github.com/JuliaString/MurmurHash3.jl/issues/12 + # We are not using ShortStrings on 32-bit due to hash being broken on 32-bit. + # See https://github.com/JuliaString/MurmurHash3.jl/issues/12 if Int === Int64 @test isbits FixedTimeZone("0123") else From 2ec09bf291a27014029841279ce4c7709b5f844e Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 13 Apr 2021 18:17:02 +0100 Subject: [PATCH 06/10] fix try_convert --- src/types/name.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/types/name.jl b/src/types/name.jl index 98da06c57..58ccfc758 100644 --- a/src/types/name.jl +++ b/src/types/name.jl @@ -17,15 +17,18 @@ end Base.convert(::Type{String}, name::SName) = string(name) function Base.convert(::Type{SName}, str::AbstractString) name = try_convert(SName, str) - name isa Nothing && DomainError(str, "All timezone name parts must have length < 16") + if name isa Nothing + throw(DomainError(str, "Timezone must have 3 or fewer parts, all with length < 16")) + end return name end try_convert(::Type{SName}, name::SName) = name try_convert(::Type{String}, name::String) = name function try_convert(::Type{SName}, str::AbstractString) - parts = split(str, "/"; limit=3) - all(length(parts) < 16) ||return nothing + parts = split(str, "/") + (length(parts) <= 3) || return nothing + all(length(part)<16 for part in parts) || return nothing return if length(parts) == 3 SName(parts[1], parts[2], parts[3]) elseif length(parts) == 2 From 49bbd30b05f54d90dd23e325c1136f40b96c7e27 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 13 Apr 2021 19:06:19 +0100 Subject: [PATCH 07/10] fix io to never show the SName type --- src/io.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/io.jl b/src/io.jl index 7df072d8a..8dbc650aa 100644 --- a/src/io.jl +++ b/src/io.jl @@ -73,17 +73,17 @@ function Base.show(io::IO, tz::VariableTimeZone) # Compact printing of a custom time zone which is non-constructable elseif get(io, :compact, false) - print(io, VariableTimeZone, "(") - show(io, tz.name) - print(io, ", ...)") + print(io, VariableTimeZone, "(\"") + print(io, tz.name) + print(io, "\", ...)") # Verbose printing which should print a fully constructable `VariableTimeZone`. else # Force `:compact => false` to make the force the transition vector printing into # long form. - print(io, VariableTimeZone, "(") - show(io, tz.name) - print(io, ", ") + print(io, VariableTimeZone, "(\"") + print(io, tz.name) + print(io, "\", ") show(IOContext(io, :compact => false), tz.transitions) print(io, ", ") show(io, tz.cutoff) @@ -128,7 +128,7 @@ Base.show(io::IO, ::MIME"text/plain", zdt::ZonedDateTime) = print(io, zdt) # https://github.com/JuliaLang/julia/pull/33290 if VERSION >= v"1.5.0-DEV.224" - Base.typeinfo_implicit(::Type{ZonedDateTime}) = true + Base.typeinfo_implicit(::Type{<:ZonedDateTime}) = true end # Use compact printing on certain element types for Julia versions before: From e3d8d0dfcbcd2ea82160c7df175e6b3b215a433f Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 13 Apr 2021 19:06:44 +0100 Subject: [PATCH 08/10] fixed missed stringing in a test --- test/parse.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/parse.jl b/test/parse.jl index 100aaebd8..7b8294442 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -327,7 +327,7 @@ end consistent_years = t -> year(t.utc_datetime) >= 2007 tz, i = _parsesub_tz("CST+6CDT+5,M3.2.0/2,M11.1.0/2") - @test tz.name == "CST/CDT" + @test string(tz.name) == "CST/CDT" @test tz.name != wpg.name @test filter(consistent_years, tz.transitions) == filter(consistent_years, wpg.transitions) @test tz.cutoff == wpg.cutoff From faf0a299507d8ceae25bd4ba1fbe4c1bfa455f88 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 13 Apr 2021 19:07:38 +0100 Subject: [PATCH 09/10] make explict that Type and StepRange are for any <:ZonedDataTime --- src/arithmetic.jl | 10 +++++----- src/types/zoneddatetime.jl | 6 +++--- test/arithmetic.jl | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/arithmetic.jl b/src/arithmetic.jl index e1991b711..9438ca19d 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -17,7 +17,7 @@ function Base.:(-)(zdt::ZonedDateTime, p::TimePeriod) return ZonedDateTime(DateTime(zdt, UTC) - p, timezone(zdt); from_utc=true) end -function broadcasted(::typeof(+), r::StepRange{ZonedDateTime}, p::DatePeriod) +function broadcasted(::typeof(+), r::StepRange{<:ZonedDateTime}, p::DatePeriod) start, step, stop = first(r), Base.step(r), last(r) # Since the local time + period can result in an invalid local datetime when working with @@ -41,10 +41,10 @@ function broadcasted(::typeof(+), r::StepRange{ZonedDateTime}, p::DatePeriod) return StepRange(start, step, stop) end -function broadcasted(::typeof(+), r::StepRange{ZonedDateTime}, p::TimePeriod) +function broadcasted(::typeof(+), r::StepRange{<:ZonedDateTime}, p::TimePeriod) return StepRange(r.start + p, r.step, r.stop + p) end -broadcasted(::typeof(+), p::Period, r::StepRange{ZonedDateTime}) = broadcasted(+, r, p) -broadcasted(::typeof(-), r::StepRange{ZonedDateTime}, p::Period) = broadcasted(+, r, -p) -broadcasted(::typeof(-), p::Period, r::StepRange{ZonedDateTime}) = broadcasted(-, r, p) +broadcasted(::typeof(+), p::Period, r::StepRange{<:ZonedDateTime}) = broadcasted(+, r, p) +broadcasted(::typeof(-), r::StepRange{<:ZonedDateTime}, p::Period) = broadcasted(+, r, -p) +broadcasted(::typeof(-), p::Period, r::StepRange{<:ZonedDateTime}) = broadcasted(-, r, p) diff --git a/src/types/zoneddatetime.jl b/src/types/zoneddatetime.jl index fd489cc0b..378dd8cd6 100644 --- a/src/types/zoneddatetime.jl +++ b/src/types/zoneddatetime.jl @@ -179,11 +179,11 @@ function Base.hash(zdt::ZonedDateTime, h::UInt) return h end -Base.typemin(::Type{ZonedDateTime}) = ZonedDateTime(typemin(DateTime), utc_tz; from_utc=true) -Base.typemax(::Type{ZonedDateTime}) = ZonedDateTime(typemax(DateTime), utc_tz; from_utc=true) +Base.typemin(::Type{<:ZonedDateTime}) = ZonedDateTime(typemin(DateTime), utc_tz; from_utc=true) +Base.typemax(::Type{<:ZonedDateTime}) = ZonedDateTime(typemax(DateTime), utc_tz; from_utc=true) # Note: The `validargs` function is as part of the Dates parsing interface. -function Dates.validargs(::Type{ZonedDateTime}, y::Int64, m::Union{Int64, Int32}, d::Int64, h::Int64, mi::Int64, s::Int64, ms::Int64, tz::AbstractString) +function Dates.validargs(::Type{<:ZonedDateTime}, y::Int64, m::Union{Int64, Int32}, d::Int64, h::Int64, mi::Int64, s::Int64, ms::Int64, tz::AbstractString) err = validargs(DateTime, y, Int64(m), d, h, mi, s, ms) err === nothing || return err istimezone(tz) || return argerror("TimeZone: \"$tz\" is not a recognized time zone") diff --git a/test/arithmetic.jl b/test/arithmetic.jl index 6a272dae3..13de497ae 100644 --- a/test/arithmetic.jl +++ b/test/arithmetic.jl @@ -55,7 +55,7 @@ spring_zdt = ZonedDateTime(spring, warsaw) # Arithmetic with a StepRange should always work even when the start/stop lands on # ambiguous or non-existent DateTimes. -@testset "StepRange{ZonedDateTime}" begin +@testset "StepRange{<:ZonedDateTime}" begin @testset "time-period" begin dt = DateTime(2015, 6, 1) From 68a4fb49108a303faec02e4a4366047a00c574e0 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 13 Apr 2021 19:09:07 +0100 Subject: [PATCH 10/10] require ShortStrings 0.3.7 for AbstractString handling --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5cd54a2d0..715d978b9 100644 --- a/Project.toml +++ b/Project.toml @@ -18,7 +18,7 @@ Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" EzXML = "0.9.1, 1" Mocking = "0.7" RecipesBase = "0.7, 0.8, 1" -ShortStrings = "0.3.6" +ShortStrings = "0.3.7" julia = "1" [extras]