Skip to content

Commit eb7bfc7

Browse files
committed
2 parents 1719a8c + a846172 commit eb7bfc7

File tree

3 files changed

+34
-16
lines changed

3 files changed

+34
-16
lines changed

.travis.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,8 @@ git:
2626
# - if [ $TRAVIS_OS_NAME = osx ]; then brew install gcc; fi
2727

2828
## uncomment the following lines to override the default test script
29-
#script:
30-
# - julia -e 'Pkg.clone(pwd()); Pkg.build("Strs"); Pkg.test("Strs"; coverage=true)'
31-
- julia -e 'Pkg.clone("https://github.com/JuliaString/StrTables.jl.git"); Pkg.clone("https://github.com/JuliaString/LaTeX_Entities.jl.git"); Pkg.clone("https://github.com/JuliaString/Emoji_Entities.jl.git"); Pkg.clone("https://github.com/JuliaString/HTML_Entities.jl.git"); Pkg.clone("https://github.com/JuliaString/Unicode_Entities.jl.git"); Pkg.clone("https://github.com/JuliaString/Format.jl.git"); Pkg.clone(pwd()); Pkg.add("LightXML"); Pkg.add("JSON"); Pkg.build("LaTeX_Entities"); Pkg.build("Emoji_Entities"); Pkg.build("HTML_Entities"); Pkg.build("Unicode_Entities"); Pkg.test("Strs"; coverage=true)'
29+
script:
30+
- julia -e 'Pkg.clone(pwd()); Pkg.test("Strs"; coverage=true)'
3231
after_success:
3332
# push coverage results to Coveralls
3433
- julia -e 'cd(Pkg.dir("Strs")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'

src/latin.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ convert(::Type{<:Str{_LatinCSE}}, s::ASCIISubStr) = _cpyconvert(LatinCSE, s)
6969
convert(::Type{<:Str{_LatinCSE}}, s::_LatinSubStr) = Str(_LatinCSE, _copysub(s))
7070

7171
# Assumes that has already been checked for validity
72+
73+
# These should be sped up to do chunks at a time, when no bytes > 0x7f
7274
function _utf8_to_latin(pnt::Ptr{UInt8}, len)
7375
buf, out = _allocate(UInt8, len)
7476
fin = out + len

src/support.jl

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -645,19 +645,31 @@ end
645645

646646
count_chars(T, dat, len) = count_chars(T, codeunit(T), dat, 1, len)
647647

648-
"""
649-
Calculate the total number of bytes > 0x7f
650-
"""
651-
function count_latin(len, pnt::Ptr{UInt8})
652-
# Todo: optimize this to work on chunks when pnt is aligned
648+
@inline function _count_mask_al(pnt, siz, msk, v)
653649
cnt = 0
654-
fin = pnt + len
655-
while pnt < fin
656-
cnt += (get_codeunit(pnt) > 0x7f)
657-
pnt += 1
650+
fin = pnt + siz
651+
while (pnt += CHUNKSZ) < fin
652+
cnt += count_ones(v & msk)
653+
v = unsafe_load(pnt)
654+
end
655+
cnt + count_ones((siz & CHUNKMSK == 0 ? v : (v & _mask_bytes(siz))) & msk)
656+
end
657+
@inline _count_mask_al(pnt, siz, msk) = _count_mask_al(pnt, siz, msk, unsafe_load(pnt))
658+
659+
@inline function _count_mask_ul(beg, siz, msk)
660+
align = reinterpret(UInt, beg)
661+
pnt = reinterpret(Ptr{UInt64}, align & ~CHUNKMSK)
662+
v = unsafe_load(pnt)
663+
if (align &= CHUNKMSK) != 0
664+
v &= ~_mask_bytes(align)
665+
siz += align
658666
end
659-
cnt
667+
_count_mask_al(pnt, siz, msk, v)
660668
end
669+
"""
670+
Calculate the total number of bytes > 0x7f
671+
"""
672+
count_latin(len, pnt::Ptr{UInt8}) = _count_mask_ul(pnt, len, hi_mask)
661673

662674
"""
663675
Validates and calculates number of characters in a UTF-8,UTF-16 or UTF-32 encoded vector/string
@@ -777,6 +789,8 @@ end
777789
first(str::Str, n::Integer) = str[1:min(end, nextind(str, 0, n))]
778790
last(str::Str, n::Integer) = str[max(1, prevind(str, ncodeunits(str)+1, n)):end]
779791

792+
const HAS_WMEMCPY = !(@static V6_COMPAT ? is_windows() : Sys.iswindows())
793+
780794
const Chrs = @static V6_COMPAT ? Union{Char,AbstractChar} : Chr
781795

782796
function repeat(ch::CP, cnt::Integer) where {CP <: Chrs}
@@ -867,7 +881,9 @@ _memcmp(a::SubString{<:Str{<:Quad_CSEs}}, b::SubString{<:Str{Quad_CSEs}}, siz) =
867881
_memcpy(dst::Ptr{UInt8}, src::Ptr, siz) =
868882
ccall(:memcpy, Ptr{UInt8}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt), dst, src, siz)
869883
_memcpy(a::Ptr{WidChr}, b::Ptr{WidChr}, len) =
870-
ccall(:wmemcpy, Ptr{WidChr}, (Ptr{WidChr}, Ptr{WidChr}, UInt), a, b, len)
884+
(HAS_WMEMCPY
885+
? ccall(:wmemcpy, Ptr{WidChr}, (Ptr{WidChr}, Ptr{WidChr}, UInt), a, b, len)
886+
: ccall(:memcpy, Ptr{WidChr}, (Ptr{WidChr}, Ptr{WidChr}, UInt), a, b, bytoff(WidChr, len)))
871887
_memcpy(a::Ptr{OthChr}, b::Ptr{OthChr}, len) =
872888
ccall(:memcpy, Ptr{OthChr}, (Ptr{OthChr}, Ptr{OthChr}, UInt), a, b, bytoff(OthChr, len))
873889

@@ -936,11 +952,12 @@ function repeat(str::T, cnt::Integer) where {C<:CSE,T<:Str{C}}
936952
CU = codeunit(T)
937953
@preserve str begin
938954
len = ncodeunits(str)
939-
totlen = len * cnt
940-
buf, out = _allocate(CU, totlen)
941955
if len == 1 # common case: repeating a single codeunit string
956+
buf, out = _allocate(CU, cnt)
942957
_memset(out, get_codeunit(pointer(str)), cnt)
943958
else
959+
totlen = len * cnt
960+
buf, out = _allocate(CU, totlen)
944961
pnt = pointer(str)
945962
fin = bytoff(out, totlen)
946963
siz = bytoff(CU, len)

0 commit comments

Comments
 (0)