From 4e53204ef12e363030789e8bf1745f132e62b56b Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Wed, 18 Nov 2020 10:33:05 -0500
Subject: [PATCH 1/5] Optimization for some string operations, such as isascii

---
 .drone.yml    |  27 --------------
 Project.toml  |  10 ++---
 src/latin.jl  |   2 +-
 src/search.jl |   4 ++
 src/types.jl  |  28 +++++++-------
 src/utf8.jl   | 101 ++++++++++++++++++++++++++++++++++++++++----------
 6 files changed, 106 insertions(+), 66 deletions(-)

diff --git a/.drone.yml b/.drone.yml
index 6ac7118..2607e25 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -11,30 +11,3 @@ steps:
   image: julia:1.5
   commands:
   - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
----
-kind: pipeline
-name: linux - arm64 - Julia 1.0
-
-platform:
-  os: linux
-  arch: arm64
-
-steps:
-- name: build
-  image: julia:1.0
-  commands:
-  - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
-
----
-kind: pipeline
-name: linux - arm - Julia 1.0
-
-platform:
-  os: linux
-  arch: arm
-
-steps:
-- name: build
-  image: julia:1.0
-  commands:
-  - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
diff --git a/Project.toml b/Project.toml
index d17deff..2e93cb3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,7 +4,7 @@ authors  = ["ScottPJones <scottjones@alum.mit.edu>"]
 keywords = ["Strings"]
 license  = "MIT"
 uuid     = "e79e7a6a-7bb1-5a4d-9d64-da657b06f53a"
-version = "1.0.4"
+version = "1.0.5"
 
 [deps]
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
@@ -24,9 +24,9 @@ Random  = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 test = ["Test", "Random"]
 
 [compat]
-julia = "^1.0.0"
-ModuleInterfaceTools = "^1.0.0"
+julia = "1"
+ModuleInterfaceTools = "1"
 MurmurHash3 = "^1.0.3"
-StrAPI = "^1.0.0"
+StrAPI = "1"
 ChrBase = "^1.0.1"
-CharSetEncodings = "^1.0.0"
+CharSetEncodings = "1"
diff --git a/src/latin.jl b/src/latin.jl
index 2b4a512..14c59bd 100644
--- a/src/latin.jl
+++ b/src/latin.jl
@@ -167,7 +167,7 @@ end
 
 function convert(::Type{<:Str{C}}, vec::Vector{CU}) where {C<:Latin_CSEs,CU<:CodeUnitTypes}
     # handle zero length string quickly
-    (len = length(vec)) == 0 && return _empty_str(C)
+    (len = length(vec)) == 0 && return empty_str(C)
     @preserve vec begin
         pnt = pointer(vec)
         # get number of bytes to allocate
diff --git a/src/search.jl b/src/search.jl
index a5ac2e8..1db425e 100644
--- a/src/search.jl
+++ b/src/search.jl
@@ -121,6 +121,10 @@ found(::Type{<:AbstractString}, v) = v != 0
 find_result(::Type{<:AbstractString}, v) = v
 
 nothing_sentinel(i) = first(i) == 0 ? nothing : i
+Base.findfirst(a::AbstractChar, b::Str)   = nothing_sentinel(find(First, a, b))
+Base.findlast(a::AbstractChar, b::Str)    = nothing_sentinel(find(Last, a, b))
+Base.findnext(a::AbstractChar, b::Str, i) = nothing_sentinel(find(Fwd, a, b, i))
+Base.findprev(a::AbstractChar, b::Str, i) = nothing_sentinel(find(Rev, a, b, i))
 Base.findfirst(a, b::Str)   = nothing_sentinel(find(First, a, b))
 Base.findlast(a, b::Str)    = nothing_sentinel(find(Last, a, b))
 Base.findnext(a, b::Str, i) = nothing_sentinel(find(Fwd, a, b, i))
diff --git a/src/types.jl b/src/types.jl
index 3a36f99..1693771 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -30,18 +30,6 @@ _mskdn32(v, m, s) = _msk32(v, m) >>> s
 (::Type{Str})(::Type{C}, v::String) where {C<:CSE} = Str(C, v, nothing, nothing, nothing)
 (::Type{Str})(::Type{C}, v::Str) where {C<:CSE} = Str(C, v.data, nothing, nothing, nothing)
 
-# Handle change from endof -> lastindex
-@static if !isdefined(Base, :lastindex)
-    lastindex(str::AbstractString) = Base.endof(str)
-    lastindex(arr::AbstractArray) = Base.endof(arr)
-    Base.endof(str::Str) = lastindex(str)
-end
-@static if !isdefined(Base, :firstindex)
-    firstindex(str::AbstractString) = 1
-    # AbstractVector might be an OffsetArray
-    firstindex(str::Vector) = 1
-end
-
 # Definition of built-in Str types
 
 const empty_string = ""
@@ -131,8 +119,18 @@ pointer(s::Str{<:Quad_CSEs}) = reinterpret(Ptr{UInt32}, pointer(s.data))
 const CHUNKSZ = sizeof(UInt) # used for fast processing of strings
 const CHUNKMSK = (CHUNKSZ-1)%UInt
 
-_pntchunk(s::Union{String,Vector{UInt8}}) = reinterpret(Ptr{UInt}, pointer(s))
-_pntchunk(s::Str) = reinterpret(Ptr{UInt}, pointer(s.data))
+_pntchunk(p::Union{UInt,Ptr}) = reinterpret(Ptr{UInt}, p)
+_pntchunk(s::Union{String,Vector{UInt8}}) = _pntchunk(pointer(s))
+_pntchunk(s::Str) = _pntchunk(pointer(s.data))
+
+# Type and mask for even faster string handling
+const BigChunk = UInt === UInt32 ? UInt64 : UInt128
+const BIGCHUNKSZ = sizeof(BigChunk)
+const BIGCHUNKMSK = (BIGCHUNKSZ-1)%UInt
+
+_pntbigchunk(p::Union{UInt,Ptr}) = reinterpret(Ptr{BigChunk}, p)
+_pntbigchunk(s::Union{String,Vector{UInt8}}) = _pntbigchunk(pointer(s))
+_pntbigchunk(s::Str) = _pntbigchunk(pointer(s.data))
 
 """Length of string in codeunits"""
 ncodeunits(s::Str)              = sizeof(s)
@@ -144,6 +142,8 @@ ncodeunits(s::Str{<:Quad_CSEs}) = sizeof(s) >>> 2
 
 @inline _mask_bytes(n) = ((1%UInt) << ((n & CHUNKMSK) << 3)) - 0x1
 
+@inline _big_mask_bytes(n) = ((1%BigChunk) << ((n & BIGCHUNKMSK) << 3)) - 0x1
+
 # Support for SubString of Str
 
 Base.SubString(str::Str{C}) where {C<:SubSet_CSEs} =
diff --git a/src/utf8.jl b/src/utf8.jl
index ea7dcca..3ac7eed 100644
--- a/src/utf8.jl
+++ b/src/utf8.jl
@@ -1,7 +1,7 @@
 #=
 UTF8Str type
 
-Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones,
+Copyright 2017-2020 Gandalf Software, Inc., Scott P. Jones,
 and other contributors to the Julia language
 
 Licensed under MIT License, see LICENSE.md
@@ -89,10 +89,19 @@ xor 80 then << 1 then |
 11 -> 01 -> 1
 =#
 
+@inline _widen_mask(msk::UInt) = ((msk%BigChunk) << (8*sizeof(UInt))) | msk
+
 const hi_mask = CHUNKSZ == 4 ? 0x8080_8080 : 0x8080_8080_8080_8080
+const big_hi_mask = _widen_mask(hi_mask)
+
+@inline _count_cont(v, msk) = (v = xor(v, msk); count_ones(xor(((v << 1) | v), msk) & msk))
+@inline msk_lead(v, msk) = (v = xor(v, msk); xor(xor(((v << 1) | v), msk) & msk, msk))
+
+@inline _count_cont(v::UInt) = _count_cont(v, hi_mask)
+@inline msk_lead(v::UInt) = msk_lead(v, hi_mask)
 
-@inline _count_cont(v) = (v = xor(v, hi_mask); count_ones(xor(((v << 1) | v), hi_mask) & hi_mask))
-@inline msk_lead(v) = (v = xor(v, hi_mask); xor(xor(((v << 1) | v), hi_mask) & hi_mask, hi_mask))
+@inline _count_cont(v::BigChunk) = _count_cont(v, big_hi_mask)
+@inline _msk_lead(v::BigChunk) = _msk_lead(v, big_hi_mask)
 
 @inline function _align_len_utf8(pnt, cnt, v)
     len = 0
@@ -141,29 +150,83 @@ end
     _check_mask_al(pnt, cnt, msk, v)
 end
 
+@inline _mask_bytes(v::T, cnt) where {T} =
+    ifelse((cnt & (sizeof(T)-1)%UInt) == 0,
+           v, T(v & (one(T) << ((cnt & (sizeof(T)-1)%UInt) << 3)) - 1))
+
+@inline chk_chunk(ptr, msk::T, cnt) where {T} =
+    iszero(_mask_bytes(unsafe_load(reinterpret(Ptr{T}, ptr)) & msk, cnt))
+
+@inline function _check_block_al(ptr, cnt, msk)
+    # First check very frequent cases of short strings
+    # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
+    # taking advantage of the knowledge of how String types are stored in Julia,
+    # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
+    cnt <= CHUNKSZ && return chk_chunk(ptr, msk, cnt)
+    bigmsk = _widen_mask(msk)
+    cnt <= BIGCHUNKSZ && return chk_chunk(ptr, bigmsk, cnt)
+    (unsafe_load(_pntchunk(ptr)) & msk) == 0 || return false
+    cnt -= CHUNKSZ
+    cnt <= BIGCHUNKSZ && return chk_chunk(ptr, bigmsk, cnt)
+    pnt = _pntbigchunk(ptr + CHUNKSZ)
+    fin = _pntbigchunk(ptr + CHUNKSZ + cnt)
+    v = unsafe_load(pnt) & bigmsk
+    while (pnt += BIGCHUNKSZ) < fin
+        v == 0 || return false
+        v = unsafe_load(pnt) & bigmsk
+    end
+    iszero(_mask_bytes(v, cnt))
+end
+
+@inline function _check_block_ul(beg, cnt, msk)
+    align = reinterpret(UInt, beg)
+    pnt = _pntbigchunk(align & ~BIGCHUNKMSK)
+    v = unsafe_load(pnt)
+    if (align &= BIGCHUNKMSK) != 0
+        v &= ~_big_mask_bytes(align)
+        cnt += align
+    end
+    fin = _pntbigchunk(pnt + cnt)
+    bigmsk = _widen_mask(msk)
+    while (pnt += BIGCHUNKSZ) < fin
+        (v & bigmsk) == 0 || return false
+        v = unsafe_load(pnt)
+    end
+    ((cnt & BIGCHUNKMSK) == 0 ? v : (v & _big_mask_bytes(cnt))) & bigmsk == 0
+end
+
 _ascii_mask(::Type{UInt8})  = hi_mask
-_ascii_mask(::Type{UInt16}) = 0xff80_ff80_ff80_ff80
-_ascii_mask(::Type{UInt32}) = 0xffffff80_ffffff80
+@static if UInt == 4
+    _ascii_mask(::Type{UInt16}) = 0xff80_ff80
+    _ascii_mask(::Type{UInt32}) = 0xffffff80
 
-_latin_mask(::Type{UInt16}) = 0xff00_ff00_ff00_ff00
-_latin_mask(::Type{UInt32}) = 0xffffff00_ffffff00
+    _latin_mask(::Type{UInt16}) = 0xff00_ff00
+    _latin_mask(::Type{UInt32}) = 0xffffff00
 
-const _bmp_mask_32   = 0xffff0000_ffff0000
+    const _bmp_mask_32   = 0xffff0000
+else
+    _ascii_mask(::Type{UInt16}) = 0xff80_ff80_ff80_ff80
+    _ascii_mask(::Type{UInt32}) = 0xffffff80_ffffff80
 
-is_ascii(str::SubString{<:Str{C}}) where {C<:Union{UTF8CSE,LatinCSE,Binary_CSEs,UTF16CSE,UCS2CSE,
-                                                   Text2CSE,Text4CSE,UTF32CSE}} =
-    (cnt = sizeof(str)) == 0 ? true :
-    @preserve str _check_mask_ul(pointer(str), cnt, _ascii_mask(codeunit(C)))
+    _latin_mask(::Type{UInt16}) = 0xff00_ff00_ff00_ff00
+    _latin_mask(::Type{UInt32}) = 0xffffff00_ffffff00
+
+    const _bmp_mask_32   = 0xffff0000_ffff0000
+end
+
+const ASCII_Union = Union{UTF8CSE,LatinCSE,Binary_CSEs,UTF16CSE,UCS2CSE,Text2CSE,Text4CSE,UTF32CSE}
+
+is_ascii(str::SubString{<:Str{C}}) where {C<:ASCII_Union} =
+    (cnt = sizeof(str)) == 0 ||
+        (@preserve str _check_block_ul(pointer(str), cnt, _ascii_mask(codeunit(C))))
 
 is_ascii(vec::Vector{T}) where {T<:CodeUnitTypes} =
-    (cnt = sizeof(vec)) == 0 ? true :
-    @preserve str _check_mask_ul(pointer(vec), cnt, _ascii_mask(T))
+    (cnt = sizeof(vec)) == 0 ||
+         (@preserve str _check_block_ul(pointer(vec), cnt, _ascii_mask(T)))
 
-is_ascii(str::Str{C}) where {C<:Union{UTF8_CSEs,LatinCSE,Binary_CSEs,UTF16CSE,UCS2CSE,
-                                      Text2CSE,Text4CSE,UTF32CSE}} =
-    (cnt = sizeof(str)) == 0 ? true :
-    @preserve str _check_mask_al(reinterpret(Ptr{UInt}, pointer(str)), cnt,
-                                 _ascii_mask(codeunit(C)))
+is_ascii(str::Str{C}) where {C<:ASCII_Union} =
+    (cnt = sizeof(str)) == 0 ||
+         (@preserve str _check_block_al(pointer(str), cnt, _ascii_mask(codeunit(C))))
 
 # Todo! Here you need to see that 0b11yyyyxx at least 1 y must be set,
 # which indicates a non-Latin1 character

From 275d95b0cde629c5f6db74190d487ebea6993e9a Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Wed, 25 Nov 2020 20:45:43 -0500
Subject: [PATCH 2/5] Further optimizations, for length, is_latin, is_bmp

---
 src/StrBase.jl |   7 +-
 src/types.jl   |   7 ++
 src/utf16.jl   |   8 +--
 src/utf8.jl    | 178 +++++++++++++++++++++++++------------------------
 4 files changed, 105 insertions(+), 95 deletions(-)

diff --git a/src/StrBase.jl b/src/StrBase.jl
index 0ce1c45..84c3343 100644
--- a/src/StrBase.jl
+++ b/src/StrBase.jl
@@ -23,14 +23,15 @@ using ModuleInterfaceTools
 
 @api develop! check_string, unsafe_check_string, fast_check_string, skipascii, skipbmp,
               countmask, count_chars, _count_mask_al, _count_mask_ul, count_latin,
-              _copysub, _cvtsize, _repeat, empty_str, _data, _pntchunk, _str,
+              _copysub, _cvtsize, _repeat, empty_str, _data, _mask_bytes,
+              _pntchunk, _pntbigchunk, _str,
               ValidatedStyle, MutableStyle, EqualsStyle, CanContain
 
 @api develop LineCounts, CharTypes, CharStat, maxbit, calcstats, check_continuation,
              UTF_LONG, UTF_LATIN1, UTF_UNICODE2, UTF_UNICODE3, UTF_UNICODE4, UTF_SURROGATE,
-             UTF_INVALID, CHUNKSZ, CHUNKMSK,
+             UTF_INVALID, CHUNKSZ, CHUNKMSK, BIGCHUNKSZ, BIGCHUNKMSK,
              _memcmp, _memcpy, _memset, _fwd_memchr, _rev_memchr,
-             empty_string, _calcpnt, _mask_bytes, _allocate,
+             BigChunk, empty_string, _calcpnt, _allocate, SingleCU, MultiCU,
              MS_UTF8, MS_UTF16, MS_UTF32, MS_SubUTF32, MS_Latin, MS_ByteStr, MS_RawUTF8,
              _wrap_substr, _empty_sub,
              AccessType, UInt16_U, UInt32_U, UInt16_S, UInt32_S, UInt16_US, UInt32_US,
diff --git a/src/types.jl b/src/types.jl
index 1693771..85efda4 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -144,6 +144,13 @@ ncodeunits(s::Str{<:Quad_CSEs}) = sizeof(s) >>> 2
 
 @inline _big_mask_bytes(n) = ((1%BigChunk) << ((n & BIGCHUNKMSK) << 3)) - 0x1
 
+@inline function _mask_bytes(v::T, cnt) where {T}
+    shft = (cnt & (sizeof(T) - 1))%UInt << 3
+    ifelse(shft == 0, v, v & ~(typemax(T) << shft))
+end
+
+@inline _widen_mask(msk::UInt) = ((msk%BigChunk) << (8*sizeof(UInt))) | msk
+
 # Support for SubString of Str
 
 Base.SubString(str::Str{C}) where {C<:SubSet_CSEs} =
diff --git a/src/utf16.jl b/src/utf16.jl
index bfc3b6d..4f202b7 100644
--- a/src/utf16.jl
+++ b/src/utf16.jl
@@ -22,7 +22,7 @@ const _hi_bit_16  = CHUNKSZ == 4 ? 0x8000_8000 : 0x8000_8000_8000_8000
         len += count_ones(v)
         v = _get_lead(pnt)
     end
-    len + count_ones((cnt & CHUNKMSK) == 0 ? v : (v & _mask_bytes(cnt)))
+    len + count_ones(_mask_bytes(v, cnt))
 end
 
 _length_al(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int) =
@@ -100,7 +100,7 @@ end
         v == 0 || return false
         v = _get_masked(pnt)
     end
-    ((cnt & CHUNKMSK) == 0 ? v : (v & _mask_bytes(cnt))) == 0
+    _mask_bytes(v, cnt) == 0
 end
 @inline _check_bmp_utf16_al(pnt, cnt) = _check_bmp_utf16_al(pnt, cnt, unsafe_load(pnt))
 
@@ -116,11 +116,11 @@ end
 end
 
 is_bmp(str::Str{UTF16CSE}) =
-    (cnt = sizeof(str)) == 0 ? true :
+    (cnt = sizeof(str)) == 0 ||
     @preserve str _check_bmp_utf16_al(reinterpret(Ptr{UInt}, pointer(str)), cnt)
 
 is_bmp(str::SubString{<:Str{UTF16CSE}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_bmp_utf16_ul(pointer(str), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf16_ul(pointer(str), cnt)
 
 is_bmp(str::MaybeSub{<:Str{<:UCS2_CSEs}}) = true
 
diff --git a/src/utf8.jl b/src/utf8.jl
index 3ac7eed..f997bef 100644
--- a/src/utf8.jl
+++ b/src/utf8.jl
@@ -89,96 +89,88 @@ xor 80 then << 1 then |
 11 -> 01 -> 1
 =#
 
-@inline _widen_mask(msk::UInt) = ((msk%BigChunk) << (8*sizeof(UInt))) | msk
-
 const hi_mask = CHUNKSZ == 4 ? 0x8080_8080 : 0x8080_8080_8080_8080
 const big_hi_mask = _widen_mask(hi_mask)
 
-@inline _count_cont(v, msk) = (v = xor(v, msk); count_ones(xor(((v << 1) | v), msk) & msk))
+@inline get_high_mask(::UInt) = hi_mask
+@inline get_high_mask(::BigChunk) = big_hi_mask
+
 @inline msk_lead(v, msk) = (v = xor(v, msk); xor(xor(((v << 1) | v), msk) & msk, msk))
 
-@inline _count_cont(v::UInt) = _count_cont(v, hi_mask)
-@inline msk_lead(v::UInt) = msk_lead(v, hi_mask)
+@inline msk_lead(v) = msk_lead(v, get_high_mask(v))
 
-@inline _count_cont(v::BigChunk) = _count_cont(v, big_hi_mask)
-@inline _msk_lead(v::BigChunk) = _msk_lead(v, big_hi_mask)
+@inline get_lead(T, ptr) = msk_lead(unsafe_load(reinterpret(Ptr{T}, ptr)))
 
-@inline function _align_len_utf8(pnt, cnt, v)
-    len = 0
-    fin = pnt + cnt
-    v = msk_lead(v)
-    while (pnt += CHUNKSZ) < fin
+@inline count_masked(v, cnt) = count_ones(_mask_bytes(v, cnt))
+
+function _length_al(::MultiCU, ::Type{UTF8CSE}, beg::Ptr{UInt8}, cnt::Int)
+    # First check very frequent cases of short strings
+    # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
+    # taking advantage of the knowledge of how String types are stored in Julia,
+    # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
+    if cnt <= BIGCHUNKSZ
+        return (cnt <= CHUNKSZ
+                ? count_masked(get_lead(UInt, beg), cnt)
+                : count_masked(get_lead(BigChunk, beg), cnt))
+    end
+    len = count_ones(get_lead(UInt, beg))
+    cnt -= CHUNKSZ
+    pnt = _pntbigchunk(beg + CHUNKSZ)
+    v = get_lead(BigChunk, pnt)
+    cnt <= BIGCHUNKSZ && return len + count_masked(v, cnt)
+    fin = _pntbigchunk(beg + CHUNKSZ + cnt)
+    while (pnt += BIGCHUNKSZ) < fin
         len += count_ones(v)
-        v = msk_lead(unsafe_load(pnt))
+        v = get_lead(BigChunk, pnt)
     end
-    len + count_ones(cnt & CHUNKMSK == 0 ? v : (v & _mask_bytes(cnt)))
+    len + count_masked(v, cnt)
 end
 
-_length_al(::MultiCU, ::Type{UTF8CSE}, beg::Ptr{UInt8}, cnt::Int) =
-    (pnt = reinterpret(Ptr{UInt}, beg); _align_len_utf8(pnt, cnt, unsafe_load(pnt)))
-
-function _length(::MultiCU, ::Type{UTF8CSE}, beg::Ptr{UInt8}, cnt::Int)
+function _length_ul(::MultiCU, ::Type{UTF8CSE}, beg::Ptr{UInt8}, cnt::Int)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        msk = _mask_bytes(align)
-        v = (v & ~msk) | (msk & hi_mask)
+    if (align &= BIGCHUNKMSK) != 0
+        msk = _big_mask_bytes(align)
+        v = (v & ~msk) | (msk & big_hi_mask)
         cnt += align
     end
-    _align_len_utf8(pnt, cnt, v)
-end
-
-@inline function _check_mask_al(pnt, cnt, msk, v)
+    len = 0
     fin = pnt + cnt
-    while (pnt += CHUNKSZ) < fin
-        (v & msk) == 0 || return false
-        v = unsafe_load(pnt)
-    end
-    (cnt & CHUNKMSK == 0 ? v : (v & _mask_bytes(cnt))) & msk == 0
-end
-@inline _check_mask_al(pnt, cnt, msk) = _check_mask_al(pnt, cnt, msk, unsafe_load(pnt))
-
-@inline function _check_mask_ul(beg, cnt, msk)
-    align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
-    v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        v &= ~_mask_bytes(align)
-        cnt += align
+    v = msk_lead(v)
+    while (pnt += BIGCHUNKSZ) < fin
+        len += count_ones(v)
+        v = msk_lead(unsafe_load(pnt))
     end
-    _check_mask_al(pnt, cnt, msk, v)
+    len + count_masked(v, cnt)
 end
 
-@inline _mask_bytes(v::T, cnt) where {T} =
-    ifelse((cnt & (sizeof(T)-1)%UInt) == 0,
-           v, T(v & (one(T) << ((cnt & (sizeof(T)-1)%UInt) << 3)) - 1))
-
-@inline chk_chunk(ptr, msk::T, cnt) where {T} =
-    iszero(_mask_bytes(unsafe_load(reinterpret(Ptr{T}, ptr)) & msk, cnt))
+@inline get_chunk(ptr, msk::T, cnt) where {T} =
+    _mask_bytes(unsafe_load(reinterpret(Ptr{T}, ptr)) & msk, cnt)
 
-@inline function _check_block_al(ptr, cnt, msk)
+@inline function _check_mask_al(ptr, cnt, msk)
     # First check very frequent cases of short strings
     # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
     # taking advantage of the knowledge of how String types are stored in Julia,
     # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
-    cnt <= CHUNKSZ && return chk_chunk(ptr, msk, cnt)
+    cnt <= CHUNKSZ && return get_chunk(ptr, msk, cnt) == 0
     bigmsk = _widen_mask(msk)
-    cnt <= BIGCHUNKSZ && return chk_chunk(ptr, bigmsk, cnt)
+    cnt <= BIGCHUNKSZ && return get_chunk(ptr, bigmsk, cnt) == 0
     (unsafe_load(_pntchunk(ptr)) & msk) == 0 || return false
     cnt -= CHUNKSZ
-    cnt <= BIGCHUNKSZ && return chk_chunk(ptr, bigmsk, cnt)
+    cnt <= BIGCHUNKSZ && return get_chunk(ptr, bigmsk, cnt) == 0
     pnt = _pntbigchunk(ptr + CHUNKSZ)
     fin = _pntbigchunk(ptr + CHUNKSZ + cnt)
-    v = unsafe_load(pnt) & bigmsk
+    v = unsafe_load(pnt)
     while (pnt += BIGCHUNKSZ) < fin
-        v == 0 || return false
-        v = unsafe_load(pnt) & bigmsk
+        (v & bigmsk) == 0 || return false
+        v = unsafe_load(pnt)
     end
-    iszero(_mask_bytes(v, cnt))
+    _mask_bytes(v & bigmsk, cnt) == 0
 end
 
-@inline function _check_block_ul(beg, cnt, msk)
+@inline function _check_mask_ul(beg, cnt, msk)
+    bigmsk = _widen_mask(msk)
     align = reinterpret(UInt, beg)
     pnt = _pntbigchunk(align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
@@ -186,13 +178,12 @@ end
         v &= ~_big_mask_bytes(align)
         cnt += align
     end
-    fin = _pntbigchunk(pnt + cnt)
-    bigmsk = _widen_mask(msk)
+    fin = pnt + cnt
     while (pnt += BIGCHUNKSZ) < fin
         (v & bigmsk) == 0 || return false
         v = unsafe_load(pnt)
     end
-    ((cnt & BIGCHUNKMSK) == 0 ? v : (v & _big_mask_bytes(cnt))) & bigmsk == 0
+    _mask_bytes(v & bigmsk, cnt) == 0
 end
 
 _ascii_mask(::Type{UInt8})  = hi_mask
@@ -226,88 +217,99 @@ is_ascii(vec::Vector{T}) where {T<:CodeUnitTypes} =
 
 is_ascii(str::Str{C}) where {C<:ASCII_Union} =
     (cnt = sizeof(str)) == 0 ||
-         (@preserve str _check_block_al(pointer(str), cnt, _ascii_mask(codeunit(C))))
+         (@preserve str _check_mask_al(pointer(str), cnt, _ascii_mask(codeunit(C))))
 
 # Todo! Here you need to see that 0b11yyyyxx at least 1 y must be set,
 # which indicates a non-Latin1 character
-_all_latin(val) = ((val & (val<<1) & (val<<2 | (val<<3) | (val<<4) | (val<<5))) & hi_mask) == 0
+_all_latin(val) =
+    ((val & (val<<1) & (val<<2 | (val<<3) | (val<<4) | (val<<5))) & get_high_mask(val)) == 0
 
-@inline function _check_latin_utf8_al(pnt, cnt, v)
+@inline function _check_latin_utf8_al(beg, cnt)
+    pnt = reinterpret(Ptr{UInt}, beg)
     fin = pnt + cnt
+    v = unsafe_load(pnt)
     while (pnt += CHUNKSZ) < fin
         _all_latin(v) || return false
         v = unsafe_load(pnt)
     end
-    _all_latin(cnt & CHUNKMSK == 0 ? v : (v & _mask_bytes(cnt)))
+    _all_latin(_mask_bytes(v, cnt))
 end
-@inline _check_latin_utf8_al(pnt, cnt) = _check_latin_utf8_al(pnt, cnt, unsafe_load(pnt))
 
 @inline function _check_latin_utf8_ul(beg, cnt)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        v &= ~_mask_bytes(align)
+    if (align &= BIGCHUNKMSK) != 0
+        v &= ~_big_mask_bytes(align)
         cnt += align
     end
-    _check_latin_utf8_al(pnt, cnt, v)
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        _all_latin(v) || return false
+        v = unsafe_load(pnt)
+    end
+    _all_latin(_mask_bytes(v, cnt))
 end
 
 is_latin(str::Str{UTF8CSE}) =
-    (siz = sizeof(str)) == 0 ? true :
-    @preserve str _check_latin_utf8_al(reinterpret(Ptr{UInt}, pointer(str)), siz)
+    (siz = sizeof(str)) == 0 || @preserve str _check_latin_utf8_al(pointer(str), siz)
 
 is_latin(str::SubString{<:Str{UTF8CSE}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_latin_utf8_ul(pointer(str), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_latin_utf8_ul(pointer(str), cnt)
 
 is_latin(vec::Vector{T}) where {T<:Union{UInt16,UInt32}} =
-    (cnt = sizeof(vec)) == 0 ? true :
+    (cnt = sizeof(vec)) == 0 ||
     @preserve vec _check_mask_ul(pointer(vec), cnt, _latin_mask(T))
 
 is_latin(str::SubString{<:Str{C}}) where {C<:Union{Word_CSEs,Quad_CSEs}} =
-    (cnt = sizeof(str)) == 0 ? true :
+    (cnt = sizeof(str)) == 0 ||
     @preserve str _check_mask_ul(pointer(str), cnt, _latin_mask(codeunit(C)))
 
 is_latin(str::Str{C}) where {C<:Union{Word_CSEs,Quad_CSEs}} =
-    (cnt = sizeof(str)) == 0 ? true :
+    (cnt = sizeof(str)) == 0 ||
     @preserve str _check_mask_al(pointer(str), cnt, _latin_mask(codeunit(C)))
 
 # All 4 top bits must be 1 (i.e. 0xfx) for this to be non-BMP
-_all_bmp(val) = ((val | (val<<1) | (val<<2) | (val<<3)) & hi_mask) == 0
+_all_bmp(val) = ((val | (val<<1) | (val<<2) | (val<<3)) & get_high_mask(val)) == 0
 
-@inline function _check_bmp_utf8_al(pnt, cnt, v)
+@inline function _check_bmp_utf8_al(beg, cnt)
+    pnt = reinterpret(Ptr{UInt}, beg)
     fin = pnt + cnt
+    v = unsafe_load(pnt)
     while (pnt += CHUNKSZ) < fin
         _all_bmp(v) || return false
         v = unsafe_load(pnt)
     end
-    _all_bmp(cnt & CHUNKMSK == 0 ? v : (v & _mask_bytes(cnt)))
+    _all_bmp(_mask_bytes(v, cnt))
 end
-@inline _check_bmp_utf8_al(pnt, cnt) = _check_bmp_utf8_al(pnt, cnt, unsafe_load(pnt))
 
 @inline function _check_bmp_utf8_ul(beg, cnt)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        v &= ~_mask_bytes(align)
+    if (align &= BIGCHUNKMSK) != 0
+        v &= ~_big_mask_bytes(align)
         cnt += align
     end
-    _check_bmp_utf8_al(pnt, cnt, v)
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        _all_bmp(v) || return false
+        v = unsafe_load(pnt)
+    end
+    _all_bmp(_mask_bytes(v, cnt))
 end
 
 is_bmp(str::Str{UTF8CSE}) =
-    (cnt = sizeof(str)) == 0 ? true :
-    @preserve str _check_bmp_utf8_al(reinterpret(Ptr{UInt}, pointer(str)), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf8_al(pointer(str), cnt)
 
 is_bmp(str::SubString{<:Str{UTF8CSE}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_bmp_utf8_ul(pointer(str), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf8_ul(pointer(str), cnt)
 
 is_bmp(str::SubString{<:Str{<:Union{Text4CSE,UTF32CSE}}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_mask_ul(pointer(str), cnt, _bmp_mask_32)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_mask_ul(pointer(str), cnt, _bmp_mask_32)
 
 is_bmp(str::Str{<:Union{Text4CSE,UTF32CSE}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_mask_al(pointer(str), cnt, _bmp_mask_32)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_mask_al(pointer(str), cnt, _bmp_mask_32)
 
 is_unicode(str::MS_UTF8) = true
 

From 98fedf923dc70cefec053829e25ead123f35aab9 Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Thu, 26 Nov 2020 17:03:47 -0500
Subject: [PATCH 3/5] Optimizations of UTF-16 length, is_bmp

---
 Project.toml   |  2 +-
 src/compare.jl |  4 +--
 src/core.jl    |  8 ++---
 src/search.jl  |  8 ++---
 src/support.jl |  4 +--
 src/types.jl   |  2 +-
 src/utf16.jl   | 89 +++++++++++++++++++++++++++++++++++---------------
 src/utf8.jl    | 32 ++++++++++++------
 test/basic.jl  | 21 +++++-------
 9 files changed, 106 insertions(+), 64 deletions(-)

diff --git a/Project.toml b/Project.toml
index 2e93cb3..0af4980 100644
--- a/Project.toml
+++ b/Project.toml
@@ -27,6 +27,6 @@ test = ["Test", "Random"]
 julia = "1"
 ModuleInterfaceTools = "1"
 MurmurHash3 = "^1.0.3"
-StrAPI = "1"
+StrAPI = "1.1"
 ChrBase = "^1.0.1"
 CharSetEncodings = "1"
diff --git a/src/compare.jl b/src/compare.jl
index 7b60f06..a7ab04a 100644
--- a/src/compare.jl
+++ b/src/compare.jl
@@ -54,7 +54,7 @@ end
     while pnt < fin
         str_done(b, pos) && return 1
         c1, pnt = _nextcp(C, pnt)
-        ch, pos = str_next(b, pos)
+        ch, pos = iterate(b, pos)
         c2 = ch%UInt32
         c1 == c2 || return ifelse(c1 < c2, -1, 1)
     end
@@ -93,7 +93,7 @@ function _cpeq(a::MaybeSub{T}, b) where {C<:CSE, T<:Str{C}}
     while pnt < fin
         str_done(b, pos) && return false
         c1, pnt = _nextcp(C, pnt)
-        ch, pos = str_next(b, pos)
+        ch, pos = iterate(b, pos)
         c1 == codepoint(ch) || return false
     end
     true
diff --git a/src/core.jl b/src/core.jl
index 8c53c91..49321a9 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -2,7 +2,7 @@
 Core functions
 
 
-Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones, and others (see Julia contributors)
+Copyright 2017-2020 Gandalf Software, Inc., Scott P. Jones, and others (see Julia contributors)
 Licensed under MIT License, see LICENSE.md
 
 Inspired by / derived from code in Julia
@@ -33,7 +33,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)
 
 # Use more generic length check
 @inline _length_check(str::SubString{<:Str{C}}, cnt) where {C<:CSE} =
-    _length(MultiCU(), C, pointer(str), cnt)
+    @preserve str _length_ul(MultiCU(), C, pointer(str), cnt)
 
 # Go directly to aligned length check
 @inline _length_check(str::Str{C}, cnt) where {C<:CSE} =
@@ -42,7 +42,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)
 @inline _length(::MultiCU, str::MaybeSub{T}) where {T<:Str} =
     (cnt = ncodeunits(str); cnt < 2 ? Int(cnt > 0) : @preserve str _length_check(str, cnt))
 
-@inline _length(::SingleCU, ::Type{<:CSE}, ::Ptr{<:CodeUnitTypes}, cnt::Int) = cnt
+@inline _length_ul(::SingleCU, ::Type{<:CSE}, ::Ptr{<:CodeUnitTypes}, cnt::Int) = cnt
 
 @inline _length(::MultiCU, str::Str{RawUTF8CSE}) = length(str.data)
 @inline _length(::MultiCU, str::Str{RawUTF8CSE}, i::Int, j::Int) = length(str.data, i, j)
@@ -55,7 +55,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)
         0 <= j <  lim || boundserr(str, j)
     end
     (cnt = j - i + 1) <= 0 ? 0 :
-        @preserve str _length(cs, cse(str), bytoff(pointer(str), i - 1), cnt)
+        @preserve str _length_ul(cs, cse(str), bytoff(pointer(str), i - 1), cnt)
 end
 
 @inline _thisind(::SingleCU, str, len, pnt, pos) = Int(pos)
diff --git a/src/search.jl b/src/search.jl
index 1db425e..16ca775 100644
--- a/src/search.jl
+++ b/src/search.jl
@@ -193,7 +193,7 @@ function find(::Type{D}, needle::AbstractString, str::AbstractString,
     @inbounds is_valid(str, pos) || index_error(str, pos)
     (tlen = ncodeunits(needle)) == 0 && return pos:pos-1
     (cmp = CanContain(str, needle)) === NoCompare() && return _not_found
-    @inbounds ch, nxt = str_next(needle, 1)
+    @inbounds ch, nxt = iterate(needle, 1)
     is_valid(eltype(str), ch) || return _not_found
     # Check if single character
     if nxt > tlen
@@ -209,7 +209,7 @@ function find(::Type{T}, needle::AbstractString, str::AbstractString) where {T<:
     pos = T === First ? 1 : thisind(str, slen)
     (tlen = ncodeunits(needle)) == 0 && return pos:(pos-1)
     (cmp = CanContain(str, needle)) === NoCompare() && return _not_found
-    @inbounds ch, nxt = str_next(needle, 1)
+    @inbounds ch, nxt = iterate(needle, 1)
     is_valid(eltype(str), ch) || return _not_found
     # Check if single character
     if nxt > tlen
@@ -302,8 +302,8 @@ end
 """Compare two strings, starting at nxtstr and nxtsub"""
 @inline function _cmp_str(str, strpos, endpos, sub, subpos, endsub)
     while strpos <= endpos
-        c, strnxt = str_next(str, strpos)
-        d, subpos = str_next(sub, subpos)
+        c, strnxt = iterate(str, strpos)
+        d, subpos = iterate(sub, subpos)
         c == d || break
         subpos > endsub && return strpos
         strpos = strnxt
diff --git a/src/support.jl b/src/support.jl
index 73d4d1a..48c5ddd 100644
--- a/src/support.jl
+++ b/src/support.jl
@@ -264,7 +264,7 @@ function unsafe_check_string(str::T;
     totalchar = latin1byte = num2byte = num3byte = num4byte = invalids = 0
     pos = 1
     @inbounds while !str_done(str, pos)
-        chr, nxt = str_next(str, pos)
+        chr, nxt = iterate(str, pos)
         ch = chr%UInt32
         totalchar += 1
         if ch > 0x7f
@@ -288,7 +288,7 @@ function unsafe_check_string(str::T;
                     break
                 end
                 # next character *must* be a trailing surrogate character
-                chr, nxt = str_next(str, nxt)
+                chr, nxt = iterate(str, nxt)
                 if !is_surrogate_trail(chr)
                     accept_invalids || strerror(StrErrors.NOT_TRAIL, pos, chr)
                     invalids += 1
diff --git a/src/types.jl b/src/types.jl
index 85efda4..2078f8b 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -145,7 +145,7 @@ ncodeunits(s::Str{<:Quad_CSEs}) = sizeof(s) >>> 2
 @inline _big_mask_bytes(n) = ((1%BigChunk) << ((n & BIGCHUNKMSK) << 3)) - 0x1
 
 @inline function _mask_bytes(v::T, cnt) where {T}
-    shft = (cnt & (sizeof(T) - 1))%UInt << 3
+    shft = ((cnt & (sizeof(T) - 1))%UInt) << 3
     ifelse(shft == 0, v, v & ~(typemax(T) << shft))
 end
 
diff --git a/src/utf16.jl b/src/utf16.jl
index 4f202b7..e3a46e6 100644
--- a/src/utf16.jl
+++ b/src/utf16.jl
@@ -10,34 +10,59 @@ Based in (small) part on code for UTF16String that used to be in Julia
 const _trail_mask = CHUNKSZ == 4 ? 0xdc00_dc00 : 0xdc00_dc00_dc00_dc00
 const _hi_bit_16  = CHUNKSZ == 4 ? 0x8000_8000 : 0x8000_8000_8000_8000
 
-@inline _mask_surr(v)  = xor((v | v<<1 | v<<2 | v<<3 | v<<4 | v<<5) & _hi_bit_16, _hi_bit_16)
-@inline _get_masked(v::UInt) = _mask_surr(xor(v, _trail_mask))
-@inline _get_masked(qpnt::Ptr{UInt}) = _get_masked(unsafe_load(qpnt))
-@inline _get_lead(qpnt) = xor(_get_masked(qpnt), _hi_bit_16)
+const _big_trail_mask = _widen_mask(_trail_mask)
+const _big_hi_bit_16  = _widen_mask(_big_hi_bit_16)
 
-@inline function _align_len_utf16(pnt, cnt, v)
-    len = 0
+@inline _mask_surr(v, msk)  = xor((v | v<<1 | v<<2 | v<<3 | v<<4 | v<<5) & msk, msk)
+
+@inline _get_masked(v::UInt) = _mask_surr(xor(v, _trail_mask))
+@inline _get_masked(v::BigChunk) = _mask_surr(xor(v, _big_trail_mask))
+@inline _get_masked(qpnt::Ptr) = _get_masked(unsafe_load(qpnt))
+
+@inline _get_lead(qpnt::Ptr{UInt}) = xor(_get_masked(qpnt), _hi_bit_16)
+@inline _get_lead(qpnt::Ptr{BigChunk}) = xor(_get_masked(qpnt), _big_hi_bit_16)
+
+@inline function _length_al(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
+    # First check very frequent cases of short strings
+    # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
+    # taking advantage of the knowledge of how String types are stored in Julia,
+    # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
+    cnt <<= 1
+    if cnt <= BIGCHUNKSZ
+        return (cnt <= CHUNKSZ
+                ? count_ones(_mask_bytes(_get_lead(_pntchunk(beg), cnt))
+                : count_ones(_mask_bytes(_get_lead(_pntbigchunk(beg), cnt))
+    end
+    len = count_ones(_get_lead(_pntchunk(beg)))
+    cnt -= CHUNKSZ
+    pnt = _pntbigchunk(beg + CHUNKSZ)
+    v = _get_lead(pnt)
+    cnt <= BIGCHUNKSZ && return len + count_ones(_mask_bytes(v, cnt))
     fin = pnt + cnt
-    while (pnt += CHUNKSZ) < fin
+    while (pnt += BIGCHUNKSZ) < fin
         len += count_ones(v)
         v = _get_lead(pnt)
     end
     len + count_ones(_mask_bytes(v, cnt))
 end
 
-_length_al(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int) =
-    (pnt = reinterpret(Ptr{UInt}, beg); _align_len_utf16(pnt, cnt<<1, _get_lead(pnt)))
-
-function _length(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
+function _length_ul(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
+    cnt <<= 1
     v = _get_lead(pnt)
-    if (align &= CHUNKMSK) != 0
-        msk = _mask_bytes(align)
-        v = (v & ~msk) | (msk & _trail_mask)
-        cnt += (align>>>1)
+    if (align &= BIGCHUNKMSK) != 0
+        msk = _big_mask_bytes(align)
+        v = (v & ~msk) | (msk & _big_trail_mask)
+        cnt += align
     end
-    _align_len_utf16(pnt, cnt<<1, v)
+    len = 0
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        len += count_ones(v)
+        v = _get_lead(pnt)
+    end
+    len + count_ones(_mask_bytes(v, cnt))
 end
 
 function _nextind(::MultiCU, str::MS_UTF16, pos::Int, nchar::Int)
@@ -93,31 +118,41 @@ function is_bmp(str::MS_UTF16)
     end
 end
 
-@inline function _check_bmp_utf16_al(pnt, cnt, v)
+@inline function _check_bmp_utf16_al(beg, cnt)
+    cnt <= CHUNKSZ && return _mask_bytes(_get_masked(_pntchunk(beg)), cnt) == 0
+    cnt <= BIGCHUNKSZ && return _mask_bytes(_get_masked(_pntbigchunk(beg)), cnt) == 0
+    _get_masked(_pntchunk(beg)) == 0 || return false
+    cnt -= CHUNKSZ
+    cnt <= BIGCHUNKSZ && return _mask_bytes(_get_masked(_pntbigchunk(beg)), cnt) == 0
+    pnt = _pntbigchunk(beg + CHUNKSZ)
+    v = _get_masked(pnt)
     fin = pnt + cnt
-    v = _get_masked(v)
-    while (pnt += CHUNKSZ) < fin
+    while (pnt += BIGCHUNKSZ) < fin
         v == 0 || return false
         v = _get_masked(pnt)
     end
     _mask_bytes(v, cnt) == 0
 end
-@inline _check_bmp_utf16_al(pnt, cnt) = _check_bmp_utf16_al(pnt, cnt, unsafe_load(pnt))
 
 @inline function _check_bmp_utf16_ul(beg, cnt)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        v &= ~_mask_bytes(align)
+    if (align &= BIGCHUNKMSK) != 0
+        v &= ~_big_mask_bytes(align)
         cnt += align
     end
-    _check_bmp_utf16_al(pnt, cnt, v)
+    v = _get_masked(v)
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        v == 0 || return false
+        v = _get_masked(pnt)
+    end
+    _mask_bytes(v, cnt) == 0
 end
 
 is_bmp(str::Str{UTF16CSE}) =
-    (cnt = sizeof(str)) == 0 ||
-    @preserve str _check_bmp_utf16_al(reinterpret(Ptr{UInt}, pointer(str)), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf16_al(pointer(str), cnt)
 
 is_bmp(str::SubString{<:Str{UTF16CSE}}) =
     (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf16_ul(pointer(str), cnt)
diff --git a/src/utf8.jl b/src/utf8.jl
index f997bef..bd41a4f 100644
--- a/src/utf8.jl
+++ b/src/utf8.jl
@@ -209,11 +209,11 @@ const ASCII_Union = Union{UTF8CSE,LatinCSE,Binary_CSEs,UTF16CSE,UCS2CSE,Text2CSE
 
 is_ascii(str::SubString{<:Str{C}}) where {C<:ASCII_Union} =
     (cnt = sizeof(str)) == 0 ||
-        (@preserve str _check_block_ul(pointer(str), cnt, _ascii_mask(codeunit(C))))
+        (@preserve str _check_mask_ul(pointer(str), cnt, _ascii_mask(codeunit(C))))
 
 is_ascii(vec::Vector{T}) where {T<:CodeUnitTypes} =
     (cnt = sizeof(vec)) == 0 ||
-         (@preserve str _check_block_ul(pointer(vec), cnt, _ascii_mask(T)))
+         (@preserve str _check_mask_ul(pointer(vec), cnt, _ascii_mask(T)))
 
 is_ascii(str::Str{C}) where {C<:ASCII_Union} =
     (cnt = sizeof(str)) == 0 ||
@@ -225,10 +225,16 @@ _all_latin(val) =
     ((val & (val<<1) & (val<<2 | (val<<3) | (val<<4) | (val<<5))) & get_high_mask(val)) == 0
 
 @inline function _check_latin_utf8_al(beg, cnt)
-    pnt = reinterpret(Ptr{UInt}, beg)
-    fin = pnt + cnt
+    cnt <= CHUNKSZ && return _all_latin(_mask_bytes(unsafe_load(_pntchunk(ptr)), cnt))
+    bigmsk = _widen_mask(msk)
+    cnt <= BIGCHUNKSZ && return _all_latin(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
+    _all_latin(unsafe_load(_pntchunk(ptr))) || return false
+    cnt -= CHUNKSZ
+    cnt <= BIGCHUNKSZ && return  _all_latin(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
+    pnt = _pntbigchunk(ptr + CHUNKSZ)
     v = unsafe_load(pnt)
-    while (pnt += CHUNKSZ) < fin
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
         _all_latin(v) || return false
         v = unsafe_load(pnt)
     end
@@ -273,10 +279,16 @@ is_latin(str::Str{C}) where {C<:Union{Word_CSEs,Quad_CSEs}} =
 _all_bmp(val) = ((val | (val<<1) | (val<<2) | (val<<3)) & get_high_mask(val)) == 0
 
 @inline function _check_bmp_utf8_al(beg, cnt)
-    pnt = reinterpret(Ptr{UInt}, beg)
-    fin = pnt + cnt
+    cnt <= CHUNKSZ && return _all_bmp(_mask_bytes(unsafe_load(_pntchunk(ptr)), cnt))
+    bigmsk = _widen_mask(msk)
+    cnt <= BIGCHUNKSZ && return _all_bmp(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
+    _all_bmp(unsafe_load(_pntchunk(ptr))) || return false
+    cnt -= CHUNKSZ
+    cnt <= BIGCHUNKSZ && return  _all_bmp(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
+    pnt = _pntbigchunk(ptr + CHUNKSZ)
+    fin = _pntbigchunk(ptr + CHUNKSZ + cnt)
     v = unsafe_load(pnt)
-    while (pnt += CHUNKSZ) < fin
+    while (pnt += BIGCHUNKSZ) < fin
         _all_bmp(v) || return false
         v = unsafe_load(pnt)
     end
@@ -452,9 +464,9 @@ _iterate(::MultiCU, ::Type{T}, str::SubString{<:Str{RawUTF8CSE}}, pos::Int) wher
 end
 
 _next(::MultiCU, ::Type{T}, str::Str{RawUTF8CSE}, pos::Int) where {T} =
-    str_next(str.data, pos)
+    iterate(str.data, pos)
 _next(::MultiCU, ::Type{T}, str::SubString{<:Str{RawUTF8CSE}}, pos::Int) where {T} =
-    str_next(SubString(str.string.data, str.offset + pos, str.offset + ncodeunits(str)), 1)
+    iterate(SubString(str.string.data, str.offset + pos, str.offset + ncodeunits(str)), 1)
 
 ## overload methods for efficiency ##
 
diff --git a/test/basic.jl b/test/basic.jl
index 78b372b..61a758b 100644
--- a/test/basic.jl
+++ b/test/basic.jl
@@ -292,8 +292,8 @@ let
 
     @test lastindex(srep) == 7
 
-    @test str_next(srep, 3) == ('β',5)
-    @test str_next(srep, 7) == ('β',9)
+    @test iterate(srep, 3) == ('β',5)
+    @test iterate(srep, 7) == ('β',9)
 
     @test srep[7] == 'β'
     @test_throws StringIndexError srep[8]
@@ -327,8 +327,8 @@ end
     @test_throws MethodError codeunit(tstr, true)
     @test_throws MethodError isvalid(tstr, 1)
     @test_throws MethodError isvalid(tstr, true)
-    @test_throws MethodError str_next(tstr, 1)
-    @test_throws MethodError str_next(tstr, true)
+    @test_throws MethodError iterate(tstr, 1)
+    @test_throws MethodError iterate(tstr, true)
     @test_throws MethodError lastindex(tstr)
 
     gstr = GenericString("12")
@@ -598,7 +598,7 @@ end
     for st in ("Hello", "Σ", "こんにちは", "😊😁")
         local s
         s = ST(st)
-        @test str_next(s, lastindex(s))[2] > sizeof(s)
+        @test iterate(s, lastindex(s))[2] > sizeof(s)
         @test nextind(s, lastindex(s)) > sizeof(s)
     end
 end
@@ -902,7 +902,7 @@ function testbin(::Type{ST}) where {ST}
                  b"\xf8\x9f\x98\x84", b"\xf8\x9f\x98\x84z")),
         s in lst
         st = ST(s)
-        @test str_next(st, 1)[2] == 2
+        @test iterate(st, 1)[2] == 2
         @test nextind(st, 1) == 2
     end
 
@@ -917,7 +917,7 @@ function testbin(::Type{ST}) where {ST}
         (s, r) in lst
         st = ST(s)
         (ST === BinaryStr || ST === Text1Str) && (r = 2)
-        @test str_next(st, 1)[2] == r
+        @test iterate(st, 1)[2] == r
         @test nextind(st, 1) == r
     end
 end
@@ -937,12 +937,7 @@ end
     @test String(sym) == string(Char(0xdcdb))
     @test Meta.lower(Main, sym) === sym
     res = string(Meta.parse(string(Char(0xdcdb)," = 1"),1,raise=false)[1])
-    @static if VERSION ≥ v"1.5.0-DEV.460"
-        @test res == "\$(Expr(:error, \"invalid UTF-8 sequence\"))"
-    else
-        @test startswith(res, "\$(Expr(:error, \"invalid character \\\"\\udcdb\\\"")
-        @test endswith(res,   "\"))")
-    end
+    @test res == "\$(Expr(:error, \"invalid UTF-8 sequence\"))"
 end
 
 @testset "invalid code point" begin

From e797807952202279a2e53a9ef1380e5397fcbe7e Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Sun, 17 Jan 2021 10:24:01 -0500
Subject: [PATCH 4/5] Change to use GitHub Actions

---
 .travis.yml  |  34 -------------
 src/ascii.jl |  22 +-------
 src/latin.jl |  20 +-------
 src/utf16.jl |  58 ++++++++++------------
 src/utf8.jl  |  10 ----
 src/util.jl  | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++
 test/util.jl |  25 ++++++++++
 7 files changed, 191 insertions(+), 116 deletions(-)
 delete mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 423d9fa..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-## Documentation: http://docs.travis-ci.com/user/languages/julia/
-language: julia
-os:
-  - linux
-  - osx
-  - windows
-julia:
-  - 1.0
-  - 1
-  - nightly
-notifications:
-  email: false
-git:
-  depth: 99999999
-
-## uncomment the following lines to allow failures on nightly julia
-## (tests will run but not make your overall status red)
-matrix:
-  allow_failures:
-   - julia: nightly
-
-## uncomment and modify the following lines to manually install system packages
-#addons:
-#  apt: # apt-get for linux
-#    packages:
-#    - gfortran
-#before_script: # homebrew for mac
-#  - if [ $TRAVIS_OS_NAME = osx ]; then brew install gcc; fi
-
-## uncomment the following lines to override the default test script
-
-after_success:
-  # push coverage results to Codecov
-  - julia -e 'using Pkg; cd(Pkg.dir("StrBase")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
diff --git a/src/ascii.jl b/src/ascii.jl
index 20c7ef9..e8c9adc 100644
--- a/src/ascii.jl
+++ b/src/ascii.jl
@@ -1,32 +1,12 @@
 #=
 ASCIIStr type
 
-Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones,
+Copyright 2017-2020 Gandalf Software, Inc., Scott P. Jones,
 and other contributors to the Julia language
 Licensed under MIT License, see LICENSE.md
 Based in part on code for ASCIIString that used to be in Julia
 =#
 
-## overload methods for efficiency ##
-
-function _string(coll)
-    n = 0
-    for str in coll
-        n += ncodeunits(str)
-    end
-    buf, out = _allocate(UInt8, n)
-    for str in coll
-        @preserve str begin
-            len = ncodeunits(str)
-            unsafe_copyto!(out, pointer(str), len)
-            out += len
-        end
-    end
-    buf
-end
-
-string(c::MaybeSub{<:Str{ASCIICSE}}...) = length(c) == 1 ? c[1] : Str(ASCIICSE, _string(c))
-
 ## transcoding to ASCII ##
 
 function convert(::Type{<:Str{ASCIICSE}}, str::AbstractString)
diff --git a/src/latin.jl b/src/latin.jl
index 14c59bd..9f63fcb 100644
--- a/src/latin.jl
+++ b/src/latin.jl
@@ -1,7 +1,8 @@
 #=
 LatinStr/_LatinStr type (ISO Latin1 8-bit subset of Unicode)
 
-Copyright 2017 Gandalf Software, Inc., Scott P. Jones, and other contributors to the Julia language
+Copyright 2017, 2020 Gandalf Software, Inc., Scott P. Jones,
+and other contributors to the Julia language
 Licensed under MIT License, see LICENSE.md
 Based in part on code for ASCIIString that used to be in Julia
 =#
@@ -13,23 +14,6 @@ is_latin(str::MaybeSub{<:Str{<:LatinCSE}}) = true
 is_bmp(str::MS_Latin) = true
 is_unicode(str::MS_Latin) = true
 
-const MS_ASCIILatin = MaybeSub{<:Str{<:Union{ASCIICSE, Latin_CSEs}}}
-
-function string(collection::MS_ASCIILatin...)
-    length(collection) == 1 && return collection[1]
-    len = 0
-    @inbounds for str in collection
-        len += ncodeunits(str)
-    end
-    buf, pnt = _allocate(len)
-    @inbounds for str in collection
-        len = ncodeunits(str)
-        _memcpy(pnt, pointer(str), len)
-        pnt += len
-    end
-    Str(LatinCSE, buf)
-end
-
 ## transcoding to Latin1 ##
 
 function convert(::Type{<:Str{C}}, str::AbstractString) where {C<:Latin_CSEs}
diff --git a/src/utf16.jl b/src/utf16.jl
index e3a46e6..cbd3d7b 100644
--- a/src/utf16.jl
+++ b/src/utf16.jl
@@ -11,41 +11,47 @@ const _trail_mask = CHUNKSZ == 4 ? 0xdc00_dc00 : 0xdc00_dc00_dc00_dc00
 const _hi_bit_16  = CHUNKSZ == 4 ? 0x8000_8000 : 0x8000_8000_8000_8000
 
 const _big_trail_mask = _widen_mask(_trail_mask)
-const _big_hi_bit_16  = _widen_mask(_big_hi_bit_16)
+const _big_hi_bit_16  = _widen_mask(_hi_bit_16)
 
 @inline _mask_surr(v, msk)  = xor((v | v<<1 | v<<2 | v<<3 | v<<4 | v<<5) & msk, msk)
 
-@inline _get_masked(v::UInt) = _mask_surr(xor(v, _trail_mask))
-@inline _get_masked(v::BigChunk) = _mask_surr(xor(v, _big_trail_mask))
+@inline _get_masked(v::UInt) = _mask_surr(xor(v, _trail_mask), _hi_bit_16)
+@inline _get_masked(v::BigChunk) = _mask_surr(xor(v, _big_trail_mask), _big_hi_bit_16)
 @inline _get_masked(qpnt::Ptr) = _get_masked(unsafe_load(qpnt))
 
 @inline _get_lead(qpnt::Ptr{UInt}) = xor(_get_masked(qpnt), _hi_bit_16)
 @inline _get_lead(qpnt::Ptr{BigChunk}) = xor(_get_masked(qpnt), _big_hi_bit_16)
 
-@inline function _length_al(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
-    # First check very frequent cases of short strings
-    # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
-    # taking advantage of the knowledge of how String types are stored in Julia,
-    # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
-    cnt <<= 1
-    if cnt <= BIGCHUNKSZ
-        return (cnt <= CHUNKSZ
-                ? count_ones(_mask_bytes(_get_lead(_pntchunk(beg), cnt))
-                : count_ones(_mask_bytes(_get_lead(_pntbigchunk(beg), cnt))
-    end
+## overload methods for efficiency ##
+
+function _length_utf16_al(beg::Ptr{UInt16}, cnt::Int)
     len = count_ones(_get_lead(_pntchunk(beg)))
     cnt -= CHUNKSZ
     pnt = _pntbigchunk(beg + CHUNKSZ)
     v = _get_lead(pnt)
-    cnt <= BIGCHUNKSZ && return len + count_ones(_mask_bytes(v, cnt))
-    fin = pnt + cnt
-    while (pnt += BIGCHUNKSZ) < fin
-        len += count_ones(v)
-        v = _get_lead(pnt)
+    if cnt > BIGCHUNKSZ
+        fin = pnt + cnt
+        while (pnt += BIGCHUNKSZ) < fin
+            len += count_ones(v)
+            v = _get_lead(pnt)
+        end
     end
     len + count_ones(_mask_bytes(v, cnt))
 end
 
+function _length_al(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
+    # First check very frequent cases of short strings
+    # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
+    # taking advantage of the knowledge of how String types are stored in Julia,
+    # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
+    cnt <<= 1
+    (cnt <= BIGCHUNKSZ
+     ? (cnt <= CHUNKSZ
+        ? count_ones(_mask_bytes(_get_lead(_pntchunk(beg), cnt)))
+        : count_ones(_mask_bytes(_get_lead(_pntbigchunk(beg), cnt))))
+     : _length_utf16_al(beg, cnt))
+end
+
 function _length_ul(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
     align = reinterpret(UInt, beg)
     pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
@@ -104,20 +110,6 @@ function _prevind(::MultiCU, str::MS_UTF16, pos::Int, nchar::Int)
 end
 
 # Check for any surrogate characters
-function is_bmp(str::MS_UTF16)
-    (siz = sizeof(str)) == 0 && return true
-    # Todo: handle unaligned for ARM32
-    @preserve str begin
-        siz < CHUNKSZ && return (_get_masked(_pntchunk(str)) & _mask_bytes(siz)) == 0
-
-        pnt, fin = _calcpnt(str, siz)
-        while (pnt += CHUNKSZ) <= fin
-            _get_masked(pnt) == 0 || return false
-        end
-        pnt - CHUNKSZ == fin || (_get_masked(pnt) & _mask_bytes(siz)) == 0
-    end
-end
-
 @inline function _check_bmp_utf16_al(beg, cnt)
     cnt <= CHUNKSZ && return _mask_bytes(_get_masked(_pntchunk(beg)), cnt) == 0
     cnt <= BIGCHUNKSZ && return _mask_bytes(_get_masked(_pntbigchunk(beg)), cnt) == 0
diff --git a/src/utf8.jl b/src/utf8.jl
index bd41a4f..71b07b1 100644
--- a/src/utf8.jl
+++ b/src/utf8.jl
@@ -226,7 +226,6 @@ _all_latin(val) =
 
 @inline function _check_latin_utf8_al(beg, cnt)
     cnt <= CHUNKSZ && return _all_latin(_mask_bytes(unsafe_load(_pntchunk(ptr)), cnt))
-    bigmsk = _widen_mask(msk)
     cnt <= BIGCHUNKSZ && return _all_latin(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
     _all_latin(unsafe_load(_pntchunk(ptr))) || return false
     cnt -= CHUNKSZ
@@ -601,15 +600,6 @@ _prevind(::MultiCU, str::Str{RawUTF8CSE}, pos::Int, nchar::Int) =
 _prevind(::MultiCU, str::Str{RawUTF8CSE}, pos::Int) =
     prevind(str.data, pos)
 
-#=
-const _ByteStr = Union{Str{ASCIICSE}, SubString{<:Str{ASCIICSE}},
-                       Str{UTF8CSE},  SubString{<:Str{UTF8CSE}}}
-
-string(s::_ByteStr) = s
-string(s::_ByteStr, c::_ByteStr...) = UTF8Str(_string(c))
-    # ^^ at least one must be UTF-8 or the ASCII-only method would get called
-=#
-
 function _reverse(::MultiCU, ::Type{UTF8CSE}, len, pnt::Ptr{T}) where {T<:CodeUnitTypes}
     buf, beg = _allocate(T, len)
     out = beg + len
diff --git a/src/util.jl b/src/util.jl
index 045b650..a4c39e4 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -7,6 +7,144 @@ Licensed under MIT License, see LICENSE.md
 Based initially on julia/test/strings/util.jl
 =#
 
+function _concat(T, a, b)
+    la = ncodeunits(a)
+    lb = ncodeunits(b)
+    buf, out = _allocate(T, la + lb)
+    @preserve a unsafe_copyto!(out, pointer(a), la)
+    @preserve b unsafe_copyto!(out + la, pointer(b), lb)
+    buf
+end
+
+function _string(T, a, b, rest)
+    la = ncodeunits(a)
+    lb = ncodeunits(b)
+    len = la + lb
+    @inbounds for str in rest
+        len += ncodeunits(str)
+    end
+    buf, out = _allocate(T, len)
+    @preserve a unsafe_copyto!(out, pointer(a), la)
+    out += la
+    @preserve b unsafe_copyto!(out, pointer(b), lb)
+    out += lb
+    @inbounds for str in rest
+        len = ncodeunits(str)
+        @preserve str unsafe_copyto!(out, pointer(str), len)
+        out += len
+    end
+    buf
+end
+
+function _string(T, coll)
+    len = 0
+    @inbounds for str in coll
+        len += ncodeunits(str)
+    end
+    buf, out = _allocate(T, len)
+    @inbounds for str in coll
+        len = ncodeunits(str)
+        @preserve str unsafe_copyto!(out, pointer(str), len)
+        out += len
+    end
+    buf
+end
+
+# Handle concatenation where all the same CSE for strings, and character set for characters
+#=
+"""
+WIP: this is rather tricky.
+It really should handle any type of Chr / Str / CSE, not just the ones defined
+in CharSetEncodings, ChrBase and StrBase
+Ideally, it could also handle mixes with String and Char (or other AbstractString / AbstractChar
+types.
+It may need to do two or even three passes, one to determine the correct type to be output,
+another to determine the output length, and finally another to copy the strings / characters into
+the buffer.
+The result type should be based on promotion rules, i.e. outputting UCS2Str if only ASCII, Latin, UCS2 characters and strings are in the list.
+This is difficult to do in a way that will still be type stable.
+"""
+
+function _string_chr(a::Union{<:Chr{CS,T}, <:Str{C}, SubString{<:Str{C}}}...
+                     ) where {CS<:CharSet,T,C<:CSE{CS}}
+    len = 0
+    for v in a
+        if v isa Chr
+            len += 1
+        else
+            len += ncodeunits(v)
+        end
+    end
+    buf, out = _allocate(T, len)
+    for v in a
+        len = ncodeunits(str)
+        @preserve str unsafe_copyto!(out, pointer(str), len)
+        out += len
+    end
+    buf
+end
+=#
+
+string(c::MaybeSub{<:Str}) = c
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}}...) = Str(LatinCSE, _string(UInt8, c))
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}}...) = Str(UTF8CSE, _string(UInt8, c))
+string(c::MaybeSub{<:Str{<:UCS2_CSEs}}...) = Str(UCS2CSE, _string(UInt16, c))
+string(c::MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}}...) = Str(UTF16CSE, _string(UInt16, c))
+string(c::MaybeSub{<:Str{<:UTF32_CSEs}}...) = Str(UTF32CSE, _string(UInt32, c))
+
+#=
+const MS_Str{C} = MaybeSub{<:Str{C}}
+string(a::MS_Str{C}, b::MS_Str{C}) where {C<:CSE} = Str(C, _concat(codeunit(C), a, b))
+string(a::MS_Str{C}, b::MS_Str{C}, c::MS_Str{C}...) where {C<:CSE} =
+    Str(C, _string(codeunit(C), a, b, c))
+
+string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
+string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
+string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
+
+const MS_AL = MS_Str{<:Union{ASCIICSE,Latin_CSEs}}
+string(a::MS_AL, b::MS_AL) = Str(LatinCSE, _concat(UInt8, a, b))
+string(a::MS_AL, b::MS_AL, c::MS_AL...) = Str(LatinCSE, _string(UInt8, a, b, c))
+
+const MS_AU = MS_Str{<:Union{ASCIICSE,UTF8CSE}}
+string(a::MS_AU, b::MS_AU) = Str(UTF8CSE, _concat(UInt8, a, b))
+string(a::MS_AU, b::MS_AU, c::MS_AU...) = Str(UTF8CSE, _string(UInt8, a, b, c))
+
+const MS_U2 = MS_Str{<:UCS2_CSEs}
+string(a::MS_U2, b::MS_U2) = Str(UCS2CSE, _concat(UInt16, a, b))
+string(a::MS_U2, b::MS_U2, c::MS_U2...) = Str(UCS2CSE, _string(UInt16, a, b, c))
+
+const MS_UT = MS_Str{<:Union{UCS2_CSEs,UTF16CSE}}
+string(a::MS_UT, b::MS_UT) = Str(UTF16CSE, _concat(UInt16, a, b))
+string(a::MS_UT, b::MS_UT, c::MS_UT...) = Str(UTF16CSE, _string(UInt16, a, b, c))
+
+const MS_U4 = MS_Str{<:UTF32_CSEs}
+string(a::MS_U4, b::MS_U4) = Str(UTF32CSE, _concat(UInt32, a, b))
+string(a::MS_U4, b::MS_U4, c::MS_U4...) = Str(UTF32CSE, _string(UInt32, a, b, c))
+=#
+
+#=
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}}...) =
+    length(c) == 1 ? c[1] : Str(LatinCSE, _string(UInt8, c))
+
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}}...) =
+    length(c) == 1 ? c[1] : Str(UTF8CSE, _string(UInt8, c))
+
+string(c::MaybeSub{<:Str{<:UCS2_CSEs}}...) =
+    length(c) == 1 ? c[1] : Str(UCS2CSE, _string(UInt16, c))
+
+string(c::MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}}...) =
+    length(c) == 1 ? c[1] : Str(UTF16CSE, _string(UInt16, c))
+
+string(c::MaybeSub{<:Str{<:UTF32_CSEs}}...) =
+    length(c) == 1 ? c[1] : Str(UTF32CSE, _string(UInt32, c))
+=#
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}}...) = Str(LatinCSE, _string(UInt8, c))
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}}...) = Str(UTF8CSE, _string(UInt8, c))
+string(c::MaybeSub{<:Str{<:UCS2_CSEs}}...) = Str(UCS2CSE, _string(UInt16, c))
+string(c::MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}}...) = Str(UTF16CSE, _string(UInt16, c))
+string(c::MaybeSub{<:Str{<:UTF32_CSEs}}...) = Str(UTF32CSE, _string(UInt32, c))
+
 # starts with and ends with predicates
 
 starts_with(a::MaybeSub{<:Str{C}}, b::MaybeSub{<:Str{C}}) where {C<:CSE} =
diff --git a/test/util.jl b/test/util.jl
index d85645b..3f77471 100644
--- a/test/util.jl
+++ b/test/util.jl
@@ -307,6 +307,31 @@
             #non-hex characters
             @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH")
         end
+
+        @testset "Concatenation" begin
+            asc = ASCIIStr("foo")
+            lat = LatinStr("bar")
+            ucs = UCS2Str("baz")
+            u32 = UTF32Str("silly")
+            ut8 = UTF8Str("test")
+            ut16 = UTF16Str("ugly")
+            haslat = _LatinStr("você")
+            hasucs = _UCS2Str("†")
+            hasu32 = _UTF32Str("\U1f596")
+            @test typeof(asc * asc) == ASCIIStr
+            @test typeof(asc * lat) == LatinStr
+            @test typeof(asc * ut8) == UTF8Str
+            @test typeof(asc * haslat) == LatinStr
+            @test typeof(lat * lat) == LatinStr
+            @test typeof(haslat * haslat) == _LatinStr
+            @test typeof(lat * haslat) == LatinStr
+            @test typeof(ucs * ucs) == UCS2Str
+            @test typeof(hasucs * hasucs) == _UCS2Str
+            @test typeof(ucs * hasucs) == UCS2Str
+            @test typeof(u32 * u32) == UTF32Str
+            @test typeof(hasu32 * hasu32) == _UTF32Str
+            @test typeof(u32 * hasu32) == UTF32Str
+        end
     end
 
     # b"" should be immutable

From c3d074088809f9d61dc5370d7d666a42b4d1ad0d Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Thu, 13 May 2021 10:56:54 -0400
Subject: [PATCH 5/5] Update version and CI

---
 .drone.yml               | 27 ++++++++++++++++++++++++++
 .github/workflows/ci.yml | 42 ++++++++++++++++++++++++++++++++++++++++
 Project.toml             |  2 +-
 src/utf8.jl              |  2 +-
 4 files changed, 71 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/ci.yml

diff --git a/.drone.yml b/.drone.yml
index 2607e25..be0eae9 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -11,3 +11,30 @@ steps:
   image: julia:1.5
   commands:
   - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
+
+---
+kind: pipeline
+name: linux - arm - Julia 1.6
+platform:
+  os: linux
+  arch: arm
+
+steps:
+- name: build
+  image: julia:1.6
+  commands:
+  - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
+
+---
+kind: pipeline
+name: linux - arm64 - Julia 1.6
+
+platform:
+  os: linux
+  arch: arm64
+
+steps:
+- name: build
+  image: julia:1.6
+  commands:
+  - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..65063c3
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,42 @@
+name: CI
+on:
+  - push
+  - pull_request
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.5'
+          - 'nightly'
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        arch:
+          - x64
+          - x86
+        exclude:
+          - os: macOS-latest
+            arch: x86
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: actions/cache@v1
+        env:
+          cache-name: cache-artifacts
+        with:
+          path: ~/.julia/artifacts
+          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-test-${{ env.cache-name }}-
+            ${{ runner.os }}-test-
+            ${{ runner.os }}-
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
diff --git a/Project.toml b/Project.toml
index 0af4980..b209a69 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,7 +4,7 @@ authors  = ["ScottPJones <scottjones@alum.mit.edu>"]
 keywords = ["Strings"]
 license  = "MIT"
 uuid     = "e79e7a6a-7bb1-5a4d-9d64-da657b06f53a"
-version = "1.0.5"
+version = "1.1.0"
 
 [deps]
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
diff --git a/src/utf8.jl b/src/utf8.jl
index 71b07b1..4300d27 100644
--- a/src/utf8.jl
+++ b/src/utf8.jl
@@ -213,7 +213,7 @@ is_ascii(str::SubString{<:Str{C}}) where {C<:ASCII_Union} =
 
 is_ascii(vec::Vector{T}) where {T<:CodeUnitTypes} =
     (cnt = sizeof(vec)) == 0 ||
-         (@preserve str _check_mask_ul(pointer(vec), cnt, _ascii_mask(T)))
+         (@preserve vec _check_mask_ul(pointer(vec), cnt, _ascii_mask(T)))
 
 is_ascii(str::Str{C}) where {C<:ASCII_Union} =
     (cnt = sizeof(str)) == 0 ||