|  | 
| 1 |  | -{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} | 
|  | 1 | +{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} | 
| 2 | 2 | {-# LANGUAGE MagicHash, UnboxedTuples, PatternGuards #-} | 
| 3 | 3 | {-# OPTIONS_GHC -fno-warn-unused-imports #-} | 
| 4 | 4 | {-# LANGUAGE Trustworthy #-} | 
| @@ -469,6 +469,7 @@ import           Data.ByteString.Builder.Prim.ASCII | 
| 469 | 469 | import           Foreign | 
| 470 | 470 | import           Foreign.C.Types | 
| 471 | 471 | import           Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr) | 
|  | 472 | +import           GHC.Int (Int (..)) | 
| 472 | 473 | import           GHC.Word (Word8 (..)) | 
| 473 | 474 | import           GHC.Exts | 
| 474 | 475 | import           GHC.IO | 
| @@ -672,50 +673,87 @@ primMapLazyByteStringBounded w = | 
| 672 | 673 | -- | 
| 673 | 674 | -- @since 0.11.0.0 | 
| 674 | 675 | cstring :: Addr# -> Builder | 
| 675 |  | -cstring = | 
| 676 |  | -    \addr0 -> builder $ step addr0 | 
| 677 |  | -  where | 
| 678 |  | -    step :: Addr# -> BuildStep r -> BuildStep r | 
| 679 |  | -    step !addr !k br@(BufferRange op0@(Ptr op0#) ope) | 
| 680 |  | -      | W8# ch == 0 = k br | 
| 681 |  | -      | op0 == ope = | 
| 682 |  | -          return $ bufferFull 1 op0 (step addr k) | 
| 683 |  | -      | otherwise = do | 
| 684 |  | -          IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of | 
| 685 |  | -                       s' -> (# s', () #) | 
| 686 |  | -          let br' = BufferRange (op0 `plusPtr` 1) ope | 
| 687 |  | -          step (addr `plusAddr#` 1#) k br' | 
| 688 |  | -      where | 
| 689 |  | -        !ch = indexWord8OffAddr# addr 0# | 
|  | 676 | +cstring = \addr0 -> builder $ \k br -> do | 
|  | 677 | +#if __GLASGOW_HASKELL__ >= 811 | 
|  | 678 | +    let len = cstringLength# addr0 | 
|  | 679 | +#else | 
|  | 680 | +    (I# len) <- fromIntegral <$> S.c_strlen (Ptr addr0) | 
|  | 681 | +#endif | 
|  | 682 | +    cstring_step addr0 len k br | 
|  | 683 | +{-# INLINE cstring #-} | 
|  | 684 | + | 
|  | 685 | +cstring_step :: Addr# -> Int# -> BuildStep r -> BuildStep r | 
|  | 686 | +cstring_step !addr !len !k br@(BufferRange op0 ope) | 
|  | 687 | +    -- String is empty, process the continuation | 
|  | 688 | +  | (I# len) == 0 = k br | 
|  | 689 | +    -- Buffer is full, allocate some more...  We ask for just one more | 
|  | 690 | +    -- byte, but the builder allocation strategy will in practice give | 
|  | 691 | +    -- us more space, which we'll consume in a single step. | 
|  | 692 | +  | op0 == ope = | 
|  | 693 | +       return $ bufferFull 1 op0 (cstring_step addr len k) | 
|  | 694 | +    -- Copy as much of the string as fits into the available buffer space. | 
|  | 695 | +    -- If the string is long enough, we may have asked for less than its | 
|  | 696 | +    -- full length, filling the buffer with the rest will go into the next | 
|  | 697 | +    -- builder step. | 
|  | 698 | +  | otherwise = do | 
|  | 699 | +      let !avail@(I# avail#) = min (I# len) (ope `minusPtr` op0) | 
|  | 700 | +          br' = BufferRange (op0 `plusPtr` avail) ope | 
|  | 701 | +          addr' = addr `plusAddr#` avail# | 
|  | 702 | +          len' = len -# avail# | 
|  | 703 | +      S.memcpy op0 (Ptr addr) avail | 
|  | 704 | +      cstring_step addr' len' k br' | 
| 690 | 705 | 
 | 
| 691 | 706 | -- | A null-terminated UTF-8 encoded 'Foreign.C.String.CString'. | 
| 692 | 707 | -- Null characters can be encoded as @0xc0 0x80@. | 
| 693 | 708 | -- | 
| 694 | 709 | -- @since 0.11.0.0 | 
| 695 | 710 | cstringUtf8 :: Addr# -> Builder | 
| 696 |  | -cstringUtf8 = | 
| 697 |  | -    \addr0 -> builder $ step addr0 | 
| 698 |  | -  where | 
| 699 |  | -    step :: Addr# -> BuildStep r -> BuildStep r | 
| 700 |  | -    step !addr !k br@(BufferRange op0@(Ptr op0#) ope) | 
| 701 |  | -      | W8# ch == 0 = k br | 
| 702 |  | -      | op0 == ope = | 
| 703 |  | -          return $ bufferFull 1 op0 (step addr k) | 
| 704 |  | -        -- NULL is encoded as 0xc0 0x80 | 
| 705 |  | -      | W8# ch == 0xc0 | 
| 706 |  | -      , W8# (indexWord8OffAddr# addr 1#) == 0x80 = do | 
| 707 |  | -          let !(W8# nullByte#) = 0 | 
| 708 |  | -          IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of | 
| 709 |  | -                       s' -> (# s', () #) | 
| 710 |  | -          let br' = BufferRange (op0 `plusPtr` 1) ope | 
| 711 |  | -          step (addr `plusAddr#` 2#) k br' | 
| 712 |  | -      | otherwise = do | 
| 713 |  | -          IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of | 
| 714 |  | -                       s' -> (# s', () #) | 
| 715 |  | -          let br' = BufferRange (op0 `plusPtr` 1) ope | 
| 716 |  | -          step (addr `plusAddr#` 1#) k br' | 
| 717 |  | -      where | 
| 718 |  | -        !ch = indexWord8OffAddr# addr 0# | 
|  | 711 | +cstringUtf8 = \addr0 -> builder $ \k br -> do | 
|  | 712 | +#if __GLASGOW_HASKELL__ >= 811 | 
|  | 713 | +    let len = cstringLength# addr0 | 
|  | 714 | +#else | 
|  | 715 | +    (I# len) <- fromIntegral <$> S.c_strlen (Ptr addr0) | 
|  | 716 | +#endif | 
|  | 717 | +    nullAt <- S.c_strstr (Ptr addr0) (Ptr "\xc0\x80"#) | 
|  | 718 | +    cstringUtf8_step addr0 len nullAt k br | 
|  | 719 | +{-# INLINE cstringUtf8 #-} | 
|  | 720 | + | 
|  | 721 | +cstringUtf8_step :: Addr# -> Int# -> Ptr Word8 -> BuildStep r -> BuildStep r | 
|  | 722 | +cstringUtf8_step !addr !len !nullAt !k br@(BufferRange op0@(Ptr op0#) ope) | 
|  | 723 | +    -- String is empty, process the continuation | 
|  | 724 | +  | (I# len) == 0 = k br | 
|  | 725 | +    -- Contains no encoded nulls, use simpler 'cstring' code | 
|  | 726 | +  | nullPtr == nullAt = | 
|  | 727 | +      cstring_step addr len k br | 
|  | 728 | +    -- Buffer is full, allocate some more...  We ask for just one more | 
|  | 729 | +    -- byte, but the builder allocation strategy will in practice give | 
|  | 730 | +    -- us more space, which we'll consume in a single step. | 
|  | 731 | +  | op0 == ope = | 
|  | 732 | +       return $ bufferFull 1 op0 (cstringUtf8_step addr len nullAt k) | 
|  | 733 | +    -- We're at the encoded null-byte, append a '\0' to the buffer and | 
|  | 734 | +    -- continue with the rest of the input string, after locating the | 
|  | 735 | +    -- next encoded null-byte, if any. | 
|  | 736 | +  | (Ptr addr) == nullAt = do | 
|  | 737 | +      let !(W8# nullByte#) = 0 | 
|  | 738 | +      IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of | 
|  | 739 | +                   s' -> (# s', () #) | 
|  | 740 | +      let br' = BufferRange (op0 `plusPtr` 1) ope | 
|  | 741 | +          addr' = addr `plusAddr#` 2# | 
|  | 742 | +          len' = len -# 2# | 
|  | 743 | +      nullAt' <- S.c_strstr (Ptr addr') (Ptr "\xc0\x80"#) | 
|  | 744 | +      cstringUtf8_step addr' len' nullAt' k br' | 
|  | 745 | +    -- Copy as much of the null-free portion of the string as fits into the | 
|  | 746 | +    -- available buffer space. If the string is long enough, we may have asked | 
|  | 747 | +    -- for less than its full length, filling the buffer with the rest will go | 
|  | 748 | +    -- into the next builder step. | 
|  | 749 | +  | otherwise = do | 
|  | 750 | +      let !nullFree = nullAt `minusPtr` (Ptr addr) | 
|  | 751 | +          !avail@(I# avail#) = min nullFree (ope `minusPtr` op0) | 
|  | 752 | +          br' = BufferRange (op0 `plusPtr` avail) ope | 
|  | 753 | +          addr' = addr `plusAddr#` avail# | 
|  | 754 | +          len' = len -# avail# | 
|  | 755 | +      S.memcpy op0 (Ptr addr) avail | 
|  | 756 | +      cstringUtf8_step addr' len' nullAt k br' | 
| 719 | 757 | 
 | 
| 720 | 758 | ------------------------------------------------------------------------------ | 
| 721 | 759 | -- Char8 encoding | 
|  | 
0 commit comments