| 
 | 1 | +/*  | 
 | 2 | + * stream_list.c  | 
 | 3 | + *  | 
 | 4 | + * SRTP stream list SSE2 implementation  | 
 | 5 | + *  | 
 | 6 | + * Andrey Semashev  | 
 | 7 | + */  | 
 | 8 | +/*  | 
 | 9 | + *  | 
 | 10 | + * Copyright (c) 2022, Cisco Systems, Inc.  | 
 | 11 | + * All rights reserved.  | 
 | 12 | + *  | 
 | 13 | + * Redistribution and use in source and binary forms, with or without  | 
 | 14 | + * modification, are permitted provided that the following conditions  | 
 | 15 | + * are met:  | 
 | 16 | + *  | 
 | 17 | + *   Redistributions of source code must retain the above copyright  | 
 | 18 | + *   notice, this list of conditions and the following disclaimer.  | 
 | 19 | + *  | 
 | 20 | + *   Redistributions in binary form must reproduce the above  | 
 | 21 | + *   copyright notice, this list of conditions and the following  | 
 | 22 | + *   disclaimer in the documentation and/or other materials provided  | 
 | 23 | + *   with the distribution.  | 
 | 24 | + *  | 
 | 25 | + *   Neither the name of the Cisco Systems, Inc. nor the names of its  | 
 | 26 | + *   contributors may be used to endorse or promote products derived  | 
 | 27 | + *   from this software without specific prior written permission.  | 
 | 28 | + *  | 
 | 29 | + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS  | 
 | 30 | + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT  | 
 | 31 | + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS  | 
 | 32 | + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE  | 
 | 33 | + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,  | 
 | 34 | + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES  | 
 | 35 | + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  | 
 | 36 | + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  | 
 | 37 | + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,  | 
 | 38 | + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)  | 
 | 39 | + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  | 
 | 40 | + * OF THE POSSIBILITY OF SUCH DAMAGE.  | 
 | 41 | + *  | 
 | 42 | + */  | 
 | 43 | + | 
 | 44 | +/* NOTE: This file is intended to be included only once, in stream_list.c */  | 
 | 45 | + | 
 | 46 | +#include <emmintrin.h>  | 
 | 47 | +#if defined(_MSC_VER)  | 
 | 48 | +#include <intrin.h>  | 
 | 49 | +#endif  | 
 | 50 | + | 
 | 51 | +/*  | 
 | 52 | + * Returns an index of the stream corresponding to ssrc,  | 
 | 53 | + * or >= streams->size if no stream exists for that ssrc.  | 
 | 54 | + */  | 
 | 55 | +uint32_t srtp_stream_list_find(const srtp_stream_list_t *streams, uint32_t ssrc)  | 
 | 56 | +{  | 
 | 57 | +    const uint32_t *const ssrcs = streams->ssrcs;  | 
 | 58 | +    const __m128i mm_ssrc = _mm_set1_epi32(ssrc);  | 
 | 59 | +    uint32_t pos = 0u, n = (streams->size + 7u) & ~(uint32_t)(7u);  | 
 | 60 | +    for (uint32_t m = n & ~(uint32_t)(15u); pos < m; pos += 16u) {  | 
 | 61 | +        __m128i mm1 = _mm_loadu_si128((const __m128i *)(ssrcs + pos));  | 
 | 62 | +        __m128i mm2 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 4u));  | 
 | 63 | +        __m128i mm3 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 8u));  | 
 | 64 | +        __m128i mm4 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 12u));  | 
 | 65 | +        mm1 = _mm_cmpeq_epi32(mm1, mm_ssrc);  | 
 | 66 | +        mm2 = _mm_cmpeq_epi32(mm2, mm_ssrc);  | 
 | 67 | +        mm3 = _mm_cmpeq_epi32(mm3, mm_ssrc);  | 
 | 68 | +        mm4 = _mm_cmpeq_epi32(mm4, mm_ssrc);  | 
 | 69 | +        mm1 = _mm_packs_epi32(mm1, mm2);  | 
 | 70 | +        mm3 = _mm_packs_epi32(mm3, mm4);  | 
 | 71 | +        mm1 = _mm_packs_epi16(mm1, mm3);  | 
 | 72 | +        uint32_t mask = _mm_movemask_epi8(mm1);  | 
 | 73 | +        if (mask) {  | 
 | 74 | +#if defined(_MSC_VER)  | 
 | 75 | +            unsigned long bit_pos;  | 
 | 76 | +            _BitScanForward(&bit_pos, mask);  | 
 | 77 | +            pos += bit_pos;  | 
 | 78 | +#else  | 
 | 79 | +            pos += __builtin_ctz(mask);  | 
 | 80 | +#endif  | 
 | 81 | + | 
 | 82 | +            goto done;  | 
 | 83 | +        }  | 
 | 84 | +    }  | 
 | 85 | + | 
 | 86 | +    if (pos < n) {  | 
 | 87 | +        __m128i mm1 = _mm_loadu_si128((const __m128i *)(ssrcs + pos));  | 
 | 88 | +        __m128i mm2 = _mm_loadu_si128((const __m128i *)(ssrcs + pos + 4u));  | 
 | 89 | +        mm1 = _mm_cmpeq_epi32(mm1, mm_ssrc);  | 
 | 90 | +        mm2 = _mm_cmpeq_epi32(mm2, mm_ssrc);  | 
 | 91 | +        mm1 = _mm_packs_epi32(mm1, mm2);  | 
 | 92 | + | 
 | 93 | +        uint32_t mask = _mm_movemask_epi8(mm1);  | 
 | 94 | +        if (mask) {  | 
 | 95 | +#if defined(_MSC_VER)  | 
 | 96 | +            unsigned long bit_pos;  | 
 | 97 | +            _BitScanForward(&bit_pos, mask);  | 
 | 98 | +            pos += bit_pos / 2u;  | 
 | 99 | +#else  | 
 | 100 | +            pos += __builtin_ctz(mask) / 2u;  | 
 | 101 | +#endif  | 
 | 102 | +            goto done;  | 
 | 103 | +        }  | 
 | 104 | + | 
 | 105 | +        pos += 8u;  | 
 | 106 | +    }  | 
 | 107 | + | 
 | 108 | +done:  | 
 | 109 | +    return pos;  | 
 | 110 | +}  | 
0 commit comments