@@ -207,86 +207,105 @@ const (
207207 maxLen = 250 // guard for the StatsD UDP packet size
208208)
209209
210- // isTrim returns true if the byte is to be trimmed at the ends.
211- func isTrim (b byte ) bool { return b == '.' || b == '_' || b == '-' }
210+ var shouldTrim [256 ]bool = [256 ]bool {
211+ '.' : true ,
212+ '_' : true ,
213+ '-' : true ,
214+ }
212215
213216// appendSanitizedMetricName converts *any* string into something that StatsD / Graphite
214217// accepts without complaints.
215218func appendSanitizedMetricName (dst []byte , raw string ) []byte {
216- nameLen := 0
217- orig := len (dst )
218219 if raw == "" {
219220 if len (dst ) == 0 {
220221 return append (dst , "_unnamed_" ... )
221222 }
222223 return dst
223224 }
224- // ── 1. accent folding (creates one temporary ↴)
225- // tmp := stripUnicodeAccents([]byte(raw))
226-
227- // ── 2. run the same ASCII sanitizer, but write into dst
228- lastWasRepl := false
229- for i := 0 ; i < len (raw ); i ++ {
230- c := byte (raw [i ])
231-
232- if c < 128 && valid [c ] {
233- // ASCII valid chars
234- dst = append (dst , c )
235- nameLen ++
236- lastWasRepl = false
237- } else if c >= 0xC2 && c <= 0xC3 && i + 1 < len (raw ) {
238- // Check for 2-byte UTF-8 sequences that are common accented letters
239- c2 := byte (raw [i + 1 ])
240- if c2 >= 0x80 && c2 <= 0xBF { // Valid second byte
241- // Decode the 2-byte sequence
242- codepoint := uint16 (c & 0x1F )<< 6 | uint16 (c2 & 0x3F )
243-
244- // Map common accented characters (U+00C0-U+00FF range)
245- if codepoint >= 0xC0 && codepoint <= 0xFF {
246- mapped := accentMap [codepoint ]
247- if valid [mapped ] {
225+ orig := len (dst )
226+
227+ // Pre-grow
228+ need := len (raw )
229+ if need > maxLen {
230+ need = maxLen
231+ }
232+ if cap (dst )- len (dst ) < need {
233+ nd := make ([]byte , len (dst ), len (dst )+ need )
234+ copy (nd , dst )
235+ dst = nd
236+ }
237+
238+ n := len (raw )
239+ i := 0
240+ lastWasReplacement := false
241+
242+ // Skip leading trim while building
243+ for i < n {
244+ c := raw [i ]
245+ if ! shouldTrim [c ] {
246+ break
247+ }
248+ i ++
249+ }
250+
251+ for i < n && (len (dst )- orig ) < maxLen {
252+ // Batch ASCII-valid run
253+ remaining := maxLen - (len (dst ) - orig )
254+ j := i
255+ limit := i + remaining
256+ if limit > n {
257+ limit = n
258+ }
259+ for j < limit {
260+ c := raw [j ]
261+ if c >= 128 || ! valid [c ] {
262+ break
263+ }
264+ j ++
265+ }
266+ if j > i {
267+ dst = append (dst , raw [i :j ]... )
268+ lastWasReplacement = false
269+ i = j
270+ continue
271+ }
272+
273+ // 2-byte common accent folding
274+ c0 := raw [i ]
275+ if c0 >= 0xC2 && c0 <= 0xC3 && i + 1 < n {
276+ c1 := raw [i + 1 ]
277+ if c1 >= 0x80 && c1 <= 0xBF {
278+ code := uint16 (c0 & 0x1F )<< 6 | uint16 (c1 & 0x3F )
279+ if code >= 0xC0 && code <= 0xFF {
280+ mapped := accentMap [code ]
281+ if valid [mapped ] && (len (dst )- orig ) < maxLen {
248282 dst = append (dst , mapped )
249- nameLen ++
250- lastWasRepl = false
251- i ++ // Skip the second byte
283+ lastWasReplacement = false
284+ i += 2
252285 continue
253286 }
254287 }
255288 }
256- // If we get here, treat as invalid
257- if ! lastWasRepl {
258- dst = append (dst , replacement )
259- nameLen ++
260- lastWasRepl = true
261- }
262- } else if ! lastWasRepl {
263- // Everything else (3-byte, 4-byte sequences, invalid chars)
264- dst = append (dst , replacement )
265- nameLen ++
266- lastWasRepl = true
267289 }
268290
269- if nameLen >= maxLen {
270- break
291+ // Replacement for everything else
292+ if ! lastWasReplacement && len (dst ) > orig && (len (dst )- orig ) < maxLen {
293+ dst = append (dst , replacement )
294+ lastWasReplacement = true
271295 }
296+ i ++
272297 }
273298
274- // 3. trim leading / trailing '.', '_' or '-'
275- start , end := orig , len (dst )
276- for start < end && isTrim (dst [start ]) {
277- start ++
278- }
279- for end > start && isTrim (dst [end - 1 ]) {
280- end --
281- }
282-
283- // 4. compact if we trimmed something
284- if start > orig || end < len (dst ) {
285- copy (dst [orig :], dst [start :end ])
286- dst = dst [:orig + (end - start )]
299+ // Trim trailing '.' '_' '-'
300+ for l := len (dst ); l > orig ; {
301+ c := dst [l - 1 ]
302+ if ! shouldTrim [c ] {
303+ break
304+ }
305+ l --
306+ dst = dst [:l ]
287307 }
288308
289- // 5. fallback if everything vanished
290309 if len (dst ) == orig {
291310 return append (dst , "_truncated_" ... )
292311 }
0 commit comments