Skip to content

Commit e67886c

Browse files
committed
Consolidate all hashing to the common/hashing package
And remove now unsued hashing funcs.
1 parent d5eda13 commit e67886c

File tree

125 files changed

+177
-368
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+177
-368
lines changed

common/hashing/hashing.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@
1515
package hashing
1616

1717
import (
18+
"crypto/md5"
1819
"encoding/hex"
1920
"io"
21+
"strconv"
2022
"sync"
2123

2224
"github.com/cespare/xxhash/v2"
25+
"github.com/gohugoio/hashstructure"
26+
"github.com/gohugoio/hugo/identity"
2327
)
2428

2529
// XXHashFromReader calculates the xxHash for the given reader.
@@ -50,6 +54,82 @@ func XxHashFromStringHexEncoded(f string) string {
5054
return hex.EncodeToString(hash)
5155
}
5256

57+
// MD5FromStringHexEncoded returns the MD5 hash of the given string.
58+
func MD5FromStringHexEncoded(f string) string {
59+
h := md5.New()
60+
h.Write([]byte(f))
61+
return hex.EncodeToString(h.Sum(nil))
62+
}
63+
64+
// HashString returns a hash from the given elements.
65+
// It will panic if the hash cannot be calculated.
66+
// Note that this hash should be used primarily for identity, not for change detection as
67+
// it in the more complex values (e.g. Page) will not hash the full content.
68+
func HashString(vs ...any) string {
69+
hash := HashUint64(vs...)
70+
return strconv.FormatUint(hash, 10)
71+
}
72+
73+
var hashOptsPool = sync.Pool{
74+
New: func() any {
75+
return &hashstructure.HashOptions{
76+
Hasher: xxhash.New(),
77+
}
78+
},
79+
}
80+
81+
func getHashOpts() *hashstructure.HashOptions {
82+
return hashOptsPool.Get().(*hashstructure.HashOptions)
83+
}
84+
85+
func putHashOpts(opts *hashstructure.HashOptions) {
86+
opts.Hasher.Reset()
87+
hashOptsPool.Put(opts)
88+
}
89+
90+
// HashUint64 returns a hash from the given elements.
91+
// It will panic if the hash cannot be calculated.
92+
// Note that this hash should be used primarily for identity, not for change detection as
93+
// it in the more complex values (e.g. Page) will not hash the full content.
94+
func HashUint64(vs ...any) uint64 {
95+
var o any
96+
if len(vs) == 1 {
97+
o = toHashable(vs[0])
98+
} else {
99+
elements := make([]any, len(vs))
100+
for i, e := range vs {
101+
elements[i] = toHashable(e)
102+
}
103+
o = elements
104+
}
105+
106+
hashOpts := getHashOpts()
107+
defer putHashOpts(hashOpts)
108+
109+
hash, err := hashstructure.Hash(o, hashOpts)
110+
if err != nil {
111+
panic(err)
112+
}
113+
return hash
114+
}
115+
116+
type keyer interface {
117+
Key() string
118+
}
119+
120+
// For structs, hashstructure.Hash only works on the exported fields,
121+
// so rewrite the input slice for known identity types.
122+
func toHashable(v any) any {
123+
switch t := v.(type) {
124+
case keyer:
125+
return t.Key()
126+
case identity.IdentityProvider:
127+
return t.GetIdentity()
128+
default:
129+
return v
130+
}
131+
}
132+
53133
type xxhashReadFrom struct {
54134
buff []byte
55135
*xxhash.Digest

common/hashing/hashing_test.go

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
package hashing
1515

1616
import (
17+
"fmt"
18+
"math"
1719
"strings"
1820
"testing"
1921

20-
"github.com/cespare/xxhash/v2"
2122
qt "github.com/frankban/quicktest"
2223
)
2324

@@ -72,8 +73,47 @@ func BenchmarkXXHashFromStringHexEncoded(b *testing.B) {
7273
}
7374
}
7475

75-
func xxHashFromString(f string) uint64 {
76-
h := xxhash.New()
77-
h.WriteString(f)
78-
return h.Sum64()
76+
func TestHashString(t *testing.T) {
77+
c := qt.New(t)
78+
79+
c.Assert(HashString("a", "b"), qt.Equals, "3176555414984061461")
80+
c.Assert(HashString("ab"), qt.Equals, "7347350983217793633")
81+
82+
var vals []any = []any{"a", "b", tstKeyer{"c"}}
83+
84+
c.Assert(HashString(vals...), qt.Equals, "4438730547989914315")
85+
c.Assert(vals[2], qt.Equals, tstKeyer{"c"})
86+
}
87+
88+
type tstKeyer struct {
89+
key string
90+
}
91+
92+
func (t tstKeyer) Key() string {
93+
return t.key
94+
}
95+
96+
func (t tstKeyer) String() string {
97+
return "key: " + t.key
98+
}
99+
100+
func BenchmarkHashString(b *testing.B) {
101+
word := " hello "
102+
103+
var tests []string
104+
105+
for i := 1; i <= 5; i++ {
106+
sentence := strings.Repeat(word, int(math.Pow(4, float64(i))))
107+
tests = append(tests, sentence)
108+
}
109+
110+
b.ResetTimer()
111+
112+
for _, test := range tests {
113+
b.Run(fmt.Sprintf("n%d", len(test)), func(b *testing.B) {
114+
for i := 0; i < b.N; i++ {
115+
HashString(test)
116+
}
117+
})
118+
}
79119
}

common/loggers/handlersmisc.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121
"sync"
2222

2323
"github.com/bep/logg"
24-
"github.com/gohugoio/hugo/identity"
24+
"github.com/gohugoio/hugo/common/hashing"
2525
)
2626

2727
// PanicOnWarningHook panics on warnings.
@@ -85,7 +85,7 @@ func (h *logOnceHandler) HandleLog(e *logg.Entry) error {
8585
}
8686
h.mu.Lock()
8787
defer h.mu.Unlock()
88-
hash := identity.HashUint64(e.Level, e.Message, e.Fields)
88+
hash := hashing.HashUint64(e.Level, e.Message, e.Fields)
8989
if h.seen[hash] {
9090
return errStop
9191
}

config/namespace.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ package config
1616
import (
1717
"encoding/json"
1818

19-
"github.com/gohugoio/hugo/identity"
19+
"github.com/gohugoio/hugo/common/hashing"
2020
)
2121

2222
func DecodeNamespace[S, C any](configSource any, buildConfig func(any) (C, any, error)) (*ConfigNamespace[S, C], error) {
2323
// Calculate the hash of the input (not including any defaults applied later).
2424
// This allows us to introduce new config options without breaking the hash.
25-
h := identity.HashString(configSource)
25+
h := hashing.HashString(configSource)
2626

2727
// Build the config
2828
c, ext, err := buildConfig(configSource)

helpers/general.go

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ package helpers
1515

1616
import (
1717
"bytes"
18-
"crypto/md5"
19-
"encoding/hex"
2018
"fmt"
2119
"io"
2220
"net"
@@ -257,66 +255,6 @@ func SliceToLower(s []string) []string {
257255
return l
258256
}
259257

260-
// XXHashFromReader creates a xxHash hash from the given reader.
261-
262-
// MD5String takes a string and returns its MD5 hash.
263-
func MD5String(f string) string {
264-
h := md5.New()
265-
h.Write([]byte(f))
266-
return hex.EncodeToString(h.Sum([]byte{}))
267-
}
268-
269-
// MD5FromReaderFast creates a MD5 hash from the given file. It only reads parts of
270-
// the file for speed, so don't use it if the files are very subtly different.
271-
// It will not close the file.
272-
// It will return the MD5 hash and the size of r in bytes.
273-
func MD5FromReaderFast(r io.ReadSeeker) (string, int64, error) {
274-
const (
275-
// Do not change once set in stone!
276-
maxChunks = 8
277-
peekSize = 64
278-
seek = 2048
279-
)
280-
281-
h := md5.New()
282-
buff := make([]byte, peekSize)
283-
284-
for i := 0; i < maxChunks; i++ {
285-
if i > 0 {
286-
_, err := r.Seek(seek, 0)
287-
if err != nil {
288-
if err == io.EOF {
289-
break
290-
}
291-
return "", 0, err
292-
}
293-
}
294-
295-
_, err := io.ReadAtLeast(r, buff, peekSize)
296-
if err != nil {
297-
if err == io.EOF || err == io.ErrUnexpectedEOF {
298-
h.Write(buff)
299-
break
300-
}
301-
return "", 0, err
302-
}
303-
h.Write(buff)
304-
}
305-
306-
size, _ := r.Seek(0, io.SeekEnd)
307-
308-
return hex.EncodeToString(h.Sum(nil)), size, nil
309-
}
310-
311-
// MD5FromReader creates a MD5 hash from the given reader.
312-
func MD5FromReader(r io.Reader) (string, error) {
313-
h := md5.New()
314-
if _, err := io.Copy(h, r); err != nil {
315-
return "", nil
316-
}
317-
return hex.EncodeToString(h.Sum(nil)), nil
318-
}
319-
320258
// IsWhitespace determines if the given rune is whitespace.
321259
func IsWhitespace(r rune) bool {
322260
return r == ' ' || r == '\t' || r == '\n' || r == '\r'

helpers/general_test.go

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,13 @@
1414
package helpers_test
1515

1616
import (
17-
"fmt"
1817
"reflect"
1918
"strings"
2019
"testing"
2120

2221
"github.com/gohugoio/hugo/helpers"
2322

2423
qt "github.com/frankban/quicktest"
25-
"github.com/spf13/afero"
2624
)
2725

2826
func TestResolveMarkup(t *testing.T) {
@@ -256,93 +254,6 @@ func TestUniqueStringsSorted(t *testing.T) {
256254
c.Assert(helpers.UniqueStringsSorted(nil), qt.IsNil)
257255
}
258256

259-
func TestFastMD5FromFile(t *testing.T) {
260-
fs := afero.NewMemMapFs()
261-
262-
if err := afero.WriteFile(fs, "small.txt", []byte("abc"), 0o777); err != nil {
263-
t.Fatal(err)
264-
}
265-
266-
if err := afero.WriteFile(fs, "small2.txt", []byte("abd"), 0o777); err != nil {
267-
t.Fatal(err)
268-
}
269-
270-
if err := afero.WriteFile(fs, "bigger.txt", []byte(strings.Repeat("a bc d e", 100)), 0o777); err != nil {
271-
t.Fatal(err)
272-
}
273-
274-
if err := afero.WriteFile(fs, "bigger2.txt", []byte(strings.Repeat("c d e f g", 100)), 0o777); err != nil {
275-
t.Fatal(err)
276-
}
277-
278-
c := qt.New(t)
279-
280-
sf1, err := fs.Open("small.txt")
281-
c.Assert(err, qt.IsNil)
282-
sf2, err := fs.Open("small2.txt")
283-
c.Assert(err, qt.IsNil)
284-
285-
bf1, err := fs.Open("bigger.txt")
286-
c.Assert(err, qt.IsNil)
287-
bf2, err := fs.Open("bigger2.txt")
288-
c.Assert(err, qt.IsNil)
289-
290-
defer sf1.Close()
291-
defer sf2.Close()
292-
defer bf1.Close()
293-
defer bf2.Close()
294-
295-
m1, _, err := helpers.MD5FromReaderFast(sf1)
296-
c.Assert(err, qt.IsNil)
297-
c.Assert(m1, qt.Equals, "e9c8989b64b71a88b4efb66ad05eea96")
298-
299-
m2, _, err := helpers.MD5FromReaderFast(sf2)
300-
c.Assert(err, qt.IsNil)
301-
c.Assert(m2, qt.Not(qt.Equals), m1)
302-
303-
m3, _, err := helpers.MD5FromReaderFast(bf1)
304-
c.Assert(err, qt.IsNil)
305-
c.Assert(m3, qt.Not(qt.Equals), m2)
306-
307-
m4, _, err := helpers.MD5FromReaderFast(bf2)
308-
c.Assert(err, qt.IsNil)
309-
c.Assert(m4, qt.Not(qt.Equals), m3)
310-
311-
m5, err := helpers.MD5FromReader(bf2)
312-
c.Assert(err, qt.IsNil)
313-
c.Assert(m5, qt.Not(qt.Equals), m4)
314-
}
315-
316-
func BenchmarkMD5FromFileFast(b *testing.B) {
317-
fs := afero.NewMemMapFs()
318-
319-
for _, full := range []bool{false, true} {
320-
b.Run(fmt.Sprintf("full=%t", full), func(b *testing.B) {
321-
for i := 0; i < b.N; i++ {
322-
b.StopTimer()
323-
if err := afero.WriteFile(fs, "file.txt", []byte(strings.Repeat("1234567890", 2000)), 0o777); err != nil {
324-
b.Fatal(err)
325-
}
326-
f, err := fs.Open("file.txt")
327-
if err != nil {
328-
b.Fatal(err)
329-
}
330-
b.StartTimer()
331-
if full {
332-
if _, err := helpers.MD5FromReader(f); err != nil {
333-
b.Fatal(err)
334-
}
335-
} else {
336-
if _, _, err := helpers.MD5FromReaderFast(f); err != nil {
337-
b.Fatal(err)
338-
}
339-
}
340-
f.Close()
341-
}
342-
})
343-
}
344-
}
345-
346257
func BenchmarkUniqueStrings(b *testing.B) {
347258
input := []string{"a", "b", "d", "e", "d", "h", "a", "i"}
348259

0 commit comments

Comments
 (0)