Skip to content

Commit 1490591

Browse files
authored
Supports charset other than UTF-8 (#9)
1 parent 7894f0d commit 1490591

File tree

5 files changed

+52
-11
lines changed

5 files changed

+52
-11
lines changed

data/6.html

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<html xmlns:og="http://ogp.me/ns#">
2+
<head>
3+
<meta charset="shift_jis" />
4+
<title>ƒ^ƒCƒgƒ‹</title>
5+
<meta property="og:title" content="ShiftJISƒ^ƒCƒgƒ‹" />
6+
<meta property="og:type" content="website" />
7+
<meta property="og:url" content="http://example.com" />
8+
<meta property="og:image" content="http://example.com/image.png" />
9+
</head>
10+
<body>
11+
</body>
12+
</html>

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ module github.com/soranoba/googp
22

33
go 1.15
44

5-
require golang.org/x/net v0.0.0-20200904194848-62affa334b73
5+
require golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e

go.sum

+8-9
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
2-
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
3-
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
4-
golang.org/x/net v0.0.0-20200904194848-62affa334b73 h1:MXfv8rhZWmFeqX3GNZRsd6vOLoaCHjYEX3qkRo3YBUA=
5-
golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
6-
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
7-
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
8-
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
9-
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
1+
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ=
2+
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
3+
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
4+
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
5+
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
6+
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
7+
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
8+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

googp.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111
package googp
1212

1313
import (
14+
"bufio"
1415
"fmt"
16+
"io"
1517
"mime"
1618
"net/http"
19+
20+
"golang.org/x/net/html/charset"
1721
)
1822

1923
// Fetch the content from the URL and parse OGP information.
@@ -43,5 +47,11 @@ func Parse(res *http.Response, i interface{}, opts ...ParserOpts) error {
4347
}
4448
}
4549

46-
return NewParser(opts...).Parse(res.Body, i)
50+
br := bufio.NewReader(res.Body)
51+
var reader io.Reader = br
52+
data, _ := br.Peek(1024)
53+
enc, _, _ := charset.DetermineEncoding(data, ct)
54+
reader = enc.NewDecoder().Reader(reader)
55+
56+
return NewParser(opts...).Parse(reader, i)
4757
}

googp_test.go

+20
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,26 @@ func TestParse(t *testing.T) {
4747
assertEqual(t, ogp.Images[0].URL, "http://example.com/image.png")
4848
}
4949

50+
func TestParseWithEncoding(t *testing.T) {
51+
client := &http.Client{}
52+
req, err := http.NewRequest("GET", endpoint()+"/6.html", nil)
53+
if err != nil {
54+
t.Error(err)
55+
}
56+
57+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
58+
defer cancel()
59+
60+
res, err := client.Do(req.WithContext(ctx))
61+
var ogp OGP
62+
assertNoError(t, Parse(res, &ogp))
63+
64+
assertEqual(t, ogp.Title, "ShiftJISタイトル")
65+
assertEqual(t, ogp.Type, "website")
66+
assertEqual(t, ogp.URL, "http://example.com")
67+
assertEqual(t, ogp.Images[0].URL, "http://example.com/image.png")
68+
}
69+
5070
func ExampleFetch() {
5171
var ogp OGP
5272
if err := Fetch(endpoint()+"/5.html", &ogp); err != nil {

0 commit comments

Comments
 (0)