Skip to content

Commit 976ab65

Browse files
committedJun 16, 2020
update package
1 parent 7ad44fb commit 976ab65

File tree

4 files changed

+573
-203
lines changed

4 files changed

+573
-203
lines changed
 

‎README.md

Lines changed: 142 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -5,130 +5,159 @@
55
[![Go Report Card](https://goreportcard.com/badge/github.com/gitgitcode/alink)](https://goreportcard.com/report/github.com/gitgitcode/alink)
66
- [ZH](#简介)
77

8-
Golang package to read href,video,title ... tags from an HTML page。
8+
Golang package to read href,video,title,img ... tags from an HTML page。
99

1010

1111
## 简介
1212

13-
一个简单的Golang package 主要用来读取HTML页面中的 ``` <title> ,<video>,<a>``` 等元素.
14-
通过 ```alink.NewRespBody``` 方法处理可以读取 ```http.Get``` 返回的```response.Body```内容。
15-
注意如果要多次读取使用io.Reader 要通过 ```body, err := ioutil.ReadAll(b.Body)```读取后再次新建 ``` readerHref := bytes.NewReader(body)``` 的方式来进行。
16-
内部使用html.Parse 解析后返回一个字符串数组指针。
13+
一个简单的Golang package 主要用来读取HTML页面中的 ``` <title> ,<video>的src,<a>的href,<img>的src``` 等元素的内容.
14+
在库里提供了两种方式处理 ```http.Get``` 返回的```response.Body```内容,一是通过 ```alink.GetBytesReaderWithIoReader```方法处理可以读取 ```http.Get``` 返回的```response.Body```内容。
15+
但是如果要***多次***读取使用io.Reader 要通过 ```body, err := ioutil.ReadAll(b.Body)```读取后再次新建 ``` readerHref := bytes.NewReader(body)``` 的方式来进行。
16+
第二中就是使用 ```alink.GetByteWithIoReader``` 方法读取```http.Get``` 返回的```response.Body``` 使用``WithByte``后缀的方进行多次读取.
17+
内部方法使用html.Parse 解析后内容。
18+
1719

1820
### 例子 Example
1921

20-
- 一个读取google/baidu主页的例子。获取页面的title和全部a连接并打印出来
22+
- 一个读取google/baidu主页的例子。获取页面的img和全部a连接并打印出来
2123

22-
- Use http client Get google/baidu Index Page and collect tags title ,href
24+
- Use http client Get google/baidu Index Page and collect tags img ,href
2325

2426
```go
2527
package main
2628

2729
import (
28-
"github.com/gitgitcode/alink"
29-
"fmt"
30-
"log"
31-
"math/rand"
32-
"net/http"
33-
"time"
34-
)
35-
var userAgentList = []string{"Mozilla/5.0 (compatible, MSIE 10.0, Windows NT, DigExt)",
36-
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, 360SE)",
37-
"Mozilla/4.0 (compatible, MSIE 8.0, Windows NT 6.0, Trident/4.0)",
38-
"Mozilla/5.0 (compatible, MSIE 9.0, Windows NT 6.1, Trident/5.0,",
39-
"Opera/9.80 (Windows NT 6.1, U, en) Presto/2.8.131 Version/11.11",
40-
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, TencentTraveler 4.0)",
41-
"Mozilla/5.0 (Windows, U, Windows NT 6.1, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
42-
"Mozilla/5.0 (Macintosh, Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
43-
"Mozilla/5.0 (Macintosh, U, Intel Mac OS X 10_6_8, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
44-
"Mozilla/5.0 (Linux, U, Android 3.0, en-us, Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
45-
"Mozilla/5.0 (iPad, U, CPU OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
46-
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, Trident/4.0, SE 2.X MetaSr 1.0, SE 2.X MetaSr 1.0, .NET CLR 2.0.50727, SE 2.X MetaSr 1.0)",
47-
"Mozilla/5.0 (iPhone, U, CPU iPhone OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
48-
"MQQBrowser/26 Mozilla/5.0 (Linux, U, Android 2.3.7, zh-cn, MB200 Build/GRJ22, CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"}
49-
func GetRandomUserAgent() string{
50-
r := rand.New(rand.NewSource(time.Now().UnixNano()))
51-
return userAgentList[r.Intn(len(userAgentList))]
52-
}
53-
var accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
54-
55-
func main() {
56-
57-
str:="https://google.co.jp"
58-
str1:="https://www.baidu.com"
59-
60-
//fmt.Print(alink.IsValidUrl(str1))
61-
client:= http.Client{Timeout: 2 * time.Second}
62-
req,err := http.NewRequest("GET",str,nil)
63-
req1,err1 := http.NewRequest("GET",str1,nil)
64-
65-
if err != nil{
66-
log.Printf("google is err:%s",err.Error())
67-
}
68-
69-
if err1 != nil{
70-
log.Printf("baidu is err:%s",err1.Error())
71-
}
72-
73-
ReqAdd(req)
74-
ReqAdd(req1)
75-
b,err := client.Do(req)
76-
defer client.CloseIdleConnections()
77-
78-
if err != nil{
79-
log.Printf("request google err %s",err.Error())
80-
b1,err1 := client.Do(req1)
81-
if err1 !=nil{
82-
log.Printf("request baidu err %s",err.Error())
83-
return
84-
}
85-
b = b1
86-
}
87-
88-
body, err := ioutil.ReadAll(b.Body)
89-
if err !=nil{
90-
panic(err)
91-
}
92-
//for read twice create new reader
93-
readerHref := bytes.NewReader(body)
94-
//创建两个新 reader
95-
readerTitle := bytes.NewReader(body)
96-
97-
t,f := alink.Title(readerTitle)
98-
99-
if f !=nil {
100-
log.Print(f)
101-
}
102-
fmt.Printf("title:%s \n",t)
103-
104-
a,bl := alink.Alink(readerHref)
105-
106-
107-
if bl {
108-
for i,v := range *a{
109-
fmt.Printf("index:%d=href:%s\n",i,v)
110-
}
111-
}
112-
113-
114-
115-
//title:百度一下,你就知道
116-
//index:0=href:/
117-
// index:1=href:javascript:;
118-
// index:2=href:https://passport.baidu.com/v2
119-
//or
120-
//title:Google
121-
//index:0=href:/
122-
// index:1=href:javascript:;
123-
// index:2=href:https://wwww.google.com/
124-
125-
}
126-
127-
func ReqAdd(req *http.Request) {
128-
req.Header.Set("Cookie","sug=3; a=1; ORIGIN=0; bdime=21110")
129-
req.Header.Add("User-Agent",GetRandomUserAgent() )
130-
req.Header.Add("Accept",accept)
131-
req.Header.Add("Upgrade-Insecure-Requests","1")
132-
}
133-
30+
"github.com/gitgitcode/alink"
31+
"bytes"
32+
"fmt"
33+
"io/ioutil"
34+
"log"
35+
"math/rand"
36+
"net/http"
37+
"time"
38+
)
39+
var userAgentList = []string{"Mozilla/5.0 (compatible, MSIE 10.0, Windows NT, DigExt)",
40+
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, 360SE)",
41+
"Mozilla/4.0 (compatible, MSIE 8.0, Windows NT 6.0, Trident/4.0)",
42+
"Mozilla/5.0 (compatible, MSIE 9.0, Windows NT 6.1, Trident/5.0,",
43+
"Opera/9.80 (Windows NT 6.1, U, en) Presto/2.8.131 Version/11.11",
44+
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, TencentTraveler 4.0)",
45+
"Mozilla/5.0 (Windows, U, Windows NT 6.1, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
46+
"Mozilla/5.0 (Macintosh, Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
47+
"Mozilla/5.0 (Macintosh, U, Intel Mac OS X 10_6_8, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
48+
"Mozilla/5.0 (Linux, U, Android 3.0, en-us, Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
49+
"Mozilla/5.0 (iPad, U, CPU OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
50+
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, Trident/4.0, SE 2.X MetaSr 1.0, SE 2.X MetaSr 1.0, .NET CLR 2.0.50727, SE 2.X MetaSr 1.0)",
51+
"Mozilla/5.0 (iPhone, U, CPU iPhone OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
52+
"MQQBrowser/26 Mozilla/5.0 (Linux, U, Android 2.3.7, zh-cn, MB200 Build/GRJ22, CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"}
53+
54+
func GetRandomUserAgent() string{
55+
r := rand.New(rand.NewSource(time.Now().UnixNano()))
56+
return userAgentList[r.Intn(len(userAgentList))]
57+
}
58+
59+
var accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
60+
61+
func ReqAdd(req *http.Request) {
62+
req.Header.Set("Cookie","sug=3; a=1; ORIGIN=0; bdime=21110")
63+
req.Header.Add("User-Agent",GetRandomUserAgent() )
64+
req.Header.Add("Accept",accept)
65+
req.Header.Add("Upgrade-Insecure-Requests","1")
66+
}
67+
68+
func main() {
69+
70+
Response ,_:= GetHttpResponseP()
71+
body, err := ioutil.ReadAll(Response.Body)
72+
if err !=nil{
73+
panic(err)
74+
}
75+
GetWithByte(body)
76+
GetWithBytesReaderCreateTwiceNewReader(body)
77+
78+
}
79+
80+
func GetHttpResponseP() (*http.Response,error){
81+
str:="https://google.co.jp"
82+
str1:="https://www.baidu.com"
83+
84+
//fmt.Print(alink.IsValidUrl(str1))
85+
client:= http.Client{Timeout: 2 * time.Second}
86+
req,err := http.NewRequest("GET",str,nil)
87+
req1,err1 := http.NewRequest("GET",str1,nil)
88+
89+
if err != nil{
90+
log.Printf("google is err:%s",err.Error())
91+
}
92+
93+
if err1 != nil{
94+
log.Printf("baidu is err:%s",err1.Error())
95+
}
96+
97+
ReqAdd(req)
98+
ReqAdd(req1)
99+
b,err := client.Do(req)
100+
defer client.CloseIdleConnections()
101+
if err != nil{
102+
log.Printf("request google err %s",err.Error())
103+
b1,err1 := client.Do(req1)
104+
if err1 !=nil{
105+
log.Printf("request baidu err %s",err.Error())
106+
panic(err1)
107+
}
108+
b = b1
109+
}
110+
return b ,nil
111+
}
112+
func GetWithByte(body []byte) {
113+
114+
title, err:= alink.GetTitleWithByte(body)
115+
if err == nil{
116+
fmt.Println(title)
117+
}else{
118+
fmt.Println("GetWithByte GetTitleWithByte err")
119+
}
120+
src,err := alink.GetImgSrcWithByte(body)
121+
if err == nil{
122+
for _,s :=range *src{
123+
fmt.Println(s)
124+
}
125+
}else{
126+
fmt.Println("GetWithByte GetImgSrcWithByte err")
127+
}
128+
129+
}
130+
131+
func GetWithBytesReaderCreateTwiceNewReader(body []byte){
132+
fmt.Println("<=================>")
133+
//for read twice create new reader
134+
readerHref := bytes.NewReader(body)
135+
//创建两个新 reader
136+
readerImg := bytes.NewReader(body)
137+
138+
t,f := alink.GetHrefWithBytesReader (readerImg)
139+
140+
if f !=nil {
141+
log.Print(f)
142+
}
143+
fmt.Printf("Href:%s \n",t)
144+
145+
a,bl := alink.GetImgSrcWithBytesReader(readerHref)
146+
147+
if bl ==nil{
148+
for i,v := range *a{
149+
fmt.Printf("index:%d=href:%s\n",i,v)
150+
}
151+
}
152+
153+
//title:百度一下,你就知道
154+
//index:0=href:/
155+
// index:1=href:javascript:;
156+
// index:2=href:https://passport.baidu.com/v2
157+
//or
158+
//title:Google
159+
//index:0=href:/
160+
// index:1=href:javascript:;
161+
// index:2=href:https://wwww.google.com/
162+
}
134163
```

‎alink.go

Lines changed: 117 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,30 @@ import (
1717
"strings"
1818
)
1919

20-
// NewRespBody is the func create a new io.reader body
21-
// It returns a point of bytes.Reader
22-
func NewRespBody(respBody io.Reader) (*bytes.Reader, error) {
20+
// GetBytesReaderWithIoReader create a new bytes reader
21+
func GetBytesReaderWithIoReader(respBody io.Reader)(reader *bytes.Reader ,err error){
2322

24-
b, err := ioutil.ReadAll(respBody)
25-
reader := bytes.NewReader(b)
23+
c, err := ioutil.ReadAll(respBody)
24+
if err == nil{
25+
reader = bytes.NewReader(c)
26+
}
2627
return reader, err
2728
}
2829

30+
// GetByteWithIoReader is the func use ioutil.ReadAll() change to byte
31+
// It returns []byte
32+
func GetByteWithIoReader(respBody io.Reader) ([]byte, error) {
33+
b, err := ioutil.ReadAll(respBody)
34+
return b, err
35+
}
36+
37+
38+
// GetByteReader use bytes.NewReader create a new reapBody to read
39+
func GetByteReader(respBody []byte) *bytes.Reader {
40+
reader := bytes.NewReader(respBody)
41+
return reader
42+
}
43+
2944
// 检查是否是url
3045
// check string is url
3146
// It return bool
@@ -63,13 +78,12 @@ func isImgElement(n *html.Node) bool {
6378
}
6479

6580
// Get page title
66-
func titleText(n *html.Node) (string, bool) {
81+
func getTitleText(n *html.Node) (string, bool) {
6782
if isTitleElement(n) {
68-
//log.Print(n)
6983
return n.FirstChild.Data, true
7084
}
7185
for c := n.FirstChild; c != nil; c = c.NextSibling {
72-
result, ok := titleText(c)
86+
result, ok := getTitleText(c)
7387
if ok {
7488
return result, ok
7589
}
@@ -78,7 +92,7 @@ func titleText(n *html.Node) (string, bool) {
7892
}
7993

8094
// videoSrc get video src
81-
func videoSrc(node *html.Node) (string, bool) {
95+
func getVideoSrc(node *html.Node) (string, bool) {
8296
if isVideoElement(node) {
8397
for _, attr := range node.Attr {
8498
if attr.Key == "src" {
@@ -88,7 +102,7 @@ func videoSrc(node *html.Node) (string, bool) {
88102
return "", true
89103
}
90104
for c := node.FirstChild; c != nil; c = c.NextSibling {
91-
mark, ok := videoSrc(c)
105+
mark, ok := getVideoSrc(c)
92106
if ok {
93107
return mark, ok
94108
}
@@ -98,48 +112,126 @@ func videoSrc(node *html.Node) (string, bool) {
98112

99113
// VideoSrc get the video tags src
100114
// It returns []string
101-
func VideoSrc(httpBody *bytes.Reader) (s []string, err error) {
115+
func GetVideoSrcWithBytesReader(httpBody *bytes.Reader) (s []string, err error) {
102116
var src []string
103117
node, err := html.Parse(httpBody)
104118
if err != nil {
105119
return src, err
106120
}
107-
link, flag := videoSrc(node)
121+
link, flag := getVideoSrc(node)
108122
if flag {
109123
src = append(src, link)
110124
}
111125
return src, nil
112126
}
113127

114-
// Title to get pages title return a string
115-
func Title(httpBody *bytes.Reader) (t string, err error) {
128+
// TitleBytes to get pages title return a string
129+
func TitleBytes(httpBody *bytes.Reader) (t string, err error) {
116130
title := ""
117131
node, err := html.Parse(httpBody)
118132
if err != nil {
119133
return title, err
120134
}
121135

122-
title, _ = titleText(node)
136+
title, _ = getTitleText(node)
137+
138+
return title, nil
139+
}
140+
141+
// GetTitleWithByte
142+
func GetTitleWithByte(httpBody []byte) (t string, err error) {
143+
title := ""
144+
body:= GetByteReader(httpBody)
145+
146+
node, err := html.Parse(body)
147+
if err != nil {
148+
return title, err
149+
}
150+
151+
title, _ = getTitleText(node)
123152

124153
return title, nil
125154
}
126155

127-
// Alink get all links
128-
// It returns point []string and a bool value to check the page has a tags
129-
func Alink(httpBody *bytes.Reader) (l *[]string, b bool) {
156+
// GetImgSrcWithBytesReader get all img urls
157+
func GetImgSrcWithBytesReader(httpBody *bytes.Reader )(i *[]string, err error){
158+
ul:= []string{}
159+
page,err := html.Parse(httpBody)
160+
if err != nil{
161+
return &ul,err
162+
}
163+
ll , _ := getImgUrl(page,&ul)
164+
return ll,nil
165+
166+
}
167+
168+
// GetImgSrcWithByte
169+
func GetImgSrcWithByte(httpBody []byte )(i *[]string, err error){
170+
var ul []string
171+
mm := GetByteReader(httpBody)
172+
173+
page,err := html.Parse(mm)
174+
if err != nil{
175+
return &ul,err
176+
}
177+
ll , _ := getImgUrl(page,&ul)
178+
return ll,nil
179+
180+
}
181+
182+
// getImgUrl
183+
func getImgUrl(node *html.Node, ad *[]string) (l *[]string, b bool) {
184+
flag := false
185+
if isImgElement(node){
186+
for _, v := range node.Attr{
187+
if v.Key == "src" {
188+
if check(ad, v.Val) == false {
189+
*ad = append(*ad, v.Val)
190+
}
191+
}
192+
}
193+
return ad ,true
194+
}
195+
196+
for p:= node.FirstChild;p!=nil;p= p.NextSibling{
197+
ul,f := getImgUrl(p,ad)
198+
if f {
199+
flag = f
200+
ad = ul
201+
}
202+
203+
}
204+
return ad, flag
205+
}
206+
207+
// GetHrefWithBytesReader get all links
208+
// It returns point []string
209+
func GetHrefWithBytesReader(httpBody *bytes.Reader) (l *[]string, err error) {
130210
var links []string
131211
node, err := html.Parse(httpBody)
132212
if err != nil {
133-
return &links, false
213+
return &links, err
134214
}
135-
ff, _ := alLink(node, &links)
136-
return ff, true
215+
ff, _ := getHref(node, &links)
216+
return ff, nil
137217
}
138218

139-
// alLink Get href url
140-
func alLink(node *html.Node, h *[]string) (f *[]string, n bool) {
141-
b := false
219+
// GetHrefWithByte
220+
func GetHrefWithByte(httpBody []byte) (l *[]string, err error) {
221+
var links []string
222+
mm := GetByteReader(httpBody)
223+
node, err := html.Parse(mm)
224+
if err != nil {
225+
return &links, err
226+
}
227+
ff, _ := getHref(node, &links)
228+
return ff, nil
229+
}
142230

231+
232+
// getHref get url
233+
func getHref(node *html.Node, h *[]string) (f *[]string, n bool) {
234+
b := false
143235
if isAHrefElement(node) {
144236
for _, a := range node.Attr {
145237
if a.Key == "href" {
@@ -155,7 +247,7 @@ func alLink(node *html.Node, h *[]string) (f *[]string, n bool) {
155247
}
156248
}
157249
for c := node.FirstChild; c != nil; c = c.NextSibling {
158-
all, flag := alLink(c, h)
250+
all, flag := getHref(c, h)
159251
h = all
160252
b = flag
161253
}

‎alink_test.go

Lines changed: 233 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package alink
33
import (
44
"bytes"
55
"golang.org/x/net/html"
6+
"io"
67
"log"
78
"reflect"
89
"testing"
@@ -48,7 +49,7 @@ func TestNotIsValidUrl(t *testing.T) {
4849
}
4950
}
5051

51-
func TestAlink(t *testing.T) {
52+
func TestGetHrefWithBytesReader(t *testing.T) {
5253
var reader = `<a href="http://jjjj.com">1</a>
5354
<a href='http://news.google.com'>2</a>
5455
<a style=\"\" href=http://imgur.com>3</a>
@@ -59,7 +60,7 @@ func TestAlink(t *testing.T) {
5960
//string to byte.reader
6061
c := []byte(reader)
6162
b := bytes.NewReader(c)
62-
mm, _ := Alink(b)
63+
mm, _ := GetHrefWithBytesReader(b)
6364
//log.Print(mm)
6465
for i, k := range *mm {
6566
links = append(links, k)
@@ -80,12 +81,58 @@ func TestAlink(t *testing.T) {
8081
}
8182
}
8283

83-
func TestNewRespBody(t *testing.T) {
84+
func TestGetImgSrcWithBytesReader(t *testing.T) {
85+
type args struct {
86+
httpBody *bytes.Reader
87+
}
88+
89+
90+
var html = `<a href="http://jjjj.com">1</a> <video src="http://abc.com/ab.mp4">
91+
<a style=\"\" href=http://imgur.com>3</a> <img src="abc.com/img.jpg">http://alink.com</p>`
92+
93+
c := []byte(html)
94+
i := args{
95+
bytes.NewReader(c),
96+
}
97+
98+
var html1 = `<a href="http://jjjj.com">1</a> <video src="http://abc.com/ab.mp4">
99+
<a style=\"\" href=http://imgur.com>3</a> <img lin="abc.com/img.jpg">http://alink.com</p>`
100+
101+
c1 := []byte(html1)
102+
i1 := args{
103+
bytes.NewReader(c1),
104+
}
105+
106+
var tests =[]struct {
107+
name string
108+
args args
109+
wantS *[]string
110+
wantErr bool
111+
}{
112+
{"img",i,&[]string{"abc.com/img.jpg"},false },
113+
{"imgNoSrc",i1,&[]string{},false },
114+
}
115+
for _, tt :=range tests{
116+
t.Run(tt.name,func(t *testing.T){
117+
gotS ,err:= GetImgSrcWithBytesReader(tt.args.httpBody)
118+
if (err != nil) != tt.wantErr {
119+
t.Errorf("GetImgSrcWithBytesReader() error = %v, wantErr %v", err, tt.wantErr)
120+
return
121+
}
122+
if !reflect.DeepEqual(gotS, tt.wantS) {
123+
t.Errorf("GetImgSrcWithBytesReader() gotS = %v, want %v", gotS, tt.wantS)
124+
}
125+
})
126+
}
127+
128+
}
129+
130+
func TestGetBytesReaderWithIoReader(t *testing.T) {
84131
s := "<div></div>"
85132
str := []byte(s)
86133
reader := bytes.NewReader(str)
87-
//str := strings.NewReader(s)
88-
abc, err := NewRespBody(reader)
134+
135+
abc, err := GetBytesReaderWithIoReader(reader)
89136
if err != nil {
90137
t.Error(err)
91138
}
@@ -100,7 +147,7 @@ func TestNewRespBody(t *testing.T) {
100147
}
101148
}
102149

103-
func TestVideo(t *testing.T) {
150+
func TestGetVideoSrcWithBytesReader(t *testing.T) {
104151
var reader = `<a href="http://jjjj.com">1</a>
105152
<video src="http://abc.com/ab.mp4">
106153
<a style=\"\" href=http://imgur.com>3</a>
@@ -130,7 +177,7 @@ func TestVideo(t *testing.T) {
130177
//log.Print(tests[0].wantS)
131178
for _, tt := range tests {
132179
t.Run(tt.name, func(t *testing.T) {
133-
gotS, err := VideoSrc(tt.args.httpBody)
180+
gotS, err := GetVideoSrcWithBytesReader(tt.args.httpBody)
134181

135182
if (err != nil) != tt.wantErr {
136183
t.Errorf("Video() error = %v, wantErr %v", err, tt.wantErr)
@@ -144,7 +191,7 @@ func TestVideo(t *testing.T) {
144191
}
145192
}
146193

147-
func TestTitle(t *testing.T) {
194+
func TestTitleBytes(t *testing.T) {
148195

149196
type args struct {
150197
httpBody *bytes.Reader
@@ -170,7 +217,7 @@ func TestTitle(t *testing.T) {
170217
}
171218
for _, tt := range tests {
172219
t.Run(tt.name, func(t *testing.T) {
173-
gotT, err := Title(tt.args.httpBody)
220+
gotT, err := TitleBytes(tt.args.httpBody)
174221
if (err != nil) != tt.wantErr {
175222
t.Errorf("Title() error = %v, wantErr %v", err, tt.wantErr)
176223
return
@@ -187,8 +234,184 @@ func BenchmarkAlink(b *testing.B) {
187234
p := []byte(page)
188235

189236
for i := 0; i < b.N; i++ {
190-
Alink(bytes.NewReader(p))
237+
GetHrefWithBytesReader(bytes.NewReader(p))
238+
}
239+
}
240+
241+
242+
243+
func TestGetImgSrcWithByte(t *testing.T) {
244+
type args struct {
245+
httpBody []byte
246+
}
247+
var reader = `<img src="http://jjjj.com">
248+
<video src="http://abc.com/ab.mp4">
249+
<a style=\"\" href=http://imgur.com>3</a>
250+
http://alink.com
251+
</p>`
252+
253+
//string to byte.reader
254+
httpBody :=args{[]byte(reader) }
255+
256+
tests := []struct {
257+
name string
258+
args args
259+
wantI *[]string
260+
wantErr bool
261+
}{
262+
{"img",httpBody, &[]string{"http://jjjj.com"},false},
263+
}
264+
265+
for _, tt := range tests {
266+
t.Run(tt.name, func(t *testing.T) {
267+
gotI, err := GetImgSrcWithByte(tt.args.httpBody)
268+
if (err != nil) != tt.wantErr {
269+
t.Errorf("GetImgSrcWithByte() error = %v, wantErr %v", err, tt.wantErr)
270+
return
271+
}
272+
if !reflect.DeepEqual(gotI, tt.wantI) {
273+
t.Errorf("GetImgSrcWithByte() gotI = %v, want %v", gotI, tt.wantI)
274+
}
275+
})
191276
}
192277
}
193278

194279
//go test -cover -v -coverprofile=c.out
280+
281+
func TestGetTitleWithByte(t *testing.T) {
282+
type args struct {
283+
httpBody []byte
284+
}
285+
286+
page1 := "<html><header><title>test1</title></header><body></body></html>"
287+
p1 := []byte(page1)
288+
a1 := args{p1}
289+
290+
tests := []struct {
291+
name string
292+
args args
293+
wantT string
294+
wantErr bool
295+
}{
296+
// TODO: Add test cases.
297+
{"title",a1,"test1",false},
298+
}
299+
for _, tt := range tests {
300+
t.Run(tt.name, func(t *testing.T) {
301+
gotT, err := GetTitleWithByte(tt.args.httpBody)
302+
if (err != nil) != tt.wantErr {
303+
t.Errorf("GetTitleWithByte() error = %v, wantErr %v", err, tt.wantErr)
304+
return
305+
}
306+
if gotT != tt.wantT {
307+
t.Errorf("GetTitleWithByte() gotT = %v, want %v", gotT, tt.wantT)
308+
}
309+
})
310+
}
311+
}
312+
313+
func TestGetByteWithIoReader(t *testing.T) {
314+
type args struct {
315+
respBody io.Reader
316+
}
317+
318+
af :=[]byte("abc")
319+
320+
body := args{
321+
bytes.NewReader(af),
322+
}
323+
tests := []struct {
324+
name string
325+
args args
326+
want []byte
327+
wantErr bool
328+
}{
329+
// TODO: Add test cases.
330+
{"readToByte",body,[]byte{97,98,99},false},
331+
}
332+
for _, tt := range tests {
333+
t.Run(tt.name, func(t *testing.T) {
334+
got, err := GetByteWithIoReader(tt.args.respBody)
335+
if (err != nil) != tt.wantErr {
336+
t.Errorf("GetByteWithIoReader() error = %v, wantErr %v", err, tt.wantErr)
337+
return
338+
}
339+
if !reflect.DeepEqual(got, tt.want) {
340+
t.Errorf("GetByteWithIoReader() got = %v, want %v", got, tt.want)
341+
}
342+
})
343+
}
344+
}
345+
346+
//func TestGetHrefWithByte(t *testing.T) {
347+
// type args struct {
348+
// httpBody []byte
349+
// }
350+
// html :=`<p>test<a href="test.com">one</a></p>`
351+
// html1 :=`<p>test <a href="#">one</a></p>`
352+
//
353+
// h := []byte(html)
354+
// h1 := []byte(html1)
355+
// arr := args{
356+
// h,
357+
// }
358+
// arr1 := args{h1}
359+
// tests := []struct {
360+
// name string
361+
// args args
362+
// wantL *[]string
363+
// wantB bool
364+
// }{
365+
//
366+
// {"one",arr,&[]string{"test.com"},true},
367+
// {"two",arr1,&[]string{""},true},
368+
// }
369+
// for _, tt := range tests {
370+
// t.Run(tt.name, func(t *testing.T) {
371+
// gotL, gotB := GetHrefWithByte(tt.args.httpBody)
372+
// if !reflect.DeepEqual(gotL, tt.wantL) {
373+
// t.Errorf("GetHrefWithByte() gotL = %v, want %v", gotL, tt.wantL)
374+
// }
375+
// if gotB != tt.wantB {
376+
// t.Errorf("GetHrefWithByte() gotB = %v, want %v", gotB, tt.wantB)
377+
// }
378+
// })
379+
// }
380+
//}
381+
382+
func TestGetHrefWithByte1(t *testing.T) {
383+
type args struct {
384+
httpBody []byte
385+
}
386+
html :=`<p>test<a href="test.com">one</a></p>`
387+
html1 :=`<p>test <a href="#">one</a></p>`
388+
389+
h := []byte(html)
390+
h1 := []byte(html1)
391+
arr := args{
392+
h,
393+
}
394+
arr1 := args{h1}
395+
tests := []struct {
396+
name string
397+
args args
398+
wantL *[]string
399+
wantErr bool
400+
}{
401+
// TODO: Add test cases.
402+
{"one",arr,&[]string{"test.com"},false},
403+
{"two",arr1,&[]string{""},false},
404+
}
405+
for _, tt := range tests {
406+
t.Run(tt.name, func(t *testing.T) {
407+
gotL, err := GetHrefWithByte(tt.args.httpBody)
408+
if (err != nil) != tt.wantErr {
409+
t.Errorf("GetHrefWithByte() error = %v, wantErr %v", err, tt.wantErr)
410+
return
411+
}
412+
if !reflect.DeepEqual(gotL, tt.wantL) {
413+
t.Errorf("GetHrefWithByte() gotL = %v, want %v", gotL, tt.wantL)
414+
}
415+
})
416+
}
417+
}

‎coverage.out

Lines changed: 81 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,82 @@
11
mode: count
2-
github.com/gitgitcode/alink/alink.go:14.61,19.2 3 1
3-
github.com/gitgitcode/alink/alink.go:23.32,25.16 2 7
4-
github.com/gitgitcode/alink/alink.go:28.2,29.50 2 4
5-
github.com/gitgitcode/alink/alink.go:33.2,33.13 1 2
6-
github.com/gitgitcode/alink/alink.go:25.16,27.3 1 3
7-
github.com/gitgitcode/alink/alink.go:29.50,31.3 1 2
8-
github.com/gitgitcode/alink/alink.go:37.40,39.2 1 12
9-
github.com/gitgitcode/alink/alink.go:42.40,44.2 1 8
10-
github.com/gitgitcode/alink/alink.go:47.40,49.2 1 11
11-
github.com/gitgitcode/alink/alink.go:52.38,54.2 1 0
12-
github.com/gitgitcode/alink/alink.go:57.45,58.23 1 12
13-
github.com/gitgitcode/alink/alink.go:62.2,62.53 1 10
14-
github.com/gitgitcode/alink/alink.go:68.2,68.18 1 2
15-
github.com/gitgitcode/alink/alink.go:58.23,61.3 1 2
16-
github.com/gitgitcode/alink/alink.go:62.53,64.9 2 10
17-
github.com/gitgitcode/alink/alink.go:64.9,66.4 1 8
18-
github.com/gitgitcode/alink/alink.go:71.47,72.26 1 8
19-
github.com/gitgitcode/alink/alink.go:80.2,80.56 1 7
20-
github.com/gitgitcode/alink/alink.go:86.2,86.18 1 4
21-
github.com/gitgitcode/alink/alink.go:72.26,73.34 1 1
22-
github.com/gitgitcode/alink/alink.go:78.3,78.18 1 0
23-
github.com/gitgitcode/alink/alink.go:73.34,74.25 1 1
24-
github.com/gitgitcode/alink/alink.go:74.25,76.5 1 1
25-
github.com/gitgitcode/alink/alink.go:80.56,82.9 2 7
26-
github.com/gitgitcode/alink/alink.go:82.9,84.4 1 3
27-
github.com/gitgitcode/alink/alink.go:90.64,93.16 3 1
28-
github.com/gitgitcode/alink/alink.go:96.2,97.10 2 1
29-
github.com/gitgitcode/alink/alink.go:100.2,100.17 1 1
30-
github.com/gitgitcode/alink/alink.go:93.16,95.3 1 0
31-
github.com/gitgitcode/alink/alink.go:97.10,99.3 1 1
32-
github.com/gitgitcode/alink/alink.go:104.58,107.16 3 2
33-
github.com/gitgitcode/alink/alink.go:111.2,113.19 2 2
34-
github.com/gitgitcode/alink/alink.go:107.16,109.3 1 0
35-
github.com/gitgitcode/alink/alink.go:117.58,120.16 3 1
36-
github.com/gitgitcode/alink/alink.go:123.2,124.17 2 1
37-
github.com/gitgitcode/alink/alink.go:120.16,122.3 1 0
38-
github.com/gitgitcode/alink/alink.go:128.65,131.26 2 11
39-
github.com/gitgitcode/alink/alink.go:145.2,145.56 1 8
40-
github.com/gitgitcode/alink/alink.go:150.2,150.13 1 8
41-
github.com/gitgitcode/alink/alink.go:131.26,132.31 1 3
42-
github.com/gitgitcode/alink/alink.go:132.31,133.23 1 4
43-
github.com/gitgitcode/alink/alink.go:133.23,135.25 2 3
44-
github.com/gitgitcode/alink/alink.go:141.5,141.19 1 3
45-
github.com/gitgitcode/alink/alink.go:135.25,139.6 1 3
46-
github.com/gitgitcode/alink/alink.go:145.56,149.3 3 10
47-
github.com/gitgitcode/alink/alink.go:154.32,155.30 1 3
48-
github.com/gitgitcode/alink/alink.go:165.2,165.10 1 3
49-
github.com/gitgitcode/alink/alink.go:155.30,157.25 2 0
50-
github.com/gitgitcode/alink/alink.go:163.3,163.19 1 0
51-
github.com/gitgitcode/alink/alink.go:157.25,158.39 1 0
52-
github.com/gitgitcode/alink/alink.go:158.39,160.10 2 0
53-
github.com/gitgitcode/alink/alink.go:170.41,172.26 2 3
54-
github.com/gitgitcode/alink/alink.go:178.2,178.14 1 3
55-
github.com/gitgitcode/alink/alink.go:172.26,173.15 1 3
56-
github.com/gitgitcode/alink/alink.go:173.15,175.9 2 0
2+
download/alink/alink.go:21.85,24.15 2 1
3+
download/alink/alink.go:27.2,27.20 1 1
4+
download/alink/alink.go:24.15,26.3 1 1
5+
download/alink/alink.go:32.62,35.2 2 1
6+
download/alink/alink.go:39.51,42.2 2 4
7+
download/alink/alink.go:47.32,49.16 2 7
8+
download/alink/alink.go:52.2,53.50 2 4
9+
download/alink/alink.go:57.2,57.13 1 2
10+
download/alink/alink.go:49.16,51.3 1 3
11+
download/alink/alink.go:53.50,55.3 1 2
12+
download/alink/alink.go:61.40,63.2 1 18
13+
download/alink/alink.go:66.40,68.2 1 8
14+
download/alink/alink.go:71.40,73.2 1 25
15+
download/alink/alink.go:76.38,78.2 1 42
16+
download/alink/alink.go:81.48,82.23 1 18
17+
download/alink/alink.go:85.2,85.53 1 15
18+
download/alink/alink.go:91.2,91.18 1 3
19+
download/alink/alink.go:82.23,84.3 1 3
20+
download/alink/alink.go:85.53,87.9 2 15
21+
download/alink/alink.go:87.9,89.4 1 12
22+
download/alink/alink.go:95.50,96.26 1 8
23+
download/alink/alink.go:104.2,104.56 1 7
24+
download/alink/alink.go:110.2,110.18 1 4
25+
download/alink/alink.go:96.26,97.34 1 1
26+
download/alink/alink.go:102.3,102.18 1 0
27+
download/alink/alink.go:97.34,98.25 1 1
28+
download/alink/alink.go:98.25,100.5 1 1
29+
download/alink/alink.go:104.56,106.9 2 7
30+
download/alink/alink.go:106.9,108.4 1 3
31+
download/alink/alink.go:115.81,118.16 3 1
32+
download/alink/alink.go:121.2,122.10 2 1
33+
download/alink/alink.go:125.2,125.17 1 1
34+
download/alink/alink.go:118.16,120.3 1 0
35+
download/alink/alink.go:122.10,124.3 1 1
36+
download/alink/alink.go:129.63,132.16 3 2
37+
download/alink/alink.go:136.2,138.19 2 2
38+
download/alink/alink.go:132.16,134.3 1 0
39+
download/alink/alink.go:142.62,147.16 4 1
40+
download/alink/alink.go:151.2,153.19 2 1
41+
download/alink/alink.go:147.16,149.3 1 0
42+
download/alink/alink.go:157.79,160.15 3 2
43+
download/alink/alink.go:163.2,164.15 2 2
44+
download/alink/alink.go:160.15,162.3 1 0
45+
download/alink/alink.go:169.65,174.15 4 1
46+
download/alink/alink.go:177.2,178.15 2 1
47+
download/alink/alink.go:174.15,176.3 1 0
48+
download/alink/alink.go:183.69,185.23 2 42
49+
download/alink/alink.go:196.2,196.49 1 39
50+
download/alink/alink.go:204.2,204.17 1 39
51+
download/alink/alink.go:185.23,186.30 1 3
52+
download/alink/alink.go:193.3,193.18 1 3
53+
download/alink/alink.go:186.30,187.22 1 3
54+
download/alink/alink.go:187.22,188.34 1 2
55+
download/alink/alink.go:188.34,190.6 1 2
56+
download/alink/alink.go:196.49,198.8 2 39
57+
download/alink/alink.go:198.8,201.4 2 11
58+
download/alink/alink.go:209.78,212.16 3 1
59+
download/alink/alink.go:215.2,216.16 2 1
60+
download/alink/alink.go:212.16,214.3 1 0
61+
download/alink/alink.go:220.64,224.16 4 2
62+
download/alink/alink.go:227.2,228.16 2 2
63+
download/alink/alink.go:224.16,226.3 1 0
64+
download/alink/alink.go:233.66,235.26 2 25
65+
download/alink/alink.go:249.2,249.56 1 20
66+
download/alink/alink.go:254.2,254.13 1 20
67+
download/alink/alink.go:235.26,236.31 1 5
68+
download/alink/alink.go:236.31,237.23 1 6
69+
download/alink/alink.go:237.23,239.29 2 5
70+
download/alink/alink.go:245.5,245.19 1 5
71+
download/alink/alink.go:239.29,243.6 1 5
72+
download/alink/alink.go:249.56,253.3 3 22
73+
download/alink/alink.go:258.32,259.30 1 5
74+
download/alink/alink.go:269.2,269.10 1 4
75+
download/alink/alink.go:259.30,261.25 2 1
76+
download/alink/alink.go:267.3,267.19 1 1
77+
download/alink/alink.go:261.25,262.39 1 1
78+
download/alink/alink.go:262.39,264.10 2 1
79+
download/alink/alink.go:273.41,275.26 2 7
80+
download/alink/alink.go:281.2,281.14 1 7
81+
download/alink/alink.go:275.26,276.15 1 3
82+
download/alink/alink.go:276.15,278.9 2 0

0 commit comments

Comments
 (0)
Please sign in to comment.