Skip to content

Commit 7166636

Browse files
committed
day-066 BeautifulSoup code
1 parent fa71660 commit 7166636

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed

.DS_Store

-78 KB
Binary file not shown.

day-066/BeautifulSoup-demo-02.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
from bs4 import BeautifulSoup
2+
3+
html_doc = """
4+
<html><head><title>index</title></head>
5+
<body>
6+
<p class="title"><b>首页</b></p>
7+
<p class="main">我常用的网站
8+
<a href="https://www.google.com" class="website" id="google">Google</a>
9+
<a href="https://www.baidu.com" class="website" id="baidu">Baidu</a>
10+
<a href="https://cn.bing.com" class="website" id="bing">Bing</a>
11+
</p>
12+
<div><!--这是注释内容--></div>
13+
<p class="content1">...</p>
14+
<p class="content2">...</p>
15+
</body>
16+
"""
17+
18+
# demo 1
19+
soup = BeautifulSoup(html_doc, "lxml")
20+
tags = soup.find_all('b')
21+
print(tags)
22+
23+
24+
# demo 2
25+
import re
26+
for tag in soup.find_all(re.compile("^b")):
27+
print(tag.name)
28+
29+
30+
# demo 3
31+
for tag in soup.find_all(['a', 'b']):
32+
print(tag)
33+
34+
35+
# demo 4
36+
for tag in soup.find_all(True):
37+
print(tag.name, end=', ')
38+
39+
40+
# demo 5
41+
def has_id_class(tag):
42+
return tag.has_attr('id') and tag.has_attr('class')
43+
44+
tags = soup.find_all(has_id_class)
45+
for tag in tags:
46+
print(tag)
47+
48+
49+
# demo 6
50+
tags = soup.find_all(id='google')
51+
print(tags[0]['href'])
52+
53+
for tag in soup.find_all(id=True):
54+
print(tag['href'])
55+
56+
57+
# demo 7
58+
tags = soup.find_all("a", class_="website")
59+
for tag in tags:
60+
print(tag['href'])
61+
62+
def has_seven_characters(css_class):
63+
return css_class is not None and len(css_class) == 7
64+
65+
for tag in soup.find_all(class_=has_seven_characters):
66+
print(tag['id'])
67+
68+
69+
# demo 8
70+
css_soup = BeautifulSoup('<p class="body strikeout"></p>', 'lxml')
71+
tags = css_soup.find_all("p", class_="strikeout")
72+
print(tags)
73+
74+
75+
# demo 9
76+
tags = soup.find_all(text="Google")
77+
print("google : ", tags)
78+
79+
tags = soup.find_all(text=["Baidu", "Bing"])
80+
print("baidu & bing : ", tags)
81+
82+
tags = soup.find_all('a', text="Google")
83+
print("a[text=google] : ", tags)
84+
85+
86+
# demo 10
87+
tag = soup.find_all("a", limit=1)
88+
print(tag)
89+
90+
tags = soup.find_all("p", recursive=False)
91+
print(tags)
92+
93+
94+
# demo 11
95+
tags = soup.select("body a")
96+
for tag in tags:
97+
print(tag['href'])
98+
99+
100+
# demo 12
101+
tags = soup.select("p > a")
102+
print(tags)
103+
104+
tags = soup.select("p > #google")
105+
print(tags)
106+
107+
108+
# demo 13
109+
tags = soup.select(".website")
110+
for tag in tags:
111+
print(tag.string)
112+
113+
114+
# demo 14
115+
tags = soup.select("#google")
116+
print(tags)
117+
118+
119+
# demo 15
120+
tags = soup.select('a[href="https://cn.bing.com"]')
121+
print(tags)

0 commit comments

Comments
 (0)