Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6fd30ed

Browse files
committedJan 6, 2021
add youtube api tutorial
1 parent 67b84d8 commit 6fd30ed

File tree

9 files changed

+736
-0
lines changed

9 files changed

+736
-0
lines changed
 

‎README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
133133
- [How to Get Google Page Ranking in Python](https://www.thepythoncode.com/article/get-google-page-ranking-by-keyword-in-python). ([code](general/getting-google-page-ranking))
134134
- [How to Make a Telegram Bot in Python](https://www.thepythoncode.com/article/make-a-telegram-bot-in-python). ([code](general/telegram-bot))
135135
- [How to Use Gmail API in Python](https://www.thepythoncode.com/article/use-gmail-api-in-python). ([code](general/gmail-api))
136+
- [How to Use YouTube API in Python](https://www.thepythoncode.com/article/using-youtube-api-in-python). ([code](general/youtube-api))
136137

137138
- ### [Database](https://www.thepythoncode.com/topic/using-databases-in-python)
138139
- [How to Use MySQL Database in Python](https://www.thepythoncode.com/article/using-mysql-database-in-python). ([code](database/mysql-connector))

‎general/youtube-api/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# [How to Use YouTube API in Python](https://www.thepythoncode.com/article/using-youtube-api-in-python)
2+
To run this:
3+
- `pip3 install -r requirements.txt`
4+
- For complete code, use `youtube-api.ipynb`
5+
- To get video details: `video_details.py`
6+
- To get channel details: `channel_details.py`
7+
- To search by keyword: `search_by_keyword.py`
8+
- To extract comments: `comments.py`
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from utils import (
2+
youtube_authenticate,
3+
get_channel_id_by_url,
4+
get_channel_details,
5+
get_video_details,
6+
print_video_infos
7+
)
8+
9+
10+
def get_channel_videos(youtube, **kwargs):
11+
return youtube.search().list(
12+
**kwargs
13+
).execute()
14+
15+
16+
if __name__ == "__main__":
17+
# authenticate to YouTube API
18+
youtube = youtube_authenticate()
19+
channel_url = "https://www.youtube.com/channel/UC8butISFwT-Wl7EV0hUK0BQ"
20+
# get the channel ID from the URL
21+
channel_id = get_channel_id_by_url(youtube, channel_url)
22+
# get the channel details
23+
response = get_channel_details(youtube, id=channel_id)
24+
# extract channel infos
25+
snippet = response["items"][0]["snippet"]
26+
statistics = response["items"][0]["statistics"]
27+
channel_country = snippet["country"]
28+
channel_description = snippet["description"]
29+
channel_creation_date = snippet["publishedAt"]
30+
channel_title = snippet["title"]
31+
channel_subscriber_count = statistics["subscriberCount"]
32+
channel_video_count = statistics["videoCount"]
33+
channel_view_count = statistics["viewCount"]
34+
print(f"""
35+
Title: {channel_title}
36+
Published At: {channel_creation_date}
37+
Description: {channel_description}
38+
Country: {channel_country}
39+
Number of videos: {channel_video_count}
40+
Number of subscribers: {channel_subscriber_count}
41+
Total views: {channel_view_count}
42+
""")
43+
# the following is grabbing channel videos
44+
# number of pages you want to get
45+
n_pages = 2
46+
# counting number of videos grabbed
47+
n_videos = 0
48+
next_page_token = None
49+
for i in range(n_pages):
50+
params = {
51+
'part': 'snippet',
52+
'q': '',
53+
'channelId': channel_id,
54+
'type': 'video',
55+
}
56+
if next_page_token:
57+
params['pageToken'] = next_page_token
58+
res = get_channel_videos(youtube, **params)
59+
channel_videos = res.get("items")
60+
for video in channel_videos:
61+
n_videos += 1
62+
video_id = video["id"]["videoId"]
63+
# easily construct video URL by its ID
64+
video_url = f"https://www.youtube.com/watch?v={video_id}"
65+
video_response = get_video_details(youtube, id=video_id)
66+
print(f"================Video #{n_videos}================")
67+
# print the video details
68+
print_video_infos(video_response)
69+
print(f"Video URL: {video_url}")
70+
print("="*40)
71+
# if there is a next page, then add it to our parameters
72+
# to proceed to the next page
73+
if "nextPageToken" in res:
74+
next_page_token = res["nextPageToken"]

‎general/youtube-api/comments.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from utils import youtube_authenticate, get_video_id_by_url, get_channel_id_by_url
2+
3+
4+
def get_comments(youtube, **kwargs):
5+
return youtube.commentThreads().list(
6+
part="snippet",
7+
**kwargs
8+
).execute()
9+
10+
11+
12+
if __name__ == "__main__":
13+
# authenticate to YouTube API
14+
youtube = youtube_authenticate()
15+
# URL can be a channel or a video, to extract comments
16+
url = "https://www.youtube.com/watch?v=jNQXAC9IVRw&ab_channel=jawed"
17+
if "watch" in url:
18+
# that's a video
19+
video_id = get_video_id_by_url(url)
20+
params = {
21+
'videoId': video_id,
22+
'maxResults': 2,
23+
'order': 'relevance', # default is 'time' (newest)
24+
}
25+
else:
26+
# should be a channel
27+
channel_id = get_channel_id_by_url(url)
28+
params = {
29+
'allThreadsRelatedToChannelId': channel_id,
30+
'maxResults': 2,
31+
'order': 'relevance', # default is 'time' (newest)
32+
}
33+
# get the first 2 pages (2 API requests)
34+
n_pages = 2
35+
for i in range(n_pages):
36+
# make API call to get all comments from the channel (including posts & videos)
37+
response = get_comments(youtube, **params)
38+
items = response.get("items")
39+
# if items is empty, breakout of the loop
40+
if not items:
41+
break
42+
for item in items:
43+
comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
44+
updated_at = item["snippet"]["topLevelComment"]["snippet"]["updatedAt"]
45+
like_count = item["snippet"]["topLevelComment"]["snippet"]["likeCount"]
46+
comment_id = item["snippet"]["topLevelComment"]["id"]
47+
print(f"""\
48+
Comment: {comment}
49+
Likes: {like_count}
50+
Updated At: {updated_at}
51+
==================================\
52+
""")
53+
if "nextPageToken" in response:
54+
# if there is a next page
55+
# add next page token to the params we pass to the function
56+
params["pageToken"] = response["nextPageToken"]
57+
else:
58+
# must be end of comments!!!!
59+
break
60+
print("*"*70)

‎general/youtube-api/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
google-api-python-client
2+
google-auth-httplib2
3+
google-auth-oauthlib
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from utils import (
2+
youtube_authenticate,
3+
get_video_details,
4+
print_video_infos,
5+
search
6+
)
7+
8+
9+
if __name__ == "__main__":
10+
# authenticate to YouTube API
11+
youtube = youtube_authenticate()
12+
# search for the query 'python' and retrieve 2 items only
13+
response = search(youtube, q="python", maxResults=2)
14+
items = response.get("items")
15+
for item in items:
16+
# get the video ID
17+
video_id = item["id"]["videoId"]
18+
# get the video details
19+
video_response = get_video_details(youtube, id=video_id)
20+
# print the video details
21+
print_video_infos(video_response)
22+
print("="*50)

‎general/youtube-api/utils.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
from googleapiclient.discovery import build
2+
from google_auth_oauthlib.flow import InstalledAppFlow
3+
from google.auth.transport.requests import Request
4+
5+
import urllib.parse as p
6+
import re
7+
import os
8+
import pickle
9+
10+
SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]
11+
12+
def youtube_authenticate():
13+
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
14+
api_service_name = "youtube"
15+
api_version = "v3"
16+
client_secrets_file = "credentials.json"
17+
creds = None
18+
# the file token.pickle stores the user's access and refresh tokens, and is
19+
# created automatically when the authorization flow completes for the first time
20+
if os.path.exists("token.pickle"):
21+
with open("token.pickle", "rb") as token:
22+
creds = pickle.load(token)
23+
# if there are no (valid) credentials availablle, let the user log in.
24+
if not creds or not creds.valid:
25+
if creds and creds.expired and creds.refresh_token:
26+
creds.refresh(Request())
27+
else:
28+
flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)
29+
creds = flow.run_local_server(port=0)
30+
# save the credentials for the next run
31+
with open("token.pickle", "wb") as token:
32+
pickle.dump(creds, token)
33+
34+
return build(api_service_name, api_version, credentials=creds)
35+
36+
37+
def get_channel_details(youtube, **kwargs):
38+
return youtube.channels().list(
39+
part="statistics,snippet,contentDetails",
40+
**kwargs
41+
).execute()
42+
43+
44+
def search(youtube, **kwargs):
45+
return youtube.search().list(
46+
part="snippet",
47+
**kwargs
48+
).execute()
49+
50+
51+
def get_video_details(youtube, **kwargs):
52+
return youtube.videos().list(
53+
part="snippet,contentDetails,statistics",
54+
**kwargs
55+
).execute()
56+
57+
58+
def print_video_infos(video_response):
59+
items = video_response.get("items")[0]
60+
# get the snippet, statistics & content details from the video response
61+
snippet = items["snippet"]
62+
statistics = items["statistics"]
63+
content_details = items["contentDetails"]
64+
# get infos from the snippet
65+
channel_title = snippet["channelTitle"]
66+
title = snippet["title"]
67+
description = snippet["description"]
68+
publish_time = snippet["publishedAt"]
69+
# get stats infos
70+
comment_count = statistics["commentCount"]
71+
like_count = statistics["likeCount"]
72+
dislike_count = statistics["dislikeCount"]
73+
view_count = statistics["viewCount"]
74+
# get duration from content details
75+
duration = content_details["duration"]
76+
# duration in the form of something like 'PT5H50M15S'
77+
# parsing it to be something like '5:50:15'
78+
parsed_duration = re.search(f"PT(\d+H)?(\d+M)?(\d+S)", duration).groups()
79+
duration_str = ""
80+
for d in parsed_duration:
81+
if d:
82+
duration_str += f"{d[:-1]}:"
83+
duration_str = duration_str.strip(":")
84+
print(f"""
85+
Title: {title}
86+
Description: {description}
87+
Channel Title: {channel_title}
88+
Publish time: {publish_time}
89+
Duration: {duration_str}
90+
Number of comments: {comment_count}
91+
Number of likes: {like_count}
92+
Number of dislikes: {dislike_count}
93+
Number of views: {view_count}
94+
""")
95+
96+
97+
def parse_channel_url(url):
98+
"""
99+
This function takes channel `url` to check whether it includes a
100+
channel ID, user ID or channel name
101+
"""
102+
path = p.urlparse(url).path
103+
id = path.split("/")[-1]
104+
if "/c/" in path:
105+
return "c", id
106+
elif "/channel/" in path:
107+
return "channel", id
108+
elif "/user/" in path:
109+
return "user", id
110+
111+
112+
def get_channel_id_by_url(youtube, url):
113+
"""
114+
Returns channel ID of a given `id` and `method`
115+
- `method` (str): can be 'c', 'channel', 'user'
116+
- `id` (str): if method is 'c', then `id` is display name
117+
if method is 'channel', then it's channel id
118+
if method is 'user', then it's username
119+
"""
120+
# parse the channel URL
121+
method, id = parse_channel_url(url)
122+
if method == "channel":
123+
# if it's a channel ID, then just return it
124+
return id
125+
elif method == "user":
126+
# if it's a user ID, make a request to get the channel ID
127+
response = get_channel_details(youtube, forUsername=id)
128+
items = response.get("items")
129+
if items:
130+
channel_id = items[0].get("id")
131+
return channel_id
132+
elif method == "c":
133+
# if it's a channel name, search for the channel using the name
134+
# may be inaccurate
135+
response = search(youtube, q=id, maxResults=1)
136+
items = response.get("items")
137+
if items:
138+
channel_id = items[0]["snippet"]["channelId"]
139+
return channel_id
140+
raise Exception(f"Cannot find ID:{id} with {method} method")
141+
142+
143+
def get_video_id_by_url(url):
144+
"""
145+
Return the Video ID from the video `url`
146+
"""
147+
# split URL parts
148+
parsed_url = p.urlparse(url)
149+
# get the video ID by parsing the query of the URL
150+
video_id = p.parse_qs(parsed_url.query).get("v")
151+
if video_id:
152+
return video_id[0]
153+
else:
154+
raise Exception(f"Wasn't able to parse video URL: {url}")

‎general/youtube-api/video_details.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from utils import (
2+
youtube_authenticate,
3+
get_video_id_by_url,
4+
get_video_details,
5+
print_video_infos
6+
)
7+
8+
9+
if __name__ == "__main__":
10+
# authenticate to YouTube API
11+
youtube = youtube_authenticate()
12+
video_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw&ab_channel=jawed"
13+
# parse video ID from URL
14+
video_id = get_video_id_by_url(video_url)
15+
# make API call to get video info
16+
response = get_video_details(youtube, id=video_id)
17+
# print extracted video infos
18+
print_video_infos(response)

‎general/youtube-api/youtube-api.ipynb

Lines changed: 396 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,396 @@
1+
{
2+
"metadata": {
3+
"language_info": {
4+
"codemirror_mode": {
5+
"name": "ipython",
6+
"version": 3
7+
},
8+
"file_extension": ".py",
9+
"mimetype": "text/x-python",
10+
"name": "python",
11+
"nbconvert_exporter": "python",
12+
"pygments_lexer": "ipython3",
13+
"version": "3.8.7-final"
14+
},
15+
"orig_nbformat": 2,
16+
"kernelspec": {
17+
"name": "python36664bitea6884f10f474b21a2a2f022451e0d09",
18+
"display_name": "Python 3.6.6 64-bit",
19+
"language": "python"
20+
}
21+
},
22+
"nbformat": 4,
23+
"nbformat_minor": 2,
24+
"cells": [
25+
{
26+
"cell_type": "code",
27+
"execution_count": null,
28+
"metadata": {},
29+
"outputs": [],
30+
"source": [
31+
"from googleapiclient.discovery import build\n",
32+
"from google_auth_oauthlib.flow import InstalledAppFlow\n",
33+
"from google.auth.transport.requests import Request\n",
34+
"\n",
35+
"import urllib.parse as p\n",
36+
"import re\n",
37+
"import os\n",
38+
"import pickle\n",
39+
"\n",
40+
"SCOPES = [\"https://www.googleapis.com/auth/youtube.force-ssl\"]"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": null,
46+
"metadata": {},
47+
"outputs": [],
48+
"source": [
49+
"def youtube_authenticate():\n",
50+
" os.environ[\"OAUTHLIB_INSECURE_TRANSPORT\"] = \"1\"\n",
51+
" api_service_name = \"youtube\"\n",
52+
" api_version = \"v3\"\n",
53+
" client_secrets_file = \"credentials.json\"\n",
54+
" creds = None\n",
55+
" # the file token.pickle stores the user's access and refresh tokens, and is\n",
56+
" # created automatically when the authorization flow completes for the first time\n",
57+
" if os.path.exists(\"token.pickle\"):\n",
58+
" with open(\"token.pickle\", \"rb\") as token:\n",
59+
" creds = pickle.load(token)\n",
60+
" # if there are no (valid) credentials availablle, let the user log in.\n",
61+
" if not creds or not creds.valid:\n",
62+
" if creds and creds.expired and creds.refresh_token:\n",
63+
" creds.refresh(Request())\n",
64+
" else:\n",
65+
" flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)\n",
66+
" creds = flow.run_local_server(port=0)\n",
67+
" # save the credentials for the next run\n",
68+
" with open(\"token.pickle\", \"wb\") as token:\n",
69+
" pickle.dump(creds, token)\n",
70+
"\n",
71+
" return build(api_service_name, api_version, credentials=creds)\n",
72+
"\n",
73+
"# authenticate to YouTube API\n",
74+
"youtube = youtube_authenticate()"
75+
]
76+
},
77+
{
78+
"cell_type": "code",
79+
"execution_count": null,
80+
"metadata": {},
81+
"outputs": [],
82+
"source": [
83+
"def get_video_id_by_url(url):\n",
84+
" \"\"\"\n",
85+
" Return the Video ID from the video `url`\n",
86+
" \"\"\"\n",
87+
" # split URL parts\n",
88+
" parsed_url = p.urlparse(url)\n",
89+
" # get the video ID by parsing the query of the URL\n",
90+
" video_id = p.parse_qs(parsed_url.query).get(\"v\")\n",
91+
" if video_id:\n",
92+
" return video_id[0]\n",
93+
" else:\n",
94+
" raise Exception(f\"Wasn't able to parse video URL: {url}\")\n",
95+
"\n",
96+
"def get_video_details(youtube, **kwargs):\n",
97+
" return youtube.videos().list(\n",
98+
" part=\"snippet,contentDetails,statistics\",\n",
99+
" **kwargs\n",
100+
" ).execute()"
101+
]
102+
},
103+
{
104+
"cell_type": "code",
105+
"execution_count": null,
106+
"metadata": {},
107+
"outputs": [],
108+
"source": [
109+
"def print_video_infos(video_response):\n",
110+
" items = video_response.get(\"items\")[0]\n",
111+
" # get the snippet, statistics & content details from the video response\n",
112+
" snippet = items[\"snippet\"]\n",
113+
" statistics = items[\"statistics\"]\n",
114+
" content_details = items[\"contentDetails\"]\n",
115+
" # get infos from the snippet\n",
116+
" channel_title = snippet[\"channelTitle\"]\n",
117+
" title = snippet[\"title\"]\n",
118+
" description = snippet[\"description\"]\n",
119+
" publish_time = snippet[\"publishedAt\"]\n",
120+
" # get stats infos\n",
121+
" comment_count = statistics[\"commentCount\"]\n",
122+
" like_count = statistics[\"likeCount\"]\n",
123+
" dislike_count = statistics[\"dislikeCount\"]\n",
124+
" view_count = statistics[\"viewCount\"]\n",
125+
" # get duration from content details\n",
126+
" duration = content_details[\"duration\"]\n",
127+
" # duration in the form of something like 'PT5H50M15S'\n",
128+
" # parsing it to be something like '5:50:15'\n",
129+
" parsed_duration = re.search(f\"PT(\\d+H)?(\\d+M)?(\\d+S)\", duration).groups()\n",
130+
" duration_str = \"\"\n",
131+
" for d in parsed_duration:\n",
132+
" if d:\n",
133+
" duration_str += f\"{d[:-1]}:\"\n",
134+
" duration_str = duration_str.strip(\":\")\n",
135+
" print(f\"\"\"\\\n",
136+
" Title: {title}\n",
137+
" Description: {description}\n",
138+
" Channel Title: {channel_title}\n",
139+
" Publish time: {publish_time}\n",
140+
" Duration: {duration_str}\n",
141+
" Number of comments: {comment_count}\n",
142+
" Number of likes: {like_count}\n",
143+
" Number of dislikes: {dislike_count}\n",
144+
" Number of views: {view_count}\n",
145+
" \"\"\")"
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": null,
151+
"metadata": {},
152+
"outputs": [],
153+
"source": [
154+
"video_url = \"https://www.youtube.com/watch?v=jNQXAC9IVRw&ab_channel=jawed\"\n",
155+
"# parse video ID from URL\n",
156+
"video_id = get_video_id_by_url(video_url)\n",
157+
"# make API call to get video info\n",
158+
"response = get_video_details(youtube, id=video_id)\n",
159+
"# print extracted video infos\n",
160+
"print_video_infos(response)"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": null,
166+
"metadata": {},
167+
"outputs": [],
168+
"source": [
169+
"def search(youtube, **kwargs):\n",
170+
" return youtube.search().list(\n",
171+
" part=\"snippet\",\n",
172+
" **kwargs\n",
173+
" ).execute()\n",
174+
"\n",
175+
"# search for the query 'python' and retrieve 2 items only\n",
176+
"response = search(youtube, q=\"python\", maxResults=2)\n",
177+
"items = response.get(\"items\")\n",
178+
"for item in items:\n",
179+
" # get the video ID\n",
180+
" video_id = item[\"id\"][\"videoId\"]\n",
181+
" # get the video details\n",
182+
" video_response = get_video_details(youtube, id=video_id)\n",
183+
" # print the video details\n",
184+
" print_video_infos(video_response)\n",
185+
" print(\"=\"*50)"
186+
]
187+
},
188+
{
189+
"cell_type": "code",
190+
"execution_count": null,
191+
"metadata": {},
192+
"outputs": [],
193+
"source": [
194+
"def parse_channel_url(url):\n",
195+
" \"\"\"\n",
196+
" This function takes channel `url` to check whether it includes a\n",
197+
" channel ID, user ID or channel name\n",
198+
" \"\"\"\n",
199+
" path = p.urlparse(url).path\n",
200+
" id = path.split(\"/\")[-1]\n",
201+
" if \"/c/\" in path:\n",
202+
" return \"c\", id\n",
203+
" elif \"/channel/\" in path:\n",
204+
" return \"channel\", id\n",
205+
" elif \"/user/\" in path:\n",
206+
" return \"user\", id\n",
207+
"\n",
208+
"\n",
209+
"def get_channel_id_by_url(youtube, url):\n",
210+
" \"\"\"\n",
211+
" Returns channel ID of a given `id` and `method`\n",
212+
" - `method` (str): can be 'c', 'channel', 'user'\n",
213+
" - `id` (str): if method is 'c', then `id` is display name\n",
214+
" if method is 'channel', then it's channel id\n",
215+
" if method is 'user', then it's username\n",
216+
" \"\"\"\n",
217+
" # parse the channel URL\n",
218+
" method, id = parse_channel_url(url)\n",
219+
" if method == \"channel\":\n",
220+
" # if it's a channel ID, then just return it\n",
221+
" return id\n",
222+
" elif method == \"user\":\n",
223+
" # if it's a user ID, make a request to get the channel ID\n",
224+
" response = get_channel_details(youtube, forUsername=id)\n",
225+
" items = response.get(\"items\")\n",
226+
" if items:\n",
227+
" channel_id = items[0].get(\"id\")\n",
228+
" return channel_id\n",
229+
" elif method == \"c\":\n",
230+
" # if it's a channel name, search for the channel using the name\n",
231+
" # may be inaccurate\n",
232+
" response = search(youtube, q=id, maxResults=1)\n",
233+
" items = response.get(\"items\")\n",
234+
" if items:\n",
235+
" channel_id = items[0][\"snippet\"][\"channelId\"]\n",
236+
" return channel_id\n",
237+
" raise Exception(f\"Cannot find ID:{id} with {method} method\")"
238+
]
239+
},
240+
{
241+
"cell_type": "code",
242+
"execution_count": null,
243+
"metadata": {},
244+
"outputs": [],
245+
"source": [
246+
"def get_channel_videos(youtube, **kwargs):\n",
247+
" return youtube.search().list(\n",
248+
" **kwargs\n",
249+
" ).execute()\n",
250+
"\n",
251+
"\n",
252+
"def get_channel_details(youtube, **kwargs):\n",
253+
" return youtube.channels().list(\n",
254+
" part=\"statistics,snippet,contentDetails\",\n",
255+
" **kwargs\n",
256+
" ).execute()"
257+
]
258+
},
259+
{
260+
"cell_type": "code",
261+
"execution_count": null,
262+
"metadata": {},
263+
"outputs": [],
264+
"source": [
265+
"channel_url = \"https://www.youtube.com/channel/UC8butISFwT-Wl7EV0hUK0BQ\"\n",
266+
"# get the channel ID from the URL\n",
267+
"channel_id = get_channel_id_by_url(youtube, channel_url)\n",
268+
"# get the channel details\n",
269+
"response = get_channel_details(youtube, id=channel_id)\n",
270+
"# extract channel infos\n",
271+
"snippet = response[\"items\"][0][\"snippet\"]\n",
272+
"statistics = response[\"items\"][0][\"statistics\"]\n",
273+
"channel_country = snippet[\"country\"]\n",
274+
"channel_description = snippet[\"description\"]\n",
275+
"channel_creation_date = snippet[\"publishedAt\"]\n",
276+
"channel_title = snippet[\"title\"]\n",
277+
"channel_subscriber_count = statistics[\"subscriberCount\"]\n",
278+
"channel_video_count = statistics[\"videoCount\"]\n",
279+
"channel_view_count = statistics[\"viewCount\"]\n",
280+
"print(f\"\"\"\n",
281+
"Title: {channel_title}\n",
282+
"Published At: {channel_creation_date}\n",
283+
"Description: {channel_description}\n",
284+
"Country: {channel_country}\n",
285+
"Number of videos: {channel_video_count}\n",
286+
"Number of subscribers: {channel_subscriber_count}\n",
287+
"Total views: {channel_view_count}\n",
288+
"\"\"\")\n",
289+
"# the following is grabbing channel videos\n",
290+
"# number of pages you want to get\n",
291+
"n_pages = 2\n",
292+
"# counting number of videos grabbed\n",
293+
"n_videos = 0\n",
294+
"next_page_token = None\n",
295+
"for i in range(n_pages):\n",
296+
" params = {\n",
297+
" 'part': 'snippet',\n",
298+
" 'q': '',\n",
299+
" 'channelId': channel_id,\n",
300+
" 'type': 'video',\n",
301+
" }\n",
302+
" if next_page_token:\n",
303+
" params['pageToken'] = next_page_token\n",
304+
" res = get_channel_videos(youtube, **params)\n",
305+
" channel_videos = res.get(\"items\")\n",
306+
" for video in channel_videos:\n",
307+
" n_videos += 1\n",
308+
" video_id = video[\"id\"][\"videoId\"]\n",
309+
" # easily construct video URL by its ID\n",
310+
" video_url = f\"https://www.youtube.com/watch?v={video_id}\"\n",
311+
" video_response = get_video_details(youtube, id=video_id)\n",
312+
" print(f\"================Video #{n_videos}================\")\n",
313+
" # print the video details\n",
314+
" print_video_infos(video_response)\n",
315+
" print(f\"Video URL: {video_url}\")\n",
316+
" print(\"=\"*40)\n",
317+
" print(\"*\"*100)\n",
318+
" # if there is a next page, then add it to our parameters\n",
319+
" # to proceed to the next page\n",
320+
" if \"nextPageToken\" in res:\n",
321+
" next_page_token = res[\"nextPageToken\"]"
322+
]
323+
},
324+
{
325+
"cell_type": "code",
326+
"execution_count": null,
327+
"metadata": {},
328+
"outputs": [],
329+
"source": [
330+
"def get_comments(youtube, **kwargs):\n",
331+
" return youtube.commentThreads().list(\n",
332+
" part=\"snippet\",\n",
333+
" **kwargs\n",
334+
" ).execute()"
335+
]
336+
},
337+
{
338+
"cell_type": "code",
339+
"execution_count": null,
340+
"metadata": {},
341+
"outputs": [],
342+
"source": [
343+
"# URL can be a channel or a video, to extract comments\n",
344+
"url = \"https://www.youtube.com/watch?v=jNQXAC9IVRw&ab_channel=jawed\"\n",
345+
"# parameters to send to commentThreads API endpoint\n",
346+
"params = {\n",
347+
" 'maxResults': 2,\n",
348+
" 'order': 'relevance', # default is 'time' (newest)\n",
349+
"}\n",
350+
"if \"watch\" in url:\n",
351+
" # that's a video\n",
352+
" video_id = get_video_id_by_url(url)\n",
353+
" params['videoId'] = video_id\n",
354+
"else:\n",
355+
" # should be a channel\n",
356+
" channel_id = get_channel_id_by_url(url)\n",
357+
" params['allThreadsRelatedToChannelId'] = channel_id\n",
358+
"# get the first 2 pages (2 API requests)\n",
359+
"n_pages = 2\n",
360+
"for i in range(n_pages):\n",
361+
" # make API call to get all comments from the channel (including posts & videos)\n",
362+
" response = get_comments(youtube, **params)\n",
363+
" items = response.get(\"items\")\n",
364+
" # if items is empty, breakout of the loop\n",
365+
" if not items:\n",
366+
" break\n",
367+
" for item in items:\n",
368+
" comment = item[\"snippet\"][\"topLevelComment\"][\"snippet\"][\"textDisplay\"]\n",
369+
" updated_at = item[\"snippet\"][\"topLevelComment\"][\"snippet\"][\"updatedAt\"]\n",
370+
" like_count = item[\"snippet\"][\"topLevelComment\"][\"snippet\"][\"likeCount\"]\n",
371+
" comment_id = item[\"snippet\"][\"topLevelComment\"][\"id\"]\n",
372+
" print(f\"\"\"\\\n",
373+
" Comment: {comment}\n",
374+
" Likes: {like_count}\n",
375+
" Updated At: {updated_at}\n",
376+
" ==================================\\\n",
377+
" \"\"\")\n",
378+
" if \"nextPageToken\" in response:\n",
379+
" # if there is a next page\n",
380+
" # add next page token to the params we pass to the function\n",
381+
" params[\"pageToken\"] = response[\"nextPageToken\"]\n",
382+
" else:\n",
383+
" # must be end of comments!!!!\n",
384+
" break\n",
385+
" print(\"*\"*70)"
386+
]
387+
},
388+
{
389+
"cell_type": "code",
390+
"execution_count": null,
391+
"metadata": {},
392+
"outputs": [],
393+
"source": []
394+
}
395+
]
396+
}

0 commit comments

Comments
 (0)
Please sign in to comment.