Skip to content

Commit 302abcd

Browse files
authored
Merge branch 'practical-nlp:master' into master
2 parents f28a883 + 83bed9c commit 302abcd

6 files changed

+383
-5394
lines changed

Ch3/06_Training_embeddings_using_gensim.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@
425425
"\n",
426426
"# load model\n",
427427
"new_model_skipgram = Word2Vec.load('model_skipgram.bin')\n",
428-
"print(model_skipgram)"
428+
"print(new_model_skipgram)"
429429
]
430430
},
431431
{
@@ -838,7 +838,7 @@
838838
"outputs": [],
839839
"source": [
840840
"# save model\n",
841-
"word2vec_cbow.wv.save_word2vec_format('word2vec_sg.bin', binary=True)\n",
841+
"word2vec_skipgram.wv.save_word2vec_format('word2vec_sg.bin', binary=True)\n",
842842
"\n",
843843
"# load model\n",
844844
"# new_model_skipgram = Word2Vec.load('model_skipgram.bin')\n",

Ch8/01_WordCloud.ipynb

Lines changed: 116 additions & 147 deletions
Large diffs are not rendered by default.

Ch8/02_DifferentTokenizers.ipynb

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 0,
5+
"execution_count": 1,
66
"metadata": {
77
"colab": {
88
"base_uri": "https://localhost:8080/",
@@ -19,23 +19,36 @@
1919
"text": [
2020
"Collecting twikenizer\n",
2121
" Downloading https://files.pythonhosted.org/packages/d2/51/7aee33630b948f0716efae7a96c4fd8f859b348694058c380fd899a4227e/twikenizer-1.0.tar.gz\n",
22-
"Building wheels for collected packages: twikenizer\n",
23-
" Building wheel for twikenizer (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
24-
" Created wheel for twikenizer: filename=twikenizer-1.0-cp36-none-any.whl size=4853 sha256=69c74c1ea248414aa18ccbf0d8f0b704cb204a0af7fd1a75d3821a783b301b83\n",
25-
" Stored in directory: /root/.cache/pip/wheels/34/5d/df/2bf827d7e3c6bda2b2dcdd978f975afd43ef400784c9507675\n",
26-
"Successfully built twikenizer\n",
2722
"Installing collected packages: twikenizer\n",
28-
"Successfully installed twikenizer-1.0\n",
23+
" Running setup.py install for twikenizer: started\n",
24+
" Running setup.py install for twikenizer: finished with status 'done'\n",
25+
"Successfully installed twikenizer-1.0\n"
26+
]
27+
},
28+
{
29+
"name": "stderr",
30+
"output_type": "stream",
31+
"text": [
32+
"You are using pip version 19.0.3, however version 21.1.2 is available.\n",
33+
"You should consider upgrading via the 'python -m pip install --upgrade pip' command.\n"
34+
]
35+
},
36+
{
37+
"name": "stdout",
38+
"output_type": "stream",
39+
"text": [
2940
"Collecting emoji\n",
30-
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/8d/521be7f0091fe0f2ae690cc044faf43e3445e0ff33c574eae752dd7e39fa/emoji-0.5.4.tar.gz (43kB)\n",
31-
"\u001b[K |████████████████████████████████| 51kB 1.7MB/s \n",
32-
"\u001b[?25hBuilding wheels for collected packages: emoji\n",
33-
" Building wheel for emoji (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
34-
" Created wheel for emoji: filename=emoji-0.5.4-cp36-none-any.whl size=42176 sha256=13fd92618dda624b5bf58512c35020d306717a1ffbb07b067cea5ef79ca2ba2e\n",
35-
" Stored in directory: /root/.cache/pip/wheels/2a/a9/0a/4f8e8cce8074232aba240caca3fade315bb49fac68808d1a9c\n",
36-
"Successfully built emoji\n",
41+
" Downloading https://files.pythonhosted.org/packages/24/fa/b3368f41b95a286f8d300e323449ab4e86b85334c2e0b477e94422b8ed0f/emoji-1.2.0-py3-none-any.whl (131kB)\n",
3742
"Installing collected packages: emoji\n",
38-
"Successfully installed emoji-0.5.4\n"
43+
"Successfully installed emoji-1.2.0\n"
44+
]
45+
},
46+
{
47+
"name": "stderr",
48+
"output_type": "stream",
49+
"text": [
50+
"You are using pip version 19.0.3, however version 21.1.2 is available.\n",
51+
"You should consider upgrading via the 'python -m pip install --upgrade pip' command.\n"
3952
]
4053
}
4154
],
@@ -46,7 +59,7 @@
4659
},
4760
{
4861
"cell_type": "code",
49-
"execution_count": 0,
62+
"execution_count": 2,
5063
"metadata": {
5164
"colab": {},
5265
"colab_type": "code",
@@ -59,7 +72,7 @@
5972
},
6073
{
6174
"cell_type": "code",
62-
"execution_count": 0,
75+
"execution_count": 3,
6376
"metadata": {
6477
"colab": {
6578
"base_uri": "https://localhost:8080/",
@@ -86,7 +99,7 @@
8699
},
87100
{
88101
"cell_type": "code",
89-
"execution_count": 0,
102+
"execution_count": 5,
90103
"metadata": {
91104
"colab": {
92105
"base_uri": "https://localhost:8080/",
@@ -132,7 +145,7 @@
132145
"name": "python",
133146
"nbconvert_exporter": "python",
134147
"pygments_lexer": "ipython3",
135-
"version": "3.6.10"
148+
"version": "3.7.4"
136149
}
137150
},
138151
"nbformat": 4,

Ch8/03_TrendingTopics.ipynb

Lines changed: 131 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -1,115 +1,108 @@
11
{
2-
"nbformat": 4,
3-
"nbformat_minor": 0,
4-
"metadata": {
5-
"colab": {
6-
"name": "TrendingTopics.ipynb",
7-
"provenance": [],
8-
"collapsed_sections": []
9-
},
10-
"kernelspec": {
11-
"name": "python3",
12-
"display_name": "Python 3"
13-
}
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {
6+
"colab_type": "text",
7+
"id": "6IM1xuTnGlPs"
8+
},
9+
"source": [
10+
"# Trending Topics\n",
11+
"\n",
12+
"## In this notebook we show you how to access the most trending topics in a particular location. \n",
13+
"### pre-requisites: Twitter Developer Account. If you dont have one, dont worry it is free to obtain all you need to do is visit this link and follow the setup process explained [here](https://cran.r-project.org/web/packages/rtweet/vignettes/auth.html)."
14+
]
1415
},
15-
"cells": [
16-
{
17-
"cell_type": "markdown",
18-
"metadata": {
19-
"id": "6IM1xuTnGlPs",
20-
"colab_type": "text"
21-
},
22-
"source": [
23-
"#Trending Topics\n",
24-
"\n",
25-
"##In this notebook we show you how to access the most trending topics in a particular locatoin. \n",
26-
"###pre-requisites: Twitter Developer Account. If you dont have one, dont worry it is free to obtain all you need to do is visit this link and follow the setup process explained [here](https://cran.r-project.org/web/packages/rtweet/vignettes/auth.html)."
27-
]
28-
},
29-
{
30-
"cell_type": "code",
31-
"metadata": {
32-
"id": "SSKP1W_7p7vL",
33-
"colab_type": "code",
34-
"colab": {}
35-
},
36-
"source": [
37-
"import tweepy, json"
38-
],
39-
"execution_count": 0,
40-
"outputs": []
41-
},
42-
{
43-
"cell_type": "code",
44-
"metadata": {
45-
"id": "toSpm2I4BeTH",
46-
"colab_type": "code",
47-
"colab": {}
48-
},
49-
"source": [
50-
"#setting all the credentials of your twitter developer account\n",
51-
"CONSUMER_KEY= 'insert your customer key'\n",
52-
"CONSUMER_SECRET= 'insert your customer secrect key'\n",
53-
"ACCESS_KEY= 'insert your access key here'\n",
54-
"ACCESS_SECRET= 'insert your access secret key here'"
55-
],
56-
"execution_count": 0,
57-
"outputs": []
58-
},
59-
{
60-
"cell_type": "code",
61-
"metadata": {
62-
"id": "oiNcCJPiBfDW",
63-
"colab_type": "code",
64-
"colab": {}
65-
},
66-
"source": [
67-
"#granting access to your twitter account\n",
68-
"auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)\n",
69-
"auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)\n",
70-
"api = tweepy.API(auth)"
71-
],
72-
"execution_count": 0,
73-
"outputs": []
16+
{
17+
"cell_type": "code",
18+
"execution_count": 2,
19+
"metadata": {
20+
"colab": {},
21+
"colab_type": "code",
22+
"id": "SSKP1W_7p7vL"
23+
},
24+
"outputs": [],
25+
"source": [
26+
"import tweepy, json"
27+
]
28+
},
29+
{
30+
"cell_type": "code",
31+
"execution_count": 3,
32+
"metadata": {
33+
"colab": {},
34+
"colab_type": "code",
35+
"id": "toSpm2I4BeTH"
36+
},
37+
"outputs": [],
38+
"source": [
39+
"# Setting all the credentials of your twitter developer account.\n",
40+
"\n",
41+
"CONSUMER_KEY= 'insert your customer key'\n",
42+
"CONSUMER_SECRET= 'insert your customer secrect key'\n",
43+
"ACCESS_KEY= 'insert your access key here'\n",
44+
"ACCESS_SECRET= 'insert your access secret key here'"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": 4,
50+
"metadata": {
51+
"colab": {},
52+
"colab_type": "code",
53+
"id": "oiNcCJPiBfDW"
54+
},
55+
"outputs": [],
56+
"source": [
57+
"# Granting access to your twitter account.\n",
58+
"\n",
59+
"auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)\n",
60+
"auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)\n",
61+
"api = tweepy.API(auth)"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {
68+
"colab": {
69+
"base_uri": "https://localhost:8080/",
70+
"height": 35
7471
},
75-
{
76-
"cell_type": "code",
77-
"metadata": {
78-
"id": "wAM5FWYPBix2",
79-
"colab_type": "code",
80-
"colab": {
81-
"base_uri": "https://localhost:8080/",
82-
"height": 35
83-
},
84-
"outputId": "d474a149-e0dc-440f-a723-c273f0a112ae"
85-
},
86-
"source": [
87-
"#A WOEID (Where On Earth IDentifier) is a unique 32-bit reference identifier\n",
88-
"\n",
89-
"WORLD_WOE_ID =1\n",
90-
"BRAZIL_WOE_ID = 23424768\n",
91-
"\n",
92-
"#fetching trending topics in brazil \n",
93-
"brazil_trends = api.trends_place(BRAZIL_WOE_ID)\n",
94-
"trends = json.loads(json.dumps(brazil_trends, indent=1))\n",
95-
" \n",
96-
"brazil_trend_list=[] \n",
97-
"for trend in trends[0][\"trends\"]:\n",
98-
"\tbrazil_trend_list.append(trend[\"name\"].strip(\"#\"))\n",
99-
"\n",
100-
"#fetching trending topics in the world\n",
101-
"world_trends = api.trends_place(WORLD_WOE_ID)\n",
102-
"world_trends_json = json.loads(json.dumps(world_trends, indent=1))\n",
103-
"\n",
104-
"world_trend_list=[] \n",
105-
"for trend in world_trends_json[0][\"trends\"]:\n",
106-
"\tworld_trend_list.append(trend[\"name\"].strip(\"#\"))\n",
107-
" \n",
108-
"#printing the topics which are trending in both WORLD and BRAZIL \n",
109-
"print(set(world_trend_list).intersection(set(brazil_trend_list)))"
110-
],
111-
"execution_count": 29,
112-
"outputs": [
72+
"colab_type": "code",
73+
"id": "wAM5FWYPBix2",
74+
"outputId": "d474a149-e0dc-440f-a723-c273f0a112ae"
75+
},
76+
"outputs": [],
77+
"source": [
78+
"# A WOEID (Where On Earth IDentifier) is a unique 32-bit reference identifier.\n",
79+
"\n",
80+
"WORLD_WOE_ID =1\n",
81+
"BRAZIL_WOE_ID = 23424768\n",
82+
"\n",
83+
"# Fetching trending topics in BraziL.\n",
84+
"\n",
85+
"brazil_trends = api.trends_place(BRAZIL_WOE_ID)\n",
86+
"trends = json.loads(json.dumps(brazil_trends, indent=1))\n",
87+
" \n",
88+
"brazil_trend_list=[] \n",
89+
"for trend in trends[0][\"trends\"]:\n",
90+
"\tbrazil_trend_list.append(trend[\"name\"].strip(\"#\"))\n",
91+
"\n",
92+
"# Fetching trending topics in the world.\n",
93+
"world_trends = api.trends_place(WORLD_WOE_ID)\n",
94+
"world_trends_json = json.loads(json.dumps(world_trends, indent=1))\n",
95+
"\n",
96+
"world_trend_list=[] \n",
97+
"for trend in world_trends_json[0][\"trends\"]:\n",
98+
"\tworld_trend_list.append(trend[\"name\"].strip(\"#\"))\n",
99+
" \n",
100+
"# Printing the topics which are trending in both WORLD and BRAZIL.\n",
101+
"\n",
102+
"print(set(world_trend_list).intersection(set(brazil_trend_list)))"
103+
],
104+
"execution_count": 29,
105+
"outputs": [
113106
{
114107
"output_type": "stream",
115108
"text": [
@@ -118,32 +111,32 @@
118111
"name": "stdout"
119112
}
120113
]
121-
},
122-
{
123-
"cell_type": "code",
124-
"metadata": {
125-
"id": "CvD89_eKCF7d",
126-
"colab_type": "code",
127-
"colab": {}
128-
},
129-
"source": [
130-
""
131-
],
132-
"execution_count": 0,
133-
"outputs": []
134-
},
135-
{
136-
"cell_type": "code",
137-
"metadata": {
138-
"id": "PPpiCPOwDGLO",
139-
"colab_type": "code",
140-
"colab": {}
141-
},
142-
"source": [
143-
""
144-
],
145-
"execution_count": 0,
146-
"outputs": []
147-
}
148-
]
149-
}
114+
}
115+
],
116+
"metadata": {
117+
"colab": {
118+
"collapsed_sections": [],
119+
"name": "TrendingTopics.ipynb",
120+
"provenance": []
121+
},
122+
"kernelspec": {
123+
"display_name": "Python 3",
124+
"language": "python",
125+
"name": "python3"
126+
},
127+
"language_info": {
128+
"codemirror_mode": {
129+
"name": "ipython",
130+
"version": 3
131+
},
132+
"file_extension": ".py",
133+
"mimetype": "text/x-python",
134+
"name": "python",
135+
"nbconvert_exporter": "python",
136+
"pygments_lexer": "ipython3",
137+
"version": "3.7.4"
138+
}
139+
},
140+
"nbformat": 4,
141+
"nbformat_minor": 1
142+
}

0 commit comments

Comments
 (0)