Skip to content

Commit 013ec72

Browse files
committed
add recommender notebook
1 parent ef1d2b6 commit 013ec72

File tree

3 files changed

+1899
-0
lines changed

3 files changed

+1899
-0
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,7 @@ gradlew*
3838

3939
# Mac
4040
.DS_Store
41+
42+
# Data
43+
*.zip
44+
notebook/ml-100k/

notebook/26_keras_embeddings.ipynb

+302
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 3,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [
10+
{
11+
"name": "stdout",
12+
"output_type": "stream",
13+
"text": [
14+
"(40,)\n",
15+
"[ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14.\n",
16+
" 15. 16. 17. 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29.\n",
17+
" 30. 31. 32. 33. 34. 35. 36. 37. 38. 39.]\n",
18+
"[[ 0. 1. 2. 3.]\n",
19+
" [ 4. 5. 6. 7.]\n",
20+
" [ 8. 9. 10. 11.]\n",
21+
" [ 12. 13. 14. 15.]\n",
22+
" [ 16. 17. 18. 19.]\n",
23+
" [ 20. 21. 22. 23.]\n",
24+
" [ 24. 25. 26. 27.]\n",
25+
" [ 28. 29. 30. 31.]\n",
26+
" [ 32. 33. 34. 35.]\n",
27+
" [ 36. 37. 38. 39.]]\n"
28+
]
29+
}
30+
],
31+
"source": [
32+
"import numpy as np\n",
33+
"\n",
34+
"embedding_size = 4\n",
35+
"vocab_size = 10\n",
36+
"\n",
37+
"embedding = np.arange(embedding_size * vocab_size, dtype='float')\n",
38+
"print(embedding.shape)\n",
39+
"print(embedding)\n",
40+
"\n",
41+
"embedding = embedding.reshape(vocab_size, embedding_size)\n",
42+
"print(embedding)"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": 5,
48+
"metadata": {
49+
"collapsed": false
50+
},
51+
"outputs": [
52+
{
53+
"data": {
54+
"text/plain": [
55+
"array([ 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.])"
56+
]
57+
},
58+
"execution_count": 5,
59+
"metadata": {},
60+
"output_type": "execute_result"
61+
}
62+
],
63+
"source": [
64+
"i = 3\n",
65+
"onehot = np.zeros(vocab_size)\n",
66+
"onehot[i] = 1.\n",
67+
"onehot"
68+
]
69+
},
70+
{
71+
"cell_type": "code",
72+
"execution_count": 6,
73+
"metadata": {
74+
"collapsed": false
75+
},
76+
"outputs": [
77+
{
78+
"name": "stdout",
79+
"output_type": "stream",
80+
"text": [
81+
"[ 12. 13. 14. 15.]\n"
82+
]
83+
}
84+
],
85+
"source": [
86+
"embedding_vector = np.dot(onehot, embedding)\n",
87+
"print(embedding_vector)"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": 7,
93+
"metadata": {
94+
"collapsed": false
95+
},
96+
"outputs": [
97+
{
98+
"name": "stdout",
99+
"output_type": "stream",
100+
"text": [
101+
"[ 12. 13. 14. 15.]\n"
102+
]
103+
}
104+
],
105+
"source": [
106+
"print(embedding[i])"
107+
]
108+
},
109+
{
110+
"cell_type": "code",
111+
"execution_count": 8,
112+
"metadata": {
113+
"collapsed": false
114+
},
115+
"outputs": [
116+
{
117+
"name": "stderr",
118+
"output_type": "stream",
119+
"text": [
120+
"Using TensorFlow backend.\n"
121+
]
122+
}
123+
],
124+
"source": [
125+
"from tensorflow.contrib import keras\n",
126+
"from keras.layers import Embedding\n",
127+
"\n",
128+
"embedding_layer = Embedding(\n",
129+
" output_dim=embedding_size, input_dim=vocab_size,\n",
130+
" input_length=1, name='my_embedding')"
131+
]
132+
},
133+
{
134+
"cell_type": "code",
135+
"execution_count": 10,
136+
"metadata": {
137+
"collapsed": false
138+
},
139+
"outputs": [
140+
{
141+
"data": {
142+
"text/plain": [
143+
"(None, 1, 4)"
144+
]
145+
},
146+
"execution_count": 10,
147+
"metadata": {},
148+
"output_type": "execute_result"
149+
}
150+
],
151+
"source": [
152+
"from keras.layers import Input\n",
153+
"from keras.models import Model\n",
154+
"\n",
155+
"x = Input(shape=[1], name='input')\n",
156+
"embedding = embedding_layer(x)\n",
157+
"model = Model(inputs=x, outputs=embedding)\n",
158+
"model.output_shape"
159+
]
160+
},
161+
{
162+
"cell_type": "code",
163+
"execution_count": 11,
164+
"metadata": {
165+
"collapsed": false
166+
},
167+
"outputs": [
168+
{
169+
"data": {
170+
"text/plain": [
171+
"[array([[ 0.01890775, 0.00499418, -0.03474957, 0.02684459],\n",
172+
" [ 0.0318494 , -0.04652676, -0.02924601, 0.04009086],\n",
173+
" [-0.03589082, 0.0474348 , -0.04485966, 0.00298793],\n",
174+
" [-0.02304914, 0.01285596, -0.03610522, -0.00133644],\n",
175+
" [-0.04690611, -0.0206648 , 0.0260491 , -0.01262562],\n",
176+
" [ 0.01401315, 0.03188027, -0.02592033, -0.01135837],\n",
177+
" [-0.00707678, -0.01920606, 0.01314666, 0.04426006],\n",
178+
" [-0.02399683, 0.04837314, -0.03009446, -0.00333629],\n",
179+
" [ 0.02805784, -0.01677012, -0.0288386 , -0.00996032],\n",
180+
" [ 0.01646114, -0.03790113, -0.01738508, -0.04946321]], dtype=float32)]"
181+
]
182+
},
183+
"execution_count": 11,
184+
"metadata": {},
185+
"output_type": "execute_result"
186+
}
187+
],
188+
"source": [
189+
"model.get_weights()"
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": 12,
195+
"metadata": {
196+
"collapsed": false
197+
},
198+
"outputs": [
199+
{
200+
"data": {
201+
"text/plain": [
202+
"array([[[ 0.01890775, 0.00499418, -0.03474957, 0.02684459]],\n",
203+
"\n",
204+
" [[-0.02304914, 0.01285596, -0.03610522, -0.00133644]]], dtype=float32)"
205+
]
206+
},
207+
"execution_count": 12,
208+
"metadata": {},
209+
"output_type": "execute_result"
210+
}
211+
],
212+
"source": [
213+
"model.predict([[0],\n",
214+
" [3]])"
215+
]
216+
},
217+
{
218+
"cell_type": "code",
219+
"execution_count": 14,
220+
"metadata": {
221+
"collapsed": false
222+
},
223+
"outputs": [
224+
{
225+
"data": {
226+
"text/plain": [
227+
"(None, 4)"
228+
]
229+
},
230+
"execution_count": 14,
231+
"metadata": {},
232+
"output_type": "execute_result"
233+
}
234+
],
235+
"source": [
236+
"from keras.layers import Flatten\n",
237+
"\n",
238+
"x = Input(shape=[1], name='input')\n",
239+
"\n",
240+
"# Add a flatten layer to remove useless \"sequence\" dimension\n",
241+
"y = Flatten()(embedding_layer(x))\n",
242+
"\n",
243+
"model2 = Model(inputs=x, outputs=y)\n",
244+
"model2.output_shape"
245+
]
246+
},
247+
{
248+
"cell_type": "code",
249+
"execution_count": 15,
250+
"metadata": {
251+
"collapsed": false
252+
},
253+
"outputs": [
254+
{
255+
"data": {
256+
"text/plain": [
257+
"array([[ 0.01890775, 0.00499418, -0.03474957, 0.02684459],\n",
258+
" [-0.02304914, 0.01285596, -0.03610522, -0.00133644]], dtype=float32)"
259+
]
260+
},
261+
"execution_count": 15,
262+
"metadata": {},
263+
"output_type": "execute_result"
264+
}
265+
],
266+
"source": [
267+
"model2.predict([[0],\n",
268+
" [3]])"
269+
]
270+
},
271+
{
272+
"cell_type": "code",
273+
"execution_count": null,
274+
"metadata": {
275+
"collapsed": true
276+
},
277+
"outputs": [],
278+
"source": []
279+
}
280+
],
281+
"metadata": {
282+
"kernelspec": {
283+
"display_name": "Python 3",
284+
"language": "python",
285+
"name": "python3"
286+
},
287+
"language_info": {
288+
"codemirror_mode": {
289+
"name": "ipython",
290+
"version": 3
291+
},
292+
"file_extension": ".py",
293+
"mimetype": "text/x-python",
294+
"name": "python",
295+
"nbconvert_exporter": "python",
296+
"pygments_lexer": "ipython3",
297+
"version": "3.5.3"
298+
}
299+
},
300+
"nbformat": 4,
301+
"nbformat_minor": 2
302+
}

0 commit comments

Comments
 (0)