Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DataCollection.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"DataCollection.ipynb","provenance":[],"authorship_tag":"ABX9TyNL1hnFLfjp1n6UlUm8JB3q"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0D_azwu7-F-T","executionInfo":{"status":"ok","timestamp":1646193842168,"user_tz":300,"elapsed":8248,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}},"outputId":"dd6fc927-b324-40a6-8d56-036e6e6b7624"},"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting spotipy\n"," Downloading spotipy-2.19.0-py3-none-any.whl (27 kB)\n","Collecting urllib3>=1.26.0\n"," Downloading urllib3-1.26.8-py2.py3-none-any.whl (138 kB)\n","\u001b[K |████████████████████████████████| 138 kB 11.9 MB/s \n","\u001b[?25hCollecting requests>=2.25.0\n"," Downloading requests-2.27.1-py2.py3-none-any.whl (63 kB)\n","\u001b[K |████████████████████████████████| 63 kB 2.3 MB/s \n","\u001b[?25hRequirement already satisfied: six>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spotipy) (1.15.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.25.0->spotipy) (2.10)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.25.0->spotipy) (2021.10.8)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from requests>=2.25.0->spotipy) (2.0.12)\n","Installing collected packages: urllib3, requests, spotipy\n"," Attempting uninstall: urllib3\n"," Found existing installation: urllib3 1.24.3\n"," Uninstalling urllib3-1.24.3:\n"," Successfully uninstalled urllib3-1.24.3\n"," Attempting uninstall: requests\n"," Found existing installation: requests 2.23.0\n"," Uninstalling requests-2.23.0:\n"," Successfully uninstalled requests-2.23.0\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","google-colab 1.0.0 requires requests~=2.23.0, but you have requests 2.27.1 which is incompatible.\n","datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\u001b[0m\n","Successfully installed requests-2.27.1 spotipy-2.19.0 urllib3-1.26.8\n"]}],"source":["!pip install spotipy"]},{"cell_type":"code","source":["import spotipy\n","from spotipy.oauth2 import SpotifyClientCredentials\n","\n","cid = '8bc466d7fa994c40b1c0ad5ecde205ad' \n","secret = 'b045478038744d0a9a1917e2be70e15d'\n","\n","\n","client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)\n","spotify_api = spotipy.Spotify(client_credentials_manager = client_credentials_manager)\n","\n","print(spotify_api)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uHKox4RQ-IXP","executionInfo":{"status":"ok","timestamp":1646193844513,"user_tz":300,"elapsed":4,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}},"outputId":"f928b4d7-46f3-45f4-dff3-1bb662b0db84"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["<spotipy.client.Spotify object at 0x7fdfe83bcd10>\n"]}]},{"cell_type":"code","source":["import json\n","import pandas as pd\n","\n","playlist_features_list = [\"artist\", \"album\", \"track_name\", \"track_id\", \"release_year\", \"explicit\", \"popularity\",\n"," \"danceability\", \"energy\", \"key\", \"loudness\", \"mode\", \"speechiness\",\n"," \"instrumentalness\", \"liveness\", \"valence\", \"tempo\", \"duration_ms\", \"time_signature\"]\n"," \n","def analyze_playlist (username, playlist_id, playlist_year):\n","\n"," offset = 0\n"," playlist_df = pd.DataFrame(columns = playlist_features_list)\n"," playlist = spotify_api.user_playlist_tracks(username, playlist_id, limit = 100, offset=offset)[\"items\"]\n"," count = 0\n","\n"," #print(playlist[0][\"track\"][\"album\"][\"artists\"][0][\"name\"])\n","\n"," for track in playlist:\n"," playlist_data = {}\n"," playlist_data[\"artist\"] = track[\"track\"][\"album\"][\"artists\"][0][\"name\"]\n"," playlist_data[\"album\"] = track[\"track\"][\"album\"][\"name\"]\n"," playlist_data[\"track_name\"] = track[\"track\"][\"name\"]\n"," playlist_data[\"track_id\"] = track[\"track\"][\"id\"]\n"," playlist_data[\"release_year\"] = playlist_year\n"," playlist_data[\"explicit\"] = track[\"track\"][\"explicit\"]\n"," playlist_data[\"popularity\"] = track[\"track\"][\"popularity\"]\n"," \n"," audio_data = spotify_api.audio_features(playlist_data[\"track_id\"])[0]\n"," for data in playlist_features_list[7:]:\n"," playlist_data[data] = audio_data[data]\n"," \n"," track_df = pd.DataFrame(playlist_data, index = [0])\n"," playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)\n"," count += 1\n"," \n"," #topRange * 100 is the amount of songs that will be scraped per playlist\n"," topRange = 100\n"," for i in range(1,topRange):\n"," offset = 100 * i\n"," playlist = spotify_api.user_playlist_tracks(username, playlist_id, limit = 100, offset=offset)[\"items\"]\n"," #print(playlist[0][\"track\"][\"album\"][\"artists\"][0][\"name\"] + \" \" + str(count))\n"," \n"," for track in playlist:\n"," if(track[\"track\"] is not None):\n"," try:\n"," playlist_data = {}\n"," playlist_data[\"artist\"] = track[\"track\"][\"album\"][\"artists\"][0][\"name\"]\n"," playlist_data[\"album\"] = track[\"track\"][\"album\"][\"name\"]\n"," playlist_data[\"track_name\"] = track[\"track\"][\"name\"]\n"," playlist_data[\"track_id\"] = track[\"track\"][\"id\"]\n"," playlist_data[\"release_year\"] = playlist_year\n"," playlist_data[\"explicit\"] = track[\"track\"][\"explicit\"]\n"," playlist_data[\"popularity\"] = track[\"track\"][\"popularity\"]\n"," \n"," audio_data = spotify_api.audio_features(playlist_data[\"track_id\"])[0]\n"," for data in playlist_features_list[7:]:\n"," playlist_data[data] = audio_data[data]\n"," \n"," track_df = pd.DataFrame(playlist_data, index = [0])\n"," playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)\n"," count += 1\n"," except:\n"," print(\" exception\")\n"," #print(json.dumps(track, indent=4))\n","\n"," return list(playlist_df.T.to_dict().values())"],"metadata":{"id":"3MzNTiiq-Jqp","executionInfo":{"status":"ok","timestamp":1646193847240,"user_tz":300,"elapsed":168,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["def analyze_playlist_dict(playlist_dict):\n"," playlists = {}\n"," for i, (key, val) in enumerate(playlist_dict.items()):\n"," playlist = analyze_playlist(*val)\n"," print(len(playlist))\n"," playlists[key] = playlist\n"," return playlists"],"metadata":{"id":"nIn_7L9a-N4S","executionInfo":{"status":"ok","timestamp":1646193851027,"user_tz":300,"elapsed":6,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["playlist_dict = {\n"," 'Top Tracks of 2018' : (\"Spotify\" , \"37i9dQZF1DX1HUbZS4LEyL\", 2018),\n"," 'Top Tracks of 2019' : (\"Spotify\" , \"37i9dQZF1DX7fxmJCMXN72\", 2019),\n"," 'Top Tracks of 2020' : (\"Spotify\" , \"37i9dQZF1DX7Jl5KP2eZaS\", 2020),\n"," 'Top Tracks of 2021' : (\"Spotify\" , \"37i9dQZF1DX18jTM2l2fJY\", 2020),\n","}"],"metadata":{"id":"mxIR7nM8-Pnk","executionInfo":{"status":"ok","timestamp":1646193853148,"user_tz":300,"elapsed":172,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["from google.colab import drive\n","\n","drive.mount('/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ltzN2Ob8-S3D","executionInfo":{"status":"ok","timestamp":1646193884301,"user_tz":300,"elapsed":29570,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}},"outputId":"e7896156-7e42-4986-cc06-4bd927cb035b"},"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /drive\n"]}]},{"cell_type":"code","source":["import csv\n","\n","data = analyze_playlist_dict(playlist_dict)\n","data_file = open('/drive/My Drive/Data Vis Final/data_file.csv', 'w')\n","csv_writer = csv.DictWriter(data_file, fieldnames=playlist_features_list)\n","csv_writer.writeheader()\n"," \n","count = 0\n","for playlist in playlist_dict:\n"," plData = data[playlist]\n"," for track in plData:\n"," csv_writer.writerow(track)\n"," count += 1\n"," \n","data_file.close()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"FsFBcuFL-TeM","executionInfo":{"status":"ok","timestamp":1646193959583,"user_tz":300,"elapsed":66183,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}},"outputId":"dfedf3c3-84c4-4b39-9b91-16fad5ebbee6"},"execution_count":7,"outputs":[{"output_type":"stream","name":"stdout","text":["100\n","50\n","50\n","50\n"]}]},{"cell_type":"code","source":["spotify_df = pd.read_csv('/drive/My Drive/Data Vis Final/data_file.csv')\n","print(spotify_df)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Dtd08Ww9_DVD","executionInfo":{"status":"ok","timestamp":1646193981990,"user_tz":300,"elapsed":151,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}},"outputId":"f5b93028-49d9-4565-8eee-ed984779f563"},"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":[" artist album ... duration_ms time_signature\n","0 Drake Scorpion ... 198973 4\n","1 XXXTENTACION ? ... 166606 4\n","2 Post Malone beerbongs & bentleys ... 218147 4\n","3 Post Malone beerbongs & bentleys ... 221440 4\n","4 Drake Scorpion ... 217925 4\n",".. ... ... ... ... ...\n","245 J Balvin JOSE ... 217360 4\n","246 Joel Corry Head & Heart (feat. MNEK) ... 166028 4\n","247 Ariana Grande Positions ... 173711 4\n","248 Tate McRae you broke me first ... 169266 4\n","249 Sebastian Yatra Pareja Del Año ... 195053 4\n","\n","[250 rows x 19 columns]\n"]}]},{"cell_type":"code","source":["#Code Provided by Alvin Lee (See https://github.com/MFaria27/DS3010-Spotify-Project/blob/main/DS3010_Project.ipynb for details)\n","\n","pd.options.mode.chained_assignment = None\n","\n","#iterate through key column to replace key values with string\n","for i in range(0, len(spotify_df.key)):\n"," if spotify_df.key[i] == \"C\":\n"," spotify_df.key[i] = 0\n"," elif spotify_df.key[i] == \"C#/Db\":\n"," spotify_df.key[i] = 1\n"," elif spotify_df.key[i] == \"D\":\n"," spotify_df.key[i] = 2\n"," elif spotify_df.key[i] == \"D#/Eb\":\n"," spotify_df.key[i] = 3\n"," elif spotify_df.key[i] == \"E\":\n"," spotify_df.key[i] = 4\n"," elif spotify_df.key[i] == \"F\":\n"," spotify_df.key[i] = 5\n"," elif spotify_df.key[i] == \"F#/Gb\":\n"," spotify_df.key[i] = 6\n"," elif spotify_df.key[i] == \"G\":\n"," spotify_df.key[i] = 7\n"," elif spotify_df.key[i] == \"G#/Ab\":\n"," spotify_df.key[i] = 8\n"," elif spotify_df.key[i] == \"A\":\n"," spotify_df.key[i] = 9\n"," elif spotify_df.key[i] == \"A#/Bb\":\n"," spotify_df.key[i] = 10\n"," elif spotify_df.key[i] == \"B\":\n"," spotify_df.key[i] = 11\n","\n","#iterate through mode column to replace 0 or 1 with Minor/Major\n","for i in range(0, len(spotify_df['mode'])):\n"," if spotify_df['mode'][i] == 'Minor':\n"," spotify_df['mode'][i] = 0\n"," elif spotify_df['mode'][i] == 'Major':\n"," spotify_df['mode'][i] = 1\n","#print(spotify_df.key.head())\n","\n","for i in range(0, len(spotify_df['explicit'])):\n"," if spotify_df.explicit[i] == True:\n"," spotify_df.explicit[i] = 1\n"," elif spotify_df.explicit[i] == False:\n"," spotify_df.explicit[i] = 0\n","\n","#check correct transformation\n","print(spotify_df.explicit.head())\n","print(spotify_df.explicit.tail())\n","\n","spotify_df.to_csv('/drive/My Drive/Data Vis Final/clean_data_file.csv')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NPds3zbQDTtz","executionInfo":{"status":"ok","timestamp":1646194014180,"user_tz":300,"elapsed":613,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}},"outputId":"d1e6af2b-9460-4d7b-c64b-af31564c0e76"},"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["0 1\n","1 1\n","2 1\n","3 1\n","4 1\n","Name: explicit, dtype: object\n","245 0\n","246 0\n","247 1\n","248 0\n","249 0\n","Name: explicit, dtype: object\n"]}]},{"cell_type":"code","source":["clean_df = pd.read_csv('/drive/My Drive/Data Vis Final/clean_data_file.csv')\n","print(clean_df)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Ovb9k_SLEAay","executionInfo":{"status":"ok","timestamp":1646194019594,"user_tz":300,"elapsed":143,"user":{"displayName":"Mattheus Faria","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjLi9sjJWu2_14PJLFUvYxzIAPFaO606bFBXZxyw=s64","userId":"06406791078906152497"}},"outputId":"01b2e993-3716-44c4-a075-b0e40492af35"},"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":[" Unnamed: 0 artist ... duration_ms time_signature\n","0 0 Drake ... 198973 4\n","1 1 XXXTENTACION ... 166606 4\n","2 2 Post Malone ... 218147 4\n","3 3 Post Malone ... 221440 4\n","4 4 Drake ... 217925 4\n",".. ... ... ... ... ...\n","245 245 J Balvin ... 217360 4\n","246 246 Joel Corry ... 166028 4\n","247 247 Ariana Grande ... 173711 4\n","248 248 Tate McRae ... 169266 4\n","249 249 Sebastian Yatra ... 195053 4\n","\n","[250 rows x 20 columns]\n"]}]}]}
Binary file added Gotham_Bold_Regular.ttf
Binary file not shown.
Binary file added Progress Book.pdf
Binary file not shown.
Loading