-
Notifications
You must be signed in to change notification settings - Fork 8
/
addMetadata.py
376 lines (278 loc) · 11.6 KB
/
addMetadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
#!/usr/bin/env python
DESC = """
____ _ _ __ __ __ ____ _____
| __ ) ___ __ _ _ _| |_(_)/ _|_ _| \/ | _ \___ /
| _ \ / _ \/ _` | | | | __| | |_| | | | |\/| | |_) ||_ \
| |_) | __/ (_| | |_| | |_| | _| |_| | | | | __/___) |
|____/ \___|\__,_|\__,_|\__|_|_| \__, |_| |_|_| |____/
|___/
______________________________________________________________
| |
| Edit Metadata of MP3 files based on file name |
|____________________________________________________________|
"""
import sys
import shutil
import os
from os import chdir, listdir, rename, walk, path, environ
from os.path import basename, dirname, realpath
import spotipy
import argparse
import configparser
import spotipy.oauth2 as oauth2
import re
from titlecase import titlecase
import requests
from bs4 import BeautifulSoup
import eyed3
import argparse
def setup_config():
'''
read api keys from config.ini file
'''
global CONFIG, GENIUS_KEY, SP_SECRET, SP_ID, config_path
CONFIG = configparser.ConfigParser()
config_path = realpath(__file__).replace(basename(__file__), '')
config_path = config_path + 'config.ini'
CONFIG.read(config_path)
GENIUS_KEY = CONFIG['keys']['genius_key']
SP_SECRET = CONFIG['keys']['spotify_client_secret']
SP_ID = CONFIG['keys']['spotify_client_id']
if GENIUS_KEY == '<insert genius key here>':
print('Warning, you are missing Genius key. Add it using --config\n\n')
if SP_SECRET == '<insert spotify client secret here>':
print('Warning, you are missing Spotify Client Secret. Add it using --config\n\n')
if SP_ID == '<insert spotify client id here>':
print('Warning, you are missing Spotify Client ID. Add it using --config\n\n')
def add_config_keys():
'''
Adds configuration keys in the config.ini file
'''
GENIUS_KEY = CONFIG['keys']['genius_key']
SP_SECRET = CONFIG['keys']['spotify_client_secret']
SP_ID = CONFIG['keys']['spotify_client_id']
if GENIUS_KEY == '<insert genius key here>':
genius_key = input('Enter Genius Client Access token : ')
CONFIG['keys']['genius_key'] = str(genius_key)
if SP_SECRET == '<insert spotify client secret here>':
sp_secret = input('Enter Spotify Secret token : ')
CONFIG['keys']['spotify_client_secret'] = str(sp_secret)
if SP_ID == '<insert spotify client id here>':
sp_id = input('Enter Spotify Client ID : ')
CONFIG['keys']['spotify_client_id'] = str(sp_id)
with open(config_path, 'w') as configfile:
CONFIG.write(configfile)
def improve_song_name(song):
'''
removes all unwanted words and numbers from file name so that the spotify search results can be improved
removes all numbers from beginning, then strip all punctuation marks from the string, then remove words in word_filters, then remove unwanted space
'''
char_filters = "()[]{}-:_/=!+\"\'"
word_filters = ('lyrics', 'lyric', 'by', 'video', 'official', 'hd', 'dirty', 'with', 'lyrics', 'feat', 'original', 'mix',
'www', 'com', 'mp3', 'audio', 'remixed', 'remix', 'full', 'version', 'music', 'hq', 'uploaded', 'explicit')
reg_exp = 's/^\d\d //'
song = song.strip()
song = song.lstrip("0123456789.- ")
# re.sub(reg_exp, '', song)
song = song[0:-4]
song = ''.join(
map(lambda c: " " if c in char_filters else c, song))
song = re.sub('|'.join(re.escape(key) for key in word_filters),
"", song, flags=re.IGNORECASE)
song = ' '.join(song.split()).strip()
return song
def get_song_name(title, artist):
'''
return search query for spotify api call
'''
return title + ' - ' + artist
def get_lyrics_genius(song_name):
'''
calls genius.com api for getting the url of the song lyrics page then scrapes that page to fetch the lyrics
'''
GENIUS_KEY = "iazjdOEEunvS_XOXhmJTcUzOsvrEjaNIftCKj7PLrgZjjWXiFTeoNHVmwYRDMkx9"
base_url = "https://api.genius.com"
headers = {'Authorization': 'Bearer %s' % (GENIUS_KEY)}
search_url = base_url + "/search"
data = {'q': song_name}
response = requests.get(search_url, data=data, headers=headers)
json = response.json()
try:
song_info = json['response']['hits'][0]['result']['api_path']
except KeyError:
print("Could not find lyrics for " + song_name)
return None
except IndexError:
print("Could not find lyrics for " + song_name)
return None
song_url = base_url + song_info
response = requests.get(song_url, headers=headers)
json = response.json()
song_path = json['response']['song']['path']
song_url = "http://genius.com" + song_path
page = requests.get(song_url)
html = BeautifulSoup(page.text, "html.parser")
# remove script tags that they put in the middle of the lyrics
[h.extract() for h in html('script')]
lyrics = html.find("div", class_="lyrics").get_text()
lyrics.replace('\n', ' ')
return lyrics
def get_metadata_spotify(spotify, song_name):
'''
call spotify.com api to get the metadata required, as much as possible
'''
print("trying to find data on Spotify...")
metadata = {}
try:
meta_tags = spotify.search(song_name, limit=1)['tracks']['items'][0]
except IndexError:
print("Could not find the song on Spotify")
return []
metadata['title'] = meta_tags['name']
metadata['artist'] = meta_tags['artists'][0]['name']
metadata['album'] = meta_tags['album']['name']
metadata['album_artist'] = meta_tags['album']['artists'][0]['name']
album_id = meta_tags['album']['id']
album_meta_tags = spotify.album(album_id)
metadata['release_date'] = album_meta_tags['release_date']
try:
metadata['genre'] = titlecase(album_meta_tags['genres'][0])
except IndexError:
try:
artist_id = meta_tags['artists'][0]['id']
artist_meta_tags = spotify.artist(artist_id)
metadata['genre'] = titlecase(artist_meta_tags['genres'][0])
except IndexError:
print("song genre could not be found.")
pass
metadata['track_num'] = meta_tags['track_number']
metadata['disc_num'] = meta_tags['disc_number']
metadata['albumart'] = meta_tags['album']['images'][0]['url']
lyrics = get_lyrics_genius(get_song_name(
metadata['title'], metadata['artist']))
if lyrics is not None:
metadata['lyrics'] = lyrics
print()
return metadata
def list_files():
'''
list all files in current directory with extension .mp3
'''
files = []
return [f for f in listdir('.') if f.endswith('.mp3')]
def set_metadata(file_name, metadata):
'''
call eyed3 module to set mp3 song metadata as received from spotify
'''
print("setting metadata for " + file_name)
print()
audiofile = eyed3.load(file_name)
tag = audiofile.tag
if 'genre' in metadata:
tag.genre = metadata['genre']
if 'lyrics' in metadata:
tag.lyrics.set(metadata['lyrics'])
img = requests.get(
metadata['albumart'], stream=True)
img = img.raw
albumart = img.read()
tag.images.set(3, albumart, 'image/jpeg')
tag.save(version=(2, 3, 0))
# if not norename:
# song_title = rename_format.format(
# title=metadata['title'] + ' -',
# artist=metadata['artist'] + ' -',
# album=metadata['album'] + ' -')
# song_title = song_title[:-1] if song_title.endswith('-') else song_title
# song_title = ' '.join(song_title.split()).strip()
# print("renaming " + file_name + "to " + song_title)
# new_path = path.dirname(file_name) + '{}.mp3'.format(song_title)
# rename(file_name, new_path)
print()
return
def fix_music_file(spotify, file_name, norename, rename_format):
print("------------------------------------------------------------------------")
print()
print()
print("Currently processing " + file_name)
metadata = get_metadata_spotify(spotify, improve_song_name(file_name))
if not metadata:
is_improvemet_needed = True
return is_improvemet_needed
else:
set_metadata(file_name, metadata)
is_improvemet_needed = False
rename_file = rename_to_format(
file_name, norename, rename_format, metadata)
shutil.move(rename_file, 'Music')
return is_improvemet_needed
def rename_to_format(file_name, norename, rename_format, metadata):
if not norename:
song_title = rename_format.format(
title=metadata['title'] + ' -',
artist=metadata['artist'] + ' -',
album=metadata['album'] + ' -')
song_title = song_title[:-1] if song_title.endswith('-') else song_title
song_title = ' '.join(song_title.split()).strip()
print("renaming " + file_name + "to " + song_title)
new_path = path.dirname(file_name) + '{}.mp3'.format(song_title)
rename(file_name, new_path)
return new_path
def fix_music_files(spotify, files, norename, rename_format):
need_to_improve = []
for file_name in files:
response = fix_music_file(spotify, file_name, norename, rename_format)
if response is True:
need_to_improve.append(file_name)
("------------------------------------------------------------------------")
print()
print()
return need_to_improve
def main():
'''
Deals with arguements and calls other functions
'''
setup_config()
parser = argparse.ArgumentParser(
description="{}".format(DESC), formatter_class=argparse.RawDescriptionHelpFormatter
)
# group = parser.add_mutually_exclusive_group(required=True)
parser.add_argument('-d', '--dir', action="store", dest='repair_directory',
help='give path of music files\' directory', default=os.getcwd())
parser.add_argument('-s', '--song', action='store', dest='song_name',
help='Only fix metadata of the file specified', default=None)
parser.add_argument('-c', '--config', action='store_true', dest='config',
help="Add API Keys to config\n\n")
parser.add_argument('-n', '--norename', action='store_true',
help='Does not rename files to song title\n\n')
parser.add_argument('-f', '--format', action='store', dest='rename_format', help='''Specify the Name format used in renaming,
Valid Keywords are:
{title}{artist}{album}\n\n)''')
args = parser.parse_args()
repair_directory = args.repair_directory or '.'
song_name = args.song_name or None
norename = args.norename or False
rename_format = args.rename_format or '{title}'
config = args.config
if config:
add_config_keys()
auth = oauth2.SpotifyClientCredentials(
client_id="622a0e16a4914e3eadc2a37b4a134f1e", client_secret="6fe008a8b7754954a58a9849fa3172df")
token = auth.get_access_token()
spotify = spotipy.Spotify(auth=token)
files = []
if song_name is not None:
need_to_improve = fix_music_file(
spotify, song_name, norename, rename_format)
if need_to_improve is True:
print(song_name)
elif repair_directory:
chdir(repair_directory or '.')
if not os.path.exists("Music"):
os.makedirs("Music")
files = list_files()
need_to_improve = fix_music_files(
spotify, files, norename, rename_format)
print(need_to_improve)
if __name__ == "__main__":
main()