-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsplit-album
executable file
·278 lines (235 loc) · 9.81 KB
/
split-album
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#! /usr/bin/env nix-shell
#! nix-shell -i oil -p coreutils moreutils ffmpeg gnused ripgrep file
shopt --set strict:all
const show_help = "A small script for splitting files into tracks, perfect for full album releases and audiobooks.
Based from Luke Smith's booksplit script
(https://raw.githubusercontent.com/LukeSmithxyz/voidrice/091d7e54c5c1ed77201ce1254aa2623a2801c9f2/.local/bin/booksplit).
split-album [options...] [\$ALBUM_FILE \$TIMESTAMP_FILE]
Options:
-h, --help Show the help section.
--tutorial Show a helpful tutorial-esque description of the program.
--audio-file [file] Set the audio file to be split.
--timestamp-file [file] Set the timestamp file to be used for splitting.
-t, --title [title] Set the title of the album.
-d, --date [date] Set the publication date of the album.
-a, --author [author] Set the author of the album.
--json Print the JSON data for use with other applications.
--skip Skip the extraction part, useful for printing data with '--json' or testing the timestamp file.
--strict Set to be strict, exiting when an invalid timestamp is encountered.
Environment variables:
EXTENSION The extension to be used. This is used in conjunction with FFmpeg
where it can detect the codec to be converted automatically with it.
When any of the required metadata is missing (i.e., title, date, author), it will be prompted.
"
const show_descriptive_help = "This script splits an album with a timestamp file.
You're always going to see using this script like the following:
split-album \$AUDIO_FILE \$TIMESTAMP_FILE
The timestamp file contains a starting timestamp (in HH:MM:SS[.MS]) and the title of the chapter/track.
For more information, see https://trac.ffmpeg.org/wiki/Seeking.
Lines starting with '#' and empty lines will be ignored.
The following is an example of the content of a timestamp file.
\`\`\`
00:00:00 Introduction
00:03:54 It's the R-rated scene
00:25:43 Boring exposition at the cafe
00:36:54 Expedition time
00:41:51 Fighting time
00:42:22 Shower scene
\`\`\`
You can also create a timestamp file in JSON format.
It is the equivalent of the 'chapters' key from the JSON output (i.e., '--json').
The equivalent JSON of the previous example would be this:
\`\`\`
[
{
\"timestamp\": \"00:00:00\",
\"title\": \"Introduction\"
},
{
\"timestamp\": \"00:03:54\",
\"title\": \"It's the R-rated scene\"
},
{
\"timestamp\": \"00:25:43\",
\"title\": \"Boring exposition at the cafe\"
},
{
\"timestamp\": \"00:36:54\",
\"title\": \"Expedition time\"
},
{
\"timestamp\": \"00:41:51\",
\"title\": \"Fighting time\"
},
{
\"timestamp\": \"00:42:22\",
\"title\": \"Shower scene\"
}
]
\`\`\`
There will be a folder created with the safe name of the album (in kebab-case) containing the split tracks.
The original file will be kept, do what you want with it.
"
proc warnf(msg, @args) {
>&2 printf "${msg}\\n" @args
}
proc errorf(msg, @args) {
>&2 printf "${msg}\\n" @args
exit 1
}
proc prompt(msg, :out, prefix = ">> ") {
>&2 printf '%s\n%s' $msg $prefix
read --line
setref out = $_line
}
proc kebab-case(word) {
write -- $word | sed --regexp-extended --expression 's/./\L&/g' --expression 's/\s+/-/g' --expression 's/[^a-z0-9-]//g' --expression 's/^-+|-+$//g' --expression 's/-+/-/g'
}
proc main {
# This could be configured by setting the 'EXTENSION' environment variable.
const EXTENSION = ${EXTENSION:-"opus"}
# Set up the variables.
var audio_file = ''
var timestamp_file = ''
var album = ''
var author = ''
var pub_date = ''
var prints_json = false
var strict_mode = false
var skip = false
# Parse the arguments.
while test $len(ARGV) -gt 0 {
case $[ARGV[0]] {
-h|--help)
write -- $show_help
exit
;;
--tutorial)
write -- $show_descriptive_help
exit
;;
--audio-file)
setvar audio_file = ARGV[1]
shift 2
;;
--timestamp-file)
setvar timestamp_file = ARGV[1]
shift 2
;;
-a|--author)
setvar author = ARGV[1]
shift 2
;;
-d|--date)
setvar pub_date = ARGV[1]
shift 2
;;
-t|--title)
setvar album = ARGV[1]
shift 2
;;
--strict)
setvar strict_mode = true
shift
;;
--skip)
setvar skip = true
shift
;;
--json)
setvar prints_json = true
shift
;;
*)
setvar audio_file = ARGV[0]
setvar timestamp_file = ARGV[1]
shift 2
;;
}
}
# Check the files if it is valid.
test -f $audio_file || errorf '%s is not a regular file' $audio_file
test -f $timestamp_file || errorf '%s is not a regular file' $timestamp_file
# Prompt for the missing values if not passed from the command line.
test $album || prompt "What is the title of the album?" :album
test $author || prompt "Who is the author of the album?" :author
test $pub_date || prompt "When is the album published?" :pub_date
# Populate the output data.
# This is going to be used throughout the processing.
# Additionally, the object will be printed when `--json` flag is passed.
const output_data = {}
setvar output_data['file'] = $audio_file
setvar output_data['chapters'] = []
setvar output_data['album'] = $album
setvar output_data['author'] = $author
setvar output_data['date'] = $pub_date
setvar output_data['extension'] = $EXTENSION
# The following variable stores an eggex, a simplified notation for regular expressions.
# Pretty nice to use especially that literals are quoted and classes are not.
const timestamp_regex = / %start digit{2,} ':' digit{2} ':' digit{2} <'.' digit+>? %end /
# We'll keep track whether the pipeline has encountered an error.
# If it has, the script will exit throughout various points of the process.
var has_error = false
# Deserialize the given input into the chapters data.
# This script accept several formats from a JSON file to a plain-text file derived from Luke Smith's 'booksplit' script.
case $(file --mime-type --brief $timestamp_file) {
"application/json")
json read :chapters < $timestamp_file
setvar output_data['chapters'] = chapters
;;
# The text file is formatted quite similarly to the required format from the booksplit script.
# I improved some things in the format such as allowing comments (i.e., lines starting with '#') and empty lines allowing for cleaner input.
"text/plain")
sed --regexp-extended --expression '/^\s*$/d' --expression '/^#/d' $timestamp_file | while read --line {
# We'll build the chapter data to be added later to the output data.
var chapter = {}
setvar chapter['title'] = $(write -- $_line | cut -d' ' -f2-)
setvar chapter['timestamp'] = $(write -- $_line | cut -d' ' -f1)
# Mark the input to be erreneous if the timestamp format is not valid.
# This will cause the script to exit in the next part of the process.
# We won't be exiting immediately to give all possible errors.
write -- ${chapter['timestamp']} | rg --quiet $timestamp_regex || {
warnf "'%s' %s is not a valid timestamp" ${chapter['timestamp']} ${chapter['title']}
setvar has_error = true
continue
}
_ output_data['chapters'].append(chapter)
}
;;
}
# Exit if the script is set as strict and has erreneous input.
# If the user cares about the input, they have to set it to strict mode.
if (strict_mode and has_error) { exit 1 }
# Set parts of the output data and prepare for the splitting process.
const title_slug = $(kebab-case $album)
setvar output_data['directory'] = $(realpath --canonicalize-missing $title_slug)
mkdir -p $title_slug
# Rather than sequentially segmenting the audio, we'll extract the starting and ending timestamps of each segment then feed it to a job queue that can execute jobs in parallel.
# Take note we don't have the ending timestamp of each segment so we need a way to look back into items.
const chapter_len = len(output_data['chapters'])
var job_queue = %()
# Iterate through the chapters and populate the job queue.
# We'll also fill up the rest of the chapter-related data into the output data.
for index in @(seq $[chapter_len]) {
var index = Int(index)
setvar chapter = output_data['chapters'][index - 1]
var start = chapter['timestamp']
var end = output_data['chapters'][index]['timestamp'] if index !== chapter_len else null
var filename = $(printf "%.2d-%s.%s" $index $(kebab-case ${chapter['title']}) $EXTENSION)
setvar output_data['chapters'][index - 1]['file'] = filename
# Check for incorrect timestamp order and set the pipeline as erreneous if it is.
# We can't let the splitting process proceed since it will surely make problematic output.
if (start > end and end is not null) {
warnf '%s (start) is ahead compared to %s (end)' $start $end
setvar has_error = true
}
append :job_queue ">&2 printf '[%d/%d] %s\\n' $[index] $[chapter_len] \"$[output_data['chapters'][index - 1]['title']]\"; ffmpeg -loglevel quiet -nostdin -i '${audio_file}' -ss ${start} $['-to ' + end if index !== chapter_len else ''] ${title_slug}/${filename}"
}
# Exit the process if an error detected.
if (has_error) { exit 1 }
# Start the splitting process if the `--skip` is absent.
if (not skip) { parallel -- @job_queue }
# Print the output data as JSON if the `--json` flag is passed.
if (prints_json) { json write :output_data }
}
main @ARGV