-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathds.js
56 lines (49 loc) · 2.05 KB
/
ds.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
const Sox = require('sox-stream')
const DeepSpeech = require('deepspeech')
const MemoryStream = require('memory-stream')
module.exports = emitter => {
// Beam width used in the CTC decoder when building candidate transcriptions
const BEAM_WIDTH = 500
// The alpha hyperparameter of the CTC decoder. Language Model weight
const LM_WEIGHT = 1.75
// The beta hyperparameter of the CTC decoder. Word insertion weight (penalty)
const WORD_COUNT_WEIGHT = 1.00
// Valid word insertion weight. This is used to lessen the word insertion penalty
// when the inserted word is part of the vocabulary
const VALID_WORD_COUNT_WEIGHT = 1.00
// These constants are tied to the shape of the graph used (changing them changes
// the geometry of the first layer), so make sure you use the same constants that
// were used during training
// Number of MFCC features to use
const N_FEATURES = 26
// Size of the context window used for producing timesteps in the input vector
const N_CONTEXT = 9
const MODEL = './models/output_graph.pb'
const ALPHABET = './models/alphabet.txt'
const LM = './models/lm.binary'
const TRIE = './models/trie'
console.log('Loading model from file %s', MODEL)
let model = new DeepSpeech.Model(MODEL, N_FEATURES, N_CONTEXT, ALPHABET, BEAM_WIDTH)
console.log('Finished loading model')
console.log('Loading language model from file(s) %s %s', LM, TRIE)
model.enableDecoderWithLM(ALPHABET, LM, TRIE, LM_WEIGHT, WORD_COUNT_WEIGHT, VALID_WORD_COUNT_WEIGHT)
console.log('Finished loading langauge model')
return function (stream) {
let audioStream = new MemoryStream()
stream.pipe(Sox({
output: {
bits: 16,
rate: 16000,
channels: 1,
type: 'raw'
}
})).pipe(audioStream)
audioStream.on('finish', () => {
let audioBuffer = audioStream.toBuffer()
console.log('Running inference...')
let text = model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000)
console.log('Inference finished: %s', String(text))
emitter.emit('text', {text})
})
}
}