1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
1
# # Comparison of Inverse-STFT implementations
5
2
# - Seungwon Park's implementation: IFFT + deconvolution for stacking `ytmp`
6
3
# - Keunwoo Choi's implementation: based on IRFFT
@@ -26,8 +23,10 @@ def inner(*args,**kwargs):
26
23
time = start .elapsed_time (end )
27
24
return output ,time
28
25
return inner
26
+
29
27
istft_irfft = timing (istft_irfft )
30
28
istft_deconv = timing (istft_deconv )
29
+
31
30
def test_stft ():
32
31
import traceback
33
32
audio , sr = librosa .load (librosa .util .example_audio_file (), duration = 2 ,sr = None )
@@ -60,55 +59,6 @@ def to_np(tensor):
60
59
# print(traceback.print_exc())
61
60
62
61
63
-
64
-
65
-
66
-
67
-
68
62
if __name__ == "__main__" :
69
63
test_stft ()
70
- == == == =
71
- # In[3]:
72
-
73
-
74
- y , sr = librosa .load (librosa .util .example_audio_file (), duration = 2.0 )
75
- n_fft = 2048
76
- hop_length = n_fft // 4
77
- y = torch .tensor (y )
78
- stft = torch .stft (y , n_fft , hop_length )
79
-
80
-
81
- # In[4]:
82
-
83
-
84
- stft_single = stft
85
- stft_batch = stft .unsqueeze (0 )
86
-
87
-
88
- # In[5]:
89
-
90
-
91
- result_deconv = istft_deconv (stft_single , hop_length )
92
- result_irfft = istft_irfft (stft_batch , hop_length )[0 ]
93
-
94
- diff = torch .max (torch .abs (result_deconv - result_irfft )).item ()
95
-
96
- if diff < 1e-4 :
97
- print (f'Results are consistent. Maximum difference: { diff } ' )
98
-
99
-
100
- # In[6]:
101
-
102
-
103
- get_ipython ().run_line_magic ('timeit' , 'result_deconv = istft_deconv(stft_single, hop_length)' )
104
-
105
-
106
- # In[7]:
107
-
108
-
109
- get_ipython ().run_line_magic ('timeit' , 'result_irfft = istft_irfft(stft_batch, hop_length)[0]' )
110
-
111
-
112
- # # Conclusion
113
- # - IRFFT-based implementation is faster, showing that better parallelization doesn't outspeed algorithmic optimization.
114
64
0 commit comments