1 | import wave, array, math, time, argparse, sys
|
2 | import numpy, pywt
|
3 | from scipy import signal
|
4 | import pdb
|
5 | import matplotlib.pyplot as plt
|
6 |
|
7 | def read_wav(filename):
|
8 |
|
9 | #open file, get metadata for audio
|
10 | try:
|
11 | wf = wave.open(filename,'rb')
|
12 | except IOError, e:
|
13 | print e
|
14 | return
|
15 |
|
16 | # typ = choose_type( wf.getsampwidth() ) #TODO: implement choose_type
|
17 | nsamps = wf.getnframes();
|
18 | assert(nsamps > 0);
|
19 |
|
20 | fs = wf.getframerate()
|
21 | assert(fs > 0)
|
22 |
|
23 | # read entire file and make into an array
|
24 | samps = list(array.array('i',wf.readframes(nsamps)))
|
25 | #print 'Read', nsamps,'samples from', filename
|
26 | try:
|
27 | assert(nsamps == len(samps))
|
28 | except AssertionError, e:
|
29 | print nsamps, "not equal to", len(samps)
|
30 |
|
31 | return samps, fs
|
32 |
|
33 | # print an error when no data can be found
|
34 | def no_audio_data():
|
35 | print "No audio data for sample, skipping..."
|
36 | return None, None
|
37 |
|
38 | # simple peak detection
|
39 | def peak_detect(data):
|
40 | max_val = numpy.amax(abs(data))
|
41 | peak_ndx = numpy.where(data==max_val)
|
42 | if len(peak_ndx[0]) == 0: #if nothing found then the max must be negative
|
43 | peak_ndx = numpy.where(data==-max_val)
|
44 | return peak_ndx
|
45 |
|
46 | def bpm_detector(data,fs):
|
47 | cA = []
|
48 | cD = []
|
49 | correl = []
|
50 | cD_sum = []
|
51 | levels = 4
|
52 | max_decimation = 2**(levels-1);
|
53 | min_ndx = 60./ 220 * (fs/max_decimation)
|
54 | max_ndx = 60./ 40 * (fs/max_decimation)
|
55 |
|
56 | for loop in range(0,levels):
|
57 | cD = []
|
58 | # 1) DWT
|
59 | if loop == 0:
|
60 | [cA,cD] = pywt.dwt(data,'db4');
|
61 | cD_minlen = len(cD)/max_decimation+1;
|
62 | cD_sum = numpy.zeros(cD_minlen);
|
63 | else:
|
64 | [cA,cD] = pywt.dwt(cA,'db4');
|
65 | # 2) Filter
|
66 | cD = signal.lfilter([0.01],[1 -0.99],cD);
|
67 |
|
68 | # 4) Subtractargs.filename out the mean.
|
69 |
|
70 | # 5) Decimate for reconstruction later.
|
71 | cD = abs(cD[::(2**(levels-loop-1))]);
|
72 | cD = cD - numpy.mean(cD);
|
73 | # 6) Recombine the signal before ACF
|
74 | # essentially, each level I concatenate
|
75 | # the detail coefs (i.e. the HPF values)
|
76 | # to the beginning of the array
|
77 | cD_sum = cD[0:cD_minlen] + cD_sum;
|
78 |
|
79 | if [b for b in cA if b != 0.0] == []:
|
80 | return no_audio_data()
|
81 | # adding in the approximate data as well...
|
82 | cA = signal.lfilter([0.01],[1 -0.99],cA);
|
83 | cA = abs(cA);
|
84 | cA = cA - numpy.mean(cA);
|
85 | cD_sum = cA[0:cD_minlen] + cD_sum;
|
86 |
|
87 | # ACF
|
88 | correl = numpy.correlate(cD_sum,cD_sum,'full')
|
89 |
|
90 | midpoint = len(correl) / 2
|
91 | correl_midpoint_tmp = correl[midpoint:]
|
92 | peak_ndx = peak_detect(correl_midpoint_tmp[min_ndx:max_ndx]);
|
93 | if len(peak_ndx) > 1:
|
94 | return no_audio_data()
|
95 |
|
96 | peak_ndx_adjusted = peak_ndx[0]+min_ndx;
|
97 | bpm = 60./ peak_ndx_adjusted * (fs/max_decimation)
|
98 | print bpm
|
99 | return bpm,correl
|
100 |
|
101 |
|
102 | if __name__ == '__main__':
|
103 | parser = argparse.ArgumentParser(description='Process .wav file to determine the Beats Per Minute.')
|
104 | parser.add_argument('--filename', required=True,
|
105 | help='.wav file for processing')
|
106 | parser.add_argument('--window', type=float, default=3,
|
107 | help='size of the the window (seconds) that will be scanned to determine the bpm. Typically less than 10 seconds. [3]')
|
108 |
|
109 | args = parser.parse_args()
|
110 | samps,fs = read_wav(args.filename)
|
111 |
|
112 | data = []
|
113 | correl=[]
|
114 | bpm = 0
|
115 | n=0;
|
116 | nsamps = len(samps)
|
117 | window_samps = int(args.window*fs)
|
118 | samps_ndx = 0; #first sample in window_ndx
|
119 | max_window_ndx = nsamps / window_samps;
|
120 | bpms = numpy.zeros(max_window_ndx)
|
121 |
|
122 | #iterate through all windows
|
123 | for window_ndx in xrange(0,max_window_ndx):
|
124 |
|
125 | #get a new set of samples
|
126 | #print n,":",len(bpms),":",max_window_ndx,":",fs,":",nsamps,":",samps_ndx
|
127 | data = samps[samps_ndx:samps_ndx+window_samps]
|
128 | if not ((len(data) % window_samps) == 0):
|
129 | raise AssertionError( str(len(data) ) )
|
130 |
|
131 | bpm, correl_temp = bpm_detector(data,fs)
|
132 | if bpm == None:
|
133 | continue
|
134 | bpms[window_ndx] = bpm
|
135 | correl = correl_temp
|
136 |
|
137 | #iterate at the end of the loop
|
138 | samps_ndx = samps_ndx+window_samps;
|
139 | n=n+1; #counter for debug...
|
140 |
|
141 | bpm = numpy.median(bpms)
|
142 | print 'Completed. Estimated Beats Per Minute:', bpm
|
143 |
|
144 | n = range(0,len(correl))
|
145 | plt.plot(n,abs(correl));
|
146 | plt.show(False); #plot non-blocking
|
147 | time.sleep(10);
|
148 | plt.close();
|