| """ Record a few seconds of audio and save to a WAVE file. |
""" Record a few seconds of audio and save to a WAVE file. |
| Based on http://stackoverflow.com/questions/892199/detect-record-audio-in-python/6743593#6743593 |
Based on http://stackoverflow.com/questions/892199/detect-record-audio-in-python/6743593#6743593 |
| """ |
""" |
| |
|
| import pyaudio |
import pyaudio |
| import wave |
import wave |
| import sys |
import sys |
| import audioop # http://docs.python.org/library/audioop |
import audioop # http://docs.python.org/library/audioop |
| from os.path import exists |
from os.path import exists |
| from array import array |
from array import array |
| from struct import unpack, pack |
from struct import unpack, pack |
| import threading |
import threading |
| |
from pydispatch import dispatcher |
| |
|
| CHANNELS = 1 |
CHANNELS = 1 |
| THRESHOLD = 500 |
THRESHOLD = 500 |
| CHUNK_SIZE = 1024 |
CHUNK_SIZE = 1024 |
| FORMAT = pyaudio.paInt16 |
FORMAT = pyaudio.paInt16 |
| RATE = 44100 |
RATE = 44100 |
| |
MAX_SILENT = 30 |
| |
|
| def is_silent(L): |
def is_silent(L): |
| "Returns `True` if below the 'silent' threshold" |
"Returns `True` if below the 'silent' threshold" |
| "print max(L)" |
"print max(L)" |
| "print max(L) < THRESHOLD" |
"print max(L) < THRESHOLD" |
| return max(L) < THRESHOLD |
return max(L) < THRESHOLD |
| |
|
| def normalize(L): |
def normalize(L): |
| "Average the volume out" |
"Average the volume out" |
| MAXIMUM = 16384 |
MAXIMUM = 16384 |
| times = float(MAXIMUM)/max(abs(i) for i in L) |
times = float(MAXIMUM)/max(abs(i) for i in L) |
| |
|
| LRtn = array('h') |
LRtn = array('h') |
| for i in L: |
for i in L: |
| LRtn.append(int(i*times)) |
LRtn.append(int(i*times)) |
| return LRtn |
return LRtn |
| |
|
| def trim(L): |
def trim(L): |
| "Trim the blank spots at the start and end" |
"Trim the blank spots at the start and end" |
| def _trim(L): |
def _trim(L): |
| snd_started = False |
snd_started = False |
| LRtn = array('h') |
LRtn = array('h') |
| |
|
| for i in L: |
for i in L: |
| if not snd_started and abs(i)>THRESHOLD: |
if not snd_started and abs(i)>THRESHOLD: |
| snd_started = True |
snd_started = True |
| LRtn.append(i) |
LRtn.append(i) |
| |
|
| elif snd_started: |
elif snd_started: |
| LRtn.append(i) |
LRtn.append(i) |
| return LRtn |
return LRtn |
| |
|
| # Trim to the left |
# Trim to the left |
| L = _trim(L) |
L = _trim(L) |
| |
|
| # Trim to the right |
# Trim to the right |
| L.reverse() |
L.reverse() |
| L = _trim(L) |
L = _trim(L) |
| L.reverse() |
L.reverse() |
| return L |
return L |
| |
|
| def add_silence(L, seconds): |
def add_silence(L, seconds): |
| "Add silence to the start and end of `L` of length `seconds` (float)" |
"Add silence to the start and end of `L` of length `seconds` (float)" |
| LRtn = array('h', [0 for i in xrange(int(seconds*RATE))]) |
LRtn = array('h', [0 for i in xrange(int(seconds*RATE))]) |
| LRtn.extend(L) |
LRtn.extend(L) |
| LRtn.extend([0 for i in xrange(int(seconds*RATE))]) |
LRtn.extend([0 for i in xrange(int(seconds*RATE))]) |
| return LRtn |
return LRtn |
| |
|
| def record(): |
def record(): |
| """ |
""" |
| Record a word or words from the microphone and |
Record a word or words from the microphone and |
| return the data as an array of signed shorts. |
return the data as an array of signed shorts. |
| |
|
| Normalizes the audio, trims silence from the |
Normalizes the audio, trims silence from the |
| start and end, and pads with 0.5 seconds of |
start and end, and pads with 0.5 seconds of |
| blank sound to make sure VLC et al can play |
blank sound to make sure VLC et al can play |
| it without getting chopped off. |
it without getting chopped off. |
| """ |
""" |
| p = pyaudio.PyAudio() |
p = pyaudio.PyAudio() |
| stream = p.open(format=FORMAT, channels=1, rate=RATE, |
stream = p.open(format=FORMAT, channels=1, rate=RATE, |
| input=True, output=True, |
input=True, output=True, |
| frames_per_buffer=CHUNK_SIZE) |
frames_per_buffer=CHUNK_SIZE) |
| |
|
| num_silent = 0 |
num_silent = 0 |
| snd_started = False |
snd_started = False |
| |
|
| LRtn = array('h') |
LRtn = array('h') |
| |
|
| while 1: |
while 1: |
| data = stream.read(CHUNK_SIZE) |
data = stream.read(CHUNK_SIZE) |
| L = unpack('<' + ('h'*(len(data)/2)), data) # little endian, signed short |
L = unpack('<' + ('h'*(len(data)/2)), data) # little endian, signed short |
| L = array('h', L) |
L = array('h', L) |
| LRtn.extend(L) |
LRtn.extend(L) |
| |
|
| silent = is_silent(L) |
silent = is_silent(L) |
| #print silent, num_silent, L[:10] |
#print silent, num_silent, L[:10] |
| |
|
| if silent and snd_started: |
if silent and snd_started: |
| num_silent += 1 |
num_silent += 1 |
| print num_silent |
print num_silent |
| elif not silent and not snd_started: |
elif not silent and not snd_started: |
| |
dispatcher.send( signal='SND_STARTED') |
| snd_started = True |
snd_started = True |
| print snd_started |
print snd_started |
| if snd_started and num_silent > 30: |
if snd_started and num_silent > MAX_SILENT: |
| break |
break |
| |
|
| sample_width = p.get_sample_size(FORMAT) |
sample_width = p.get_sample_size(FORMAT) |
| stream.stop_stream() |
stream.stop_stream() |
| stream.close() |
stream.close() |
| p.terminate() |
p.terminate() |
| |
|
| LRtn = normalize(LRtn) |
LRtn = normalize(LRtn) |
| LRtn = trim(LRtn) |
LRtn = trim(LRtn) |
| LRtn = add_silence(LRtn, 0.5) |
LRtn = add_silence(LRtn, 0.5) |
| return sample_width, LRtn |
return sample_width, LRtn |
| |
|
| def record_to_file(path): |
def record_to_file(path): |
| "Records from the microphone and outputs the resulting data to `path`" |
"Records from the microphone and outputs the resulting data to `path`" |
| sample_width, data = record() |
sample_width, data = record() |
| data = pack('<' + ('h'*len(data)), *data) |
data = pack('<' + ('h'*len(data)), *data) |
| |
|
| wf = wave.open(path, 'wb') |
wf = wave.open(path, 'wb') |
| wf.setnchannels(1) |
wf.setnchannels(1) |
| wf.setsampwidth(sample_width) |
wf.setsampwidth(sample_width) |
| wf.setframerate(RATE) |
wf.setframerate(RATE) |
| wf.writeframes(data) |
wf.writeframes(data) |
| wf.close() |
wf.close() |
| print("done - result written to "+path) |
print("done - result written to "+path) |
| |
|
| |
|
| |
|
| |
|
| if __name__ == '__main__': |
if __name__ == '__main__': |
| filename = 'demo.wav' |
filename = 'demo.wav' |
| record_to_file(filename) |
record_to_file(filename) |
| print("done - result written to "+filename) |
print("done - result written to "+filename) |
| |
|