""" Record a few seconds of audio and save to a WAVE file. |
""" Record a few seconds of audio and save to a WAVE file. |
Based on http://stackoverflow.com/questions/892199/detect-record-audio-in-python/6743593#6743593 |
Based on http://stackoverflow.com/questions/892199/detect-record-audio-in-python/6743593#6743593 |
""" |
""" |
|
|
import pyaudio |
import pyaudio |
import wave |
import wave |
import sys |
import sys |
import audioop # http://docs.python.org/library/audioop |
import audioop # http://docs.python.org/library/audioop |
from os.path import exists |
from os.path import exists |
from array import array |
from array import array |
from struct import unpack, pack |
from struct import unpack, pack |
import threading |
import threading |
from pydispatch import dispatcher |
from pydispatch import dispatcher |
|
|
CHANNELS = 1 |
CHANNELS = 1 |
THRESHOLD = 500 |
THRESHOLD = 500 |
CHUNK_SIZE = 1024 |
CHUNK_SIZE = 1024 |
FORMAT = pyaudio.paInt16 |
FORMAT = pyaudio.paInt16 |
RATE = 44100 |
RATE = 44100 |
MAX_SILENT = 30 |
MAX_SILENT = 30 |
|
|
def is_silent(L): |
def is_silent(L): |
"Returns `True` if below the 'silent' threshold" |
"Returns `True` if below the 'silent' threshold" |
"print max(L)" |
"print max(L)" |
"print max(L) < THRESHOLD" |
"print max(L) < THRESHOLD" |
return max(L) < THRESHOLD |
return max(L) < THRESHOLD |
|
|
def normalize(L): |
def normalize(L): |
"Average the volume out" |
"Average the volume out" |
MAXIMUM = 16384 |
MAXIMUM = 16384 |
times = float(MAXIMUM)/max(abs(i) for i in L) |
times = float(MAXIMUM)/max(abs(i) for i in L) |
|
|
LRtn = array('h') |
LRtn = array('h') |
for i in L: |
for i in L: |
LRtn.append(int(i*times)) |
LRtn.append(int(i*times)) |
return LRtn |
return LRtn |
|
|
def trim(L): |
def trim(L): |
"Trim the blank spots at the start and end" |
"Trim the blank spots at the start and end" |
def _trim(L): |
def _trim(L): |
snd_started = False |
snd_started = False |
LRtn = array('h') |
LRtn = array('h') |
|
|
for i in L: |
for i in L: |
if not snd_started and abs(i)>THRESHOLD: |
if not snd_started and abs(i)>THRESHOLD: |
snd_started = True |
snd_started = True |
LRtn.append(i) |
LRtn.append(i) |
|
|
elif snd_started: |
elif snd_started: |
LRtn.append(i) |
LRtn.append(i) |
return LRtn |
return LRtn |
|
|
# Trim to the left |
# Trim to the left |
L = _trim(L) |
L = _trim(L) |
|
|
# Trim to the right |
# Trim to the right |
L.reverse() |
L.reverse() |
L = _trim(L) |
L = _trim(L) |
L.reverse() |
L.reverse() |
return L |
return L |
|
|
def add_silence(L, seconds): |
def add_silence(L, seconds): |
"Add silence to the start and end of `L` of length `seconds` (float)" |
"Add silence to the start and end of `L` of length `seconds` (float)" |
LRtn = array('h', [0 for i in xrange(int(seconds*RATE))]) |
LRtn = array('h', [0 for i in xrange(int(seconds*RATE))]) |
LRtn.extend(L) |
LRtn.extend(L) |
LRtn.extend([0 for i in xrange(int(seconds*RATE))]) |
LRtn.extend([0 for i in xrange(int(seconds*RATE))]) |
return LRtn |
return LRtn |
|
|
def record(): |
def record(): |
""" |
""" |
Record a word or words from the microphone and |
Record a word or words from the microphone and |
return the data as an array of signed shorts. |
return the data as an array of signed shorts. |
|
|
Normalizes the audio, trims silence from the |
Normalizes the audio, trims silence from the |
start and end, and pads with 0.5 seconds of |
start and end, and pads with 0.5 seconds of |
blank sound to make sure VLC et al can play |
blank sound to make sure VLC et al can play |
it without getting chopped off. |
it without getting chopped off. |
""" |
""" |
p = pyaudio.PyAudio() |
p = pyaudio.PyAudio() |
stream = p.open(format=FORMAT, channels=1, rate=RATE, |
stream = p.open(format=FORMAT, channels=1, rate=RATE, |
input=True, output=True, |
input=True, output=True, |
frames_per_buffer=CHUNK_SIZE) |
frames_per_buffer=CHUNK_SIZE) |
|
|
num_silent = 0 |
num_silent = 0 |
snd_started = False |
snd_started = False |
|
|
LRtn = array('h') |
LRtn = array('h') |
|
|
while 1: |
while 1: |
data = stream.read(CHUNK_SIZE) |
data = stream.read(CHUNK_SIZE) |
L = unpack('<' + ('h'*(len(data)/2)), data) # little endian, signed short |
L = unpack('<' + ('h'*(len(data)/2)), data) # little endian, signed short |
L = array('h', L) |
L = array('h', L) |
LRtn.extend(L) |
|
|
|
silent = is_silent(L) |
silent = is_silent(L) |
#print silent, num_silent, L[:10] |
#print silent, num_silent, L[:10] |
|
|
if silent and snd_started: |
if silent and snd_started: |
num_silent += 1 |
num_silent += 1 |
print num_silent |
print num_silent |
elif not silent and not snd_started: |
elif not silent and not snd_started: |
dispatcher.send( signal='SND_STARTED') |
dispatcher.send( signal='SND_STARTED') |
snd_started = True |
snd_started = True |
print snd_started |
print snd_started |
|
if snd_started: |
|
LRtn.extend(L) |
if snd_started and num_silent > MAX_SILENT: |
if snd_started and num_silent > MAX_SILENT: |
break |
break |
|
|
sample_width = p.get_sample_size(FORMAT) |
sample_width = p.get_sample_size(FORMAT) |
stream.stop_stream() |
stream.stop_stream() |
stream.close() |
stream.close() |
p.terminate() |
p.terminate() |
|
|
LRtn = normalize(LRtn) |
LRtn = normalize(LRtn) |
LRtn = trim(LRtn) |
LRtn = trim(LRtn) |
LRtn = add_silence(LRtn, 0.5) |
LRtn = add_silence(LRtn, 0.5) |
return sample_width, LRtn |
return sample_width, LRtn |
|
|
def record_to_file(path): |
def record_to_file(path): |
"Records from the microphone and outputs the resulting data to `path`" |
"Records from the microphone and outputs the resulting data to `path`" |
sample_width, data = record() |
sample_width, data = record() |
data = pack('<' + ('h'*len(data)), *data) |
data = pack('<' + ('h'*len(data)), *data) |
|
|
wf = wave.open(path, 'wb') |
wf = wave.open(path, 'wb') |
wf.setnchannels(1) |
wf.setnchannels(1) |
wf.setsampwidth(sample_width) |
wf.setsampwidth(sample_width) |
wf.setframerate(RATE) |
wf.setframerate(RATE) |
wf.writeframes(data) |
wf.writeframes(data) |
wf.close() |
wf.close() |
print("done - result written to "+path) |
print("done - result written to "+path) |
|
del data |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
if __name__ == '__main__': |
filename = 'demo.wav' |
filename = 'demo.wav' |
record_to_file(filename) |
record_to_file(filename) |
print("done - result written to "+filename) |
print("done - result written to "+filename) |
|
|