localisation_audio/localisation_v1.cpp

198 lines
5.6 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <climits>
#include <cstdio>
#include <stdio.h>
#include <stdlib.h>
#include "/usr/include/alsa/asoundlib.h"
#define SAMPLE_TYPE short
class RunningAverage;
/**
* This class computes the direction of the source of the sound it hears.
*
* It uses 2 microphones, and compute the time of arrival difference of sound
* between them to estimate the sound source localization.
*/
class SoundSourceLoc
/**
* Max time shift between right and left mic in number of samples.
* This typically depends on the sample rate and the distance between
* microphones.
* You can either compute this with clever formulas involving sound speed
* and microphones distance, or just try and put the max value you get with
* extreme loc of sound. Guess what I did :-)
*/
static const int _nbSamplesMaxDiff = 13 ;
/**
* Buffer size on which we will try to locate sound.
* This is a number of samples, and depends on sample rate, and speed of
* sound loc change we want to detect. Lower values mean compute sound loc
* often, but accuracy is quite low as we compute on a very small slice of
* sound.
* Empirically, I found that computing on long sounds is better, here 4096
* samples at 44 KHz sampling rate means about one second of sound => we
/* reevaluate sound loc every second.
* Notice that the larger the value, the most computation we do, as we time
* shift on the whole buffer.
*/
static const int _bufferSize = 4096 ;
/**
* Take a point for sound loc is level > 105% of mean level.
* This allows to compute sound loc only for "meaningful" sounds, not
* background noise.
*/
static const float _minLevelFactorForValidLoc = 1.05f ;
/**
* sound speed in meters per seconds
*/
static const float _soundSpeed = 344 ;
/**
* sound sampling rate in Hz
*/
unsigned int _soundSamplingRate ;
/**
* Distance between microphones in meters#include <climits>
*/
static const float _distanceBetweenMicrophones = 0.1f ;
/** An utility to compute the running average of sound power */
RunningAverage* _averageSoundLevel ;
/** ALSA sound input handle */
snd_pcm_t* _capture_handle ;
/** sound samples input buffer */
SAMPLE_TYPE _rightBuffer[_bufferSize] ;
SAMPLE_TYPE _leftBuffer[_bufferSize] ;
public :
SoundSourceLoc(){
_averageSoundLevel = new RunningAverage(50) ;
_soundSamplingRate = 44100 ;
}
// sampling : 2 chanels, 44 KHz, 16 bits.
/** Clean exit */
SoundSourceLoc(){
snd_pcm_close(_capture_handle) ;
delete _averageSoundLevel ;
}
//**
/* Main loop : read a buffer, compute sound source localization, iterate.
*/
void run(){
while (true)
processNextSoundBlock() ;
}
/**
* This is the core of the sound source localization : it takes the
* right/left sampled sounds, and compute their differences while delaying
* one channel more and more.
* => the delay for which the difference is minimal is the real delay
* between the right/left sounds, from which we can deduce the sound source
* localization
*/
void processNextSoundBlock(){
SAMPLE_TYPE* bufs[2] ;
bufs[0] = _rightBuffer ;
bufs[1] = _leftBuffer ;
int err ;
if ((err = snd_pcm_readn(_capture_handle, (void**) bufs, _bufferSize)) != _bufferSize) {
fprintf(stderr, "read from audio interface failed (%s)\n", snd_strerror(err)) ;
exit(1) ;
}
}
// compute the sound level (i.e. "loudness" of the sound) :
SAMPLE_TYPE level = computeLevel(_rightBuffer, _leftBuffer) ;
// update the average sound level with this new measure :
_averageSoundLevel->newValue(level) ;
// relative sound level of this sample compared to average :
float relativeLevel = (float) level / (float) _averageSoundLevel->getMean() ;
int minDiff = INT_MAX ;
int minDiffTime = -1 ;
// slide time to find minimum of right/left sound differences
for (int t = -_nbSamplesMaxDiff ; t < _nbSamplesMaxDiff ; t++){
// compute sum of differences as the cross-correlation-like measure :
int diff = 0 ;
for (int i = _nbSamplesMaxDiff ; i < _bufferSize - _nbSamplesMaxDiff - 1 ; i++) diff += abs(_leftBuffer[i] - _rightBuffer[i + t]) ;
if (diff < minDiff){
minDiff = diff ;
minDiffTime = t ;
}
/// if sound is loud enough, and not an extreme (=usually false
// measure), then output it :
if ((relativeLevel > _minLevelFactorForValidLoc) && (minDiffTime > -_nbSamplesMaxDiff) && (minDiffTime < _nbSamplesMaxDiff)){
// computation of angle depending on diff time, sampling rates,
// and geometry (thanks Mathieu from Pobot :-) ) :
float angle = -(float) asin((minDiffTime * _soundSpeed) / (_soundSamplingRate * _distanceBetweenMicrophones)) ;
cout << angle << " ;" << relativeLevel << endl ;
}
}
/*
* Compute average sound level (i.e. power) for left/right channels.
*
* Notice we could probably do the computation on some samples only (for
* example one over 4 samples) without loosing much accuracy here. This
* would reduce computation time.
* Also, as we are only interested in relative evolution, we could
* simplify and avoid the multiplications by just taking the mean of
* absolute values ?
*/
SAMPLE_TYPE computeLevel(SAMPLE_TYPE right[], SAMPLE_TYPE left[]){
float level = 0 ;
for (int i = 0 ; i < _bufferSize ; i++){
float s = (left[i] + right[i]) / 2 ;
level += (s * s) ;
}
level /= _bufferSize ;
level = sqrt(level) ;
return (SAMPLE_TYPE) level ;
}
class RunningAverage
int _nbValuesForAverage ;
int _nbValues ;
float _mean ;
public :
RunningAverage(int nbValuesForAverage){
_nbValuesForAverage = nbValuesForAverage ;
_mean = 0 ;
_nbValues = 0 ;
}
void newValue(SAMPLE_TYPE v){
if (_nbValues < _nbValuesForAverage){
_nbValues++ ;
_mean = ((_mean * (_nbValues - 1)) + v) / (float)_nbValues ;
}
SAMPLE_TYPE getMean(){
return (SAMPLE_TYPE) _mean ;
}
}
int main(int argc, char *argv[]){
SoundSourceLoc soundLoc ;
soundLoc.run() ;
}