localisation_audio/localisation_v1.cpp

#include <climits>
#include <cstdio>
#include <stdio.h>
#include <stdlib.h>
#include "/usr/include/alsa/asoundlib.h"


#define SAMPLE_TYPE short

class RunningAverage;
/**
* This class computes the direction of the source of the sound it hears.
*
* It uses 2 microphones, and compute the time of arrival difference of sound
* between them to estimate the sound source localization.
*/
class SoundSourceLoc
/**
* Max time shift between right and left mic in number of samples.
* This typically depends on the sample rate and the distance between
* microphones.
* You can either compute this with clever formulas involving sound speed
* and microphones distance, or just try and put the max value you get with
* extreme loc of sound. Guess what I did :-)
*/
static const int _nbSamplesMaxDiff = 13 ;
/**
* Buffer size on which we will try to locate sound.
* This is a number of samples, and depends on sample rate, and speed of
* sound loc change we want to detect. Lower values mean compute sound loc
* often, but accuracy is quite low as we compute on a very small slice of
* sound.
* Empirically, I found that computing on long sounds is better, here 4096
* samples at 44 KHz sampling rate means about one second of sound => we
/* reevaluate sound loc every second.
* Notice that the larger the value, the most computation we do, as we time
* shift on the whole buffer.
*/
static const int _bufferSize = 4096 ;
/**
* Take a point for sound loc is level > 105% of mean level.
* This allows to compute sound loc only for "meaningful" sounds, not
* background noise.
*/
static const float _minLevelFactorForValidLoc = 1.05f ;
/**
* sound speed in meters per seconds
*/
static const float _soundSpeed = 344 ;
/**
* sound sampling rate in Hz
*/
unsigned int _soundSamplingRate ;
/**
* Distance between microphones in meters#include <climits>

*/
static const float _distanceBetweenMicrophones = 0.1f ;

/** An utility to compute the running average of sound power */
RunningAverage* _averageSoundLevel ;

/** ALSA sound input handle */
snd_pcm_t* _capture_handle ;

/** sound samples input buffer */
SAMPLE_TYPE _rightBuffer[_bufferSize] ;
SAMPLE_TYPE _leftBuffer[_bufferSize] ;

public :
SoundSourceLoc(){
_averageSoundLevel = new RunningAverage(50) ;
_soundSamplingRate = 44100 ;
}

// sampling : 2 chanels, 44 KHz, 16 bits.

/** Clean exit */
SoundSourceLoc(){
snd_pcm_close(_capture_handle) ;
delete _averageSoundLevel ;
}

//**
/* Main loop : read a buffer, compute sound source localization, iterate.
*/
void run(){
while (true)
processNextSoundBlock() ;
}


/**
* This is the core of the sound source localization : it takes the
* right/left sampled sounds, and compute their differences while delaying
* one channel more and more.
* => the delay for which the difference is minimal is the real delay
* between the right/left sounds, from which we can deduce the sound source
* localization
*/
void processNextSoundBlock(){
SAMPLE_TYPE* bufs[2] ;
bufs[0] = _rightBuffer ;
bufs[1] = _leftBuffer ;
int err ;
if ((err = snd_pcm_readn(_capture_handle, (void**) bufs, _bufferSize)) != _bufferSize) {
	fprintf(stderr, "read from audio interface failed (%s)\n", snd_strerror(err)) ;
	exit(1) ;
	}
}


// compute the sound level (i.e. "loudness" of the sound) :
SAMPLE_TYPE level = computeLevel(_rightBuffer, _leftBuffer) ;
// update the average sound level with this new measure :
_averageSoundLevel->newValue(level) ;
// relative sound level of this sample compared to average :
float relativeLevel = (float) level / (float) _averageSoundLevel->getMean() ;


int minDiff = INT_MAX ;
int minDiffTime = -1 ;
// ’slide’ time to find minimum of right/left sound differences
for (int t = -_nbSamplesMaxDiff ; t < _nbSamplesMaxDiff ; t++){
	// compute sum of differences as the cross-correlation-like measure :
	int diff = 0 ;
	for (int i = _nbSamplesMaxDiff ; i < _bufferSize - _nbSamplesMaxDiff - 1 ; i++) diff += abs(_leftBuffer[i] - _rightBuffer[i + t]) ;
	if (diff < minDiff){
	minDiff = diff ;
	minDiffTime = t ;
	}
/// if sound is loud enough, and not an extreme (=usually false
// measure), then output it :
	if ((relativeLevel > _minLevelFactorForValidLoc) && (minDiffTime > -_nbSamplesMaxDiff) && (minDiffTime < _nbSamplesMaxDiff)){
// computation of angle depending on diff time, sampling rates,
// and geometry (thanks Mathieu from Pobot :-) ) :
		float angle = -(float) asin((minDiffTime * _soundSpeed) / (_soundSamplingRate * _distanceBetweenMicrophones)) ;
		cout << angle << " ;" << relativeLevel << endl ;
		}
	}


/*
* Compute average sound level (i.e. power) for left/right channels.
*
* Notice we could probably do the computation on some samples only (for
* example one over 4 samples) without loosing much accuracy here. This
* would reduce computation time.
* Also, as we are only interested in relative evolution, we could
* simplify and avoid the multiplications by just taking the mean of
* absolute values ?
*/
SAMPLE_TYPE computeLevel(SAMPLE_TYPE right[], SAMPLE_TYPE left[]){
float level = 0 ;
for (int i = 0 ; i < _bufferSize ; i++){
	float s = (left[i] + right[i]) / 2 ;
	level += (s * s) ;
	}
level /= _bufferSize ;
level = sqrt(level) ;
return (SAMPLE_TYPE) level ;


}

class RunningAverage
int _nbValuesForAverage ;
int _nbValues ;
float _mean ;

public :
RunningAverage(int nbValuesForAverage){
	_nbValuesForAverage = nbValuesForAverage ;
	_mean = 0 ;
	_nbValues = 0 ;
}

void newValue(SAMPLE_TYPE v){
	if (_nbValues < _nbValuesForAverage){
		_nbValues++ ;
		_mean = ((_mean * (_nbValues - 1)) + v) / (float)_nbValues ;
	}
	SAMPLE_TYPE getMean(){
	return (SAMPLE_TYPE) _mean ;
	}
}

int main(int argc, char *argv[]){
SoundSourceLoc soundLoc ;
soundLoc.run() ;
}