localisation_audio/localisation_v1.cpp

198 lines
5.6 KiB
C++
Raw Permalink Normal View History

2020-05-22 18:03:11 +02:00
#include <climits>
#include <cstdio>
#include <stdio.h>
#include <stdlib.h>
#include "/usr/include/alsa/asoundlib.h"
#define SAMPLE_TYPE short
class RunningAverage;
/**
* This class computes the direction of the source of the sound it hears.
*
* It uses 2 microphones, and compute the time of arrival difference of sound
* between them to estimate the sound source localization.
*/
class SoundSourceLoc
/**
* Max time shift between right and left mic in number of samples.
* This typically depends on the sample rate and the distance between
* microphones.
* You can either compute this with clever formulas involving sound speed
* and microphones distance, or just try and put the max value you get with
* extreme loc of sound. Guess what I did :-)
*/
static const int _nbSamplesMaxDiff = 13 ;
/**
* Buffer size on which we will try to locate sound.
* This is a number of samples, and depends on sample rate, and speed of
* sound loc change we want to detect. Lower values mean compute sound loc
* often, but accuracy is quite low as we compute on a very small slice of
* sound.
* Empirically, I found that computing on long sounds is better, here 4096
* samples at 44 KHz sampling rate means about one second of sound => we
/* reevaluate sound loc every second.
* Notice that the larger the value, the most computation we do, as we time
* shift on the whole buffer.
*/
static const int _bufferSize = 4096 ;
/**
* Take a point for sound loc is level > 105% of mean level.
* This allows to compute sound loc only for "meaningful" sounds, not
* background noise.
*/
static const float _minLevelFactorForValidLoc = 1.05f ;
/**
* sound speed in meters per seconds
*/
static const float _soundSpeed = 344 ;
/**
* sound sampling rate in Hz
*/
unsigned int _soundSamplingRate ;
/**
* Distance between microphones in meters#include <climits>
*/
static const float _distanceBetweenMicrophones = 0.1f ;
/** An utility to compute the running average of sound power */
RunningAverage* _averageSoundLevel ;
/** ALSA sound input handle */
snd_pcm_t* _capture_handle ;
/** sound samples input buffer */
SAMPLE_TYPE _rightBuffer[_bufferSize] ;
SAMPLE_TYPE _leftBuffer[_bufferSize] ;
public :
SoundSourceLoc(){
_averageSoundLevel = new RunningAverage(50) ;
_soundSamplingRate = 44100 ;
}
// sampling : 2 chanels, 44 KHz, 16 bits.
/** Clean exit */
SoundSourceLoc(){
snd_pcm_close(_capture_handle) ;
delete _averageSoundLevel ;
}
//**
/* Main loop : read a buffer, compute sound source localization, iterate.
*/
void run(){
while (true)
processNextSoundBlock() ;
}
/**
* This is the core of the sound source localization : it takes the
* right/left sampled sounds, and compute their differences while delaying
* one channel more and more.
* => the delay for which the difference is minimal is the real delay
* between the right/left sounds, from which we can deduce the sound source
* localization
*/
void processNextSoundBlock(){
SAMPLE_TYPE* bufs[2] ;
bufs[0] = _rightBuffer ;
bufs[1] = _leftBuffer ;
int err ;
if ((err = snd_pcm_readn(_capture_handle, (void**) bufs, _bufferSize)) != _bufferSize) {
fprintf(stderr, "read from audio interface failed (%s)\n", snd_strerror(err)) ;
exit(1) ;
}
}
// compute the sound level (i.e. "loudness" of the sound) :
SAMPLE_TYPE level = computeLevel(_rightBuffer, _leftBuffer) ;
// update the average sound level with this new measure :
_averageSoundLevel->newValue(level) ;
// relative sound level of this sample compared to average :
float relativeLevel = (float) level / (float) _averageSoundLevel->getMean() ;
int minDiff = INT_MAX ;
int minDiffTime = -1 ;
// slide time to find minimum of right/left sound differences
for (int t = -_nbSamplesMaxDiff ; t < _nbSamplesMaxDiff ; t++){
// compute sum of differences as the cross-correlation-like measure :
int diff = 0 ;
for (int i = _nbSamplesMaxDiff ; i < _bufferSize - _nbSamplesMaxDiff - 1 ; i++) diff += abs(_leftBuffer[i] - _rightBuffer[i + t]) ;
if (diff < minDiff){
minDiff = diff ;
minDiffTime = t ;
}
/// if sound is loud enough, and not an extreme (=usually false
// measure), then output it :
if ((relativeLevel > _minLevelFactorForValidLoc) && (minDiffTime > -_nbSamplesMaxDiff) && (minDiffTime < _nbSamplesMaxDiff)){
// computation of angle depending on diff time, sampling rates,
// and geometry (thanks Mathieu from Pobot :-) ) :
float angle = -(float) asin((minDiffTime * _soundSpeed) / (_soundSamplingRate * _distanceBetweenMicrophones)) ;
cout << angle << " ;" << relativeLevel << endl ;
}
}
/*
* Compute average sound level (i.e. power) for left/right channels.
*
* Notice we could probably do the computation on some samples only (for
* example one over 4 samples) without loosing much accuracy here. This
* would reduce computation time.
* Also, as we are only interested in relative evolution, we could
* simplify and avoid the multiplications by just taking the mean of
* absolute values ?
*/
SAMPLE_TYPE computeLevel(SAMPLE_TYPE right[], SAMPLE_TYPE left[]){
float level = 0 ;
for (int i = 0 ; i < _bufferSize ; i++){
float s = (left[i] + right[i]) / 2 ;
level += (s * s) ;
}
level /= _bufferSize ;
level = sqrt(level) ;
return (SAMPLE_TYPE) level ;
}
class RunningAverage
int _nbValuesForAverage ;
int _nbValues ;
float _mean ;
public :
RunningAverage(int nbValuesForAverage){
_nbValuesForAverage = nbValuesForAverage ;
_mean = 0 ;
_nbValues = 0 ;
}
void newValue(SAMPLE_TYPE v){
if (_nbValues < _nbValuesForAverage){
_nbValues++ ;
_mean = ((_mean * (_nbValues - 1)) + v) / (float)_nbValues ;
}
SAMPLE_TYPE getMean(){
return (SAMPLE_TYPE) _mean ;
}
}
int main(int argc, char *argv[]){
SoundSourceLoc soundLoc ;
soundLoc.run() ;
}