#include #include #include #include #include "/usr/include/alsa/asoundlib.h" #define SAMPLE_TYPE short class RunningAverage; /** * This class computes the direction of the source of the sound it hears. * * It uses 2 microphones, and compute the time of arrival difference of sound * between them to estimate the sound source localization. */ class SoundSourceLoc /** * Max time shift between right and left mic in number of samples. * This typically depends on the sample rate and the distance between * microphones. * You can either compute this with clever formulas involving sound speed * and microphones distance, or just try and put the max value you get with * extreme loc of sound. Guess what I did :-) */ static const int _nbSamplesMaxDiff = 13 ; /** * Buffer size on which we will try to locate sound. * This is a number of samples, and depends on sample rate, and speed of * sound loc change we want to detect. Lower values mean compute sound loc * often, but accuracy is quite low as we compute on a very small slice of * sound. * Empirically, I found that computing on long sounds is better, here 4096 * samples at 44 KHz sampling rate means about one second of sound => we /* reevaluate sound loc every second. * Notice that the larger the value, the most computation we do, as we time * shift on the whole buffer. */ static const int _bufferSize = 4096 ; /** * Take a point for sound loc is level > 105% of mean level. * This allows to compute sound loc only for "meaningful" sounds, not * background noise. */ static const float _minLevelFactorForValidLoc = 1.05f ; /** * sound speed in meters per seconds */ static const float _soundSpeed = 344 ; /** * sound sampling rate in Hz */ unsigned int _soundSamplingRate ; /** * Distance between microphones in meters#include */ static const float _distanceBetweenMicrophones = 0.1f ; /** An utility to compute the running average of sound power */ RunningAverage* _averageSoundLevel ; /** ALSA sound input handle */ snd_pcm_t* _capture_handle ; /** sound samples input buffer */ SAMPLE_TYPE _rightBuffer[_bufferSize] ; SAMPLE_TYPE _leftBuffer[_bufferSize] ; public : SoundSourceLoc(){ _averageSoundLevel = new RunningAverage(50) ; _soundSamplingRate = 44100 ; } // sampling : 2 chanels, 44 KHz, 16 bits. /** Clean exit */ SoundSourceLoc(){ snd_pcm_close(_capture_handle) ; delete _averageSoundLevel ; } //** /* Main loop : read a buffer, compute sound source localization, iterate. */ void run(){ while (true) processNextSoundBlock() ; } /** * This is the core of the sound source localization : it takes the * right/left sampled sounds, and compute their differences while delaying * one channel more and more. * => the delay for which the difference is minimal is the real delay * between the right/left sounds, from which we can deduce the sound source * localization */ void processNextSoundBlock(){ SAMPLE_TYPE* bufs[2] ; bufs[0] = _rightBuffer ; bufs[1] = _leftBuffer ; int err ; if ((err = snd_pcm_readn(_capture_handle, (void**) bufs, _bufferSize)) != _bufferSize) { fprintf(stderr, "read from audio interface failed (%s)\n", snd_strerror(err)) ; exit(1) ; } } // compute the sound level (i.e. "loudness" of the sound) : SAMPLE_TYPE level = computeLevel(_rightBuffer, _leftBuffer) ; // update the average sound level with this new measure : _averageSoundLevel->newValue(level) ; // relative sound level of this sample compared to average : float relativeLevel = (float) level / (float) _averageSoundLevel->getMean() ; int minDiff = INT_MAX ; int minDiffTime = -1 ; // ’slide’ time to find minimum of right/left sound differences for (int t = -_nbSamplesMaxDiff ; t < _nbSamplesMaxDiff ; t++){ // compute sum of differences as the cross-correlation-like measure : int diff = 0 ; for (int i = _nbSamplesMaxDiff ; i < _bufferSize - _nbSamplesMaxDiff - 1 ; i++) diff += abs(_leftBuffer[i] - _rightBuffer[i + t]) ; if (diff < minDiff){ minDiff = diff ; minDiffTime = t ; } /// if sound is loud enough, and not an extreme (=usually false // measure), then output it : if ((relativeLevel > _minLevelFactorForValidLoc) && (minDiffTime > -_nbSamplesMaxDiff) && (minDiffTime < _nbSamplesMaxDiff)){ // computation of angle depending on diff time, sampling rates, // and geometry (thanks Mathieu from Pobot :-) ) : float angle = -(float) asin((minDiffTime * _soundSpeed) / (_soundSamplingRate * _distanceBetweenMicrophones)) ; cout << angle << " ;" << relativeLevel << endl ; } } /* * Compute average sound level (i.e. power) for left/right channels. * * Notice we could probably do the computation on some samples only (for * example one over 4 samples) without loosing much accuracy here. This * would reduce computation time. * Also, as we are only interested in relative evolution, we could * simplify and avoid the multiplications by just taking the mean of * absolute values ? */ SAMPLE_TYPE computeLevel(SAMPLE_TYPE right[], SAMPLE_TYPE left[]){ float level = 0 ; for (int i = 0 ; i < _bufferSize ; i++){ float s = (left[i] + right[i]) / 2 ; level += (s * s) ; } level /= _bufferSize ; level = sqrt(level) ; return (SAMPLE_TYPE) level ; } class RunningAverage int _nbValuesForAverage ; int _nbValues ; float _mean ; public : RunningAverage(int nbValuesForAverage){ _nbValuesForAverage = nbValuesForAverage ; _mean = 0 ; _nbValues = 0 ; } void newValue(SAMPLE_TYPE v){ if (_nbValues < _nbValuesForAverage){ _nbValues++ ; _mean = ((_mean * (_nbValues - 1)) + v) / (float)_nbValues ; } SAMPLE_TYPE getMean(){ return (SAMPLE_TYPE) _mean ; } } int main(int argc, char *argv[]){ SoundSourceLoc soundLoc ; soundLoc.run() ; }