WHITE PAPER: August 2012

Wavelets are mathematical functions that cut up data into different frequency components, and then study each component with a resolution matched to its scale. They have advantages over traditional Fourier methods in analyzing physical situations where the image contains discontinuities and sharp spikes.

Wavelets works in the following way. When you decompose a data set using wavelets, you use filters that act as averaging filters and others that produce details. Some of the resulting wavelet coefficients correspond to details in the data set. If the details are small, they might be omitted without substantially affecting the main features of the data set. The idea of thresholding, then, is to set to zero all coefficients that are less than a particular threshold. These coefficients are used in an inverse wavelet transformation to reconstruct the data set.

Sample code:

Denoise :

void wavelet_denoise(float **noisy,float **denoised,int Ni,int Nj,float threshold)

{

int i,j,k;

int oc_max,oc_r,oc_c;

float **buffer_t,**buffer;

int **normalization;

int shift_arr_r[MAX_ARR_SIZE],shift_arr_c[MAX_ARR_SIZE];

// 4 level wavelet transform.

const int levs=4;

int Nl,Nh;

float *lp,*hp;

// Choose wavelet filter. See wav_filters.h and wav_trf.c.

choose_filter('D',9);

// Main buffer for operations.

buffer_t=allocate_2d_float(Ni,Nj,0);

// Buffer on which results are accumulated.

buffer=allocate_2d_float(Ni,Nj,1);

// To keep track of the number of overcomplete results

// on each pixel. Legacy stuff, this is really not necessary

// when doing things image-wide.

normalization=allocate_2d_int(Ni,Nj,1);

// Variable that controls the number of overcomplete computations.

// oc_max=1<

// oc_max=2 overcomplete at the first level, then regular (4 x redundant)...

// see comments below.

oc_max=2;

for(oc_r=0;oc_r

// At every level of a 1-D wavelet transform there are two transforms possible

// obtained by the original bank or its one pixel shifted version.

// In L levels there are 2^L possibilities with each of these possibilities

// forming a complete transform (i.e., invertible, etc.). If we evaluate

// more than one then what we end up with is an overcomplete transform.

// In 2-D there are 4 possibilities at each level and hence 4^L total

// possibilities.

// At level l, this code selects either the "regular" (shift_arr_r[l]=0)

// or the one shifted bank (shift_arr_r[l]=1) over rows and likewise

// over columns using shift_arr_c[l].

// In order to generate the fully overcomplete transform we effectively

// put the binary representation of numbers from 0 to 2^L-1 (represented

// by oc_max) in shift_arr_r and shift_arr_c to traverse all 4^L possibilities.

// In fully overcomplete denoising example, we evaluate each of the 4^L transforms,

// hard threshold coefficients, inverse transform, and finally average the results.

// Convert oc_r into binary and put each digit (0-1) into shift_arr_r.

// Each digit causes the filters at the resulting wavelet level

// to be shifted by one (1) or remain unshifted (0).

// For filter shifts over rows.

k=oc_r;

for(i=levs-1;i>=0;i--) {

shift_arr_r[i]=k>>i;

k-=shift_arr_r[i]<

}

for(oc_c=0;oc_c

// For filter shifts over columns.

k=oc_c;

for(i=levs-1;i>=0;i--) {

shift_arr_c[i]=k>>i;

k-=shift_arr_c[i]<

}

// Copy the "noisy" region into the buffer.

for(i=0;i

for(j=0;j

buffer_t[i][j]=noisy[i][j];

}

// Select the forward bank of filters.

lp=MFLP;Nl=Nflp;

hp=MFHP;Nh=Nfhp;

if(PACKET) {

// Packet transform.

wavpack2d_inpl(buffer_t,Ni,Nj,levs,lp,Nl,hp,Nh,1,shift_arr_r,shift_arr_c);

// Threshold.

hard_threshold(buffer_t,Ni,Nj,threshold);

}

else {

// Regular wavelets.

wav2d_inpl(buffer_t,Ni,Nj,levs,lp,Nl,hp,Nh,1,shift_arr_r,shift_arr_c);

// Threshold.

hard_threshold(buffer_t,Ni,Nj,threshold);

}

// Select the inverse bank of filters.

lp=MILP;Nl=Nilp;

hp=MIHP;Nh=Nihp;

// Inverse transform.

if(PACKET)

wavpack2d_inpl(buffer_t,Ni,Nj,levs,lp,Nl,hp,Nh,0,shift_arr_r,shift_arr_c);

else

wav2d_inpl(buffer_t,Ni,Nj,levs,lp,Nl,hp,Nh,0,shift_arr_r,shift_arr_c);

// Accumulate results.

for(i=0;i

for(j=0;j

buffer[i][j]+=buffer_t[i][j];

normalization[i][j]++;

}

for(i=0;i

for(j=0;j

// Copy final results into place.

if(normalization[i][j])

denoised[i][j]=buffer[i][j]/normalization[i][j];

else

denoised[i][j]=noisy[i][j];

}

// Free buffers.

free_2d_float(buffer_t,Ni);

free_2d_float(buffer,Ni);

free_2d_int(normalization,Ni);

}

Add Noise

/***********************************************************/

void add_uniform_noise(float **orig,int Ni,int Nj,float **noisy,float mean,float stdev)

{

int i,j;

static int kilroy=0;

// The way uniform noise is generated has variance 1/12.

// Multiply with sqrt(12)*stdev to generate noise with variance=stdev^2.

float scaler=(float)(sqrt(12)*stdev);

if(!kilroy) {

kilroy=1;

srand(0);

}

for(i=0;i

for(j=0;j

noisy[i][j]=orig[i][j]+(float)((rand()/(RAND_MAX+1.0)-.5)*scaler+mean);

}

Gaussian Mixture Models

Mixture Models are a type of density model which comprise a number of component functions, usually Gaussian. These component functions are combined to provide a multimodal density. They can be employed to model the colors of an object in order to perform tasks such as real-time color-based tracking and segmentation.

These tasks may be made more robust by generating a mixture model corresponding to background colors in addition to a foreground model, and employing Bayes' theorem to perform pixel classification. Mixture models are also amenable to effective methods for on-line adaptation of models to cope with slowly-varying lighting conditions.

Mixture models are a semi-parametric alternative to non-parametric histograms (which can also be used as densities) and provide greater flexibility and precision in modeling the underlying statistics of sample data. They are able to smooth over gaps resulting from sparse sample data and provide tighter constraints in assigning object membership to color-space regions. Such precision is necessary to obtain the best results possible from color-based pixel classification for qualitative segmentation requirements.

Once a model is generated, conditional probabilities can be computed for color pixels. Gaussian mixture models can also be viewed as a form of generalized radial basis function network in which each Gaussian component is a basis function or `hidden' unit. The component priors can be viewed as weights in an output layer. Finite mixture models have also been discussed at length elsewhere although most of this work has concentrated on the general studies of the properties of mixture models rather than developing vision models for use with real data from dynamic scenes.

Let the conditional density for a pixel

belonging to multi-colored object

be a mixture with M component densities:

Where a mixing parameter P (j) corresponds to the prior probability that pixel

was generated by component j and where. Each mixture component is a Gaussian with mean

and covariance matrix

, i.e. in the case of a 2D color space:

The Image 1 and Image 2 shows the data used to build the foreground and the background (laboratory) models.

This row illustrates the probability density estimated from mixture models for the object foreground and scene background. The rightmost image is the combined posterior density in the HS colour space. Here the ``bright'' regions represent foreground whilst the ``dark'' regions give the background. The ``grey'' areas are regions of uncertainty.

Sample Code:

Matrix GaussianMixture::HermitteSplineFit(Matrix& inData, int nbSteps, Matrix& outData)

{

/* Spline fitting to rescale trajectories. */

if(nbSteps<=0)

return outData;

const int dataSize = inData.ColumnSize();

const int inSize = inData.RowSize();

const int outSize = nbSteps;

outData.Resize(outSize,dataSize);

for(int i=0;i

// Find the nearest data pair

const float cTime = float(i)/float(outSize-1)*float(inSize-1);

int prev, next;

float prevTime, nextTime;

prev = int(floor(cTime));

next = prev+1;

prevTime = float(prev);

nextTime = float(next);

const float nphase = (cTime-prevTime)/(nextTime-prevTime);

const float s1 = nphase;

const float s2 = s1*nphase;

const float s3 = s2*nphase;

const float h1 = 2.0f*s3 - 3.0f*s2 + 1.0f;

const float h2 = -2.0f*s3 + 3.0f*s2;

const float h3 = s3 - 2.0f*s2 + s1;

const float h4 = s3 - s2;

// The first column is considered as a temporal value

outData(i,0) = (float)i;

for(int j=1;j

const float p0 = (prev>0?inData(prev-1,j):inData(prev,j));

const float p1 = inData(prev,j);

const float p2 = inData(next,j);

const float p3 = (next

const float t1 = 0.5f*(p2-p0);

const float t2 = 0.5f*(p3-p1);

outData(i,j) = p1*h1+p2*h2+t1*h3+t2*h4;

}

return outData;

}

void GaussianMixture::initEM_TimeSplit(int nState,Matrix DataSet)

{

/* init the GaussianMixture by splitting the dataset into

time (first dimension) slices and computing variances

and means for each slices.

once initialisation has been performed, the nb of state is set */

Vector * mean = new Vector[nState];

int nData = DataSet.RowSize();

this->nState = nState;

this->dim = DataSet.ColumnSize();

float tmax = 0;

Matrix index(nState,nData);

int * pop = new int[nState];

priors = new float[nState];

mu.Resize(nState,dim);

sigma = new Matrix[nState];

Matrix unity(dim,dim); /* defining unity matrix */

for(int k = 0;k

for(int n = 0;n/* getting the max value for time */

{

if(DataSet(n,0) > tmax) tmax = DataSet(n,0);

}

for(int s=0;s/* clearing values */

{

mean[s].Resize(dim,true);

pop[s]=0;

}

/* divide the dataset into slices of equal time

(tmax/nState) and compute the mean for each slice

the pop table index to wich slice belongs each sample */

for(int n = 0;n

{

int s = (int)((DataSet(n,0)/(tmax+1))*nState);

//std::cout << s << std::endl;

mean[s] += DataSet.GetRow(n);

index(s,pop[s]) = (float)n;

pop[s] +=1;

}

for(int s =0;s

{

mu.SetRow(mean[s]/(float)pop[s],s); /* initiate the means computed before */

sigma[s]=Matrix(dim,dim);

priors[s]=1.0f/nState; /* set equi-probables states */

for(int ind=0;ind

{

for(int i=0;i/* Computing covariance matrices */

{

for(int j=0;j

{

sigma[s](i,j) += (DataSet((int)index(s,ind),i) - mu(s,i)) \

*(DataSet((int)index(s,ind),j)-mu(s,j));

}

sigma[s] *= 1.0f/pop[s];

sigma[s] += unity * 1e-5f; /* prevents this matrix from being non-inversible */

}

int GaussianMixture::doEM(Matrix DataSet)

{

/* perform Expectation/Maximization on the given Dataset :

Matrix DataSet(nSamples,Dimensions).

The GaussianMixture Object must be initialised before

(see initEM_TimeSplit method ) */

int nData = DataSet.RowSize();

int iter = 0;

float log_lik;

float log_lik_threshold = 1e-8f;

float log_lik_old = -1e10f;

Matrix unity(dim,dim);

for(int k = 0;k

//EM loop

while(true)

{

float * sum_p = new float[nData];

Matrix pxi(nData,nState);

Matrix pix(nData,nState);

Vector E;

//char strtmp[64];

// sprintf(strtmp, "gmms/%03d.gmm",iter);

//std::cout << strtmp << std::endl;

//saveParams(strtmp);

iter++;

if (iter>MAXITER){

std::cout << "EM stops here. Max number of iterations has been reached." << std::endl;

return iter;

}

float sum_log = 0;

// Expectation Computing

for(int i =0;i

{

sum_p[i]=0;

for(int j=0;j

{

float p = pdfState(DataSet.GetRow(i),j); // P(x|i)

if(p==0) {

std::cout << p << std::endl;

std::cout << "Error: Null probability. Abort.";

exit(0);

return -1;

}

pxi(i,j)= p;

sum_p[i] += p*priors[j];

}

sum_log += log(sum_p[i]);

}

for(int j=0;j

{

for(int i=0;i

{

pix(i,j) = pxi(i,j)*priors[j]/sum_p[i]; // then P(i|x)

}

// here we compute the log likehood

log_lik = sum_log/nData;

if(fabs((log_lik/log_lik_old)-1) < log_lik_threshold )

{

/* if log likehood hasn't move enough, the algorithm has

converged, exiting the loop */

//std::cout << "threshold ok" << std::endl;

return iter;;

}

//std::cout << "likelihood " << log_lik << std::endl;

log_lik_old = log_lik;

// Update Step

pix.SumRow(E);

for(int j=0;j

{

priors[j]=E(j)/nData; // new priors

Vector tmu(dim);

Matrix tmsigma(dim,dim);

for(int i=0;i// Means update loop

{

tmu += DataSet.GetRow(i)*pix(i,j);

}

mu.SetRow(tmu/E(j),j);

for(int i=0;i// Covariances updates

{

Matrix Dif(dim,1);

Dif.SetColumn((DataSet.GetRow(i)-mu.GetRow(j)),0);

tmsigma += (Dif*Dif.Transpose())*pix(i,j);

}

sigma[j] = tmsigma/E(j) + unity * 1e-5f;

}

return iter;

}

WHITE PAPER

Friday, August 24, 2012

Wavelets Filters

Gaussian Mixture Models

Blog Archive