Gaussian
Mixture Models
Mixture Models are a type of density model
which comprise a number of component functions, usually Gaussian. These
component functions are combined to provide a multimodal density. They can be
employed to model the colors of an object in order to perform tasks such as
real-time color-based tracking and segmentation.
These tasks may be made more robust by
generating a mixture model corresponding to background colors in addition to a
foreground model, and employing Bayes' theorem to perform pixel classification.
Mixture models are also amenable to effective methods for on-line adaptation
of models to cope with slowly-varying lighting conditions.
Mixture
models are a semi-parametric alternative to non-parametric histograms (which
can also be used as densities) and provide greater flexibility and precision in
modeling the underlying statistics of sample data. They are able to smooth over
gaps resulting from sparse sample data and provide tighter constraints in
assigning object membership to color-space regions. Such precision is necessary
to obtain the best results possible from color-based pixel classification for
qualitative segmentation requirements.
Once a model
is generated, conditional probabilities can be computed for color pixels. Gaussian
mixture models can also be viewed as a form of generalized radial basis
function network in which each Gaussian component is a basis function or
`hidden' unit. The component priors can be viewed as weights in an output
layer. Finite mixture models have also been discussed at length elsewhere
although most of this work has concentrated on the general studies of the
properties of mixture models rather than developing vision models for use with
real data from dynamic scenes.
Let the
conditional density for a pixel belonging
to multi-colored object be a
mixture with M component densities:
Where a
mixing parameter P (j) corresponds to the prior probability
that pixel was
generated by component j and where. Each mixture
component is a Gaussian with mean and
covariance matrix, i.e. in the
case of a 2D color space:
The Image 1
and Image 2 shows the data used to build the foreground and the background
(laboratory) models.
This row
illustrates the probability density estimated from mixture models for the
object foreground and scene background. The rightmost image is the combined
posterior density in the HS colour space. Here the ``bright'' regions represent
foreground whilst the ``dark'' regions give the background. The ``grey'' areas
are regions of uncertainty.
Sample Code:
Matrix GaussianMixture::HermitteSplineFit(Matrix& inData,
int nbSteps, Matrix& outData)
{
/* Spline fitting to rescale trajectories. */
if(nbSteps<=0)
return outData;
const int
dataSize = inData.ColumnSize();
const int inSize = inData.RowSize();
const int
outSize = nbSteps;
outData.Resize(outSize,dataSize);
for(int i=0;i
// Find the nearest data pair
const float cTime = float(i)/float(outSize-1)*float(inSize-1);
int prev,
next;
float prevTime, nextTime;
prev = int(floor(cTime));
next = prev+1;
prevTime = float(prev);
nextTime = float(next);
const float nphase =
(cTime-prevTime)/(nextTime-prevTime);
const float s1 =
nphase;
const float s2 =
s1*nphase;
const float s3 =
s2*nphase;
const float h1 = 2.0f*s3 - 3.0f*s2 + 1.0f;
const float h2 =
-2.0f*s3 + 3.0f*s2;
const float h3 = s3 - 2.0f*s2 + s1;
const float h4 = s3 -
s2;
// The first column is considered as a temporal value
outData(i,0) = (float)i;
for(int
j=1;j
const float p0 = (prev>0?inData(prev-1,j):inData(prev,j));
const float p1 =
inData(prev,j);
const float p2 =
inData(next,j);
const float p3 =
(next
const float t1 =
0.5f*(p2-p0);
const float t2 = 0.5f*(p3-p1);
outData(i,j) =
p1*h1+p2*h2+t1*h3+t2*h4;
}
}
return outData;
}
void GaussianMixture::initEM_TimeSplit(int nState,Matrix DataSet)
{
/* init the GaussianMixture by splitting the dataset into
time (first
dimension) slices and computing variances
and means for
each slices.
once
initialisation has been performed, the nb of state is set */
Vector * mean = new Vector[nState];
int nData = DataSet.RowSize();
this->nState = nState;
this->dim = DataSet.ColumnSize();
float tmax = 0;
Matrix
index(nState,nData);
int * pop = new int[nState];
priors = new float[nState];
mu.Resize(nState,dim);
sigma = new Matrix[nState];
Matrix
unity(dim,dim); /* defining unity matrix */
for(int k =
0;k
for(int n =
0;n/* getting the max value for time
*/
{
if(DataSet(n,0) > tmax) tmax = DataSet(n,0);
}
for(int
s=0;s/* clearing values */
{
mean[s].Resize(dim,true);
pop[s]=0;
}
/* divide the dataset into slices of equal time
(tmax/nState)
and compute the mean for each slice
the pop table
index to wich slice belongs each sample */
for(int n =
0;n
{
int s = (int)((DataSet(n,0)/(tmax+1))*nState);
//std::cout << s << std::endl;
mean[s] +=
DataSet.GetRow(n);
index(s,pop[s]) = (float)n;
pop[s] +=1;
}
for(int s
=0;s
{
mu.SetRow(mean[s]/(float)pop[s],s); /*
initiate the means computed before */
sigma[s]=Matrix(dim,dim);
priors[s]=1.0f/nState;
/* set equi-probables states */
for(int
ind=0;ind
{
for(int
i=0;i/* Computing covariance matrices */
{
for(int
j=0;j
{
sigma[s](i,j) += (DataSet((int)index(s,ind),i) - mu(s,i)) \
*(DataSet((int)index(s,ind),j)-mu(s,j));
}
}
}
sigma[s] *=
1.0f/pop[s];
sigma[s] += unity *
1e-5f; /* prevents this matrix from being
non-inversible */
}
}
int GaussianMixture::doEM(Matrix
DataSet)
{
/* perform Expectation/Maximization on the given Dataset :
Matrix
DataSet(nSamples,Dimensions).
The
GaussianMixture Object must be initialised before
(see
initEM_TimeSplit method ) */
int nData = DataSet.RowSize();
int iter = 0;
float log_lik;
float log_lik_threshold = 1e-8f;
float log_lik_old = -1e10f;
Matrix
unity(dim,dim);
for(int k =
0;k
//EM loop
while(true)
{
float * sum_p = new float[nData];
Matrix
pxi(nData,nState);
Matrix
pix(nData,nState);
Vector E;
//char strtmp[64];
// sprintf(strtmp,
"gmms/%03d.gmm",iter);
//std::cout << strtmp << std::endl;
//saveParams(strtmp);
iter++;
if (iter>MAXITER){
std::cout
<< "EM stops here. Max number of
iterations has been reached." << std::endl;
return iter;
}
float sum_log = 0;
// Expectation Computing
for(int i
=0;i
{
sum_p[i]=0;
for(int
j=0;j
{
float p = pdfState(DataSet.GetRow(i),j); // P(x|i)
if(p==0) {
std::cout << p << std::endl;
std::cout << "Error:
Null probability. Abort.";
exit(0);
return -1;
}
pxi(i,j)= p;
sum_p[i] +=
p*priors[j];
}
sum_log +=
log(sum_p[i]);
}
for(int
j=0;j
{
for(int
i=0;i
{
pix(i,j) =
pxi(i,j)*priors[j]/sum_p[i]; // then P(i|x)
}
}
// here we compute the log likehood
log_lik =
sum_log/nData;
if(fabs((log_lik/log_lik_old)-1) <
log_lik_threshold )
{
/* if log likehood hasn't move enough, the algorithm has
converged, exiting the loop */
//std::cout << "threshold ok" <<
std::endl;
return iter;;
}
//std::cout << "likelihood " <<
log_lik << std::endl;
log_lik_old =
log_lik;
// Update Step
pix.SumRow(E);
for(int
j=0;j
{
priors[j]=E(j)/nData; // new priors
Vector tmu(dim);
Matrix
tmsigma(dim,dim);
for(int
i=0;i// Means update loop
{
tmu +=
DataSet.GetRow(i)*pix(i,j);
}
mu.SetRow(tmu/E(j),j);
for(int
i=0;i// Covariances updates
{
Matrix
Dif(dim,1);
Dif.SetColumn((DataSet.GetRow(i)-mu.GetRow(j)),0);
tmsigma +=
(Dif*Dif.Transpose())*pix(i,j);
}
sigma[j] = tmsigma/E(j)
+ unity * 1e-5f;
}
}
return iter;
}
No comments:
Post a Comment