/* This routine is designed to evaluate the impact of heterogeneous covariance matrices
on computations of Mahalanobis distance. This program does NOT test for homogeneity (easily
done in other packages), but rather evaluates how violating homogeneity will affect
Mahalanobis distances between groups. This is done by computing the pooled within-groups
covariance matrix (data Sw) for a specified subgroup of the sample (data matrix), and then
using this matrix to compute squared Mahalanobis distances for the entire sample (data groups).
Repeating this for various subgroups of the data, then, gives one a measure of how different
covariance matrices impact computed distances. Because distances computed from multiple
matrices cannot be directly compared (it is the relative distances between all groups that are
important), a cluster analysis is performed to illustrate these differences. Proc discrim is
provided at the end to compute the squared distances based on the covariance matrix of the
entire sample. */
data three; set two; spsex=substr(cat,1,4); run;
proc princomp data=three cov out=pcaout noprint; var x1-x60; run;
data matrix; set pcaout; if substr(spsex,1,3)='Ptt' then output; run;
proc candisc data=matrix PCOV outstat=esses noprint;
class spsex;
var prin1-prin30;
run;
data Sw; set esses; if _TYPE_='PCOV'; keep prin1-prin30; run;
data groups; set pcaout; keep cat spsex prin1-prin30; run;
proc sort data=groups; by spsex; run;
proc means data=groups noprint;
by spsex;
output out=meanie;
run;
data mns; set meanie; if _STAT_ = 'MEAN'; drop _TYPE_ _FREQ_ _STAT_; run;
proc iml;
use Sw; read all into V;
use mns; read all into means;
read all var _CHAR_ into names;
N=nrow(means);
ds=j(N,N);
do i=1 to N; do j=1 to N;
if i=j then do; ds[i,j]=0; end;
if i