%MDS Trainale mapping for multidimensional scaling, a variant of Sammon mapping % % [W,J,STRESS] = MDS(DT,Y,OPTIONS) % [W,J,STRESS] = MDS(DT,K,OPTIONS) % [W,J,STRESS] = DT*MDS([],K,OPTIONS) % [W,J,STRESS] = DT*MDS(K,OPTIONS) % X = DS*W % % % INPUT % DT Square (M x M) dissimilarity matrix used for training % DS N x M dissimilarity matrix between testset and trainset % Y M x K matrix containing starting target configuration, or % K Desired output dimensionality (default 2) % OPTIONS Various parameters of the minimization procedure put into % a structure consisting of the following fields: 'q', 'optim', % 'init','etol','maxiter', 'isratio', 'itmap', 'st' and 'inspect' % (default: % % OPTIONS.q = 0 % OPTIONS.optim = 'pn' % OPTIONS.init = 'cs' % OPTIONS.etol = 1e-6 (the precise value depends on q) % OPTIONS.maxiter = 100 % OPTIONS.isratio = 0 % OPTIONS.itmap = 'yes' % OPTIONS.st = 1 % OPTIONS.inspect = 2). % % OUTPUT % W Multidimensional scaling mapping % J Index of points removed before optimization % STRESS Vector of stress values % X N x K dataset with output configuration % % DESCRIPTION % Finds a nonlinear MDS map (a variant of the Sammon map) of objects % represented by a symmetric distance matrix D with zero diagonal, given % either the dimensionality K or the initial configuration Y. This is done % in an iterative manner by minimizing the Sammon stress between the given % dissimilarities D (DT or DS) and the distances DY in the K-dimensional % target space: % % E = 1/(sum_{i 0, % ST = 1 and the mapping is onto 2D or larger, then the progress % is plotted during the minimization every INSPECT iterations. % % Important: % - It is assumed that D either is or approximates a Euclidean distance % matrix, i.e: % D_{ij} = sqrt (sum_k(x_i - x_j)^2). % - Missing values can be handled; they should be marked by 'NaN' in D. % % EXAMPLES: % opt.optim = 'scg'; % opt.init = 'cs'; % D = sqrt(distm(a)); % Compute the Euclidean distance dataset of A % w1 = mds(D,2,opt); % An MDS map onto 2D initialized by Classical Scaling, % % optimized by a Scaled Conjugate Gradients algorithm % n = size(D,1); % y = rand(n,2); % w2 = mds(D,y,opt); % An MDS map onto 2D initialized by random vectors % % z = rand(n,n); % Set around 40% of the random distances to NaN, i.e. % z = (z+z')/2; % not used in the MDS mapping % z = find(z <= 0.6); % D(z) = NaN; % D(1:n+1:n^2) = 0; % Set the diagonal to zero % opt.optim = 'pn'; % opt.init = 'randnp'; % opt.etol = 1e-8; % Should be high, as only some distances are used % w3 = mds(D,2,opt); % An MDS map onto 2D initialized by a random projection % % REFERENCES % 1. M.F. Moler, A Scaled Conjugate Gradient Algorithm for Fast Supervised % Learning', Neural Networks, vol. 6, 525-533, 1993. % 2. W.H. Press, S.A. Teukolsky, W.T. Vetterling and B.P. Flannery, % Numerical Recipes in C, Cambridge University Press, Cambridge, 1992. % 3. I. Borg and P. Groenen, Modern Multidimensional Scaling, Springer % Verlag, Berlin, 1997. % 4. T.F. Cox and M.A.A. Cox, Multidimensional Scaling, Chapman and Hall, % London, 1994. % % SEE ALSO (PRTools Guide) % DATASETS, MAPPINGS, PREX_MDS, MDS_CS, SAMMONM, TSNEM % % Copyright: E. Pekalska, R.P.W. Duin, e.pekalska@37steps.com % function [w,J,err,opt,y] = mds(varargin) mapname = 'MDS'; argin = shiftargin(varargin,{'scalar'}); argin = setdefaults(argin,[],2,[]); if mapping_task(argin,'definition') w = define_mapping(argin,'untrained',mapname); else % Train, evaluate or extend a mapping. [D,y,options] = deal(argin{:}); opt = mds_setopt(options); % YREP contains representative objects in the projected MDS space, i.e. % for which the mapping exists. YREP is empty for the original MDS, since % no projection is available yet. yrep = []; if (isdataset(D) | isa(D,'double')) [m,mm] = size(D); % Convert D to double, but retain labels in LAB. if (isdataset(D)), lab = getlab(D); D = +D; else, lab = ones(m,1); end; if (ismapping(y)) % The MDS mapping exists; this means that YREP has already been stored. pars = getdata(y); [k,c] = size(y); y = []; % Empty, should now be found. yrep = pars{1}; % There exists an MDS map, hence YREP is stored. opt = pars{2}; % Options used for the mapping. II = pars{3}; % Index of non-repeated points in YREP. winit = pars{4}; % The Classical Scaling map, if INIT = 'cs'. v = pars{5}; % Weights used for adding new points if ITMAP = 'no'. n = c; % Number of dimensions of the projected space. % Initialization by 'cs' is not possible when there is no winit % (i.e. the CS map) and new points should be added. if (strcmp(opt.init,'cs')) & (isempty(winit)) prwarning(2,'OPTIONS.init = cs is not possible when adding points; using kl.'); opt.init = 'kl'; end % If YREP is a scalar, we have an empty mapping. if (max(size(yrep)) == 1) y = yrep; yrep = []; end % Check whether D is a matrix with the zero diagonal for the existing map. if (m == mm) & (length(intersect(find(D(:)= m) w = yrep; % D is the same matrix as in the projection process; return % YREP is then the solution end if (length(pars) < 6) | (isempty(pars{6})) yinit = []; else yinit = pars{6}; % Possible initial configuration for points to % be added to an existing map if (size(yinit,1) ~= size(D,1)) prwarning(2,'the size of the initial configuration does not match that of the dissimilarity matrix, using random initialization.') yinit =[]; end end else % No MDS mapping available yet; perform the checks. if (~issym(D,1e-12)) prwarning(2,'D is not a symmetric matrix; will average.'); D = (D+D')/2; end % Check the number of zeros on the diagonal if (any(abs(diag(D)) > 0)) error('D should have a zero diagonal'); end end else % D is neither a dataset nor a matrix of doubles error('D should be a dataset or a matrix of doubles.'); end if (~isempty(y)) % Y is the initial configuration or N, no MDS map exists yet; % D is a square matrix. % Remove identical points, i.e. points for which D(i,j) = 0 for i ~= j. % I contains the indices of points left for the MDS mapping, J those % of the removed points and P those of the points left in I which were % identical to those removed. [I,J,P] = mds_reppoints(D); D = D(I,I); [ni,nc] = size(D); % NANID is an extra field in the OPTIONS structure, containing the indices % of NaN values (= missing values) in distance matrix D. opt.nanid = find(isnan(D(:)) == 1); % Initialise Y. [m2,n] = size(y); if (max(m2,n) == 1) % Y is a scalar, hence really is a dimensionality N. n = y; [y,winit] = mds_init(D,n,opt.init); else if (mm ~= m2) error('The matrix D and the starting configuration Y should have the same number of columns.'); end winit = []; y = +y(I,:); end % The final number of true distances is: no_dist = (ni*(ni-1) - length(opt.nanid))/2; else % This happens only if we add extra points to an existing MDS map. % Remove identical points, i.e. points for which D(i,j) = 0 for i ~= j. % I contains the indices of points left for the MDS mapping, J those % of the removed points and P those of the points left in I which were % identical to those removed. [I,J,P] = mds_reppoints(D(:,II)); D = D(I,II); [ni,nc] = size(D); yrep = yrep(II,:); n = size(yrep,2); % NANID is an extra field in the OPTIONS structure, containing the indices % of NaN values (= missing values) in distance matrix D. opt.nanid = find(isnan(D(:))); % Initialise Y. if the new points should be added in an iterative manner. [m2,n] = size(yrep); if (~isempty(yinit)) % An initial configuration exists. y = yinit; elseif (strcmp(opt.init, 'cs')) & (~isempty(winit)) y = D*winit; else y = mds_init(D,n,opt.init); end if (~isempty(opt.nanid)) % Rescale. scale = (max(yrep)-min(yrep))./(max(y)-min(y)); y = y .* repmat(scale,ni,1); end % The final number of true distances is: no_dist = (ni*nc - length(opt.nanid)); end % Check whether there is enough data left. if (~isempty(opt.nanid)) if (n*ni+2 > no_dist), error('There are too many missing distances: it is not possible to determine the MDS map.'); end if (strcmp (opt.itmap,'no')) opt.itmap = 'yes'; prwarning(1,'Due to the missing values, the projection can only be iterative. OPTIONS are changed appropriately.') end end if (opt.inspect > 0) opt.plotlab = lab(I,:); % Labels to be used for plotting in MDS_SAMMAP. else opt.plotlab = []; end if (isempty(yrep)) | (~isempty(yrep) & strcmp(opt.itmap, 'yes')) % Either no MDS exists yet OR new points should be mapped in an iterative manner. printinfo(opt); [yy,err] = mds_sammap(D,y,yrep,opt); % Define the linear projection of distances. v = []; if (isempty(yrep)) & (isempty(opt.nanid)) if (rank(D) < m) v = prpinv(D)*yy; else v = D \ yy; end end else % New points should be added by a linear projection of dissimilarity data. yy = D*v; end % Establish the projected configuration including the removed points. y = zeros(m,n); y(I,:) = +yy; if (~isempty(J)) if (~isempty(yrep)) y(J,:) = +yrep(II(P),:); else for k=length(J):-1:1 % J: indices of removed points. y(J(k),:) = y(P(k),:); % P: indices of points equal to points in J. end end end % In the definition step: shift the obtained configuration such that the % mean lies at the origin. if (isempty(yrep)) y = y - ones(length(y),1)*mean(y); y = prdataset(y,lab); else w = prdataset(y,lab); return; end % In the definition step: the mapping should be stored. opt = rmfield(opt,'nanid'); % These fields are to be used internally only; opt = rmfield(opt,'plotlab'); % not to be set up from outside w = prmapping(mfilename,'trained',{y,opt,I,winit,v,[]},[],m,n); w = setname(w,mapname); end return % ********************************************************************************** % Extra functions % ********************************************************************************** % PRINTINFO(OPT) % % Prints progress information to the file handle given by OPT.ST. function printinfo(opt) if opt.st < 1 return end %fprintf(opt.st,'Sammon mapping, error function with the parameter q=%d\n',opt.q); switch (opt.optim) case 'pn', %fprintf(opt.st,'Minimization by Pseudo-Newton algorithm\n'); case 'scg', %fprintf(opt.st,'Minimization by Scaled Conjugate Gradients algorithm\n'); otherwise error(strcat('Possible initialization methods: pn (Pseudo-Newton), ',... 'or scg (Scaled Conjugate Gradients).')); end return % ********************************************************************************** % [I,J,P] = MDS_REPPOINTS(D) % % Finds the indices of repeated/left points. J contains the indices of % repeated points in D. This means that for each j in J, there is a point % k ~= j such that D(J(j),k) = 0. I contains the indices of the remaining % points, and P those of the points in I that were identical to those in J. % Directly used in MDS routine. function [I,J,P] = mds_reppoints(D) epsilon = 1e-20; % Differences smaller than this are assumed to be zero. [m,mm] = size(D); I = 1:m; J = []; P = []; if (m == mm) & (all(abs(D(1:m+1:end)) <= epsilon)) K = intersect (find (triu(ones(m),1)), find(D < epsilon)); if (~isempty(K)) P = mod(K,m); J = fix(K./m) + 1; I(J) = []; end else [J,P] = find(D<=epsilon); I(J) = []; end return; % ********************************************************************************** % MDS_SETOPT Sets parameters for the MDS mapping % % OPT = MDS_SETOPT(OPT_GIVEN) % % INPUT % OPT_GIVEN Parameters for the MDS mapping, described below (default: []) % % OUTPUT % OPT Structure of chosen options; if OPT_GIVEN is empty, then % OPT.q = 0 % OPT.optim = 'pn' % OPT.init = 'cs' % OPT.etol = 1e-6 (the precise value depends on q) % OPT.maxiter = 100 % OPT.itmap = 'yes' % OPT.isratio = 0 % OPT.st = 1 % OPT.inspect = 2 % % DESCRIPTION % Parameters for the MDS mapping can be set or changed. OPT_GIVEN consists % of the following fields: 'q', 'init', 'optim', 'etol', 'maxiter','itmap', % 'isratio', 'st' and 'inspect'. OPTIONS can include all the fields or some % of them only. The fields of OPT have some default values, which can be % changed by the OPT_GIVEN field values. If OPT_GIVEN is empty, then OPT % contains all default values. For a description of the fields, see MDS. % % Copyright: Elzbieta Pekalska, ela@ph.tn.tudelft.nl, 2000-2003 % Faculty of Applied Sciences, Delft University of Technology % function opt = mds_setopt (opt_given) opt.q = 0; opt.init = 'cs'; opt.optim = 'pn'; opt.st = 1; opt.itmap = 'yes'; opt.maxiter = 100; opt.inspect = 2; opt.etol = inf; opt.isratio = 0; opt.nanid = []; % Here are some extra values; set up in the MDS routine. opt.plotlab = []; % Not to be changed by the user. if (~isempty(opt_given)) if (~isstruct(opt_given)) error('OPTIONS should be a structure with at least one of the following fields: q, init, etol, optim, maxiter, itmap, isratio, st or inspect.'); end fn = fieldnames(opt_given); if (~all(ismember(fn,fieldnames(opt)))) error('Wrong field names; valid field names are: q, init, optim, etol, maxiter, itmap, isratio, st or inspect.') end for i = 1:length(fn) opt = setfield(opt,fn{i},getfield(opt_given,fn{i})); end end if (isempty(intersect(opt.q,-2:1:2))) error ('OPTIONS.q should be -2, -1, 0, 1 or 2.'); end if (opt.maxiter < 2) error ('OPTIONS.iter should be at least 1.'); end if (isinf(opt.etol)) switch (opt.q) % Different defaults for different stresses. case -2, opt.etol = 1e-6; case {-1,0} opt.etol = 10*sqrt(eps); case {1,2} opt.etol = sqrt(eps); end elseif (opt.etol <= 0) | (opt.etol >= 0.1) error ('OPTIONS.etol should be positive and smaller than 0.1.'); end if (~ismember(opt.optim, {'pn','scg'})) error('OPTIONS.optim should be pn or scg.'); end if (~ismember(opt.itmap, {'yes','no'})) error('OPTIONS.itmap should be yes or no.'); end return % MDS_SAMMAP Sammon iterative nonlinear mapping for MDS % % [YMAP,ERR] = MDS_SAMMAP(D,Y,YREP,OPTIONS) % % INPUT % D Square (M x M) dissimilarity matrix % Y M x N matrix containing starting configuration, or % YREP Configuration of the representation points % OPTIONS Various parameters of the minimization procedure put into % a structure consisting of the following fields: 'q', 'optim', % 'init','etol','maxiter', 'isratio', 'itmap', 'st' and 'inspect' % (default: % OPTIONS.q = 0 % OPTIONS.optim = 'pn' % OPTIONS.init = 'cs' % OPTIONS.etol = 1e-6 (the precise value depends on q) % OPTIONS.maxiter = 100 % OPTIONS.isratio = 0 % OPTIONS.itmap = 'yes' % OPTIONS.st = 1 % OPTIONS.inspect = 2). % % OUTPUT % YMAP Mapped configuration % ERR Sammon stress % % DESCRIPTION % Maps the objects given by a symmetric distance matrix D (with a zero % diagonal) onto, say, an N-dimensional configuration YMAP by an iterative % minimization of a variant of the Sammon stress. The minimization starts % from the initial configuration Y; see MDS_INIT. % % YREP is the Sammon configuration of the representation set. It is used % when new points have to be projected. In other words, if D is an M x M % symmetric distance matrix, then YREP is empty; if D is an M x N matrix, % then YMAP is sought such that D can approximate the distances between YMAP % and YREP. % % Missing values can be handled by marking them by NaN in D. % % SEE ALSO (PRTools Guide) % MAPPINGS, MDS, MDS_CS, MDS_INIT, MDS_SETOPT % % Copyright: Elzbieta Pekalska, ela@ph.tn.tudelft.nl, 2000-2003 % Faculty of Applied Sciences, Delft University of Technology % function [y,err] = mds_sammap(Ds,y,yrep,opt) if (nargin < 4) opt = []; % Will be filled by MDS_SETOPT, below. end if isempty(opt), opt = mds_setopt(opt); end if (nargin < 3) yrep = []; end % Extract labels and calculate distance matrix. [m,n] = size(y); if (~isempty(yrep)) replab = getlab(yrep); yrep = +yrep; D = sqrt(distm(y,yrep)); else D = sqrt(distm(y)); yrep = []; replab = []; end % if (isempty(opt.plotlab)) % opt.plotlab = ones(m,1); % end it = 0; % Iteration number. eold = inf; % Previous stress (error). % Calculate initial stress. [e,a]= mds_samstress(opt.q,y,yrep,Ds,D,opt.nanid,opt.isratio); err = e; %if opt.st > 0, fprintf(opt.st,'iteration: %4i stress: %3.8d\n',it,e); end tt = sprintf('Processing %i iterations: ',opt.maxiter); prwaitbar(opt.maxiter,tt) switch (opt.optim) case 'pn', % Pseudo-Newton minimization. epr = e; % Previous values of E, EOLD for line search algorithm. eepr = e; add = 1e-4; % Avoid G2 equal 0; see below. BETA = 1e-3; % Parameter for line search algorithm. lam1 = 0; lam2 = 1; % LAM (the step factor) lies in (LAM1, LAM2). LMIN = 1e-10; % Minimum acceptable value for LAM in line search. lambest = 1e-4; % LAM = LAMBEST, if LAM was not found. % Loop until the error change falls below the tolerance or until % the maximum number of iterations is reached. while (abs(e-eold) >= opt.etol*(1 + abs(e))) & (it < opt.maxiter) % Plot progress if requested. % if (opt.st > 0) & (opt.inspect > 0) & (mod(it,opt.inspect) == 0) % if (it == 0), figure(1); clf; end % mds_plot(y,yrep,opt.plotlab,replab,e); % end yold = y; eold = e; % Calculate the stress. [e,a] = mds_samstress (opt.q,yold,yrep,Ds,[],opt.nanid,opt.isratio); % Calculate gradients and pseudo-Newton update P. [g1,g2,cc] = mds_gradients (opt.q,yold,yrep,a*Ds,[],opt.nanid); p = (g1/cc)./(abs(g2/cc) + add); slope = g1(:)' * p(:); lam = 1; % Search for a suitable step LAM using a line search. while (1) % Take a step and calculate the delta error DE. y = yold + lam .* p; [e,a] = mds_samstress(opt.q,y,yrep,Ds,[],opt.nanid,opt.isratio); de = e - eold; % Stop if the condition for a suitable lam is fulfilled. if (de < BETA * lam * slope) break; end % Try to find a suitable step LAM. if (lam ~= 1) r1 = de - lam*slope; r2 = epr - eepr - lam2*slope; aa = (2*de + r1/lam^2 - r2/lam2^2)/(lam2-lam1)^3; bb = ((-lam2*r1)/lam^2 +(lam*r2)/lam2^2)/(lam2-lam1)^2; if (abs(aa) <= eps) lamtmp = -0.5 * slope/bb; else lamtmp = (-bb + sqrt(max(0,(bb^2 - 3*aa*slope))))/(3*aa); end % Prevent LAM from becoming too large. if (lamtmp > 0.5 * lam), lamtmp = 0.5 * lam; end else lamtmp = -0.5 * slope/(e - eold - slope); end % Prevent LAM from becoming too small. lam2 = lam; lam = max(lamtmp, 0.1*lam); epr = e; eepr = eold; if (lam < LMIN) y = yold + lambest .* p; [e,a] = mds_samstress(opt.q,y,yrep,Ds,[],opt.nanid,opt.isratio); break; end end it = it + 1; err = [err; e]; prwaitbar(opt.maxiter,it,[tt num2str(it) ', error: ' num2str(e)]); %if opt.st > 0, fprintf(opt.st,'iteration: %4i stress: %3.8d\n',it,e); end end case 'scg', % Scaled Conjugate Gradient minimization. sigma0 = 1e-8; lambda = 1e-8; % Regularization parameter. lambda1 = 0; % Calculate initial stress and direction of decrease P. [e,a] = mds_samstress(opt.q,y,yrep,Ds,D,opt.nanid,opt.isratio); g1 = mds_gradients(opt.q,y,yrep,a*Ds,D,opt.nanid); % Gradient p = -g1; % Direction of decrease gnorm2 = g1(:)' * g1(:); success = 1; % Sanity check. if (gnorm2 < 1e-15) prwarning(2,['Gradient is nearly zero: ' gnorm2 ' (unlikely); initial configuration is the right one.']); return end % Loop until the error change falls below the tolerance or until % the maximum number of iterations is reached. while (abs(eold - e) > opt.etol * (1 + e)) & (it < opt.maxiter) g0 = g1; % Previous gradient pnorm2 = p(:)' * p(:); eold = e; % Plot progress if requested. % if (opt.inspect > 0) & (mod(it,opt.inspect) == 0) % if (it == 0), figure(1); clf; end % mds_plot(y,yrep,opt.plotlab,replab,e); % end if (success) sigma = sigma0/sqrt(pnorm2); % SIGMA: a small step from y to yy yy = y + sigma .*p; [e,a] = mds_samstress(opt.q,yy,yrep,Ds,[],opt.nanid,opt.isratio); g2 = mds_gradients(opt.q,yy,yrep,a*Ds,[],opt.nanid); % Gradient for yy s = (g2-g1)/sigma; % Approximation of Hessian*P directly, instead of computing delta = p(:)' * s(:); % the Hessian, since only DELTA = P'*Hessian*P is in fact needed end % Regularize the Hessian indirectly to make it positive definite. % DELTA is now computed as P'* (Hessian + regularization * Identity) * P delta = delta + (lambda1 - lambda) * pnorm2; % Indicate if the Hessian is negative definite; the regularization above was too small. if (delta < 0) lambda1 = 2 * (lambda - delta/pnorm2); % Now the Hessian will be positive definite delta = -delta + lambda * pnorm2; % This is obtained after plugging lambda1 lambda = lambda1; % into the formulation a few lines above. end mi = - p(:)' * g1(:); yy = y + (mi/delta) .*p; % mi/delta is a step size [ee,a] = mds_samstress(opt.q,yy,yrep,Ds,[],opt.nanid,opt.isratio); % This minimization procedure is based on the second order approximation of the % stress function by using the gradient and the Hessian approximation. The Hessian % is regularized, but maybe not sufficiently. The ratio Dc (<=1) below indicates % a proper approximation, if Dc is close to 1. Dc = 2 * delta/mi^2 * (e - ee); e = ee; % If Dc > 0, then the stress can be successfully decreased. success = (Dc >= 0); if (success) y = yy; [ee,a] = mds_samstress(opt.q,yy,yrep,Ds,[],opt.nanid,opt.isratio); g1 = mds_gradients(opt.q,yy,yrep,a*Ds,[],opt.nanid); gnorm2 = g1(:)' * g1(:); lambda1 = 0; beta = max(0,(g1(:)'*(g1(:)-g0(:)))/mi); p = -g1 + beta .* p; % P is a new conjugate direction if (g1(:)'*p(:) >= 0 | mod(it-1,n*m) == 0), p = -g1; % No much improvement, restart end if (Dc >= 0.75) lambda = 0.25 * lambda; % Make the regularization smaller end it = it + 1; err = [err; e]; prwaitbar(opt.maxiter,it,[tt num2str(it) ', error: ' num2str(e)]); %fprintf (opt.st,'iteration: %4i stress: %3.8d\n',it,e); else % Dc < 0 % Note that for Dc < 0, the iteration number IT is not increased, % so the Hessian is further regularized until SUCCESS equals 1. lambda1 = lambda; end % The approximation of the Hessian was poor or the stress was not % decreased (Dc < 0), hence the regularization lambda is enlarged. if (Dc < 0.25) lambda = lambda + delta * (1 - Dc)/pnorm2; end end end prwaitbar(0); return % ********************************************************************************** %MDS_STRESS - Calculate Sammon stress during optimization % % E = MDS_SAMSTRESS(Q,Y,YREP,Ds,D,NANINDEX) % % INPUT % Q Indicator of the Sammon stress; Q = -2,-1,0,1,2 % Y Current lower-dimensional configuration % YREP Configuration of the representation objects; it should be % empty when no representation set is considered % Ds Original distance matrix % D Approximate distance matrix (optional; otherwise computed from Y) % NANINDEX Index of the missing values; marked in Ds by NaN (optional; to % be found in Ds) % % OUTPUT % E Sammon stress % % DESCRIPTION % Computes the Sammon stress between the original distance matrix Ds and the % approximated distance matrix D between the mapped configuration Y and the % configuration of the representation set YREP, expressed as follows: % % E = 1/(sum_{i eps) values to be included % for the computation of the stress. I = 1:m2; if (~isempty(nanindex)) I(nanindex) = []; end O = []; if (todefine), O = 1:m+1:m2; end I = setdiff(I,O); % If OPTIONS.isratio is set, calculate optimal ALPHA to scale with. if (isratio) alpha = sum((Ds(I).^q).*D(I).^2)/sum((Ds(I).^(q+1)).*D(I)); Ds = alpha*Ds; else alpha = 1; end % C is the normalization factor. c = sum(Ds(I).^(q+2)); % If Q == 0, prevent unnecessary calculation (X^0 == 1). if (q ~= 0) e = sum(Ds(I).^q .*((Ds(I)-D(I)).^2))/c; else e = sum(((Ds(I)-D(I)).^2))/c; end return % ********************************************************************************** %MDS_GRADIENTS - Gradients for variants of the Sammon stress % % [G1,G2,CC] = MDS_GRADIENTS(Q,Y,YREP,Ds,D,NANINDEX) % % INPUT % Q Indicator of the Sammon stress; Q = -2,-1,0,1,2 % Y Current lower-dimensional configuration % YREP Configuration of the representation objects; it should be % empty when no representation set is considered % Ds Original distance matrix % D Approximate distance matrix (optional; otherwise computed from Y) % nanindex Index of missing values; marked in Ds by NaN (optional; % otherwise found from Ds) % % OUTPUT % G1 Gradient direction % G2 Approximation of the Hessian by its diagonal % % DESCRIPTION % This is a routine used directly in the MDS_SAMMAP routine. % % SEE ALSO (PRTools Guide) % MDS, MDS_INIT, MDS_SAMMAP, MDS_SAMSTRESS % % Copyright: Elzbieta Pekalska, Robert P.W. Duin, ela@ph.tn.tudelft.nl, 2000-2003 % Faculty of Applied Sciences, Delft University of Technology % function [g1,g2,c] = mds_gradients(q,y,yrep,Ds,D,nanindex) % If D is not given, calculate it from Y, the current mapped points. y = +y; yrep = +yrep; if (nargin < 5) | (isempty(D)) if (~isempty(yrep)) D = sqrt(distm(y,yrep)); else D = sqrt(distm(y)); end end % If NANINDEX is not given, find it from Ds. if (nargin < 6) nanindex = find(isnan(Ds(:))==1); end % YREP is empty if no representation set is defined yet. % This happens when the mapping should be defined. todefine = (isempty(yrep)); if (todefine) yrep = y; end [m,n] = size(y); [mm,k] = size(Ds); if (m ~= mm) error('The sizes of Y and Ds do not match.'); end if (any(size(D) ~= size(Ds))) error ('The sizes of D and Ds do not match.'); end m2 = m*k; % Convert to doubles. D = +D; Ds = +Ds; % I is the index of non-NaN, non-zero (> eps) values to be included % for the computation of the gradient and the Hessians diagonal. I = (1:m2)'; if (~isempty(nanindex)) I(nanindex) = []; end K = find(Ds(I) <= eps | D(I) <= eps); I(K) = []; % C is a normalization factor. c = -2/sum(Ds(I).^(q+2)); % Compute G1, the gradient. h1 = zeros(m2,1); if (q == 0) % Prevent unnecessary computation when Q = 0. h1(I) = (Ds(I)-D(I)) ./ D(I); else h1(I) = (Ds(I)-D(I)) ./ (D(I).*(Ds(I).^(-q))); end h1 = reshape (h1',m,k); g2 = h1 * ones(k,n); % Here G2 is assigned only temporarily, g1 = c * (g2.*y - h1*yrep); % for the computation of G1. % Compute G2, the diagonal of the Hessian, if requested. if (nargout > 1) h2 = zeros(m2,1); switch (q) case -2, h2(I) = -1./(Ds(I).*D(I).^3); case -1, h2(I) = -1./(D(I).^3); case 0, h2(I) = - Ds(I)./(D(I).^3); case 1, h2(I) = - Ds(I).^2./(D(I).^3); case 2, h2(I) = -(Ds(I)./D(I)).^3; end h2 = reshape (h2',m,k); g2 = c * (g2 + (h2*ones(k,n)).*y.^2 + h2*yrep.^2 - 2*(h2*yrep).*y); end return