%PROXM Proximity mapping % % W = PROXM(A,TYPE,P,WEIGHTS) % W = A*PROXM([],TYPE,P,WEIGHTS) % W = A*PROXM(TYPE,P,WEIGHTS) % D = B*W % % INPUT % A Dataset used for representation % B Dataset applied to the representation set % TYPE Type of the proximity (optional; default: 'distance') % P Parameter of the proximity (optional; default: 1) % WEIGHTS Weights (optional; default: all 1) % % OUTPUT % W Proximity mapping % D Dataset, proximity matrix between B and A % % DESCRIPTION % Computation of the [K x M] proximity mapping (or kernel) defined by % the [M x K] dataset A. Unlabeled objects in A are neglected. % If B is an [N x K] dataset, then D=B*W is the [N x M] proximity matrix % between B and A. The proximities can be defined by the following types % (A and B are here 1 x K vectors): % % 'polynomial' | 'p': SIGN(A*B'+1).*(A*B'+1).^P % 'homogeneous' | 'h': SIGN(A*B').*(A*B').^P % 'exponential' | 'e': EXP(-(||A-B||)/P) % 'radial_basis' | 'r': EXP(-(||A-B||.^2)/(P*P)) % 'sigmoid' | 's': SIGM((SIGN(A*B').*(A*B'))/P) % 'distance' | 'd': ||A-B||.^P % 'minkowski' | 'm': SUM(|A-B|.^P).^(1/P) % 'city-block' | 'c': SUM(|A-B|) % 'cosine' | 'o': 1 - (A*B')/||A||*||B|| % 'ndiff' | 'n': SUM(A~=B) % 'hellinger' | 'g': ||A.^0.5-B.^0.5|| % % In the polynomial case for a non-integer P, the proximity is computed % as D = SIGN(S+1).*ABS(S+1).^P, in order to avoid problems with negative % inner products S = A*B'. The features of the objects in A and B may be % weighted by the weights in the vector WEIGHTS. % % Note that for computing (squared) Euclidean distances DISTM is much % faster than PROXM. % % EXAMPLES % W = A*proxm('m',1); % define L1 distances % W = A*proxm('m',1); D = B*W; % L1 distances between B and A % W = proxm('r',2)*mapex; D = A*W; % Distances between A and itself % % SEE ALSO (PRTools Guide) % MAPPINGS, DATASETS, DISTM % Copyright: R.P.W. Duin, r.p.w.duin@37steps.com % Faculty EWI, Delft University of Technology % P.O. Box 5031, 2600 GA Delft, The Netherlands % 27-11-2008, Marc Hulsman, MR1 % Added kernel normalization. % $Id: proxm.m,v 1.10 2010/01/15 13:39:01 duin Exp $ function W = proxm(varargin) varargin = shiftargin(varargin,'char'); argin = setdefaults(varargin,[],'d',1,[]); if mapping_task(argin,'definition') W = define_mapping(argin,'untrained','Proximity mapping'); return end [A,type,s,weights] = deal(argin{:}); [m,k] = size(A); if (isstr(type)) % Definition of the mapping, just store parameters. all = char('polynomial','p','homogeneous','h','exponential','e','radial_basis','r', ... 'sigmoid','s','distance','d','minkowski','m','city-block','c',... 'cosine','o','uniform','u','ndiff','n','hellinger','g'); if (~any(strcmp(cellstr(all),type))) error(['Unknown proximity type: ' type]) end A = cdats(A,1); [m,k] = size(A); %if (isa(A,'prdataset')) if (isdataset(A)) A = testdatasize(A); W = prmapping(mfilename,'trained',{A,type,s,weights},getlab(A), ... getfeatsize(A),getobjsize(A)); else W = prmapping(mfilename,'trained',{A,type,s,weights},[],k,m); end W = setname(W,'Proximity mapping'); elseif isa(type,'prmapping') % Execution of the mapping: compute a proximity matrix % between the input data A and B; output stored in W. w = type; [B,type,s,weights] = deal(w.data{1},w.data{2},w.data{3},w.data{4}); [kk,n] = size(w); if (k ~= kk) error('Mismatch in sizes between the mapping and its data stored internally.'); end if (~isempty(weights)) if (length(weights) ~= k), error('Weight vector has a wrong length.'); end A = A.*(ones(m,1)*weights(:)'); B = B.*(ones(n,1)*weights(:)'); end testdatasize(m*n); switch type case {'polynomial','p'} D = +(A*B'); D = D + ones(m,n); if (s ~= round(s)) D = +sign(D).*abs(D).^s; elseif (s ~= 1) D = D.^s; end case {'homogeneous','h'} D = +(A*B'); if (s ~= round(s)) D = +sign(D).*abs(D).^s; elseif (s ~= 1) D = D.^s; end case {'sigmoid','s'} D = +(A*B'); D = sigm(D/s); case {'city-block','c'} D = zeros(m,n); t = sprintf('Processing %i objects: ',n); prwaitbar(n,t); for j=1:n prwaitbar(n,j,[t int2str(j)]); D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)),2); end prwaitbar(0); case {'minkowski','m'} D = zeros(m,n); t = sprintf('Minkowski distance for %i objects: ',n); prwaitbar(n,t); for j=1:n prwaitbar(n,j,[t int2str(j)]); D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)).^s,2).^(1/s); end prwaitbar(0); case {'exponential','e'} D = dist2(B,A); D = exp(-sqrt(D)/s); case {'radial_basis','r'} D = dist2(B,A); D = exp(-D/(s*s)); case {'distance','d'} D = dist2(B,A); if s ~= 2 D = sqrt(D).^s; end case {'cosine','o'} D = +(A*B'); lA = sqrt(sum(A.*A,2)); lB = sqrt(sum(B.*B,2)); D = 1 - D./(lA*lB'); case {'ndiff','n'} D = zeros(m,n); t = sprintf('Processing %i objects: ',n); prwaitbar(n,t); for j=1:n prwaitbar(n,j,[t int2str(j)]); D(:,j) = sum(+A ~= repmat(+B(j,:),m,1),2); end prwaitbar(0); case {'hellinger','g'} D = dist2(sqrt(B),sqrt(A)); D = sqrt(D); otherwise error('Unknown proximity type') end if isdataset(A) W = setdat(A,D,w); else W = D; end else error('Illegal TYPE argument.') end return; function D = dist2(B,A) % Computes square Euclidean distance, avoiding large matrices for high % dimensional data ma = size(A,1); mb = size(B,1); if isdatafile(A) D = zeros(ma,mb); next = 1; while next > 0 [a,next,J] = readdatafile(A,next); D(J,:) = +dist2(a,B); end elseif isdatafile(B) D = zeros(ma,mb); next = 1; while next > 0 % we need another version of readdatafile here, as due [b,next,J] = readdatafile2(B,next); % to persistent variables double D(:,J) = +dist2(A,b); % looping can not be handled correctly end else % A and B are not datafiles D = ones(ma,1)*sum(B'.*B',1); D = D + sum(A.*A,2)*ones(1,mb); D = D - 2 .* (+A)*(+B)'; % Clean result. J = find(D<0); D(J) = zeros(size(J)); end return