Support Vector Machine nonlinear using quadprog

7 views (last 30 days)
Hello I just recently studied how to code SVM in matlab using quadprog function (for optimization) instead of svmtrain. I've searched in many literature, one of them is in http://www.yom-tov.info/Uploads/SVM.m
I don't understand the meaning of this part z = 2*(train_targets>0) - 1;
can you please explain what is this mean?
Thanks, sorry for my bad english
Here is all the code
if true
function [test_targets, a_star] = SVM(train_patterns, train_targets, test_patterns, params)
% Classify using (a very simple implementation of) the support vector machine algorithm % % Inputs: % train_patterns - Train patterns % train_targets - Train targets % test_patterns - Test patterns % params - [kernel, kernel parameter, solver type, Slack] % Kernel can be one of: Gauss, RBF (Same as Gauss), Poly, Sigmoid, or Linear % The kernel parameters are: % RBF kernel - Gaussian width (One parameter) % Poly kernel - Polynomial degree % Sigmoid - The slope and constant of the sigmoid (in the format [1 2], with no separating commas) % Linear - None needed % Solver type can be one of: Perceptron, Quadprog, Lagrangian, SEQ % % Outputs % test_targets - Predicted targets % a - SVM coeficients % % Note: The number of support vectors found will usually be larger than is actually % needed because the two first solvers are approximate.
[Dim, Nf] = size(train_patterns); Dim = Dim + 1; train_patterns(Dim,:) = ones(1,Nf); test_patterns(Dim,:) = ones(1, size(test_patterns,2));
if (length(unique(train_targets)) == 2) z = 2*(train_targets>0) - 1; else z = train_targets; end
%Get kernel parameters [kernel, ker_param, solver, slack] = process_params(params);
%Transform the input patterns y = zeros(Nf); switch kernel, case {'Gauss','RBF'}, for i = 1:Nf, y(:,i) = exp(-sum((train_patterns-train_patterns(:,i)*ones(1,Nf)).^2)'/(2*ker_param^2)); end case {'Poly', 'Linear'} if strcmp(kernel, 'Linear') ker_param = 1; end
for i = 1:Nf,
y(:,i) = (train_patterns'*train_patterns(:,i) + 1).^ker_param;
end
case 'Sigmoid'
ker_param = str2num(ker_param);
if (length(ker_param) ~= 2)
error('This kernel needs two parameters to operate!')
end
for i = 1:Nf,
y(:,i) = tanh(train_patterns'*train_patterns(:,i)*ker_param(1)+ker_param(2));
end
otherwise
error('Unknown kernel. Can be Gauss, Linear, Poly, or Sigmoid.')
end
%Find the SVM coefficients switch solver case 'Quadprog' %Quadratic programming alpha_star = quadprog(diag(z)*y'*y*diag(z), -ones(1, Nf), zeros(1, Nf), 1, z, 0, zeros(1, Nf), slack*ones(1, Nf))'; a_star = (alpha_star.*z)*y';
%Find the bias
sv_for_bias = find((alpha_star > 0) & (alpha_star < slack - 0.001*slack));
%sv_for_bias = find((alpha_star > 0.001*slack) & (alpha_star < slack - 0.001*slack));
if isempty(sv_for_bias),
bias = 0;
else
B = z(sv_for_bias) - a_star(sv_for_bias);
bias = mean(B);
end
sv = find(alpha_star > 0);
%sv = find(alpha_star > 0.001*slack);
case 'Perceptron' max_iter = 1e4; iter = 0; rate = 0.01; xi = ones(1,Nf)/Nf*slack;
if ~isfinite(slack),
slack = 0;
end
%Find a start point
processed_y = [y; ones(1,Nf)] .* (ones(Nf+1,1)*z);
a_star = mean(processed_y')';
while ((sum(sign(a_star'*processed_y+xi-1)~=1)>0) & (iter < max_iter))
iter = iter + 1;
if (iter/5000 == floor(iter/5000)),
disp(['Working on iteration number ' num2str(iter)])
end
%Find the worse classified sample (That farthest from the border)
dist = a_star'*processed_y+xi;
[m, indice] = min(dist);
a_star = a_star + rate*processed_y(:,indice);
%Calculate the new slack vector
xi(indice) = xi(indice) + rate;
xi = xi / sum(xi) * slack;
end
if (iter == max_iter),
disp(['Maximum iteration (' num2str(max_iter) ') reached']);
else
disp(['Converged after ' num2str(iter) ' iterations.'])
end
bias = 0;
a_star = a_star(1:Nf)';
sv = find(((z.*a_star) < slack/Nf) & ((z.*a_star) > 0));
%sv = find(abs(a_star) > slack*1e-3);
case 'Lagrangian' %Lagrangian SVM (See Mangasarian & Musicant, Lagrangian Support Vector Machines) tol = 1e-5; max_iter = 1e5; nu = 1/Nf; iter = 0;
D = diag(z);
alpha = 1.9/nu;
e = ones(Nf,1);
I = speye(Nf);
Q = I/nu + D*y'*D;
P = inv(Q);
u = P*e;
oldu = u + 1;
while ((iter<max_iter) & (sum(sum((oldu-u).^2)) > tol)),
iter = iter + 1;
if (iter/5000 == floor(iter/5000)),
disp(['Working on iteration number ' num2str(iter)])
end
oldu = u;
f = Q*u-1-alpha*u;
u = P*(1+(abs(f)+f)/2);
end
a_star = y*D*u(1:Nf);
bias = -e'*D*u;
sv = find(((z'.*a_star) < slack/Nf) & ((z'.*a_star) > 0));
%sv = find(abs(a_star) < slack*1e-3);
case 'SEQ' % Sequential SVM, as per Sethu Vijayakumar and Si Wu "Sequential % support vector classifiers and regression"
lambda = 0;
max_diff = 1e-5;
D = (z'*z).*(y + lambda^2);
max_iter = 100;
iter = 0;
stop = 0;
h = zeros(Nf, 1);
gamma = 0.2 / max(D(:));
while ~stop & (iter < max_iter)
E = h'*D;
d_h = min(max(gamma*(1 - E'), -h), slack - h);
h = h + d_h;
iter = iter + 1;
stop = (max(abs(d_h)) < max_diff);
%disp(sum(h) - 0.5*h'*D*h)
%disp(['Maximum difference: ' num2str(max(abs(d_h)))])
end
a_star = h.*z';
sv = find(h > slack/Nf);
bias = 0;
case 'Cascade'
%Cascade SVM
Nlevels = 4;
is_sv = ones(1, Nf);
new_params = strrep(params, 'Cascade', 'Lagrangian');
for i = 1:Nlevels,
new_is_sv = zeros(1, Nf);
for j = 1:2^(Nlevels-i),
in_cur = [max(1, floor((j-1)*Nf/(2^(Nlevels-i)+1))):min(Nf, floor(j*Nf/(2^(Nlevels-i))))];
in_cur = in_cur(find(is_sv(in_cur)));
try
[temp, cur_alpha] = SVM(train_patterns(:, in_cur), train_targets(in_cur), test_patterns(:, 1:2), new_params);
new_is_sv(in_cur) = ((cur_alpha.*z(in_cur)' < slack) & (cur_alpha.*z(in_cur)' > 0));
catch
end
end
is_sv = new_is_sv;
end
[temp, cur_a_star] = SVM(train_patterns(:, find(is_sv)), train_targets(find(is_sv)), test_patterns(:, 1:2), new_params);
a_star = zeros(1, Nf);
a_star(find(is_sv)) = cur_a_star;
sv = find(is_sv);
bias = 0;%mean(z(sv) - a_star(sv));
otherwise
error('Unknown solver. Can be either Quadprog or Perceptron')
end
%Find support verctors Nsv = length(sv); if isempty(sv), error('No support vectors found'); else disp(['Found ' num2str(Nsv) ' support vectors']) end
%Margin b = 1/sqrt(sum(a_star.^2)); disp(['The margin is ' num2str(b)])
%Classify test patterns N = size(test_patterns, 2); y = zeros(1, N);
for i = 1:Nsv, switch kernel, case {'Gauss','RBF'}, y = y + a_star(sv(i)) * exp(-sum((test_patterns-train_patterns(:,sv(i))*ones(1,N)).^2)'/(2*ker_param^2))'; case {'Poly', 'Linear'} y = y + a_star(sv(i)) * (test_patterns'*train_patterns(:,sv(i))+1)'.^ker_param; case 'Sigmoid' y = y + a_star(sv(i)) * tanh(test_patterns'*train_patterns(:,sv(i))*ker_param(1)+ker_param(2))'; end end
test_targets = y + bias;
if (length(unique(train_targets)) == 2) test_targets = test_targets > 0; end end
  2 Comments
Maike
Maike on 7 Apr 2014
I tried this code. The reason might be that if originally your data labeled with (class1 - 0 and Class2 - 1) it will change to -1 1 for classes one and two respectively. If Your data patterns labeled -1 1 then there will be no changes...So far that's the only reason, which is obvious for me.
Nasreen Rahmat
Nasreen Rahmat on 17 Feb 2016
Can any one sent me the function of [kernel, ker_param, solver, slack] = process_params(params); for SVM so that i can understand how it works. My email id is Student_AUP_n33@yahoo.com. Thanks

Sign in to comment.

Answers (0)

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!