Reason for disagreement between glmnet results from Matlab and R
1 view (last 30 days)
Show older comments
My question is about comparing glmnet results from Matlab and R.
The following code estimates $\beta$ using the glmnet function in Matlab.
clear;
load acetylene;
X = [x1 x2 x3]; cy = y - mean(y);
% write files as csv so that we can import them in R
csvwrite('testX.csv', X);
csvwrite('testY.csv', y);
% the lambda seq
lseq = exp(linspace(2, 8, 16));
csvwrite('testLam.csv', lseq)
% fit using glmnet [B fitInfo] = lasso(X, cy, 'Lambda', fliplr(lseq), 'Alpha', 0.55, 'Standardize', false); % notice 'Standardize', false %
% compare the zeros with R's zeros later
csvwrite('matlab_b.csv', B)
The R version to accomplish this is as follows.
rm(list = ls())
xmat <- as.matrix(read.csv("testX.csv", header = FALSE))
colnames(xmat) <- c("x1", "x2", "x3")
ymat <- as.matrix(read.csv("testY.csv", header = FALSE))
yvec <- as.numeric(ymat)
cyvec <- yvec - mean(yvec)
lseq <- as.numeric(unlist(read.csv("testLam.csv", header = FALSE)))
library(glmnet)
fits <- glmnet(xmat, cyvec, family = "gaussian", alpha = 0.55, lambda = rev(lseq), standardize = FALSE)
bb <- as.matrix(fits$beta)[, seq(16, 1, by = -1)]
mbb <- as.matrix(read.csv("matlab_b.csv", header = FALSE))
We see that while the magnitudes differ slightly across the λ sequence, the number of coefficients set to 0 agree.
# check if #zeros are equal all.equal(which(mbb == 0), which(bb == 0)) # [1] TRUE all.equal(as.vector(mbb), as.vector(bb)) # [1] "Mean relative difference: 0.03296765"
The results, however, disagree significantly if I try to do what I think matlab is trying to do (assuming I understand the documentation correctly).
clear; load acetylene;
X = [x1 x2 x3];
% standardize and center sxmat = (X - repmat(mean(X, 1), size(X, 1), 1)) ./ repmat(std(X, 0, 1), size(X, 1), 1); % center ys cy = y - mean(y);
% write files
csvwrite('testSX.csv', sxmat);
csvwrite('testCY.csv', cy);
% the lambda seq
lseq = exp(linspace(2, 8, 16));
csvwrite('testLam.csv', lseq)
% fit using glmnet and without standardization. [B fitInfo] = lasso(sxmat, cy, 'Lambda', fliplr(lseq), 'Alpha', 0.55, 'Standardize', false);
% save beta for comparison
csvwrite('matlab_sb.csv', B)
Now repeat this process in R.
rm(list =ls())
sxmat <- as.matrix(read.csv("testSX.csv", header = FALSE))
colnames(sxmat) <- c("x1", "x2", "x3")
ymat <- as.matrix(read.csv("testCY.csv", header = FALSE))
cyvec <- as.numeric(ymat)
lseq <- as.numeric(unlist(read.csv("testLam.csv", header = FALSE)))
library(glmnet)
fits <- glmnet(sxmat, cyvec, family = "gaussian", alpha = 0.55, lambda = rev(lseq), standardize = FALSE)
sbb <- as.matrix(fits$beta)[, seq(16, 1, by = -1)]
smbb <- as.matrix(read.csv("matlab_sb.csv", header = FALSE))
The results are now wildly different.
all.equal(as.vector(smbb), as.vector(sbb)) # [1] "Mean relative difference: 1.703174" The entries that are 0s still agree, however.
all.equal(which(as.vector(smbb) == 0), which(as.vector(sbb) == 0)) [1] TRUE I am sure I am missing something but unable to spot the reason. (Possibly, default standardization of y by default in R and not so in Matlab?)
0 Comments
Answers (0)
See Also
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!