costFunctionReg.m 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. function [J, grad] = costFunctionReg(theta, X, y, lambda)
  2. %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
  3. % J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
  4. % theta as the parameter for regularized logistic regression and the
  5. % gradient of the cost w.r.t. to the parameters.
  6. % Initialize some useful values
  7. m = length(y); % number of training examples
  8. % You need to return the following variables correctly
  9. J = 0;
  10. grad = zeros(size(theta));
  11. % ====================== YOUR CODE HERE ======================
  12. % Instructions: Compute the cost of a particular choice of theta.
  13. % You should set J to the cost.
  14. % Compute the partial derivatives and set grad to the partial
  15. % derivatives of the cost w.r.t. each parameter in theta
  16. %J = 1 / m * ((-1 .* y') * log(sigmoid(X * theta)) - (1 .- y)' * log(1 .- sigmoid(X * theta))) %...
  17. %+ lambda / (2 * m) * [0; (theta([2, rows(theta)],:) .^ 2)];
  18. J = 1 / m * sum(((-1 .* y') * log(sigmoid(X * theta)) - (1 .- y)' * log(1 .- sigmoid(X * theta)))) ...
  19. + lambda / (2 * m) * sum(theta([2: rows(theta)]) .^ 2);
  20. %size(J)
  21. %
  22. % the dumb way
  23. % first attempt:
  24. % two step process: compute j0 without regularization and then j1-n with regularization
  25. % 1. set up a matrix X1 of the same size as X. Next copy the row values from the first column of X into the matrix X1
  26. % and fill the rest of the rows/columns with zeros. Apply the gradient formula,
  27. % Use X as the parameter in the sigmoid function
  28. % this result in an 1x1 matrix
  29. % 2. set up a 2nd matrix X2N. It contains 0 in its first column, and data copied from col2 to col end of X
  30. % apply the graident function
  31. % this result in a 28x1 matrix
  32. % finally,
  33. % discard the first row of gradient X2N in Step 2 as it contains the regularized value
  34. % return the gradient by adding the first result and X2N[2:end]
  35. %
  36. %
  37. %
  38. X1 = [X(:, 1) zeros(m, columns(X) - 1)];
  39. grad1 = 1 / m * X1' * (sigmoid(X * theta) - y);
  40. %fprintf ('grad1: %f\n', grad1);
  41. X2N = [[zeros(m, 1)] X(:, 2:end)];
  42. %X2N
  43. grad2N = 1 / m * X2N' * (sigmoid(X * theta) - y) + lambda / m * theta;
  44. %grad2N
  45. grad = [grad1(1, 1); grad2N(2:end, 1)];
  46. %fprintf ('------------> %f\n', grad(1:5));
  47. %
  48. %
  49. % the quick way
  50. %
  51. %
  52. %grad =1/m * X' * (sigmoid(X* theta) - y) + lambda/m * theta .* [0; ones(length(theta)-1, 1)];
  53. %fprintf ('------------> %f\n', grad(1:5));
  54. % =============================================================
  55. end