astron %!s(int64=7) %!d(string=hai) anos
pai
achega
1d957e582d

BIN=BIN
machine learning/Machine Learning - Home _ Coursera-quiz-lesson-9-2.pdf


+ 66 - 1
machine learning/machine-learning-ex8/ex8/cofiCostFunc.m

@@ -7,6 +7,8 @@ function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ...
 %
 
 % Unfold the U and W matrices from params
+
+
 X = reshape(params(1:num_movies*num_features), num_movies, num_features);
 Theta = reshape(params(num_movies*num_features+1:end), ...
                 num_users, num_features);
@@ -15,8 +17,9 @@ Theta = reshape(params(num_movies*num_features+1:end), ...
 % You need to return the following values correctly
 J = 0;
 X_grad = zeros(size(X));
-Theta_grad = zeros(size(Theta));
 
+Theta_grad = zeros(size(Theta));
+%fprintf('-----------size  num movies %d\n',  num_movies);
 % ====================== YOUR CODE HERE ======================
 % Instructions: Compute the cost function and gradient for collaborative
 %               filtering. Concretely, you should first implement the cost
@@ -41,9 +44,71 @@ Theta_grad = zeros(size(Theta));
 %
 
 
+% Add intercept term to X
+%X = [ones(num_movies, 1) X];
+%X_grad = zeros(size(X));
+% Initialize fitting parameters
+%Theta = [zeros(num_users, 1) Theta];
+%Theta_grad = zeros(size(X));
+
+
+regT = lambda / 2 * sum(sum(Theta.^2));
+regX = lambda / 2 * sum(sum(X.^2));
+
+
+% step 1 -> select only Rij == 1 -> X (5x 3+1) * Theta'(3+1 x4) -> Y (5x4)
+% step 2 sum i of Y -> sum j of Y -> Y -> (1x1)
+% refer to lecture notes pg 18
+J = 1 / 2 * sum(sum( R .* (X * Theta' - Y).^2)) + regT + regX;
+
+
+
+
+for i = 1:num_movies
+  idx = find(R(i,:) == 1);
+  %num_users_rated = size(idx);
+  %dumpsize('idx', idx);
+  %fprintf('there are %d users that rated the movie at %dth row (tot movies %d)\n', columns(num_users_rated), i, num_movies);
+  
+  Theta_temp = Theta(idx,:);
+  %dumpsize('Theta_temp', Theta_temp);
+  Y_temp = Y(i,idx);
+  %dumpsize('Y_temp', Y_temp);
+  X_grad(i,:) = (X(i,:) * Theta_temp' - Y_temp) * Theta_temp + lambda * X(i,:);
+  
+
+endfor
+
+
+% for each user j, 
+% -- determine the theta_gradient of user j
+for j = 1:num_users
+  % find all movies rated by user j
+  % that means, select column j of R, go down through each row from 1 to num_movies, record the 
+  % array index where the cell(i,j) == 1
+  idx = find(R(:,j) == 1);
+  %fprintf('there are %d movies that were rated by user %d (tot users %d)\n', columns(idx), i, num_users);
+  % example:
+  % R = [0;1;0;1;0]
+  % then idx =  [2, 4, 5]
+  % and Y at the jth column = [0;4;0;3;5]
+  % thus Y_temp = [4;3;5]
+  % X_temp row 1 = contents of row 2 and all columns from X
+  % X_temp row 2 = contents of row 4 and all columns from X
+  % X_temp row 3 = contents of row 5 and all columns from X
+  Y_temp = Y(idx,j);
+  X_temp = X(idx,:);
+  Theta_grad(j,:) = (X_temp * Theta(j,:)' - Y_temp)' * X_temp + lambda * Theta(j,:);;
+  
+endfor
+
+
 
 
 
+% get rid of the intercept term X0 and theta0
+%X_grad = X_grad(:,2:end);
+%Theta_grad = Theta_grad(:,2:end);
 
 
 

+ 126 - 0
machine learning/machine-learning-ex8/ex8/cofiCostFunc_bak.m

@@ -0,0 +1,126 @@
+function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ...
+                                  num_features, lambda)
+%COFICOSTFUNC Collaborative filtering cost function
+%   [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
+%   num_features, lambda) returns the cost and gradient for the
+%   collaborative filtering problem.
+%
+
+% Unfold the U and W matrices from params
+
+
+X = reshape(params(1:num_movies*num_features), num_movies, num_features);
+Theta = reshape(params(num_movies*num_features+1:end), ...
+                num_users, num_features);
+
+            
+% You need to return the following values correctly
+J = 0;
+X_grad = zeros(size(X));
+
+Theta_grad = zeros(size(Theta));
+%fprintf('-----------size  num movies %d\n',  num_movies);
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the cost function and gradient for collaborative
+%               filtering. Concretely, you should first implement the cost
+%               function (without regularization) and make sure it is
+%               matches our costs. After that, you should implement the 
+%               gradient and use the checkCostFunction routine to check
+%               that the gradient is correct. Finally, you should implement
+%               regularization.
+%
+% Notes: X - num_movies  x num_features matrix of movie features
+%        Theta - num_users  x num_features matrix of user features
+%        Y - num_movies x num_users matrix of user ratings of movies
+%        R - num_movies x num_users matrix, where R(i, j) = 1 if the 
+%            i-th movie was rated by the j-th user
+%
+% You should set the following variables correctly:
+%
+%        X_grad - num_movies x num_features matrix, containing the 
+%                 partial derivatives w.r.t. to each element of X
+%        Theta_grad - num_users x num_features matrix, containing the 
+%                     partial derivatives w.r.t. to each element of Theta
+%
+
+
+% Add intercept term to X
+X = [ones(num_movies, 1) X];
+X_grad = zeros(size(X));
+% Initialize fitting parameters
+Theta = [zeros(num_users, 1) Theta];
+Theta_grad = zeros(size(X));
+
+
+regT = lambda / 2 * sum(sum(Theta(:,2:end).^2));
+regX = lambda / 2 * sum(sum(X(:,2:end).^2));
+
+
+% step 1 -> select only Rij == 1 -> X (5x 3+1) * Theta'(3+1 x4) -> Y (5x4)
+% step 2 sum i of Y -> sum j of Y -> Y -> (1x1)
+% refer to lecture notes pg 18
+J = 1 / 2 * sum(sum( R .* (X * Theta' - Y).^2)) + regT + regX;
+
+
+
+
+for i = 1:num_movies
+  idx = find(R(i,:) == 1);
+  %num_users_rated = size(idx);
+  %dumpsize('idx', idx);
+  %fprintf('there are %d users that rated the movie at %dth row (tot movies %d)\n', columns(num_users_rated), i, num_movies);
+  
+  Theta_temp = Theta(idx,:);
+  %dumpsize('Theta_temp', Theta_temp);
+  Y_temp = Y(i,idx);
+  %dumpsize('Y_temp', Y_temp);
+  X_grad(i,:) = (X(i,:) * Theta_temp' - Y_temp) * Theta_temp;
+  
+
+endfor
+
+
+% for each user j, 
+% -- determine the theta_gradient of user j
+for j = 1:num_users
+  % find all movies rated by user j
+  % that means, select column j of R, go down through each row from 1 to num_movies, record the 
+  % array index where the cell(i,j) == 1
+  idx = find(R(:,j) == 1);
+  %fprintf('there are %d movies that were rated by user %d (tot users %d)\n', columns(idx), i, num_users);
+  % example:
+  % R = [0;1;0;1;0]
+  % then idx =  [2, 4, 5]
+  % and Y at the jth column = [0;4;0;3;5]
+  % thus Y_temp = [4;3;5]
+  % X_temp row 1 = contents of row 2 and all columns from X
+  % X_temp row 2 = contents of row 4 and all columns from X
+  % X_temp row 3 = contents of row 5 and all columns from X
+  Y_temp = Y(idx,j);
+  X_temp = X(idx,:);
+  Theta_grad(j,:) = (X_temp * Theta(j,:)' - Y_temp)' * X_temp;
+  
+endfor
+
+
+
+
+% get rid of the intercept term X0 and theta0
+X_grad = X_grad(:,2:end);
+Theta_grad = Theta_grad(:,2:end);
+
+
+
+
+
+
+
+
+
+
+
+% =============================================================
+
+grad = [X_grad(:); Theta_grad(:)];
+
+end

+ 5 - 0
machine learning/machine-learning-ex8/ex8/dumpsize.m

@@ -0,0 +1,5 @@
+function dumpsize(label, X)
+  
+  fprintf('matrix %s size:%d %d\n',  label, rows(X), columns(X));
+  
+  

+ 2 - 1
machine learning/machine-learning-ex8/ex8/estimateGaussian.m

@@ -23,7 +23,8 @@ sigma2 = zeros(n, 1);
 
 
 
-
+mu = 1/m * sum(X);
+sigma2 = 1/m * sum((X .- mu) .^2);
 
 
 

+ 16 - 12
machine learning/machine-learning-ex8/ex8/ex8_cofi.m

@@ -128,22 +128,26 @@ my_ratings = zeros(1682, 1);
 
 % Check the file movie_idx.txt for id of each movie in our dataset
 % For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
-my_ratings(1) = 4;
+%my_ratings(1) = 4;
 
 % Or suppose did not enjoy Silence of the Lambs (1991), you can set
-my_ratings(98) = 2;
+%my_ratings(98) = 2;
 
 % We have selected a few movies we liked / did not like and the ratings we
 % gave are as follows:
-my_ratings(7) = 3;
-my_ratings(12)= 5;
-my_ratings(54) = 4;
-my_ratings(64)= 5;
-my_ratings(66)= 3;
-my_ratings(69) = 5;
-my_ratings(183) = 4;
-my_ratings(226) = 5;
-my_ratings(355)= 5;
+%my_ratings(7) = 3;
+%my_ratings(12)= 5;
+%my_ratings(54) = 4;
+%my_ratings(64)= 5;
+%my_ratings(66)= 3;
+%my_ratings(69) = 5;
+%my_ratings(183) = 4;
+%my_ratings(226) = 5;
+%my_ratings(355)= 5;
+
+
+movie_rating = [69 4;94 4;96 3;98 5;143 4;157 3;187 3;164 4;161 5;1 5;179 3;181 3;22 2;226 4;241 3;249 2;258 5;257 3];
+my_ratings(movie_rating(:,1)) = movie_rating(:,2);
 
 fprintf('\n\nNew user ratings:\n');
 for i = 1:length(my_ratings)
@@ -222,7 +226,7 @@ movieList = loadMovieList();
 
 [r, ix] = sort(my_predictions, 'descend');
 fprintf('\nTop recommendations for you:\n');
-for i=1:10
+for i=1:length(my_predictions)
     j = ix(i);
     fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ...
             movieList{j});

+ 115 - 0
machine learning/machine-learning-ex8/ex8/myex8.m

@@ -0,0 +1,115 @@
+%% Machine Learning Online Class
+%  Exercise 8 | Anomaly Detection and Collaborative Filtering
+%
+%  Instructions
+%  ------------
+%
+%  This file contains code that helps you get started on the
+%  exercise. You will need to complete the following functions:
+%
+%     estimateGaussian.m
+%     selectThreshold.m
+%     cofiCostFunc.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% =============== Part 1: Loading movie ratings dataset ================
+%  You will start by loading the movie ratings dataset to understand the
+%  structure of the data.
+%  
+fprintf('Loading movie ratings dataset.\n\n');
+
+%  Load data
+load ('ex8_movies.mat');
+
+%  Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on 
+%  943 users
+%
+%  R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
+%  rating to movie i
+
+%  From the matrix, we can compute statistics like average rating.
+fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ...
+        mean(Y(1, R(1, :))));
+
+%  We can "visualize" the ratings matrix by plotting it with imagesc
+imagesc(Y);
+ylabel('Movies');
+xlabel('Users');
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+
+
+%% ============ Part 2: Collaborative Filtering Cost Function ===========
+%  You will now implement the cost function for collaborative filtering.
+%  To help you debug your cost function, we have included set of weights
+%  that we trained on that. Specifically, you should complete the code in 
+%  cofiCostFunc.m to return J.
+
+%  Load pre-trained weights (X, Theta, num_users, num_movies, num_features)
+load ('ex8_movieParams.mat');
+
+%  Reduce the data set size so that this runs faster
+num_users = 4; num_movies = 5; num_features = 3;
+X = X(1:num_movies, 1:num_features);
+Theta = Theta(1:num_users, 1:num_features);
+Y = Y(1:num_movies, 1:num_users);
+R = R(1:num_movies, 1:num_users);
+
+%  Evaluate cost function
+J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ...
+               num_features, 0);
+           
+fprintf(['Cost at loaded parameters: %f '...
+         '\n(this value should be about 22.22)\n'], J);
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+
+
+
+%% ============== Part 3: Collaborative Filtering Gradient ==============
+%  Once your cost function matches up with ours, you should now implement 
+%  the collaborative filtering gradient function. Specifically, you should 
+%  complete the code in cofiCostFunc.m to return the grad argument.
+%  
+fprintf('\nChecking Gradients (without regularization) ... \n');
+
+%  Check gradients by running checkNNGradients
+checkCostFunction;
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+
+
+
+
+%% ========= Part 4: Collaborative Filtering Cost Regularization ========
+%  Now, you should implement regularization for the cost function for 
+%  collaborative filtering. You can implement it by adding the cost of
+%  regularization to the original cost computation.
+%  
+
+%  Evaluate cost function
+J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ...
+               num_features, 1.5);
+           
+fprintf(['Cost at loaded parameters (lambda = 1.5): %f '...
+         '\n(this value should be about 31.34)\n'], J);
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+
+%% ======= Part 5: Collaborative Filtering Gradient Regularization ======
+%  Once your cost matches up with ours, you should proceed to implement 
+%  regularization for the gradient. 
+%
+
+%  
+fprintf('\nChecking Gradients (with regularization) ... \n');
+
+%  Check gradients by running checkNNGradients
+checkCostFunction(1.5);
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+

BIN=BIN
machine learning/machine-learning-ex8/ex8/octave-workspace


+ 9 - 11
machine learning/machine-learning-ex8/ex8/selectThreshold.m

@@ -23,17 +23,15 @@ for epsilon = min(pval):stepsize:max(pval)
     % Note: You can use predictions = (pval < epsilon) to get a binary vector
     %       of 0's and 1's of the outlier predictions
 
-
-
-
-
-
-
-
-
-
-
-
+    tp = sum((pval < epsilon) & (yval == 1));
+    tn = sum((pval >= epsilon) & (yval == 0));
+    fp = sum((pval < epsilon) & (yval == 0));
+    fn = sum((pval >= epsilon) & (yval == 1));
+
+    prec = tp / (tp + fp);
+    rec = tp / (tp + fn);
+    F1 = 2 * prec * rec / (prec + rec);
+   
 
     % =============================================================
 

+ 15 - 0
machine learning/machine-learning-ex8/ex8/token.mat

@@ -0,0 +1,15 @@
+# Created by Octave 4.2.2, Sat Dec 01 19:08:07 2018 HKT <astron@astron>
+# name: email
+# type: sq_string
+# elements: 1
+# length: 20
+larry1chan@gmail.com
+
+
+# name: token
+# type: sq_string
+# elements: 1
+# length: 16
+QAJ6zYZ6G9iK9OTW
+
+