逐渐增加样本训练模型实现误差最小且误差值接近1.41%的最小P（误差）值。

Q1_final.m
代码语言：javascript复制

clear all; close all; clc;

%% Set-Up: given parameters and validation data
% Given parameters
n = 2;                      % number of feature dimensions
N_train = [10;100;1000];    % number of training samples
N_val = 10000;              % number of validation samples
p = [0.9,0.1];              % class priors
mu = [-2 2;0 0]; 
Sigma(:,:,1) = [1 -0.9;-0.9 2]; Sigma(:,:,2) = [2 0.9;0.9 1];  

% Generate true class labels and draw samples from each class pdf
label_val = (rand(1,N_val) >= p(1));
Nc_val = [length(find(label_val==0)),length(find(label_val==1))];
x_val = generate_data(n,N_val,label_val,mu,Sigma,Nc_val);

%% Part 1: Minimum P-Error Classifier
% Calculate dicriminant scores and tau based on classification rule
discriminantScore = log(evalGaussian(x_val,mu(:,2),Sigma(:,:,2)))- ...
    log(evalGaussian(x_val,mu(:,1),Sigma(:,:,1)));
tau = log(sort(discriminantScore(discriminantScore >= 0)));

% Find midpoints of tau to use as threshold values
mid_tau = [tau(1)-1 tau(1:end-1)   diff(tau)./2 tau(length(tau)) 1];

% Make decision for every threshold and calculate error values
for i = 1:length(mid_tau)
    decision = (discriminantScore >= mid_tau(i));
    pFA(i) = sum(decision==1 & label_val==0)/Nc_val(1); % False alarm prob.
    pCD(i) = sum(decision==1 & label_val==1)/Nc_val(2); % Correct detection prob.
    pE(i) = pFA(i)*p(1) (1-pCD(i))*p(2);                % Total error prob.
end

% Find minimum error and corresponding threshold   decisions
[min_error,min_index] = min(pE);
min_decision = (discriminantScore >= mid_tau(min_index));
min_FA = pFA(min_index); min_CD = pCD(min_index);

% Plot ROC curve with minimum error point labeled
figure(1); plot(pFA,pCD,'-',min_FA,min_CD,'o');
title('Minimum Expected Risk ROC Curve'); 
legend('ROC Curve', 'Calculated Min Error');
xlabel('P_{False Alarm}'); ylabel('P_{Correct Detection}');

% Create grid to cover all data points
h_grid = linspace(floor(min(x_val(1,:)))-2,ceil(max(x_val(1,:))) 2);
v_grid = linspace(floor(min(x_val(2,:)))-2,ceil(max(x_val(2,:))) 2);
[h,v] = meshgrid(h_grid,v_grid);

% Calculate discriminant score for each grid point (0 = decision bound.)
d_score_grid = log(evalGaussian([h(:)';v(:)'],mu(:,2),Sigma(:,:,2)))- ... 
    log(evalGaussian([h(:)';v(:)'],mu(:,1),Sigma(:,:,1)))-mid_tau(min_index);
d_scores = reshape(d_score_grid,length(h_grid),length(v_grid));

% Plot classified data points and decision boundary
figure(2);
plot_classified_data(min_decision, label_val, Nc_val, p, [1 1 1], ...
    x_val', [h_grid;v_grid;d_scores], 'Q');

%% Part 2 & 3: Linear and Quadratic Regression
for i = 1:length(N_train)
    % Generate true labels and training data for each sample size
    label = (rand(1,N_train(i)) >= p(1));
    Nc = [length(find(label==0)),length(find(label==1))];
    x = generate_data(n,N_train(i),label,mu,Sigma,Nc);
    
    % Initialize training parameters (map x to linear and quadratic func.)
    x_L = [ones(N_train(i), 1) x'];         % linear parameters
    initial_theta_L = zeros(n 1, 1);
    x_Q = [ones(N_train(i), 1) x(1,:)' x(2,:)' (x(1,:).^2)' ...
        (x(1,:).*x(2,:))' (x(2,:).^2)'];    % quadratic parameters
    initial_theta_Q = zeros(6, 1);
    label = double(label)';
    
    % Compute gradient descent to get theta values for linear and quad.
    [theta_L, cost_L] = fminsearch(@(t)(cost_func(t, x_L, label, ...
        N_train(i))), initial_theta_L);
    [theta_Q, cost_Q] = fminsearch(@(t)(cost_func(t, x_Q, label, ...
        N_train(i))), initial_theta_Q);
    
    % Linear: choose points to draw straight boundary line
    plot_x1 = [min(x_L(:,2))-2,  max(x_L(:,2)) 2];                      
    plot_x2 = (-1./theta_L(3)).*(theta_L(2).*plot_x1   theta_L(1)); 
    
    % Linear: plot training data and trained classifier
    figure(3); plot_training_data(label, i, x_L, [plot_x1;plot_x2], 'L');
    title(['Training Based on ',num2str(N_train(i)),' Samples']);
    
    % Linear: use validation data (10k points) and make decisions
    test_set_L = [ones(N_val, 1) x_val'];
    decision_L = test_set_L*theta_L >= 0;
    
    % Linear: plot all decisions and boundary line
    figure(4);
    error_L(i) = plot_classified_data(decision_L, label_val', Nc_val, p, ...
        [1,3,i], test_set_L(:,2:3), [plot_x1;plot_x2],'L');
    title(['Classification Based on ',num2str(N_train(i)),' Samples']);
    
    % Quadratic: create grid to cover all data points and calculate scores   
    figure(5); subplot(2,3,i);
    h_grid = linspace(min(x_Q(:,2))-6, max(x_Q(:,2)) 6);
    v_grid = linspace(min(x_Q(:,3))-6, max(x_Q(:,3)) 6);
    score = get_boundary(h_grid,v_grid,theta_Q); % score of 0 = decision bound.
    
    % Quadratic: plot training data and trained classifier
    plot_training_data(label, i, x_Q, [h_grid;v_grid;score], 'Q')
    title(['Training Based on ',num2str(N_train(i)),' Samples']);
    
    % Quadratic: use validation data (10k points) and make decisions
    test_set_Q = [ones(N_val, 1) x_val(1,:)' x_val(2,:)' (x_val(1,:).^2)' ...
        (x_val(1,:).*x_val(2,:))' (x_val(2,:).^2)'];
    decision_Q = test_set_Q*theta_Q >= 0;
    
    % Quadratic: plot all decisions and boundary countour
    figure(6);
    error_quad(i) = plot_classified_data(decision_Q, label_val', Nc_val, ...
        p, [1,3,i], test_set_Q(:,2:3),[h_grid;v_grid;score],'Q');
    title(['Classification Based on ',num2str(N_train(i)),' Samples']);
end

%% Print all calculated error values 
fprintf('<strong>Minimum P(error) Achiavable:</strong> %.2f%%nn',min_error*100);
fprintf('<strong>Logistic Regression Total Error Values</strong>n');
fprintf('Training Set SizetLinear Approximation Error (%%)tQuadratic Approximation Error (%%)n');
fprintf('t  %ittttt  %.2f%%tttttttt%.2f%%n',[N_train';error_L;error_quad]);

%% Functions
function x = generate_data(n, N, label, mu, Sigma, Nc)
    % Generate N Gaussian samples based on distribution of class priors
    x = zeros(n,N);
    for L = 0:1
        x(:,label==L) = mvnrnd(mu(:,L 1),Sigma(:,:,L 1),Nc(L 1))';
    end
end

function plot_training_data(label, fig, x, bound, type)
    % Plots original class labels and decision boundary
    
    subplot(1,3,fig); hold on;
    plot(x(label==0,2),x(label==0,3),'o',x(label==1,2),x(label==1,3),' ');
    
    if type == 'L'
        % Plot straight line if boundary is linear
        plot(bound(1,:), bound(2,:));
    elseif type == 'Q'
        % Plot decision countour if non-linear (discriminant scores are 0)
        contour(bound(1,:), bound(2,:), bound(3:end,:), [0, 0]);
    end
    
    % Restrict axis and add all labels
    axis([min(x(:,2))-2, max(x(:,2)) 2, min(x(:,3))-2, max(x(:,3)) 2]);
    legend('Class 0','Class 1','Classifier');
    xlabel('x_1'); ylabel('x_2'); hold on;
end

function error = plot_classified_data(decision, label, Nc, p, fig, x, bound, type)
    % Plots incorrect and correct decisions (and boundary) based on original class labels
    
    % Find all correct and incorrect decisions
    TN = find(decision==0 & label==0);   % true negative
    FP = find(decision==1 & label==0); pFA = length(FP)/Nc(1); % false positive
    FN = find(decision==0 & label==1); pMD = length(FN)/Nc(2); % false negative
    TP = find(decision==1 & label==1);  % true positive
    error = (pFA*p(1)   pMD*p(2))*100;  % calculate total error

    % Plot all decisions (green = correct, red = incorrect)
    subplot(fig(1),fig(2),fig(3));
    plot(x(TN,1),x(TN,2),'og'); hold on;
    plot(x(FP,1),x(FP,2),'or'); hold on;
    plot(x(FN,1),x(FN,2),' r'); hold on;
    plot(x(TP,1),x(TP,2),' g'); hold on;
    
    % Plot boundary based on whether its linear(L) or non-linear(Q)
    if type == 'L'
        % Plot straight line from 2 points
        plot(bound(1,:), bound(2,:));
    elseif type == 'Q'
        % Plot decision contour (when discriminant scores are 0)
        contour(bound(1,:), bound(2,:), bound(3:end,:), [0, 0]); % p
    end
    
    % Restrict axis and add all labels
    axis([min(x(:,1))-2, max(x(:,1)) 2, min(x(:,2))-2, max(x(:,2)) 2])
    legend('Class 0 Correct Decisions','Class 0 Wrong Decisions', ...
        'Class 1 Wrong Decisions','Class 1 Correct Decisions','Classifier');
    xlabel('x_1'); ylabel('x_2');
end

function cost = cost_func(theta, x, label,N)
    % Cost function to be minimized to get best fitting parameters
    h = 1 ./ (1   exp(-x*theta));  % Sigmoid function
    cost = (-1/N)*((sum(label' * log(h))) (sum((1-label)' * log(1-h))));
end

function score = get_boundary(hGrid, vGrid, theta)
    % Generates grid of scores that spans the full range of data (where
    % a score of 0 indicates decision boundary level)
    z = zeros(length(hGrid), length(vGrid));
    for i = 1:length(hGrid)
        for j = 1:length(vGrid)
            % Map to a quadratic function
            x_bound = [1 hGrid(i) vGrid(j) hGrid(i)^2 hGrid(i)*vGrid(j) vGrid(j)^2]; 
            % Calculate score
            z(i,j) = x_bound*theta;
        end
    end
    score = z';
end

function g = evalGaussian(x,mu,Sigma)
    % Evaluates the Gaussian pdf N(mu,Sigma) at each coumn of X
    [n,N] = size(x);
    C = ((2*pi)^n * det(Sigma))^(-1/2);     % coefficient
    E = -0.5*sum((x-repmat(mu,1,N)).*(inv(Sigma)*(x-repmat(mu,1,N))),1); % exponent
    g = C*exp(E);   % final gaussian evaluation
end
final
0 人点赞