From the Matlab command window download the iris dataset using the load command: load ('fisheriris.mat'). This will download two variables. The species and meas. The former contains the list of species or types of the iris. However, the latter is a double matrix with 4 features and 150 data samples. You are required to do the followings:
Answer & Explanation
Properly labelled MATLAB code and the figures are attached in the explanation. Please run this code to get answers to all parts.
Answer to Part 4 : From figures it seems that features 2&3 and 3&4 can be used to separate the colors as different color points corresponding to flowers of different species have very less overlap in figure 2 and figure 3.
clear all
clc
close all
load ('fisheriris.mat')
idx_setosa = strcmp(species,'setosa');
idx_versicolor = strcmp(species,'versicolor');
idx_virginica = strcmp(species,'virginica');
%% PART 1
figure;
plot(meas(idx_setosa,1),meas(idx_setosa,2),'ro','LineStyle','none')
hold on
plot(meas(idx_versicolor,1),meas(idx_versicolor,2),'bs','LineStyle','none')
plot(meas(idx_virginica,1),meas(idx_virginica,2),'g*','LineStyle','none')
legend('setosa','versicolor','virginica')
xlabel('Feature 1')
ylabel('Feature 2')
%% PART 2
figure;
plot(meas(idx_setosa,2),meas(idx_setosa,3),'ro','LineStyle','none')
hold on
plot(meas(idx_versicolor,2),meas(idx_versicolor,3),'bs','LineStyle','none')
plot(meas(idx_virginica,2),meas(idx_virginica,3),'g*','LineStyle','none')
legend('setosa','versicolor','virginica')
xlabel('Feature 2')
ylabel('Feature 3')
%% PART 3
figure;
plot(meas(idx_setosa,3),meas(idx_setosa,4),'ro','LineStyle','none')
hold on
plot(meas(idx_versicolor,3),meas(idx_versicolor,4),'bs','LineStyle','none')
plot(meas(idx_virginica,3),meas(idx_virginica,4),'g*','LineStyle','none')
legend('setosa','versicolor','virginica')
xlabel('Feature 3')
ylabel('Feature 4')
%% PART 4
%
%% PART5 - Euclidean distance calu=culation
Dist = zeros(length(species),length(species));
for i = 1:length(species)
for j = 1:length(species)
Dist(i,j) = myEucDis(meas(i,:),meas(j,:));%distance between ith and jth data sample
end
end
% Dist is the matrix whose (i,j)th element is the euclidean distance
% between features of ith and jth iris flower in he datasample
%% PART6 - Mean calculation
Mean_setosa = mean(meas(idx_setosa,:)); %mean of individual features for setosa iris
Mean_versicolor = mean(meas(idx_versicolor,:)); %mean of individual features for versicolor iris
Mean_virginica = mean(meas(idx_virginica,:)); %mean of individual features for virginica iris
%%% MATLAB FUNCTION FILE
function dis = myEucDis(pt1,pt2)
%calulates euclidean distance between one data sample and the other
dis = sqrt(sum((pt1-pt2).*(pt1-pt2)));
end