#############################################################################
### REFERENCES
### https://cran.r-project.org/web/packages/factoextra/factoextra.pdf
##############################################################################



##############################################################################
########### PCA - MALES  #####################################################
##############################################################################
##############################################################################

# Load dataset. file = : designate the location (path) of the csv-formatted file in your computer exactly; row.names = 1 designate 1st column as specimen ID.
d <- read.csv(file = "D:/Supplementary/Measurement_Epidaus_male.csv", sep = ",", dec = ".", header = TRUE, row.names = 1) 

# Display dataset
d

# Create a new dataset (d2) that excludes the taxon name column. When counting columns, ignore the 1st column in the file i??gexamplei??h, because it is already used as i??grow.namei??h (ex. the 27th column in the file i??gexamplei??h must be designated as i??g26i??h); d[,-(a:b)]: create a new dataset without the columns from a to b, a and b are numerous.
d2 <- d[,-(30:31)]

# Create a new dataset "taxon_names"
taxon_names <- d[,30]

# Display the new dataset d2
d2

# Display the field "taxon_names"
taxon_names

# Display the number of rows and columns (field of variables) of d2
dim(d2)

#### Principal component analysis (PCA)
# Run the required package(s)
library(factoextra)

res.pca <- prcomp(d2, scale = TRUE)

## Graph of individuals
# PCA 2D-plot. Use 'point' to indicate individuals.
fviz_pca_ind(res.pca, geom.ind = "point", pointshape = 21, pointsize = 2, fill.ind = taxon_names, col.ind = "black", 
             palette = c("#de84a7ff", "#5f8dd3ff", "#5fbcd3ff", "#ff3535ff", "#ffd42aff", "#5aa02cff", "#2986cc", "#16537e", "#6a329f", "#c90076", "#cc0000", "#f3e400"), 
             addEllipses = TRUE, label = "var", col.var = "black", repel = TRUE, legend.title = "taxon") +
  ggtitle("2D PCA-plot from morphological dataset") +
  theme(plot.title = element_text(hjust = 0.5))


##############################################################################
########### PCA - MALES (EXTENDED)############################################
##############################################################################
##############################################################################

# Load dataset. file = : designate the location (path) of the csv-formatted file in your computer exactly; row.names = 1 designate 1st column as specimen ID.
d <- read.csv(file = "D:/Supplementary/Measurement_Epidaus_male_extended.csv", sep = ",", dec = ".", header = TRUE, row.names = 1) 

# Display dataset
d

# Create a new dataset (d2) that excludes the taxon name column. When counting columns, ignore the 1st column in the file i??gexamplei??h, because it is already used as i??grow.namei??h (ex. the 27th column in the file i??gexamplei??h must be designated as i??g26i??h); d[,-(a:b)]: create a new dataset without the columns from a to b, a and b are numerous.
d2 <- d[,-(30:31)]

# Create a new dataset "taxon_names"
taxon_names <- d[,30]

# Display the new dataset d2
d2

# Display the field "taxon_names"
taxon_names

# Display the number of rows and columns (field of variables) of d2
dim(d2)

#### Principal component analysis (PCA)
# Run the required package(s)
library(factoextra)

res.pca <- prcomp(d2, scale = TRUE)

## Graph of individuals
# PCA 2D-plot. Use 'point' to indicate individuals.
fviz_pca_ind(res.pca, geom.ind = "point", pointshape = 21, pointsize = 2, fill.ind = taxon_names, col.ind = "black", 
             palette = c("#de84a7ff", "#5f8dd3ff", "#5fbcd3ff", "#ff3535ff", "#ffd42aff", "#5aa02cff", "#2986cc", "#16537e", "#6a329f", "#c90076", "#cc0000", "#f3e400"), 
             addEllipses = TRUE, label = "var", col.var = "black", repel = TRUE, legend.title = "taxon") +
  ggtitle("2D PCA-plot from morphological dataset") +
  theme(plot.title = element_text(hjust = 0.5))

##############################################################################
########### PCA - FEMALES  ###################################################
##############################################################################
##############################################################################

# Load dataset. file = : designate the location (path) of the csv-formatted file in your computer exactly; row.names = 1 designate 1st column as specimen ID.
d <- read.csv(file = "D:/Supplementary/Measurement_Epidaus_female.csv", sep = ",", dec = ".", header = TRUE, row.names = 1) 

# Display dataset
d

# Create a new dataset (d2) that excludes the taxon name column. When counting columns, ignore the 1st column in the file i??gexamplei??h, because it is already used as i??grow.namei??h (ex. the 27th column in the file i??gexamplei??h must be designated as i??g26i??h); d[,-(a:b)]: create a new dataset without the columns from a to b, a and b are numerous.
d2 <- d[,-(28:29)]

# Create a new dataset "taxon_names"
taxon_names <- d[,28]

# Display the new dataset d2
d2

# Display the field "taxon_names"
taxon_names

# Display the number of rows and columns (field of variables) of d2
dim(d2)

#### Principal component analysis (PCA)
# Run the required package(s)
library(factoextra)

res.pca <- prcomp(d2, scale = TRUE)

## Graph of individuals
# PCA 2D-plot. Use 'point' to indicate individuals.
fviz_pca_ind(res.pca, geom.ind = "point", pointshape = 21, pointsize = 2, fill.ind = taxon_names, col.ind = "black", 
             palette = c("#de84a7ff", "#5f8dd3ff", "#5fbcd3ff", "#ff3535ff", "#ffd42aff", "#5aa02cff", "#2986cc", "#16537e", "#6a329f", "#c90076", "#cc0000", "#f3e400"), 
             addEllipses = TRUE, label = "var", col.var = "black", repel = TRUE, legend.title = "taxon") +
  ggtitle("2D PCA-plot from morphological dataset") +
  theme(plot.title = element_text(hjust = 0.5))