Genetic diversity and interaction between the maintainers of commercial Soybean cultivars using self-organizing maps
library("FactoMineR")
library("factoextra") #Correspondence Analysis
library(readxl) #Read data files in xlsx
library(randomForest) #Random Forest
library(tidyverse)
require(magrittr)
require(reshape2)
library(ggforce) #Data and graphics manipulation
library("kohonen") #Kohonen SOM
library(RColorBrewer) #Color palette
require("ggrepel")
library(gridExtra) #Graphs of Kohonen Analysis
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual', ]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
data <- read_excel("data/data.xlsx")
lower5 <- data %>%
group_by(Maintainer_coded) %>%
count() %>%
filter(n <= 5)
data <- data[!data$Maintainer_coded %in%
lower5$Maintainer_coded,]
for (i in 1:ncol(data)) {
data[[i]] = as.factor(data[[i]])
}
data = data[, -c(1, 2, 5)] #Excluding Name, Maintainer and Year variables
ntraits <- (ncol(data) - 1) / 3 #Number of traits divided by 3
ntree <- 500 #Number of trees
rf1 <- randomForest(
data$Maintainer_coded ~ .,
data = data,
mtry = ntraits,
importance = TRUE,
proximity = TRUE,
ntree = ntree,
na.action = na.roughfix
)
imp <- as.data.frame(varImpPlot(rf1))
imp$traitnames <- rownames(imp)
imp_trait <- melt(imp, id.var = "traitnames")
imp_trait$traitnames <- as.factor(imp_trait$traitnames)
levels(imp_trait$traitnames) <- c(
"Event Type",
"Flower Color",
"Growth Type",
"Hilo Color",
"Hypocotyl",
"Legum Color",
"Peroxidase Reaction",
"Pubescence Color",
"Pubescence Density",
"Seed Brightness",
"Seed Shape",
"Tegument Color"
)
imp_trait %>%
mutate(traitnames = fct_reorder(traitnames, value)) %>%
ggplot(aes(x = traitnames, y = value)) +
geom_point(aes(colour = variable, shape = variable), size = 4) +
scale_colour_discrete(labels = c("Accuracy", "Gini Indice")) +
scale_shape_discrete(labels = c("Accuracy", "Gini Indice")) +
geom_segment(aes(
x = traitnames,
xend = traitnames,
y = 0,
yend = value
)) +
ylab("Increase in node purity") +
xlab("") +
theme_minimal() +
theme(
legend.position = c(0.8, 0.15),
legend.title = element_blank(),
legend.background = element_rect(fill = "white", colour = "black"),
legend.key.size = unit(0.5, "cm"),
axis.text.y = element_text(
angle = 15,
vjust = 0.5,
hjust = 1
)
) +
coord_flip()
x <- imp_trait %>% group_by(traitnames) %>%
summarise(value = mean(value)) %>%
slice_min(value, n = 4, with_ties = FALSE) %>%
ungroup()
colnames(data) <- c(
"Maintainer_coded",
"Event Type",
"Flower Color",
"Legum Color",
"Seed Shape",
"Tegument Color",
"Hypocotyl",
"Pubescence Color",
"Pubescence Density",
"Hilo Color",
"Peroxidase Reaction",
"Growth Type",
"Seed Brightness"
)
data <-
data %>% select(!any_of(x$traitnames)) #tegument color, flower color, peroxidase reaction, and peroxidase
res.mca <- MCA(data,
quali.sup = 1,
graph = FALSE,
ncp = 54)
eig.val <- get_eigenvalue(res.mca)
eig.val
eig.val <-
as.data.frame(eig.val) #creating data frame with eigenvalues
eig.val$Dimension <-
as.numeric(str_replace(rownames(eig.val), "Dim.", ""))
ggplot(data = eig.val, aes(x = Dimension, y = (cumulative.variance.percent))) +
geom_bar(stat = "identity", fill = "#00AFBB", alpha = 0.5) +
geom_point(colour = "#FC4E07") +
geom_line(colour = "#FC4E07") +
scale_x_continuous(breaks = c(1:nrow(eig.val)), expand = c(0.01, 0)) +
scale_y_continuous(expand = expansion(mult = c(0, .05))) +
theme_bw() +
labs(x = "Dimensions",
y = "Cumulative variance percent")
# Network topology and parameters
map_dimension = 4
n_iterations = 2000
recalculate_map = T
recalculate_no_clusters = T
#Kohonen SOM
som_grid = kohonen::somgrid(
xdim = map_dimension,
ydim = map_dimension,
topo = "hexagonal",
toroidal = FALSE
)
b <- res.mca[["quali.sup"]][["coord"]] #COORDINATES OF THE MAINTAINERS IN THE 54 DIMENSIONS
set.seed(123) #Seed for reproduction of results
m = kohonen::supersom(
scale(b),
grid = som_grid,
rlen = n_iterations,
alpha = c(0.05, 0.01),
dist.fcts = 'euclidean'
)
progress <- as.data.frame(cbind(m$changes, 1:nrow(m$changes)))
colnames(progress) <- c("Mean distance to closest unit", "Iteration")
ggplot(progress, aes(Iteration, progress[[1]])) +
geom_line(color = "#00AFBB", size = 1) +
stat_smooth(color = "#FC4E07",
fill = "#FC4E07",
method = "loess") +
theme_minimal() +
labs(y = colnames(progress[1]))
grouping <- as_tibble(cbind(m$unit.classif, rownames(b)))
colnames(grouping) <- c("group", "Maintainer_coded")
grouping$group <- as.factor(grouping$group)
grouping$Maintainer_coded <- as.factor(grouping$Maintainer_coded)
data <- data %>% left_join(grouping)
som_coord <- m[[4]]$pts %>%
as_tibble %>%
mutate(group = as.integer(row_number()))
som_pts <- tibble(group = as.integer(m[[2]]),
dist = m[[3]])
ndist <- unit.distances(m$grid)
cddist <- as.matrix(object.distances(m, type = "codes"))
cddist[abs(ndist - 1) > .001] <- NA
neigh.dists <- colMeans(cddist, na.rm = TRUE)
som_coord <- som_coord %>% mutate(dist = neigh.dists)
neigdists <- som_coord %>%
ggplot(aes(x0 = x, y0 = y)) +
geom_regon(aes(
r = 0.5,
angle = 11,
sides = 6,
fill = dist
),
expand = unit(0.1, 'cm')) +
scale_fill_gradient(low = "red", high = "yellow", name = "Distance") +
theme(
panel.background = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
legend.position = "bottom"
) +
geom_text_repel(
data = som_coord,
aes(x = x, y = y, label = group),
hjust = 0.5,
force = 0,
color = "black",
show.legend = FALSE,
segment.color = NA
)
neigdists
p <- som_coord %>%
ggplot(aes(x0 = x, y0 = y)) +
geom_regon(aes(
r = 0.5,
angle = 11,
sides = 6,
fill = factor(group)
),
expand = unit(0.1, 'cm')) +
theme(
panel.background = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
legend.position = "none"
) +
scale_fill_manual(values = col_vector[40:56])
plotdata <- data %>%
group_by(group, Maintainer_coded) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
MC <- p +
geom_label_repel(
data = plotdata,
aes(x = x, y = y, label = Maintainer_coded),
max.overlaps = Inf,
box.padding = 0.1,
hjust = 0.5,
direction = "y",
color = "black",
show.legend = FALSE,
segment.color = NA
) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank())
MC
p <- som_coord %>%
ggplot(aes(x0 = x, y0 = y)) +
geom_regon(
aes(r = 0.5, angle = 11, sides = 6),
expand = unit(0.1, 'cm'),
alpha = 0,
color = "black"
) +
theme(
panel.background = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
legend.position = "bottom"
)
plotdata <- data %>%
group_by(group, `Event Type`) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
levels(plotdata$`Event Type`) <-
c(
"Conventional",
"CV127",
"Intacta™ Roundup Ready™ 2 Pro",
"Liberty Link",
"Roundup Ready"
)
EV <- p +
geom_arc_bar(
data = plotdata,
aes(
x0 = x,
y0 = y,
r0 = 0,
r = 0.4,
amount = n,
fill = `Event Type`
),
stat = 'pie',
alpha = 0.8
) +
scale_fill_manual(values = col_vector[1:5]) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank(), legend.title = element_blank())
EV
plotdata <- data %>%
group_by(group, `Legum Color`) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
levels(plotdata$`Legum Color`) <-
c(
"Brown",
"Clear",
"Dark Brown",
"Dark Gray",
"Gray",
"Light Brown",
"Light Gray",
"Medium Brown"
)
CL <- p +
geom_arc_bar(
data = plotdata,
aes(
x0 = x,
y0 = y,
r0 = 0,
r = 0.4,
amount = n,
fill = `Legum Color`
),
stat = 'pie',
alpha = 0.8
) +
scale_fill_manual(values = col_vector[6:13]) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank(), legend.title = element_blank())
CL
plotdata <- data %>%
group_by(group, `Seed Shape`) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
levels(plotdata$`Seed Shape`) <- c(
"Elongated flat",
"Elongated spherical",
"Spherical flat",
"Ovulated spherical",
"Round",
"Rounded",
"Spherical",
"Elongated"
)
FS <- p +
geom_arc_bar(
data = plotdata,
aes(
x0 = x,
y0 = y,
r0 = 0,
r = 0.4,
amount = n,
fill = `Seed Shape`
),
stat = 'pie',
alpha = 0.8
) +
scale_fill_manual(values = col_vector[14:21]) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank(), legend.title = element_blank())
FS
plotdata <- data %>%
group_by(group, Hypocotyl) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
levels(plotdata$Hypocotyl) <- c(
"Ausente",
"Ausente/green",
"Presente",
"Presente/green",
"Presente/white",
"Presente/bronze" ,
"Presente/purple"
)
HI <- p +
geom_arc_bar(
data = plotdata,
aes(
x0 = x,
y0 = y,
r0 = 0,
r = 0.4,
amount = n,
fill = Hypocotyl
),
stat = 'pie',
alpha = 0.8
) +
scale_fill_manual(values = col_vector[22:28]) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank(), legend.title = element_blank())
HI
plotdata <- data %>%
group_by(group, `Pubescence Color`) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
levels(plotdata$`Pubescence Color`) <-
c("Average",
"Brown",
"Dark Brown",
"Gray",
"Light Brown",
"Light Gray",
"Medium Brown")
CP <- p +
geom_arc_bar(
data = plotdata,
aes(
x0 = x,
y0 = y,
r0 = 0,
r = 0.4,
amount = n,
fill = `Pubescence Color`
),
stat = 'pie',
alpha = 0.8
) +
scale_fill_manual(values = col_vector[29:35]) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank(), legend.title = element_blank())
CP
plotdata <- data %>%
group_by(group, `Hilo Color`) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
levels(plotdata$`Hilo Color`) <-
c(
"Black",
"Brown",
"Dark Brown",
"Faded Black",
"Gray",
"Imperfect Black",
"Light Brown",
"Medium Brown",
"Yellow"
)
CH <- p +
geom_arc_bar(
data = plotdata,
aes(
x0 = x,
y0 = y,
r0 = 0,
r = 0.4,
amount = n,
fill = `Hilo Color`
),
stat = 'pie',
alpha = 0.8
) +
scale_fill_manual(values = col_vector[36:44]) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank(), legend.title = element_blank())
CH
plotdata <- data %>%
group_by(group, `Growth Type`) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
levels(plotdata$`Growth Type`) <- c("Determined",
"Undetermined",
"Semi-determined")
TC <- p +
geom_arc_bar(
data = plotdata,
aes(
x0 = x,
y0 = y,
r0 = 0,
r = 0.4,
amount = n,
fill = `Growth Type`
),
stat = 'pie',
alpha = 0.8
) +
scale_fill_manual(values = col_vector[45:47]) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank(), legend.title = element_blank())
TC
plotdata <- data %>%
group_by(group, `Seed Brightness`) %>%
summarize(n = n()) %>%
mutate(pct = (n / sum(n)),
lbl = scales::percent(pct)) %>%
ungroup()
plotdata$group <- as.integer(levels(plotdata$group))[plotdata$group]
plotdata <- plotdata %>% left_join(som_coord, by = "group")
plotdata <- na.omit(plotdata)
levels(plotdata$`Seed Brightness`) <-
c("Average",
"Bright",
"Shiny",
"High",
"Low",
"Low/Average",
"Matte" ,
"Opaque")
BS <- p +
geom_arc_bar(
data = plotdata,
aes(
x0 = x,
y0 = y,
r0 = 0,
r = 0.4,
amount = n,
fill = `Seed Brightness`
),
stat = 'pie',
alpha = 0.8
) +
scale_fill_manual(values = col_vector[48:55]) +
geom_text(
data = plotdata,
aes(
x = x,
y = (y - 0.45),
label = group
),
color = "black",
show.legend = FALSE
) +
theme(plot.title = element_blank(), legend.title = element_blank())
BS
yield <- read_excel("data/yield.xlsx")
data <- read_excel("data/data.xlsx")
#Transforming variables into factors
yield$Year <- as.factor(yield$Year)
for (i in 1:ncol(data)) {
data[[i]] = as.factor(data[[i]])
}
# Count graph relating event to year
levels(data$Event) <-
c(
"Conventional",
"CV127",
"Intacta™ Roundup Ready™ 2 Pro",
"Liberty Link",
"Roundup Ready"
)
ggplot() +
geom_bar(data = data, aes(Year, fill = Event)) +
geom_point(data = yield,
aes(x = Year, y = (Yield * (175 / 125000))),
group = 1,
color = "#a50026") +
geom_line(
data = yield,
aes(x = Year, y = Yield * (175 / 125000)),
group = 1,
color = "#a50026"
) +
scale_y_continuous(name = "Count",
sec.axis = sec_axis( ~ . * 125000 / 175, name = "Yield (Thousand Tons)")) +
theme_bw() +
xlab("Year") +
scale_fill_brewer(palette = "Dark2") +
labs(fill = "Event Type") +
theme(
legend.position = c(0.125, 0.85),
legend.background = element_rect(fill = "white", colour = "black")
)
# Count graph relating Growth type to year
levels(data$Growth_type) <- c("Determined",
"Undetermined",
"Semi-determined")
data %>%
filter(Growth_type != "NA") %>%
ggplot() +
geom_bar(aes(Year, fill = Growth_type)) +
theme_bw() +
xlab("Year") +
ylab("Count") +
scale_fill_brewer(palette = "Set1") +
labs(fill = "Growth type") +
theme(
legend.position = c(0.125, 0.85),
legend.background = element_rect(fill = "white", colour = "black")
)