library(RColorBrewer)
library(ggridges)
library(ggplot2)
library(BSDA)
library(dplyr)
library(kableExtra)
Setup data
#import protein data
tmt.6H <- readRDS("tmt.6H")
tmt.RIL <- readRDS("tmt.RIL")
#import RNA data
cpm.6H <- readRDS("cpm.6H")
cpm.RIL <- readRDS("cpm.RIL")
#import plant height data
PlantHeights.6h.MP <- readRDS("PlantHeights.6h.MP")
PlantHeights.ril.MP <- readRDS("PlantHeights.ril.MP")
#combine RIL and 6 hybrids
tmt <- merge.data.frame(tmt.RIL[,c(6,145:153)], tmt.6H[,c(6,66:71)], by.x = "Accession", by.y = "Accession", all = F)
cpm <- merge.data.frame(cpm.RIL[,c(1,94:102)], cpm.6H[,c(1,54:59)], by.x = "Gene", by.y = "Gene", all = F)
#Add gene column to protein data
tmt$Gene <- substr(tmt$Accession, start = 1, stop = 14)
Define function for calculating correlations
CorFun <- function(x, xcols, y){
for(i in 1:nrow(x)){
v <- as.numeric(x[i,xcols])
c <- cor(v, y, method = "pearson")
x[i,"cor"]<- c
}
x
}
Calculate correlations between hybrid/mid-parent expression levels with hybrid/mid-parent plant height
#run correlation function
tmt <- CorFun(tmt, 2:16, c(PlantHeights.ril.MP, PlantHeights.6h.MP))
cpm <- CorFun(cpm, 2:16, c(PlantHeights.ril.MP, PlantHeights.6h.MP))
#Combine data for plotting
cpm$data <- "mRNA"
cpm <- cpm[,c("Gene", "cor", "data")]
tmt$data <- "Protein"
tmt <- tmt[,c("Gene", "cor", "data")]
tmt.cpm <- rbind(tmt, cpm)
Assign gene categories
#import genes of interist (GOIs) list
GOIs <- readRDS("GOIs")
#Assign GOIs
tmt.cpm$Category <- "Other"
tmt.cpm[(tmt.cpm$Gene %in% GOIs$CytoRibo),"Category"] <- "Cytosolic ribosome"
tmt.cpm[(tmt.cpm$Gene %in% c(GOIs$NE.PtRibo, GOIs$PE.ribo)),"Category"] <- "Plastid ribosome"
tmt.cpm[(tmt.cpm$Gene %in% c(GOIs$PhANGs, GOIs$PhAPGs)),"Category"] <- "Photosynthesis"
tmt.cpm[(tmt.cpm$Gene %in% GOIs$BiosynSecMet),"Category"] <- "Biosynthesis of secondary metabolites"
tmt.cpm[(tmt.cpm$Gene %in% GOIs$AlphaLinAcidMet),"Category"] <- "Alpha-linoleic acid metabolism"
tmt.cpm$Category <- factor(tmt.cpm$Category, levels = c("Other", "Plastid ribosome", "Cytosolic ribosome", "Photosynthesis","Biosynthesis of secondary metabolites","Alpha-linoleic acid metabolism"))
Plot!
ggplot(tmt.cpm, mapping=aes(y=Category, x=cor, linetype=data, fill=stat(x)))+
geom_density_ridges_gradient(gradient_lwd = 2)+
scale_fill_gradientn(colors = alpha(rev(c(brewer.pal(n = 11, "RdBu"))), alpha = .4), guide = "none")+
theme_ridges() +
scale_linetype_manual(values = c("mRNA"="dashed", "Protein"="solid"), name="")+
theme(axis.text =element_text(size=8), axis.title = element_text(size=8), legend.position = "bottom", legend.text = element_text(size=8), axis.title.y = element_blank(), axis.title.x = element_text(hjust = 0.5, size=8))+
coord_cartesian(expand = F)+
xlab("Pearson correlation")+
geom_vline(xintercept = 0, color="grey50")+
xlim(-1,1)
Calculate statistics
GOIs.cor <- list("AlphaLinAcidMet"=GOIs$AlphaLinAcidMet, "BiosynSecMet"=GOIs$BiosynSecMet,"Photosynthesis"=c(GOIs$PhANGs, GOIs$PhAPGs), "pRibo"=c(GOIs$PE.ribo, GOIs$NE.PtRibo),"CytoRibo"=GOIs$CytoRibo)
ZtestResults <- data.frame("Alpha-linoleic acid metabolism"=numeric(), "Biosynthesis of secondary metabolites"=numeric(), "Photosynthesis"=numeric(), "Plastid ribosome"=numeric(),"Cytosolic ribosome"=numeric())
for(i in 1:length(GOIs.cor)){
sigx <- sd(tmt$cor[tmt$Gene %in% as.vector(GOIs.cor[[i]])])
sigy <- sd(tmt$cor[!tmt$Gene %in% as.vector(GOIs.cor[[i]])])
ztest <- z.test(x = tmt$cor[tmt$Gene %in% as.vector(GOIs.cor[[i]])], y = tmt$cor[!tmt$Gene %in% as.vector(GOIs.cor[[i]])], sigma.x = sigx, sigma.y = sigy)
ZtestResults[1,i]<- round(ztest$estimate[[1]], digits = 5)
ZtestResults[2,i]<- round(ztest$estimate[[2]], digits = 5)
ZtestResults[3,i]<- round(sigx, digits = 5)
ZtestResults[4,i]<- round(sigy, digits = 5)
ZtestResults[5,i]<- ztest$p.value
ZtestResults[6,i]<- round(ztest$statistic[[1]], digits = 5)
}
colnames(ZtestResults) <- c("ALA metabolism", "Biosynthesis of secondary metabolites", "Photosynthesis","Plastid ribosome", "Cytosolic ribosome")
rownames(ZtestResults) <- c("mean (group)", "mean (others)", "SD (group)", "SD (others)", "p-value", "Z-statistic")
Format statistics results in table
ZtestResults %>%
kbl(digits = 5) %>%
kable_styling(full_width = T)
ALA metabolism | Biosynthesis of secondary metabolites | Photosynthesis | Plastid ribosome | Cytosolic ribosome | |
---|---|---|---|---|---|
mean (group) | -0.68259 | -0.63130 | 0.31594 | 0.72515 | 0.53199 |
mean (others) | 0.03675 | 0.04618 | 0.02987 | 0.02753 | 0.02561 |
SD (group) | 0.05137 | 0.08811 | 0.24108 | 0.11392 | 0.26022 |
SD (others) | 0.40965 | 0.40465 | 0.41103 | 0.40547 | 0.40649 |
p-value | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 |
Z-statistic | -30.02576 | -49.90528 | 9.97572 | 37.42416 | 16.22632 |