library(RColorBrewer)
library(ggridges)
library(ggplot2)
library(BSDA)
library(dplyr)
library(kableExtra)

Setup data

#import protein data
tmt.6H <- readRDS("tmt.6H")
tmt.RIL <- readRDS("tmt.RIL")
#import RNA data
cpm.6H <- readRDS("cpm.6H")
cpm.RIL <- readRDS("cpm.RIL")
#import plant height data
PlantHeights.6h.MP <- readRDS("PlantHeights.6h.MP")
PlantHeights.ril.MP <- readRDS("PlantHeights.ril.MP")
#combine RIL and 6 hybrids 
tmt <- merge.data.frame(tmt.RIL[,c(6,145:153)], tmt.6H[,c(6,66:71)], by.x = "Accession", by.y = "Accession", all = F)
cpm <- merge.data.frame(cpm.RIL[,c(1,94:102)], cpm.6H[,c(1,54:59)], by.x = "Gene", by.y = "Gene", all = F)
#Add gene column to protein data
tmt$Gene <- substr(tmt$Accession, start = 1, stop = 14)

Define function for calculating correlations

CorFun <- function(x, xcols, y){
  for(i in 1:nrow(x)){
    v <- as.numeric(x[i,xcols])
    c <- cor(v, y, method = "pearson")
    x[i,"cor"]<- c
  }
  x
}

Calculate correlations between hybrid/mid-parent expression levels with hybrid/mid-parent plant height

#run correlation function
tmt <- CorFun(tmt, 2:16, c(PlantHeights.ril.MP, PlantHeights.6h.MP))
cpm <- CorFun(cpm, 2:16, c(PlantHeights.ril.MP, PlantHeights.6h.MP))
#Combine data for plotting
cpm$data <- "mRNA"
cpm <- cpm[,c("Gene", "cor", "data")]
tmt$data <- "Protein"
tmt <- tmt[,c("Gene", "cor", "data")]
tmt.cpm <- rbind(tmt, cpm)

Assign gene categories

#import genes of interist (GOIs) list
GOIs <- readRDS("GOIs")
#Assign GOIs
tmt.cpm$Category <- "Other"
tmt.cpm[(tmt.cpm$Gene %in% GOIs$CytoRibo),"Category"] <- "Cytosolic ribosome"
tmt.cpm[(tmt.cpm$Gene %in% c(GOIs$NE.PtRibo, GOIs$PE.ribo)),"Category"] <- "Plastid ribosome"
tmt.cpm[(tmt.cpm$Gene %in% c(GOIs$PhANGs, GOIs$PhAPGs)),"Category"] <- "Photosynthesis"
tmt.cpm[(tmt.cpm$Gene %in% GOIs$BiosynSecMet),"Category"] <- "Biosynthesis of secondary metabolites"
tmt.cpm[(tmt.cpm$Gene %in% GOIs$AlphaLinAcidMet),"Category"] <- "Alpha-linoleic acid metabolism"
tmt.cpm$Category <- factor(tmt.cpm$Category, levels = c("Other", "Plastid ribosome", "Cytosolic ribosome", "Photosynthesis","Biosynthesis of secondary metabolites","Alpha-linoleic acid metabolism"))

Plot!

ggplot(tmt.cpm, mapping=aes(y=Category, x=cor, linetype=data, fill=stat(x)))+
  geom_density_ridges_gradient(gradient_lwd = 2)+
  scale_fill_gradientn(colors = alpha(rev(c(brewer.pal(n = 11, "RdBu"))), alpha = .4), guide = "none")+
  theme_ridges() +
  scale_linetype_manual(values = c("mRNA"="dashed", "Protein"="solid"), name="")+
  theme(axis.text =element_text(size=8), axis.title = element_text(size=8), legend.position = "bottom", legend.text = element_text(size=8), axis.title.y = element_blank(), axis.title.x = element_text(hjust = 0.5, size=8))+
  coord_cartesian(expand = F)+
  xlab("Pearson correlation")+
  geom_vline(xintercept = 0, color="grey50")+
  xlim(-1,1)

Calculate statistics

GOIs.cor <- list("AlphaLinAcidMet"=GOIs$AlphaLinAcidMet, "BiosynSecMet"=GOIs$BiosynSecMet,"Photosynthesis"=c(GOIs$PhANGs, GOIs$PhAPGs), "pRibo"=c(GOIs$PE.ribo, GOIs$NE.PtRibo),"CytoRibo"=GOIs$CytoRibo)
ZtestResults <- data.frame("Alpha-linoleic acid metabolism"=numeric(), "Biosynthesis of secondary metabolites"=numeric(), "Photosynthesis"=numeric(), "Plastid ribosome"=numeric(),"Cytosolic ribosome"=numeric())

for(i in 1:length(GOIs.cor)){
  
  sigx <- sd(tmt$cor[tmt$Gene %in% as.vector(GOIs.cor[[i]])])
  sigy <- sd(tmt$cor[!tmt$Gene %in% as.vector(GOIs.cor[[i]])])
  
  ztest <- z.test(x = tmt$cor[tmt$Gene %in% as.vector(GOIs.cor[[i]])], y = tmt$cor[!tmt$Gene %in% as.vector(GOIs.cor[[i]])], sigma.x = sigx, sigma.y = sigy)
  
  ZtestResults[1,i]<- round(ztest$estimate[[1]], digits = 5)
  ZtestResults[2,i]<- round(ztest$estimate[[2]], digits = 5)
  ZtestResults[3,i]<- round(sigx, digits = 5)
  ZtestResults[4,i]<- round(sigy, digits = 5)
  ZtestResults[5,i]<- ztest$p.value
  ZtestResults[6,i]<- round(ztest$statistic[[1]], digits = 5)
  
}

colnames(ZtestResults) <- c("ALA metabolism", "Biosynthesis of secondary metabolites", "Photosynthesis","Plastid ribosome", "Cytosolic ribosome")
rownames(ZtestResults) <- c("mean (group)", "mean (others)", "SD (group)", "SD (others)", "p-value", "Z-statistic")

Format statistics results in table

ZtestResults %>%
  kbl(digits = 5) %>%
  kable_styling(full_width = T)
ALA metabolism Biosynthesis of secondary metabolites Photosynthesis Plastid ribosome Cytosolic ribosome
mean (group) -0.68259 -0.63130 0.31594 0.72515 0.53199
mean (others) 0.03675 0.04618 0.02987 0.02753 0.02561
SD (group) 0.05137 0.08811 0.24108 0.11392 0.26022
SD (others) 0.40965 0.40465 0.41103 0.40547 0.40649
p-value 0.00000 0.00000 0.00000 0.00000 0.00000
Z-statistic -30.02576 -49.90528 9.97572 37.42416 16.22632