setwd("//files.wustl.edu/Shares/DOM/ONC/Hirbe_Lab/Diana/UBR5 KO RNASeq/analysis/HOM VS WT_2")
Counts <- read.csv("Counts.csv")
# Remove duplicate rows from Counts
Counts <- Counts[!duplicated(Counts[, 1]), ]
rownames(Counts) <- Counts[, 1]
Counts<- Counts [, -1]
# Calculate row means
row_means <- rowMeans(Counts)
# Order genes by row means in descending order
ordered_counts <- Counts[rev(order(row_means)), ]
#alternative code to the above is (ordered_counts <- Counts[order(row_means, decreasing = TRUE), ])
# Filter out rows with row means less than 10
filtered_counts <- ordered_counts[rowMeans(ordered_counts) >= 10, ]
#save filtered data frame
write.csv(filtered_counts, "filtered_counts.csv")
#prepare metadata telling R the conditions (columns)
metadata <- data.frame(
sample_id = colnames(filtered_counts), # Assuming you have loaded the filtered expression data
condition = c(rep("UBR5 WT", 3), rep("UBR5 HOM", 3)), # Treatment conditions
replicate = c(1, 2, 3, 1, 2, 3) # Sample replicates
)
metadata$condition <- factor(metadata$condition, levels = c("UBR5 WT", "UBR5 HOM"))
#Load DESEQ2 for normalization
library(DESeq2)
#Use the DESeqDataSetFromMatrix function from DESeq2 to create a DESeqDataSet object
dds <- DESeqDataSetFromMatrix(countData = filtered_counts,
colData = metadata,
design = ~ condition)
#Perform normalization and estimation of dispersions: Use the DESeq() function to perform normalization and estimation of dispersions.
dds <- DESeq(dds)
results <- results(dds, alpha = 0.05)
DEGs <- subset(results, abs(log2FoldChange) > 1 & padj < 0.05)
#save the de_genes data frame
write.csv(DEGs, file = "DEG_HOM_VS_WT.csv",)
write.csv(results, file = "DeseqResults_HOM_VS_WT.csv",)
#create volcano plot
library(ggplot2)
# Add column to classify genes as DEG or not
results_df <- as.data.frame(results)
results_df$gene <- rownames(results_df)
results_df$threshold <- "Unchanged"
results_df$threshold[results_df$padj < 0.05 & abs(results_df$log2FoldChange) > 1] <- "DEG"
library(ggrepel)
# Volcano plot
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(title = "Volcano Plot: HOM vs WT",
x = "log2 Fold Change (HOM vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black")
# Select top 50 significant genes by padj
top100 <- results_df[order(results_df$padj), ][1:100, ]
# Volcano plot
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(title = "Volcano Plot: HOM vs WT",
x = "log2 Fold Change (HOM vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(data = top100,
aes(label = gene),
size = 3,
max.overlaps = Inf,
box.padding = 0.3,
point.padding = 0.2,
segment.color = "grey50")
write.csv(top100, file = "Top100_HOM_vs_WT.csv", row.names = FALSE)
# Define genes of interest
genes_of_interest <- c("Egfr", "Hsp90ab1", "Map2k2", "Cerk", "Pdgfra", "Tyk2", "Jak1",
"Yap1", "Taz", "Kdr", "Aurka", "Pten", "Csf1r","Ptch1", "Smo", "Gli2", "Gli3", "Wnt10a", "Rac2", "Rspo2", "Apc",
"Cd274", "Pdcd1", "Id1", "Id3", "Cdh1", "Cdc73", "Hrpt2","Csf1","Golph3", "Cdk1", "Acsl4", "Ptk2b", "Akt1", "Akt2", "Akt3", "Pik3ca", "Pik3c2a", "Pik3cb" , "Pik3c3", "Pik3c2b", "Pik3cd", "Atmin", "Cdkn1a", "Cdk9", "Rela", "Nfkb1", "Nfkb2", "Capza1", "Stat1", "Stat3", "Irf1", "Irf3")
# Subset DEGs for these genes (case-sensitive match!)
genes_subset <- results_df[rownames(results_df) %in% genes_of_interest, ]
# Save genes of interest with stats
write.csv(genes_subset, file = "GenesOfInterest_HOM_vs_WT.csv", row.names = TRUE)
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(title = "Volcano Plot: HET vs WT",
x = "log2 Fold Change (HET vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(
data = genes_subset,
aes(label = gene),
color = "black", # <-- force label text to black
size = 3,
max.overlaps = Inf,
box.padding = 0.3,
point.padding = 0.2,
segment.color = "grey50"
)
# Subset DEGs only
DEGs_df <- as.data.frame(DEGs)
DEGs_df$gene <- rownames(DEGs_df)
# Find overlap between DEGs and genes of interest
genes_subset <- DEGs_df[rownames(DEGs_df) %in% genes_of_interest, ]
# Save overlapping genes with stats
write.csv(genes_subset, file = "GenesOfInterest_DEGs_HOM_vs_WT.csv", row.names = TRUE)
# Volcano plot with labels ONLY for genes of interest that are DEGs
ggplot(results_df, aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
geom_point(alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Unchanged" = "grey", "DEG" = "red")) +
theme_minimal(base_size = 14) +
labs(title = "Volcano Plot: HET vs WT",
x = "log2 Fold Change (HET vs WT)",
y = "-log10 Adjusted p-value",
color = "Gene status") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(
data = genes_subset,
aes(label = gene),
color = "black", # labels in black
size = 5,
max.overlaps = Inf,
box.padding = 0.3,
point.padding = 0.2,
segment.color = "grey50"
)
if (!requireNamespace("clusterProfiler", quietly = TRUE)) {
BiocManager::install("clusterProfiler")
}
if (!requireNamespace("msigdbr", quietly = TRUE)) {
install.packages("msigdbr")
}
library(clusterProfiler)
library(msigdbr)
# Convert results to dataframe
res_df <- as.data.frame(results)
# Remove NA log2FC
res_df <- res_df[!is.na(res_df$log2FoldChange), ]
# Create named vector: names = gene symbols, values = log2FC
gene_list <- res_df$log2FoldChange
names(gene_list) <- rownames(res_df)
# Sort decreasing for clusterProfiler
gene_list <- sort(gene_list, decreasing = TRUE)
# Mouse Hallmark gene sets
hallmark_sets <- msigdbr(species = "Mus musculus", category = "H") # H = Hallmark
# Use as two-column dataframe: gs_name (pathway), gene_symbol
term2gene <- hallmark_sets[, c("gs_name", "gene_symbol")]
# Make sure your DESeq2 results have no NA log2FC
res_df <- as.data.frame(results)
res_df <- res_df[!is.na(res_df$log2FoldChange), ]
# Named vector: names = gene symbols, values = log2FC
gene_list <- res_df$log2FoldChange
names(gene_list) <- rownames(res_df)
gene_list <- sort(gene_list, decreasing = TRUE)
gsea_res <- GSEA(
geneList = gene_list,
TERM2GENE = term2gene, # <- must be dataframe, not list
pvalueCutoff = 0.1,
verbose = FALSE
)
# View top pathways
head(as.data.frame(gsea_res))
# Save results
write.csv(as.data.frame(gsea_res), "GSEA_Hallmark_Mouse_HOM_vs_WT.csv", row.names = FALSE)
library(enrichplot)
# Convert GSEA results to dataframe
gsea_df <- as.data.frame(gsea_res)
# Suppose the top (and only) enriched pathway:
top_pathway <- gsea_df$ID[5] # or use $Description if you prefer
# Classic GSEA plot for the top pathway
gseaplot2(
gsea_res,
geneSetID = top_pathway, # pathway ID
title = gsea_df$Description[5], # nice descriptive title
color = "red"
)
#PLOT HALLMARK PATHWAYS
library(ggplot2)
# Convert GSEA results to dataframe
gsea_df <- as.data.frame(gsea_res)
# Order pathways by NES (normalized enrichment score)
gsea_df <- gsea_df[order(gsea_df$NES, decreasing = TRUE), ]
# Plot ALL enriched pathways
ggplot(gsea_df, aes(x = reorder(Description, NES), y = NES, fill = -log10(p.adjust))) +
geom_col() +
coord_flip() +
labs(
title = "GSEA: All Enriched Hallmark Pathways",
x = "Pathway",
y = "Normalized Enrichment Score (NES)",
fill = "-log10 adj p-value"
) +
theme_minimal(base_size = 14)
#plot GSEA KEGG, GO, REACTOME --------------------------
# Prepare ranked gene list with Entrez IDs for GSEA
# --------------------------
library(clusterProfiler)
library(org.Mm.eg.db)
library(ReactomePA)
# Convert gene symbols to Entrez IDs
entrez_map <- bitr(names(gene_list), fromType="SYMBOL", toType="ENTREZID", OrgDb=org.Mm.eg.db)
gene_list_df <- merge(entrez_map, data.frame(log2FC = gene_list), by.x="SYMBOL", by.y="row.names")
gene_list_df <- gene_list_df[!duplicated(gene_list_df$ENTREZID), ]
gene_list_named <- gene_list_df$log2FC
names(gene_list_named) <- gene_list_df$ENTREZID
# Sort decreasing for GSEA
gene_list_named <- sort(gene_list_named, decreasing = TRUE)
# --------------------------
# 1) GSEA: KEGG Pathways
# --------------------------
gsea_kegg <- gseKEGG(
geneList = gene_list_named,
organism = "mmu",
minGSSize = 10,
pvalueCutoff = 0.1,
verbose = TRUE
)
# Save KEGG GSEA results
write.csv(as.data.frame(gsea_kegg), "GSEA_KEGG_HOM_vs_WT.csv", row.names = FALSE)
# Top 30 KEGG pathways barplot
library(enrichplot)
library(ggplot2)
# Convert gseaResult to dataframe to see top pathways
gsea_df <- as.data.frame(gsea_kegg)
# Select top 30 pathways by NES or pvalue
top30 <- gsea_df[order(gsea_df$NES, decreasing = TRUE)[1:30], ]
# Ridgeplot (shows enrichment distribution for multiple pathways)
ridgeplot(gsea_kegg, showCategory = 30) +
ggtitle("GSEA: KEGG Top 30 Pathways") +
theme_minimal(base_size = 14)
# Optional: classic GSEA plot for the top pathway
top_pathway <- top30$ID[1]
gseaplot2(gsea_kegg, geneSetID = top_pathway,
title = top30$Description[1], color = "red")
# --------------------------
# 2) GSEA: GO Biological Process (BP)
# --------------------------
gsea_go_bp <- gseGO(
geneList = gene_list_named,
OrgDb = org.Mm.eg.db,
ont = "ALL",
keyType = "ENTREZID",
minGSSize = 10,
maxGSSize = 500,
pvalueCutoff = 0.1,
verbose = TRUE
)
# Save GO BP GSEA results
write.csv(as.data.frame(gsea_go_bp), "GSEA_GO_BP_HOM_vs_WT.csv", row.names = FALSE)
# Top 30 GO BP pathways barplot
barplot(gsea_go_bp, showCategory = 30, title = "GSEA: GO BP Top 30 Pathways")
# --------------------------
# 3) GSEA: Reactome Pathways
# --------------------------
gsea_reactome <- gsePathway(
geneList = gene_list_named,
organism = "mouse",
minGSSize = 10,
pvalueCutoff = 0.1,
verbose = TRUE
)
# Save Reactome GSEA results
write.csv(as.data.frame(gsea_reactome), "GSEA_Reactome_HOM_vs_WT.csv", row.names = FALSE)
# Top 30 Reactome pathways barplot
barplot(gsea_reactome, showCategory = 30, title = "GSEA: Reactome Top 30 Pathways")
# --------------------------
library(ggplot2)
library(dplyr)
# Convert GSEA Reactome results to dataframe
gsea_reactome_df <- as.data.frame(gsea_reactome)
# Select top 30 pathways by NES magnitude
top30_reactome <- gsea_reactome_df %>%
arrange(desc(abs(NES))) %>%
slice(1:30)
# Reorder for plotting (highest NES on top)
top30_reactome$Description <- factor(top30_reactome$Description, levels = rev(top30_reactome$Description))
# Plot barplot: NES on x-axis, pathways on y-axis, fill by -log10(padj)
ggplot(top30_reactome, aes(x = NES, y = Description, fill = -log10(p.adjust))) +
geom_bar(stat = "identity") +
scale_fill_gradient(low = "red", high = "darkred") +
theme_minimal(base_size = 14) +
labs(title = "GSEA: Top 30 Reactome Pathways",
x = "Normalized Enrichment Score (NES)",
y = "",
fill = "-log10(adj.p)")
#ORA Enrichment analysis
### --- DEG-based Over-Representation Analysis (ORA) ---
library(clusterProfiler)
library(org.Mm.eg.db)
library(msigdbr)
library(ReactomePA)
library(enrichplot)
library(ggplot2)
library(dplyr)
# Subset DEGs you already defined
DEGs_df <- as.data.frame(DEGs)
DEGs_df$gene <- rownames(DEGs_df)
# Convert SYMBOL -> ENTREZ
deg_entrez <- bitr(
DEGs_df$gene,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Mm.eg.db
)
# Remove duplicates
deg_entrez <- deg_entrez[!duplicated(deg_entrez$ENTREZID), ]
### --- ORA: GO Biological Process ---
ego_bp <- enrichGO(
gene = deg_entrez$ENTREZID,
OrgDb = org.Mm.eg.db,
keyType = "ENTREZID",
ont = "BP", # Biological Process
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.2,
readable = TRUE # back to SYMBOL
)
# Save results
write.csv(as.data.frame(ego_bp), "ORA_GO_BP_HET_vs_WT.csv", row.names = FALSE)
# Plot top 20
dotplot(ego_bp, showCategory = 20) + ggtitle("ORA: GO Biological Process (DEGs)")
### --- ORA: KEGG Pathways ---
ekegg <- enrichKEGG(
gene = deg_entrez$ENTREZID,
organism = "mmu",
pvalueCutoff = 0.05
)
# Save results
write.csv(as.data.frame(ekegg), "ORA_KEGG_HET_vs_WT.csv", row.names = FALSE)
# Plot top 20
dotplot(ekegg, showCategory = 20) + ggtitle("ORA: KEGG Pathways (DEGs)")
### --- ORA: Reactome Pathways ---
ereact <- enrichPathway(
gene = deg_entrez$ENTREZID,
organism = "mouse",
pvalueCutoff = 0.05,
readable = TRUE
)
# Save results
write.csv(as.data.frame(ereact), "ORA_Reactome_HET_vs_WT.csv", row.names = FALSE)
# Plot top 20
dotplot(ereact, showCategory = 20) + ggtitle("ORA: Reactome Pathways (DEGs)")
### --- ORA: Hallmark Gene Sets ---
hallmark_sets <- msigdbr(species = "Mus musculus", category = "H")
term2gene <- hallmark_sets[, c("gs_name", "entrez_gene")]
term2gene$entrez_gene <- as.character(term2gene$entrez_gene)
ora_hallmark <- enricher(
gene = deg_entrez$ENTREZID,
TERM2GENE = term2gene,
pvalueCutoff = 0.05
)
# Save results
write.csv(as.data.frame(ora_hallmark), "ORA_Hallmark_HET_vs_WT.csv", row.names = FALSE)
# Plot top 20
dotplot(ora_hallmark, showCategory = 20) + ggtitle("ORA: Hallmark Pathways (DEGs)")
### --- DEG-based ORA: Up vs Downregulated DEGs ---
# Split DEGs
up_DEGs <- rownames(DEGs[DEGs$log2FoldChange > 1, ])
down_DEGs <- rownames(DEGs[DEGs$log2FoldChange < -1, ])
# Convert SYMBOL -> ENTREZ for both
up_entrez <- bitr(up_DEGs, fromType="SYMBOL", toType="ENTREZID", OrgDb=org.Mm.eg.db)
up_entrez <- up_entrez[!duplicated(up_entrez$ENTREZID), ]
down_entrez <- bitr(down_DEGs, fromType="SYMBOL", toType="ENTREZID", OrgDb=org.Mm.eg.db)
down_entrez <- down_entrez[!duplicated(down_entrez$ENTREZID), ]
### --- ORA: GO Biological Process ---
ego_up <- enrichGO(gene=up_entrez$ENTREZID, OrgDb=org.Mm.eg.db, keyType="ENTREZID",
ont="BP", pAdjustMethod="BH", pvalueCutoff=0.05, qvalueCutoff=0.2, readable=TRUE)
ego_down <- enrichGO(gene=down_entrez$ENTREZID, OrgDb=org.Mm.eg.db, keyType="ENTREZID",
ont="BP", pAdjustMethod="BH", pvalueCutoff=0.05, qvalueCutoff=0.2, readable=TRUE)
write.csv(as.data.frame(ego_up), "ORA_GO_BP_Upregulated.csv", row.names=FALSE)
write.csv(as.data.frame(ego_down), "ORA_GO_BP_Downregulated.csv", row.names=FALSE)
dotplot(ego_up, showCategory=20) + ggtitle("ORA: GO BP (Upregulated)")
dotplot(ego_down, showCategory=20) + ggtitle("ORA: GO BP (Downregulated)")
### --- ORA: Hallmark Gene Sets ---
hallmark_sets <- msigdbr(species="Mus musculus", category="H")
term2gene <- hallmark_sets[, c("gs_name", "entrez_gene")]
term2gene$entrez_gene <- as.character(term2gene$entrez_gene)
ora_hallmark_up <- enricher(gene=up_entrez$ENTREZID, TERM2GENE=term2gene, pvalueCutoff=0.05)
ora_hallmark_down <- enricher(gene=down_entrez$ENTREZID, TERM2GENE=term2gene, pvalueCutoff=0.05)
write.csv(as.data.frame(ora_hallmark_up), "ORA_Hallmark_Upregulated.csv", row.names=FALSE)
write.csv(as.data.frame(ora_hallmark_down), "ORA_Hallmark_Downregulated.csv", row.names=FALSE)
dotplot(ora_hallmark_up, showCategory=20) + ggtitle("ORA: Hallmark (Upregulated)")
dotplot(ora_hallmark_down, showCategory=20) + ggtitle("ORA: Hallmark (Downregulated)")
### --- ORA: Reactome ---
ereact_up <- enrichPathway(gene=up_entrez$ENTREZID, organism="mouse", pvalueCutoff=0.05, readable=TRUE)
ereact_down <- enrichPathway(gene=down_entrez$ENTREZID, organism="mouse", pvalueCutoff=0.05, readable=TRUE)
write.csv(as.data.frame(ereact_up), "ORA_Reactome_Upregulated.csv", row.names=FALSE)
write.csv(as.data.frame(ereact_down), "ORA_Reactome_Downregulated.csv", row.names=FALSE)
dotplot(ereact_up, showCategory=20) + ggtitle("ORA: Reactome (Upregulated)")
dotplot(ereact_down, showCategory=20) + ggtitle("ORA: Reactome (Downregulated)")
### --- ORA: KEGG ---
ekegg_up <- enrichKEGG(gene=up_entrez$ENTREZID, organism="mmu", pvalueCutoff=0.05)
ekegg_down <- enrichKEGG(gene=down_entrez$ENTREZID, organism="mmu", pvalueCutoff=0.05)
write.csv(as.data.frame(ekegg_up), "ORA_KEGG_Upregulated.csv", row.names=FALSE)
write.csv(as.data.frame(ekegg_down), "ORA_KEGG_Downregulated.csv", row.names=FALSE)
dotplot(ekegg_up, showCategory=20) + ggtitle("ORA: KEGG (Upregulated)")
dotplot(ekegg_down, showCategory=20) + ggtitle("ORA: KEGG (Downregulated)")
#Heatmap
ifna_genes <- c(
"Serpine2","Sntb1","Mmp14","Tnc","Fstl1","Vcan","Il15","Scg2","Ecm1","Vegfa",
"Cxcl12","Dst","Tgfbr3","Ptx3","Pcolce","Spock1","Adam12","Tagln","Loxl1","Cdh6",
"Pvr","Gadd45b","Gadd45a","Rhob","Rgs4","Fzd8","Tfpi2","Vegfc","Oxtr","Dab2",
"Lum","Col16a1","Tnfrsf11b","Fap","Matn2","Snai2","Cxcl5","Calu","Capg","Emp3",
"Nnmt","Gpc1","Tnfaip3","Nid2","Mcm7","Slit3","Slit2","Matn3","Fmod","Edil3",
"Dkk1","Qsox1","Copa","Cxcl15","Crlf1","Grem1","Fbln5","Sgcb","Sgcg","Sgcd",
"Plod2","Plod3","Foxc2","Sdc1","Sdc4","Dpysl3","Tnfrsf12a","Pdlim4","Mfap5","Col5a3",
"P3h1","Cadm1","Fstl3","Efemp2","Cap2","Gpx7","Cthrc1","Basp1","Glipr1","Lrrc15",
"Pcolce2","Colgalt1","Postn","Htra1","Pmepa1","Myl9","Slc6a8","Magee1","Wipf1","Fermt2",
"Abi3bp","Ntm","Tpm4","Ecm2","Anpep","Gm21451","Acta2","Aplp1","Areg","Bdnf",
"Bgn","Bmp1","Cald1","Serpinh1","Cd44","Cdh2","Col11a1","Col12a1","Col3a1","Col4a1",
"Col4a2","Col5a1","Col5a2","Col6a2","Col6a3","Col7a1","Col8a2","Col1a1","Col1a2","Comp",
"Ccn1","Sfrp4","Sfrp1","Mylk","Dcn","Eln","Eno2","Fas","Fbln1","Fbln2",
"Fbn1","Fbn2","Fgf2","Ccn2","Flna","Fn1","Fuca1","Gas1","Gja1","Id2",
"Igfbp2","Igfbp3","Igfbp4","Il6","Inhba","Itga2","Itga5","Itgav","Itgb1","Itgb3",
"Itgb5","Jun","Lgals1","Lox","Lrp1","Mest","Mgp","Mmp2","Mmp3","Msx1","Notch2",
"Pdgfrb","Pfn2","Serpine1","Plaur","Pmp22","Prrx1","Ppib","Pthlh","Sat1","Sparc",
"Spp1","Tgfb1","Tgm2","Thbs1","Thbs2","Thy1","Timp1","Timp3","Tpm1","Tpm2",
"Vcam1","Vim","Wnt5a","Cdh11","Nt5e","Gem","Lama1","Plod1","Lama3","Lama2",
"Lamc2","Lamc1","Tgfbi"
)
# Get normalized counts from DESeq2
norm_counts <- counts(dds, normalized=TRUE)
# Subset for IFN-α response genes (keep only genes present in your dataset)
ifna_counts <- norm_counts[rownames(norm_counts) %in% ifna_genes, ]
# Optionally, z-score normalize each gene for heatmap visualization
ifna_counts_z <- t(scale(t(ifna_counts)))
# Create annotation for columns
ann_col <- data.frame(
Condition = metadata$condition
)
rownames(ann_col) <- metadata$sample_id
library(pheatmap)
# Make sure columns are in your desired order
desired_order <- c("sample.WT_1", "sample.WT_2", "sample.WT_3",
"sample.neg.neg_1", "sample.neg.neg_2", "sample.neg.neg_3")
ifna_counts_z <- ifna_counts_z[, desired_order]
# Make sure annotation matches
ann_col <- ann_col[desired_order, , drop = FALSE]
# Heatmap
pheatmap(ifna_counts_z,
annotation_col = ann_col,
show_rownames = TRUE,
show_colnames = TRUE,
cluster_rows = TRUE,
cluster_cols = FALSE, # keep the column order fixed
scale = "row",
fontsize_row = 8,
main = "Interferon Alpha Response Genes")