MS Proteomics_STRING R Mouse Analysis

PHOTO EMBED

Wed May 06 2026 20:12:24 GMT+0000 (Coordinated Universal Time)

Saved by @1234_5

# Set working directory
setwd("//files.wustl.edu/Shares/DOM/ONC/Hirbe_Lab/Diana/IP-MS/HEK_JW23 IP_3-4-2026 - Analysis2")

# Load libraries
library(STRINGdb)
library(dplyr)
library(readr)
library(igraph)

# -------------------------------
# 1. Load data
# -------------------------------
JW_WT <- read_csv(
  "JW_WT_filtered_BFDR0.05_Saint0.8_Spec2.csv",
  show_col_types = FALSE
)

# Extract unique gene symbols
genes <- unique(JW_WT$PreyGene)
gene_df <- data.frame(gene = genes)

# -------------------------------
# 2. Initialize STRING (MATCH OLD RUN CONDITIONS)
# -------------------------------
string_db <- STRINGdb$new(
  version = "11.5",
  species = 10090,        # mouse
  score_threshold = 400   # IMPORTANT: match original regime
)

# -------------------------------
# 3. Map genes to STRING IDs
# -------------------------------
mapped <- string_db$map(
  gene_df,
  "gene",
  removeUnmappedRows = TRUE,
  takeFirst = TRUE
)

# Sanity check
cat("Mapped proteins:", length(unique(mapped$STRING_id)), "\n")

# Extract STRING IDs
hits <- mapped$STRING_id

# -------------------------------
# 4. Get interactions (NO extra filtering)
# -------------------------------
network <- string_db$get_interactions(hits)

cat("Total interactions:", nrow(network), "\n")

# -------------------------------
# 5. Save outputs
# -------------------------------
write.csv(
  network,
  "JW_WT_STRING_network_REPRODUCED.csv",
  row.names = FALSE
)

write.csv(
  mapped,
  "JW_WT_STRING_nodes_REPRODUCED.csv",
  row.names = FALSE
)

# -------------------------------
# 6. Plot network (basic STRING plot)
# -------------------------------
string_db$plot_network(hits)

# -------------------------------
# 7. Final sanity check
# -------------------------------
cat("Unique proteins:", length(unique(mapped$STRING_id)), "\n")

content_copyCOPY