Similarity Network Fusion (SNF)

Introduction

This file provides code for performing SNF on all available combinations of modalities. By altering the modalities list to include further modalities, SNF will be performed on all possible combinaitons of modalities with a final graph output in the correct naming convention for downstream inclusion in MOGDx pipeline.

Specify the dataset, project, trait, index column, and modalities

dataset <- 'TCGA'
project <- 'BRCA'
trait <- c('paper_BRCA_Subtype_PAM50')
index_col <- 'patient'

# The list of modalities
modalities <- c( 'mRNA' , 'miRNA' )

Generate list of combination of modalities

# Initialize an empty list to store sublists
mod_list <- list()

for (comb_length in 2:length(modalities)) { 
  
  len_mod_list <- length(mod_list)
  # Get all combinations without repetition
  combinations <- combn(modalities, comb_length)
  
  # Convert the matrix of combinations into a list of lists
  for (i in (len_mod_list+1):(len_mod_list + ncol(combinations))) {
    sublist <- c(combinations[, i-len_mod_list])
    mod_list[[i]] <- sublist
  }
}

Import adjacency matrices and metadata for each combination of modalities and perform SNF

for (sub_mod_list in mod_list) {
  colnames <- c('patient' ,  'race' , 'gender'  , trait)
  datMeta <- t(data.frame( row.names = colnames))
  for (mod in sub_mod_list) {
    print(mod)
    datMeta <- rbind(datMeta , read.csv(paste0('./../data/',dataset,'/raw/',project,'/output/datMeta_',mod,'.csv') , row.names = 1)[ , colnames])
  }
  datMeta <- datMeta[!(duplicated(datMeta)),]
  rownames(datMeta) <- datMeta[[index_col]]
  print(dim(datMeta))
  
  all_idx <- c()
  g_list <- list()
  for (net in list.files('./../Network/SNF/')) {
    if (unlist(strsplit(net , '_'))[1] %in% sub_mod_list) {
      print(net)
      net_graph <- read.csv(paste0('./../Network/SNF/',net) , row.names = 1)
      patients <- unique(data.frame(id = c(net_graph$from_name , net_graph$to_name) ,
                                    class = c(net_graph$from_class , net_graph$to_class)))
      relation <- data.frame(from = net_graph$from_name , 
                             to = net_graph$to_name )
      
      g_net <- graph_from_data_frame(relation , directed = FALSE , vertices = patients)
      g_net <- simplify(g_net, remove.multiple=TRUE, remove.loops=TRUE)
      
      g_list[[net]] <- g_net
      all_idx <- unique(append(all_idx,V(g_net)$name))
    }
  }
  
  # This for loop extracts the adjacency (similarity/affinity) matrix from each graph.
  adjacency_graphs <- list()
  for (graph_names in names(g_list)) {
    
    missing_idx <- setdiff(all_idx , V(g_list[[graph_names]])$name)
    g_list[[graph_names]] <- add_vertices(g_list[[graph_names]] , length(missing_idx) , name = missing_idx)
    
    graph_adj <- as.matrix(as_adjacency_matrix(g_list[[graph_names]]))[all_idx,all_idx]
    
    adjacency_graphs[[graph_names]] <- graph_adj
    
  }
  
  ## First, set all the parameters:
  K = 15;       # number of neighbors, usually (10~30)
  T = 10;   # Number of Iterations, usually (10~20)
  
  #change this to similarity matrix
  W = SNF(adjacency_graphs, K , T)
  W <- W - diag(0.5 , dim(W)[1]) 
  
  g <- snf.to.graph(W , datMeta , trait , all_idx , sub_mod_list)
  
  print(length(V(g)))
  write.csv(as_long_data_frame(g) , file = paste0('./../data/',dataset,'/raw/',project,'/output/',paste0(sub_mod_list , collapse = '_'),'_graph.csv'))
}

## [1] "mRNA"
## [1] "miRNA"
## [1] 1080    4
## [1] "miRNA_graph.csv"
## [1] "mRNA_graph.csv"

## [1] 1080