MOGDx Network Generation

This is the computationally most expensive architecture, but requires the least amount of knowledge.

The processing step occurs within each cross validation split such that similarity features are only identified within each split.

import pandas as pd
import numpy as np
import os
import sys 

sys.path.insert(0 , './../')
from MAIN.utils import *
from MAIN.train import *
import MAIN.preprocess_functions
from MAIN.GNN_MME import GCN_MME , GSage_MME , GAT_MME

import torch
import torch.nn.functional as F
import dgl
from dgl.dataloading import MultiLayerFullNeighborSampler
import matplotlib.pyplot as plt
from palettable import wesanderson
from sklearn.model_selection import StratifiedKFold , train_test_split
import networkx as nx
from datetime import datetime
import joblib
import warnings
import gc
import copy

warnings.filterwarnings("ignore")

print("Finished Library Import \n")

Finished Library Import

data_input = './../../data/TCGA/BRCA/raw/'
snf_net = 'RPPA_mRNA_graph.graphml'
index_col = 'patient'
target = 'paper_BRCA_Subtype_PAM50'

device = torch.device('cpu' if False else 'cuda') # Get GPU device name, else use CPU
print("Using %s device" % device)
get_gpu_memory()


datModalities , meta = data_parsing(data_input , ['RPPA' , 'mRNA'] , target , index_col , PROCESSED=False)
meta = meta.loc[sorted(meta.index)]

skf = StratifiedKFold(n_splits=2 , shuffle=True) 

print(skf)

data_to_save = {}
output_metrics = []
test_logits = []
test_labels = []
for i, (train_index, test_index) in enumerate(skf.split(meta.index, meta)) :
    h = {}
    MME_input_shapes = []
    
    train_index , val_index = train_test_split(
    train_index, train_size=0.8, test_size=None, stratify=meta.iloc[train_index]
    )
    
    train_idx = list(meta.iloc[train_index].index)
    val_idx   = list(meta.iloc[val_index].index)
    test_idx  = list(meta.iloc[test_index].index)
    
    all_graphs = {}
    data_to_save[i] = {}
    for mod in datModalities : 
        count_mtx = datModalities[mod]
        
        datMeta = meta.loc[count_mtx.index]
        datMeta = datMeta.loc[sorted(datMeta.index)]
        count_mtx = count_mtx.loc[datMeta.index]
        
        train_index_mod = list(set(train_idx) & set(datMeta.index))
        val_index_mod = list(set(val_idx) & set(datMeta.index))
        
        if mod in ['mRNA' , 'miRNA'] : 
            pipeline = 'DESeq' 
            if mod == 'mRNA' : 
                gene_exp = True
            else : 
                gene_exp = False
        else : 
            pipeline = 'LogReg'

        if pipeline == 'DESeq' :
            print('Performing Differential Gene Expression for Feature Selection')
            
            count_mtx , datMeta = MAIN.preprocess_functions.data_preprocess(count_mtx.astype(int).astype(np.float32), 
                                                        datMeta , gene_exp = True)
            
            train_index_mod = list(set(train_index_mod) & set(count_mtx.index))
            dds, vsd, top_genes = MAIN.preprocess_functions.DESEQ(count_mtx , datMeta , target , 
                                        n_genes=500,train_index=train_index_mod)

            data_to_save[i][f'{mod}_extracted_feats'] = list(set(top_genes))
            datExpr = pd.DataFrame(data=vsd , index=count_mtx.index , columns=count_mtx.columns)

        elif pipeline == 'LogReg' : 
            print('Performing Logistic Regression for Feature Selection')
            
            n_genes = count_mtx.shape[1]
            datExpr = count_mtx.loc[: , (count_mtx != 0).any(axis=0)] # remove any genes with all 0 expression
            
            train_index_mod = list(set(train_index_mod) & set(datExpr.index))
            val_index_mod = list(set(val_index_mod) & set(datExpr.index))
            
            extracted_feats , model  = MAIN.preprocess_functions.elastic_net(datExpr , datMeta , 
                                                         train_index=train_index_mod , 
                                                         val_index=val_index_mod ,
                                                         l1_ratio = 1 , num_epochs=1000)

            data_to_save[i][f'{mod}_extracted_feats']  = list(set(extracted_feats))
            data_to_save[i][f'{mod}_model']  = {'model' : model}
            
            del model

        if mod in [] : 
            method = 'bicorr'
        elif mod in [ 'mRNA' , 'RPPA' , 'miRNA' , 'CNV' , 'DNAm'] : 
            method = 'pearson'
        elif mod in [] : 
            method = 'euclidean'
            
        datExpr = datExpr.loc[datMeta.index]

        if len(data_to_save[i][f'{mod}_extracted_feats']) > 0 : 
            G  = MAIN.preprocess_functions.knn_graph_generation(datExpr , datMeta , method=method , 
                                                extracted_feats=data_to_save[i][f'{mod}_extracted_feats'],
                                                node_size =150 , knn = 15 )
        else : 
            G  = MAIN.preprocess_functions.knn_graph_generation(datExpr , datMeta , method=method , 
                                            extracted_feats=None, node_size =150 , knn = 15 )
                
        all_graphs[mod] = G

        h[mod] = datExpr
        MME_input_shapes.append(datExpr.shape[1])
        
    all_idx = list(set().union(*[list(h[mod].index) for mod in h]))
    train_index = indices_removal_adjust(train_index , meta.reset_index()[index_col] , all_idx)
    val_index = indices_removal_adjust(val_index , meta.reset_index()[index_col] , all_idx)
    test_index = indices_removal_adjust(test_index , meta.reset_index()[index_col] , all_idx)
    
    train_index = np.concatenate((train_index , val_index))
    
    gc.collect()
    torch.cuda.empty_cache()
    print('Clearing gpu memory')
    get_gpu_memory()
    
    if len(all_graphs) > 1 : 
        full_graphs = []

        for mod , graph in all_graphs.items() : 
            full_graph = pd.DataFrame(data = np.zeros((len(all_idx) , len(all_idx))) , index=all_idx , columns=all_idx)
            graph = nx.to_pandas_adjacency(graph)
            full_graph.loc[graph.index , graph.index] = graph.values

            full_graphs.append(full_graph)

        adj_snf = MAIN.preprocess_functions.SNF(full_graphs)

        node_colour = meta.loc[adj_snf.index].astype('category').cat.set_categories(wesanderson.FantasticFox2_5.hex_colors , rename=True)

        g  = MAIN.preprocess_functions.plot_knn_network(adj_snf , 15 , meta.loc[adj_snf.index] ,
                                               node_colours=node_colour , node_size=150)
    else : 
        g = list(all_graphs.values())[0]
        
    label = F.one_hot(torch.Tensor(list(meta.loc[sorted(all_idx)].astype('category').cat.codes)).to(torch.int64))
        
    h = reduce(merge_dfs , list(h.values()))
    h = h.loc[sorted(h.index)]

    g = dgl.from_networkx(g , node_attrs=['idx' , 'label'])

    g.ndata['feat'] = torch.Tensor(h.to_numpy())

    g.ndata['label'] = label

    model = GCN_MME(MME_input_shapes , [16 , 16] , 64 , [32], len(meta.unique())).to(device)
    
    print(model)
    print(g)

    g = g.to(device)

    loss_plot = train(g, train_index, device ,  model , label , 2000 , 1e-3 , 100)
    plt.title(f'Loss for split {i}')
    plt.show()
    plt.clf()
    
    sampler = NeighborSampler(
        [15 for i in range(len(model.gnnlayers))],  # fanout for each layer
        prefetch_node_feats=['feat'],
        prefetch_labels=['label'],
    )
    test_dataloader = DataLoader(
        g,
        torch.Tensor(test_index).to(torch.int64).to(device),
        sampler,
        device=device,
        batch_size=1024,
        shuffle=True,
        drop_last=False,
        num_workers=0,
        use_uva=False,
    )
    
    test_output_metrics = evaluate(model , g, test_dataloader)

    print(
        "Fold : {:01d} | Test Accuracy = {:.4f} | F1 = {:.4f} ".format(
        i+1 , test_output_metrics[1] , test_output_metrics[2] )
    )
    
    test_logits.extend(test_output_metrics[-2])
    test_labels.extend(test_output_metrics[-1])
    
    output_metrics.append(test_output_metrics)
    if i == 0 : 
        best_model = model
        best_idx = i
    elif output_metrics[best_idx][1] < test_output_metrics[1] : 
        best_model = model
        best_idx   = i

    get_gpu_memory()
    del model
    gc.collect()
    torch.cuda.empty_cache()
    print('Clearing gpu memory')
    get_gpu_memory()

test_logits = torch.stack(test_logits)
test_labels = torch.stack(test_labels)

Using cuda device
Total = 42.4Gb 	 Reserved = 0.0Gb 	 Allocated = 0.0Gb
StratifiedKFold(n_splits=2, random_state=None, shuffle=True)
Performing Logistic Regression for Feature Selection

Loss : 0.6042: 100%|██████████| 1000/1000 [00:02<00:00, 494.66epoch/s]

Model score : 0.877

../../_images/410ed68fc4e085b52e7797af63e257822eda0b7600bef1c6d62aa8b325b6f47c.png

Performing Differential Gene Expression for Feature Selection
Keeping 29995 genes
Removed 30665 genes
Keeping 1047 Samples
Removed 36 Samples

Fitting size factors...

Using None as control genes, passed at DeseqDataSet initialization

... done in 1.23 seconds.

Fitting dispersions...
... done in 10.47 seconds.

Fitting dispersion trend curve...
... done in 0.76 seconds.

Fitting MAP dispersions...
... done in 10.90 seconds.

Fitting LFCs...
... done in 7.12 seconds.

Calculating cook's distance...
... done in 1.82 seconds.

Replacing 2948 outlier genes.

Fitting dispersions...
... done in 1.11 seconds.

Fitting MAP dispersions...
... done in 1.02 seconds.

Fitting LFCs...
... done in 1.02 seconds.

Performing contrastive analysis for LumA vs. Her2

Running Wald tests...
... done in 2.55 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumA vs Her2
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.045843  0.109944 -0.416972   
ENSG00000000005.6     72.999641        2.364056  0.293465  8.055675   
ENSG00000000419.13  2386.496112       -0.604066  0.062602 -9.649254   
ENSG00000000457.14  1596.783158       -0.062327  0.063247 -0.985452   
ENSG00000000460.17   715.140714       -0.435593  0.080773 -5.392793   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026       -0.955682  0.240269 -3.977559   
ENSG00000288663.1     29.867524        0.627702  0.099567  6.304313   
ENSG00000288670.1    422.158728       -0.075705  0.079346 -0.954105   
ENSG00000288674.1      8.197550        0.337858  0.120301  2.808436   
ENSG00000288675.1     32.370671       -0.912699  0.116411 -7.840287   

                          pvalue          padj  
ENSG00000000003.15  6.766989e-01  7.320512e-01  
ENSG00000000005.6   7.904166e-16  9.221527e-15  
ENSG00000000419.13  4.951465e-22  1.092053e-20  
ENSG00000000457.14  3.244019e-01  3.944238e-01  
ENSG00000000460.17  6.937103e-08  3.022199e-07  
...                          ...           ...  
ENSG00000288658.1   6.962648e-05  1.878268e-04  
ENSG00000288663.1   2.894748e-10  1.756229e-09  
ENSG00000288670.1   3.400305e-01  4.104478e-01  
ENSG00000288674.1   4.978277e-03  9.501362e-03  
ENSG00000288675.1   4.495165e-15  4.869356e-14  

[29995 rows x 6 columns]
Performing contrastive analysis for LumA vs. LumB

Running Wald tests...
... done in 2.71 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumA vs LumB
                       baseMean  log2FoldChange     lfcSE       stat  \
ENSG00000000003.15  3087.392130        0.510947  0.075755   6.744707   
ENSG00000000005.6     72.999641        0.680790  0.200628   3.393294   
ENSG00000000419.13  2386.496112       -0.496193  0.043131 -11.504238   
ENSG00000000457.14  1596.783158        0.070789  0.043570   1.624718   
ENSG00000000460.17   715.140714       -0.607113  0.055622 -10.914917   
...                         ...             ...       ...        ...   
ENSG00000288658.1     22.532026       -0.808325  0.165651  -4.879702   
ENSG00000288663.1     29.867524       -0.053662  0.066356  -0.808701   
ENSG00000288670.1    422.158728       -0.228263  0.054607  -4.180137   
ENSG00000288674.1      8.197550        0.269729  0.080393   3.355135   
ENSG00000288675.1     32.370671        0.324366  0.081632   3.973538   

                          pvalue          padj  
ENSG00000000003.15  1.533361e-11  1.663405e-10  
ENSG00000000005.6   6.905757e-04  1.840247e-03  
ENSG00000000419.13  1.255933e-30  1.034937e-28  
ENSG00000000457.14  1.042227e-01  1.570698e-01  
ENSG00000000460.17  9.781815e-28  6.378381e-26  
...                          ...           ...  
ENSG00000288658.1   1.062460e-06  5.130976e-06  
ENSG00000288663.1   4.186870e-01  5.071215e-01  
ENSG00000288670.1   2.913337e-05  1.041544e-04  
ENSG00000288674.1   7.932621e-04  2.084624e-03  
ENSG00000288675.1   7.081273e-05  2.341559e-04  

[29995 rows x 6 columns]
Performing contrastive analysis for LumA vs. Normal

Running Wald tests...
... done in 2.58 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumA vs Normal
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.149391  0.151267 -0.987602   
ENSG00000000005.6     72.999641       -0.070272  0.400423 -0.175495   
ENSG00000000419.13  2386.496112        0.052635  0.086204  0.610591   
ENSG00000000457.14  1596.783158        0.460586  0.087118  5.286911   
ENSG00000000460.17   715.140714        0.442141  0.111457  3.966908   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026       -0.735339  0.331180 -2.220363   
ENSG00000288663.1     29.867524        0.106228  0.134479  0.789924   
ENSG00000288670.1    422.158728        0.475525  0.109506  4.342477   
ENSG00000288674.1      8.197550       -0.105403  0.159991 -0.658810   
ENSG00000288675.1     32.370671       -0.340401  0.161826 -2.103501   

                          pvalue      padj  
ENSG00000000003.15  3.233477e-01  0.443638  
ENSG00000000005.6   8.606909e-01  0.906762  
ENSG00000000419.13  5.414706e-01  0.651681  
ENSG00000000457.14  1.243991e-07  0.000002  
ENSG00000000460.17  7.281114e-05  0.000401  
...                          ...       ...  
ENSG00000288658.1   2.639415e-02  0.059055  
ENSG00000288663.1   4.295720e-01  0.548929  
ENSG00000288670.1   1.408855e-05  0.000097  
ENSG00000288674.1   5.100176e-01  0.624223  
ENSG00000288675.1   3.542197e-02  0.075375  

[29995 rows x 6 columns]
Performing contrastive analysis for LumA vs. Basal

Running Wald tests...
... done in 3.07 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumA vs Basal
                       baseMean  log2FoldChange     lfcSE       stat  \
ENSG00000000003.15  3087.392130       -0.445231  0.080360  -5.540469   
ENSG00000000005.6     72.999641        0.121811  0.212744   0.572570   
ENSG00000000419.13  2386.496112       -0.502644  0.045765 -10.983180   
ENSG00000000457.14  1596.783158        0.450413  0.046261   9.736320   
ENSG00000000460.17   715.140714       -1.016645  0.058992 -17.233544   
...                         ...             ...       ...        ...   
ENSG00000288658.1     22.532026       -1.948089  0.174996 -11.132162   
ENSG00000288663.1     29.867524        0.589213  0.072151   8.166402   
ENSG00000288670.1    422.158728        0.042231  0.058005   0.728061   
ENSG00000288674.1      8.197550        0.081888  0.085157   0.961606   
ENSG00000288675.1     32.370671       -0.328268  0.085826  -3.824821   

                          pvalue          padj  
ENSG00000000003.15  3.016618e-08  5.818872e-08  
ENSG00000000005.6   5.669360e-01  6.009346e-01  
ENSG00000000419.13  4.604242e-28  2.082392e-27  
ENSG00000000457.14  2.110588e-22  7.768694e-22  
ENSG00000000460.17  1.487504e-66  1.983009e-65  
...                          ...           ...  
ENSG00000288658.1   8.748769e-29  4.064106e-28  
ENSG00000288663.1   3.177226e-16  9.097937e-16  
ENSG00000288670.1   4.665759e-01  5.029268e-01  
ENSG00000288674.1   3.362475e-01  3.704861e-01  
ENSG00000288675.1   1.308669e-04  2.016518e-04  

[29995 rows x 6 columns]
Performing contrastive analysis for Her2 vs. LumB

Running Wald tests...
... done in 2.57 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 Her2 vs LumB
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130        0.556791  0.121299  4.590230   
ENSG00000000005.6     72.999641       -1.683266  0.323369 -5.205406   
ENSG00000000419.13  2386.496112        0.107873  0.069059  1.562059   
ENSG00000000457.14  1596.783158        0.133116  0.069775  1.907792   
ENSG00000000460.17   715.140714       -0.171520  0.089082 -1.925423   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026        0.147357  0.264721  0.556650   
ENSG00000288663.1     29.867524       -0.681364  0.109132 -6.243507   
ENSG00000288670.1    422.158728       -0.152558  0.087504 -1.743435   
ENSG00000288674.1      8.197550       -0.068130  0.132312 -0.514919   
ENSG00000288675.1     32.370671        1.237065  0.128962  9.592483   

                          pvalue          padj  
ENSG00000000003.15  4.427577e-06  2.483732e-05  
ENSG00000000005.6   1.935725e-07  1.438248e-06  
ENSG00000000419.13  1.182741e-01  1.871212e-01  
ENSG00000000457.14  5.641806e-02  1.010847e-01  
ENSG00000000460.17  5.417652e-02  9.782234e-02  
...                          ...           ...  
ENSG00000288658.1   5.777668e-01  6.693490e-01  
ENSG00000288663.1   4.278680e-10  5.115145e-09  
ENSG00000288670.1   8.125771e-02  1.371595e-01  
ENSG00000288674.1   6.066099e-01  6.935602e-01  
ENSG00000288675.1   8.599064e-22  4.931720e-20  

[29995 rows x 6 columns]
Performing contrastive analysis for Her2 vs. Normal

Running Wald tests...
... done in 2.29 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 Her2 vs Normal
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.103548  0.178483 -0.580154   
ENSG00000000005.6     72.999641       -2.434328  0.473977 -5.135963   
ENSG00000000419.13  2386.496112        0.656701  0.101685  6.458198   
ENSG00000000457.14  1596.783158        0.522913  0.102761  5.088644   
ENSG00000000460.17   715.140714        0.877735  0.131394  6.680153   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026        0.220343  0.390278  0.564578   
ENSG00000288663.1     29.867524       -0.521474  0.159972 -3.259780   
ENSG00000288670.1    422.158728        0.551230  0.129099  4.269815   
ENSG00000288674.1      8.197550       -0.443262  0.191417 -2.315687   
ENSG00000288675.1     32.370671        0.572297  0.190145  3.009796   

                          pvalue          padj  
ENSG00000000003.15  5.618107e-01  6.612585e-01  
ENSG00000000005.6   2.807025e-07  2.722169e-06  
ENSG00000000419.13  1.059569e-10  2.622258e-09  
ENSG00000000457.14  3.606320e-07  3.381419e-06  
ENSG00000000460.17  2.386925e-11  7.074684e-10  
...                          ...           ...  
ENSG00000288658.1   5.723605e-01  6.706232e-01  
ENSG00000288663.1   1.114985e-03  3.756484e-03  
ENSG00000288670.1   1.956349e-05  1.138767e-04  
ENSG00000288674.1   2.057536e-02  4.536925e-02  
ENSG00000288675.1   2.614234e-03  7.828087e-03  

[29995 rows x 6 columns]
Performing contrastive analysis for Her2 vs. Basal

Running Wald tests...
... done in 2.21 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 Her2 vs Basal
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.399387  0.124227 -3.214986   
ENSG00000000005.6     72.999641       -2.242245  0.331022 -6.773697   
ENSG00000000419.13  2386.496112        0.101422  0.070733  1.433868   
ENSG00000000457.14  1596.783158        0.512741  0.071486  7.172564   
ENSG00000000460.17   715.140714       -0.581052  0.091224 -6.369509   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026       -0.992407  0.270668 -3.666515   
ENSG00000288663.1     29.867524       -0.038489  0.112749 -0.341369   
ENSG00000288670.1    422.158728        0.117936  0.089665  1.315300   
ENSG00000288674.1      8.197550       -0.255971  0.135260 -1.892439   
ENSG00000288675.1     32.370671        0.584431  0.131657  4.439052   

                          pvalue          padj  
ENSG00000000003.15  1.304511e-03  2.998375e-03  
ENSG00000000005.6   1.255320e-11  8.772910e-11  
ENSG00000000419.13  1.516099e-01  2.123529e-01  
ENSG00000000457.14  7.360624e-13  5.828456e-12  
ENSG00000000460.17  1.896341e-10  1.165828e-09  
...                          ...           ...  
ENSG00000288658.1   2.458782e-04  6.493324e-04  
ENSG00000288663.1   7.328255e-01  7.904312e-01  
ENSG00000288670.1   1.884089e-01  2.559486e-01  
ENSG00000288674.1   5.843248e-02  9.229501e-02  
ENSG00000288675.1   9.035578e-06  3.005347e-05  

[29995 rows x 6 columns]
Performing contrastive analysis for LumB vs. Normal

Running Wald tests...
... done in 2.46 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumB vs Normal
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.660339  0.159710 -4.134599   
ENSG00000000005.6     72.999641       -0.751062  0.422829 -1.776279   
ENSG00000000419.13  2386.496112        0.548828  0.091000  6.031049   
ENSG00000000457.14  1596.783158        0.389797  0.091967  4.238455   
ENSG00000000460.17   715.140714        1.049255  0.117618  8.920861   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026        0.072986  0.349326  0.208934   
ENSG00000288663.1     29.867524        0.159890  0.141706  1.128321   
ENSG00000288670.1    422.158728        0.703788  0.115554  6.090571   
ENSG00000288674.1      8.197550       -0.375132  0.169207 -2.216998   
ENSG00000288675.1     32.370671       -0.664767  0.171077 -3.885788   

                          pvalue          padj  
ENSG00000000003.15  3.555757e-05  1.318795e-04  
ENSG00000000005.6   7.568690e-02  1.186675e-01  
ENSG00000000419.13  1.628993e-09  1.457548e-08  
ENSG00000000457.14  2.250635e-05  8.702824e-05  
ENSG00000000460.17  4.626757e-19  2.505046e-17  
...                          ...           ...  
ENSG00000288658.1   8.345000e-01  8.730974e-01  
ENSG00000288663.1   2.591845e-01  3.374382e-01  
ENSG00000288670.1   1.125090e-09  1.042542e-08  
ENSG00000288674.1   2.662322e-02  4.801079e-02  
ENSG00000288675.1   1.019985e-04  3.432181e-04  

[29995 rows x 6 columns]
Performing contrastive analysis for LumB vs. Basal

Running Wald tests...
... done in 2.19 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumB vs Basal
                       baseMean  log2FoldChange     lfcSE       stat  \
ENSG00000000003.15  3087.392130       -0.956178  0.095308 -10.032555   
ENSG00000000005.6     72.999641       -0.558979  0.252400  -2.214654   
ENSG00000000419.13  2386.496112       -0.006451  0.054263  -0.118890   
ENSG00000000457.14  1596.783158        0.379624  0.054849   6.921272   
ENSG00000000460.17   715.140714       -0.409531  0.069938  -5.855629   
...                         ...             ...       ...        ...   
ENSG00000288658.1     22.532026       -1.139764  0.207297  -5.498215   
ENSG00000288663.1     29.867524        0.642875  0.084864   7.575379   
ENSG00000288670.1    422.158728        0.270494  0.068745   3.934774   
ENSG00000288674.1      8.197550       -0.187841  0.101419  -1.852129   
ENSG00000288675.1     32.370671       -0.652634  0.102204  -6.385618   

                          pvalue          padj  
ENSG00000000003.15  1.096424e-23  6.605189e-23  
ENSG00000000005.6   2.678385e-02  3.592073e-02  
ENSG00000000419.13  9.053625e-01  9.182818e-01  
ENSG00000000457.14  4.476050e-12  1.394755e-11  
ENSG00000000460.17  4.752078e-09  1.200628e-08  
...                          ...           ...  
ENSG00000288658.1   3.836546e-08  9.064050e-08  
ENSG00000288663.1   3.580819e-14  1.268982e-13  
ENSG00000288670.1   8.327492e-05  1.475736e-04  
ENSG00000288674.1   6.400724e-02  8.153584e-02  
ENSG00000288675.1   1.707070e-10  4.801086e-10  

[29995 rows x 6 columns]
Performing contrastive analysis for Normal vs. Basal

Running Wald tests...
... done in 2.34 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 Normal vs Basal
                       baseMean  log2FoldChange     lfcSE       stat  \
ENSG00000000003.15  3087.392130       -0.295840  0.161945  -1.826789   
ENSG00000000005.6     72.999641        0.192083  0.428710   0.448048   
ENSG00000000419.13  2386.496112       -0.555279  0.092278  -6.017474   
ENSG00000000457.14  1596.783158       -0.010172  0.093272  -0.109060   
ENSG00000000460.17   715.140714       -1.458786  0.119249 -12.233141   
...                         ...             ...       ...        ...   
ENSG00000288658.1     22.532026       -1.212750  0.353853  -3.427268   
ENSG00000288663.1     29.867524        0.482985  0.144510   3.342218   
ENSG00000288670.1    422.158728       -0.433294  0.117198  -3.697105   
ENSG00000288674.1      8.197550        0.187291  0.171522   1.091936   
ENSG00000288675.1     32.370671        0.012134  0.173117   0.070089   

                          pvalue          padj  
ENSG00000000003.15  6.773147e-02  1.155634e-01  
ENSG00000000005.6   6.541185e-01  7.328111e-01  
ENSG00000000419.13  1.771600e-09  1.772487e-08  
ENSG00000000457.14  9.131548e-01  9.377274e-01  
ENSG00000000460.17  2.067839e-34  5.743039e-32  
...                          ...           ...  
ENSG00000288658.1   6.096865e-04  1.915328e-03  
ENSG00000288663.1   8.311184e-04  2.519648e-03  
ENSG00000288670.1   2.180718e-04  7.638751e-04  
ENSG00000288674.1   2.748612e-01  3.696571e-01  
ENSG00000288675.1   9.441229e-01  9.600946e-01  

[29995 rows x 6 columns]
Fit type used for VST : parametric

Fitting dispersions...
... done in 9.30 seconds.

../../_images/cc2b1f004fd9f248d5b4697e2b77ee67a43145fe5279aee22b5b584c4cf3d3a8.png

Clearing gpu memory
Total = 42.4Gb 	 Reserved = 0.0Gb 	 Allocated = 0.0Gb

../../_images/5642c18539af1ee23067da0be3b8628dda73c583af31028404d9f045e3e28539.png

GCN_MME(
  (encoder_dims): ModuleList(
    (0): Encoder(
      (encoder): ModuleList(
        (0): Linear(in_features=464, out_features=500, bias=True)
        (1): Linear(in_features=500, out_features=16, bias=True)
      )
      (norm): ModuleList(
        (0): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (decoder): Sequential(
        (0): Linear(in_features=16, out_features=64, bias=True)
      )
      (drop): Dropout(p=0.5, inplace=False)
    )
    (1): Encoder(
      (encoder): ModuleList(
        (0): Linear(in_features=29995, out_features=500, bias=True)
        (1): Linear(in_features=500, out_features=16, bias=True)
      )
      (norm): ModuleList(
        (0): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (decoder): Sequential(
        (0): Linear(in_features=16, out_features=64, bias=True)
      )
      (drop): Dropout(p=0.5, inplace=False)
    )
  )
  (gnnlayers): ModuleList(
    (0): GraphConv(in=64, out=32, normalization=both, activation=None)
    (1): GraphConv(in=32, out=5, normalization=both, activation=None)
  )
  (batch_norms): ModuleList(
    (0): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (drop): Dropout(p=0.5, inplace=False)
)
Graph(num_nodes=1076, num_edges=18340,
      ndata_schemes={'idx': Scheme(shape=(), dtype=torch.int64), 'label': Scheme(shape=(5,), dtype=torch.int64), 'feat': Scheme(shape=(30459,), dtype=torch.float32)}
      edata_schemes={})
Epoch 00000 | Loss 1.8291 | Train Acc. 0.2230 | 
Epoch 00005 | Loss 0.6409 | Train Acc. 0.7974 | 
Epoch 00010 | Loss 0.5584 | Train Acc. 0.8309 | 
Epoch 00015 | Loss 0.4914 | Train Acc. 0.8383 | 
Epoch 00020 | Loss 0.4561 | Train Acc. 0.8457 | 
Epoch 00025 | Loss 0.4449 | Train Acc. 0.8327 | 
Epoch 00030 | Loss 0.4324 | Train Acc. 0.8309 | 
Epoch 00035 | Loss 0.3926 | Train Acc. 0.8494 | 
Epoch 00040 | Loss 0.4163 | Train Acc. 0.8569 | 
Epoch 00045 | Loss 0.3781 | Train Acc. 0.8532 | 
Epoch 00050 | Loss 0.3679 | Train Acc. 0.8550 | 
Epoch 00055 | Loss 0.3676 | Train Acc. 0.8439 | 
Epoch 00060 | Loss 0.3519 | Train Acc. 0.8662 | 
Epoch 00065 | Loss 0.3529 | Train Acc. 0.8643 | 
Epoch 00070 | Loss 0.3619 | Train Acc. 0.8643 | 
Epoch 00075 | Loss 0.3438 | Train Acc. 0.8662 | 
Epoch 00080 | Loss 0.3323 | Train Acc. 0.8717 | 
Epoch 00085 | Loss 0.3296 | Train Acc. 0.8755 | 
Epoch 00090 | Loss 0.3310 | Train Acc. 0.8736 | 
Epoch 00095 | Loss 0.3275 | Train Acc. 0.8736 | 
Epoch 00100 | Loss 0.3142 | Train Acc. 0.8717 | 
Epoch 00105 | Loss 0.3110 | Train Acc. 0.8792 | 
Epoch 00110 | Loss 0.3265 | Train Acc. 0.8662 | 
Epoch 00115 | Loss 0.3329 | Train Acc. 0.8625 | 
Epoch 00120 | Loss 0.3043 | Train Acc. 0.8866 | 
Epoch 00125 | Loss 0.3079 | Train Acc. 0.8792 | 
Epoch 00130 | Loss 0.2987 | Train Acc. 0.8810 | 
Epoch 00135 | Loss 0.3162 | Train Acc. 0.8773 | 
Epoch 00140 | Loss 0.3199 | Train Acc. 0.8755 | 
Epoch 00145 | Loss 0.3026 | Train Acc. 0.8792 | 
Epoch 00150 | Loss 0.3020 | Train Acc. 0.8755 | 
Epoch 00155 | Loss 0.3111 | Train Acc. 0.8773 | 
Epoch 00160 | Loss 0.3019 | Train Acc. 0.8866 | 
Epoch 00165 | Loss 0.3063 | Train Acc. 0.8903 | 
Epoch 00170 | Loss 0.2964 | Train Acc. 0.8773 | 
Epoch 00175 | Loss 0.2888 | Train Acc. 0.8885 | 
Epoch 00180 | Loss 0.2945 | Train Acc. 0.8922 | 
Epoch 00185 | Loss 0.2923 | Train Acc. 0.8996 | 
Epoch 00190 | Loss 0.2908 | Train Acc. 0.8922 | 
Epoch 00195 | Loss 0.2977 | Train Acc. 0.8903 | 
Epoch 00200 | Loss 0.2945 | Train Acc. 0.8848 | 
Epoch 00205 | Loss 0.2964 | Train Acc. 0.8810 | 
Epoch 00210 | Loss 0.2782 | Train Acc. 0.8996 | 
Epoch 00215 | Loss 0.2982 | Train Acc. 0.8680 | 
Epoch 00220 | Loss 0.2880 | Train Acc. 0.8848 | 
Epoch 00225 | Loss 0.2932 | Train Acc. 0.8866 | 
Epoch 00230 | Loss 0.2993 | Train Acc. 0.8773 | 
Epoch 00235 | Loss 0.2817 | Train Acc. 0.8978 | 
Epoch 00240 | Loss 0.2857 | Train Acc. 0.8866 | 
Epoch 00245 | Loss 0.2928 | Train Acc. 0.8941 | 
Epoch 00250 | Loss 0.2903 | Train Acc. 0.8810 | 
Epoch 00255 | Loss 0.2977 | Train Acc. 0.8866 | 
Epoch 00260 | Loss 0.3070 | Train Acc. 0.8792 | 
Epoch 00265 | Loss 0.3077 | Train Acc. 0.8922 | 
Epoch 00270 | Loss 0.2879 | Train Acc. 0.8885 | 
Epoch 00275 | Loss 0.2780 | Train Acc. 0.8829 | 
Epoch 00280 | Loss 0.2829 | Train Acc. 0.8829 | 
Epoch 00285 | Loss 0.2986 | Train Acc. 0.8755 | 
Epoch 00290 | Loss 0.2913 | Train Acc. 0.8922 | 
Epoch 00295 | Loss 0.2870 | Train Acc. 0.8996 | 
Epoch 00300 | Loss 0.2759 | Train Acc. 0.8866 | 
Epoch 00305 | Loss 0.2921 | Train Acc. 0.8903 | 
Epoch 00310 | Loss 0.2996 | Train Acc. 0.8941 | 
Epoch 00315 | Loss 0.2841 | Train Acc. 0.8885 | 
Epoch 00320 | Loss 0.3018 | Train Acc. 0.8903 | 
Epoch 00325 | Loss 0.2918 | Train Acc. 0.8903 | 
Epoch 00330 | Loss 0.2901 | Train Acc. 0.8829 | 
Epoch 00335 | Loss 0.2753 | Train Acc. 0.8866 | 
Epoch 00340 | Loss 0.2861 | Train Acc. 0.8885 | 
Epoch 00345 | Loss 0.2779 | Train Acc. 0.8922 | 
Epoch 00350 | Loss 0.2898 | Train Acc. 0.8922 | 
Epoch 00355 | Loss 0.2846 | Train Acc. 0.9015 | 
Epoch 00360 | Loss 0.2750 | Train Acc. 0.8959 | 
Epoch 00365 | Loss 0.2774 | Train Acc. 0.8885 | 
Epoch 00370 | Loss 0.2826 | Train Acc. 0.8885 | 
Epoch 00375 | Loss 0.2694 | Train Acc. 0.8922 | 
Epoch 00380 | Loss 0.3013 | Train Acc. 0.8773 | 
Epoch 00385 | Loss 0.2818 | Train Acc. 0.8903 | 
Epoch 00390 | Loss 0.2829 | Train Acc. 0.8866 | 
Epoch 00395 | Loss 0.2822 | Train Acc. 0.8885 | 
Epoch 00400 | Loss 0.2829 | Train Acc. 0.8903 | 
Epoch 00405 | Loss 0.2885 | Train Acc. 0.8848 | 
Epoch 00410 | Loss 0.2770 | Train Acc. 0.8922 | 
Epoch 00415 | Loss 0.2764 | Train Acc. 0.8959 | 
Epoch 00420 | Loss 0.2841 | Train Acc. 0.8885 | 
Epoch 00425 | Loss 0.2682 | Train Acc. 0.8978 | 
Epoch 00430 | Loss 0.2932 | Train Acc. 0.8773 | 
Epoch 00435 | Loss 0.2790 | Train Acc. 0.8922 | 
Epoch 00440 | Loss 0.2649 | Train Acc. 0.8848 | 
Epoch 00445 | Loss 0.3039 | Train Acc. 0.8866 | 
Epoch 00450 | Loss 0.2829 | Train Acc. 0.8829 | 
Epoch 00455 | Loss 0.2901 | Train Acc. 0.8829 | 
Epoch 00460 | Loss 0.2908 | Train Acc. 0.8792 | 
Epoch 00465 | Loss 0.2677 | Train Acc. 0.8941 | 
Epoch 00470 | Loss 0.2917 | Train Acc. 0.8978 | 
Early stopping! No improvement for 100 consecutive epochs.

../../_images/0b3bff19087c564d8b6db6d361aa9132cf58aeec7956e576034c2234e337836a.png

Fold : 1 | Test Accuracy = 0.8364 | F1 = 0.8039 
Total = 42.4Gb 	 Reserved = 0.6Gb 	 Allocated = 0.3Gb
Clearing gpu memory
Total = 42.4Gb 	 Reserved = 0.3Gb 	 Allocated = 0.3Gb
Performing Logistic Regression for Feature Selection

Loss : 0.6041: 100%|██████████| 1000/1000 [00:02<00:00, 406.47epoch/s]

Model score : 0.877

<Figure size 640x480 with 0 Axes>

../../_images/b23457bdc33b843a183fc1e6a89ab127b74a065e9015ceacc8bd3b13d83c8719.png

Performing Differential Gene Expression for Feature Selection
Keeping 29995 genes
Removed 30665 genes
Keeping 1047 Samples
Removed 36 Samples

Fitting size factors...

Using None as control genes, passed at DeseqDataSet initialization

... done in 1.19 seconds.

Fitting dispersions...
... done in 10.30 seconds.

Fitting dispersion trend curve...
... done in 0.59 seconds.

Fitting MAP dispersions...
... done in 11.29 seconds.

Fitting LFCs...
... done in 6.66 seconds.

Calculating cook's distance...
... done in 2.13 seconds.

Replacing 2948 outlier genes.

Fitting dispersions...
... done in 1.10 seconds.

Fitting MAP dispersions...
... done in 1.05 seconds.

Fitting LFCs...
... done in 1.00 seconds.

Performing contrastive analysis for LumA vs. Her2

Running Wald tests...
... done in 2.68 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumA vs Her2
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.045843  0.109944 -0.416972   
ENSG00000000005.6     72.999641        2.364056  0.293465  8.055675   
ENSG00000000419.13  2386.496112       -0.604066  0.062602 -9.649254   
ENSG00000000457.14  1596.783158       -0.062327  0.063247 -0.985452   
ENSG00000000460.17   715.140714       -0.435593  0.080773 -5.392793   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026       -0.955682  0.240269 -3.977559   
ENSG00000288663.1     29.867524        0.627702  0.099567  6.304313   
ENSG00000288670.1    422.158728       -0.075705  0.079346 -0.954105   
ENSG00000288674.1      8.197550        0.337858  0.120301  2.808436   
ENSG00000288675.1     32.370671       -0.912699  0.116411 -7.840287   

                          pvalue          padj  
ENSG00000000003.15  6.766989e-01  7.320512e-01  
ENSG00000000005.6   7.904166e-16  9.221527e-15  
ENSG00000000419.13  4.951465e-22  1.092053e-20  
ENSG00000000457.14  3.244019e-01  3.944238e-01  
ENSG00000000460.17  6.937103e-08  3.022199e-07  
...                          ...           ...  
ENSG00000288658.1   6.962648e-05  1.878268e-04  
ENSG00000288663.1   2.894748e-10  1.756229e-09  
ENSG00000288670.1   3.400305e-01  4.104478e-01  
ENSG00000288674.1   4.978277e-03  9.501362e-03  
ENSG00000288675.1   4.495165e-15  4.869356e-14  

[29995 rows x 6 columns]
Performing contrastive analysis for LumA vs. LumB

Running Wald tests...
... done in 2.66 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumA vs LumB
                       baseMean  log2FoldChange     lfcSE       stat  \
ENSG00000000003.15  3087.392130        0.510947  0.075755   6.744707   
ENSG00000000005.6     72.999641        0.680790  0.200628   3.393294   
ENSG00000000419.13  2386.496112       -0.496193  0.043131 -11.504238   
ENSG00000000457.14  1596.783158        0.070789  0.043570   1.624718   
ENSG00000000460.17   715.140714       -0.607113  0.055622 -10.914917   
...                         ...             ...       ...        ...   
ENSG00000288658.1     22.532026       -0.808325  0.165651  -4.879702   
ENSG00000288663.1     29.867524       -0.053662  0.066356  -0.808701   
ENSG00000288670.1    422.158728       -0.228263  0.054607  -4.180137   
ENSG00000288674.1      8.197550        0.269729  0.080393   3.355135   
ENSG00000288675.1     32.370671        0.324366  0.081632   3.973538   

                          pvalue          padj  
ENSG00000000003.15  1.533361e-11  1.663405e-10  
ENSG00000000005.6   6.905757e-04  1.840247e-03  
ENSG00000000419.13  1.255933e-30  1.034937e-28  
ENSG00000000457.14  1.042227e-01  1.570698e-01  
ENSG00000000460.17  9.781815e-28  6.378381e-26  
...                          ...           ...  
ENSG00000288658.1   1.062460e-06  5.130976e-06  
ENSG00000288663.1   4.186870e-01  5.071215e-01  
ENSG00000288670.1   2.913337e-05  1.041544e-04  
ENSG00000288674.1   7.932621e-04  2.084624e-03  
ENSG00000288675.1   7.081273e-05  2.341559e-04  

[29995 rows x 6 columns]
Performing contrastive analysis for LumA vs. Normal

Running Wald tests...
... done in 2.08 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumA vs Normal
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.149391  0.151267 -0.987602   
ENSG00000000005.6     72.999641       -0.070272  0.400423 -0.175495   
ENSG00000000419.13  2386.496112        0.052635  0.086204  0.610591   
ENSG00000000457.14  1596.783158        0.460586  0.087118  5.286911   
ENSG00000000460.17   715.140714        0.442141  0.111457  3.966908   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026       -0.735339  0.331180 -2.220363   
ENSG00000288663.1     29.867524        0.106228  0.134479  0.789924   
ENSG00000288670.1    422.158728        0.475525  0.109506  4.342477   
ENSG00000288674.1      8.197550       -0.105403  0.159991 -0.658810   
ENSG00000288675.1     32.370671       -0.340401  0.161826 -2.103501   

                          pvalue      padj  
ENSG00000000003.15  3.233477e-01  0.443638  
ENSG00000000005.6   8.606909e-01  0.906762  
ENSG00000000419.13  5.414706e-01  0.651681  
ENSG00000000457.14  1.243991e-07  0.000002  
ENSG00000000460.17  7.281114e-05  0.000401  
...                          ...       ...  
ENSG00000288658.1   2.639415e-02  0.059055  
ENSG00000288663.1   4.295720e-01  0.548929  
ENSG00000288670.1   1.408855e-05  0.000097  
ENSG00000288674.1   5.100176e-01  0.624223  
ENSG00000288675.1   3.542197e-02  0.075375  

[29995 rows x 6 columns]
Performing contrastive analysis for LumA vs. Basal

Running Wald tests...
... done in 2.11 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumA vs Basal
                       baseMean  log2FoldChange     lfcSE       stat  \
ENSG00000000003.15  3087.392130       -0.445231  0.080360  -5.540469   
ENSG00000000005.6     72.999641        0.121811  0.212744   0.572570   
ENSG00000000419.13  2386.496112       -0.502644  0.045765 -10.983180   
ENSG00000000457.14  1596.783158        0.450413  0.046261   9.736320   
ENSG00000000460.17   715.140714       -1.016645  0.058992 -17.233544   
...                         ...             ...       ...        ...   
ENSG00000288658.1     22.532026       -1.948089  0.174996 -11.132162   
ENSG00000288663.1     29.867524        0.589213  0.072151   8.166402   
ENSG00000288670.1    422.158728        0.042231  0.058005   0.728061   
ENSG00000288674.1      8.197550        0.081888  0.085157   0.961606   
ENSG00000288675.1     32.370671       -0.328268  0.085826  -3.824821   

                          pvalue          padj  
ENSG00000000003.15  3.016618e-08  5.818872e-08  
ENSG00000000005.6   5.669360e-01  6.009346e-01  
ENSG00000000419.13  4.604242e-28  2.082392e-27  
ENSG00000000457.14  2.110588e-22  7.768694e-22  
ENSG00000000460.17  1.487504e-66  1.983009e-65  
...                          ...           ...  
ENSG00000288658.1   8.748769e-29  4.064106e-28  
ENSG00000288663.1   3.177226e-16  9.097937e-16  
ENSG00000288670.1   4.665759e-01  5.029268e-01  
ENSG00000288674.1   3.362475e-01  3.704861e-01  
ENSG00000288675.1   1.308669e-04  2.016518e-04  

[29995 rows x 6 columns]
Performing contrastive analysis for Her2 vs. LumB

Running Wald tests...
... done in 2.42 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 Her2 vs LumB
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130        0.556791  0.121299  4.590230   
ENSG00000000005.6     72.999641       -1.683266  0.323369 -5.205406   
ENSG00000000419.13  2386.496112        0.107873  0.069059  1.562059   
ENSG00000000457.14  1596.783158        0.133116  0.069775  1.907792   
ENSG00000000460.17   715.140714       -0.171520  0.089082 -1.925423   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026        0.147357  0.264721  0.556650   
ENSG00000288663.1     29.867524       -0.681364  0.109132 -6.243507   
ENSG00000288670.1    422.158728       -0.152558  0.087504 -1.743435   
ENSG00000288674.1      8.197550       -0.068130  0.132312 -0.514919   
ENSG00000288675.1     32.370671        1.237065  0.128962  9.592483   

                          pvalue          padj  
ENSG00000000003.15  4.427577e-06  2.483732e-05  
ENSG00000000005.6   1.935725e-07  1.438248e-06  
ENSG00000000419.13  1.182741e-01  1.871212e-01  
ENSG00000000457.14  5.641806e-02  1.010847e-01  
ENSG00000000460.17  5.417652e-02  9.782234e-02  
...                          ...           ...  
ENSG00000288658.1   5.777668e-01  6.693490e-01  
ENSG00000288663.1   4.278680e-10  5.115145e-09  
ENSG00000288670.1   8.125771e-02  1.371595e-01  
ENSG00000288674.1   6.066099e-01  6.935602e-01  
ENSG00000288675.1   8.599064e-22  4.931720e-20  

[29995 rows x 6 columns]
Performing contrastive analysis for Her2 vs. Normal

Running Wald tests...
... done in 2.60 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 Her2 vs Normal
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.103548  0.178483 -0.580154   
ENSG00000000005.6     72.999641       -2.434328  0.473977 -5.135963   
ENSG00000000419.13  2386.496112        0.656701  0.101685  6.458198   
ENSG00000000457.14  1596.783158        0.522913  0.102761  5.088644   
ENSG00000000460.17   715.140714        0.877735  0.131394  6.680153   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026        0.220343  0.390278  0.564578   
ENSG00000288663.1     29.867524       -0.521474  0.159972 -3.259780   
ENSG00000288670.1    422.158728        0.551230  0.129099  4.269815   
ENSG00000288674.1      8.197550       -0.443262  0.191417 -2.315687   
ENSG00000288675.1     32.370671        0.572297  0.190145  3.009796   

                          pvalue          padj  
ENSG00000000003.15  5.618107e-01  6.612585e-01  
ENSG00000000005.6   2.807025e-07  2.722169e-06  
ENSG00000000419.13  1.059569e-10  2.622258e-09  
ENSG00000000457.14  3.606320e-07  3.381419e-06  
ENSG00000000460.17  2.386925e-11  7.074684e-10  
...                          ...           ...  
ENSG00000288658.1   5.723605e-01  6.706232e-01  
ENSG00000288663.1   1.114985e-03  3.756484e-03  
ENSG00000288670.1   1.956349e-05  1.138767e-04  
ENSG00000288674.1   2.057536e-02  4.536925e-02  
ENSG00000288675.1   2.614234e-03  7.828087e-03  

[29995 rows x 6 columns]
Performing contrastive analysis for Her2 vs. Basal

Running Wald tests...
... done in 2.37 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 Her2 vs Basal
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.399387  0.124227 -3.214986   
ENSG00000000005.6     72.999641       -2.242245  0.331022 -6.773697   
ENSG00000000419.13  2386.496112        0.101422  0.070733  1.433868   
ENSG00000000457.14  1596.783158        0.512741  0.071486  7.172564   
ENSG00000000460.17   715.140714       -0.581052  0.091224 -6.369509   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026       -0.992407  0.270668 -3.666515   
ENSG00000288663.1     29.867524       -0.038489  0.112749 -0.341369   
ENSG00000288670.1    422.158728        0.117936  0.089665  1.315300   
ENSG00000288674.1      8.197550       -0.255971  0.135260 -1.892439   
ENSG00000288675.1     32.370671        0.584431  0.131657  4.439052   

                          pvalue          padj  
ENSG00000000003.15  1.304511e-03  2.998375e-03  
ENSG00000000005.6   1.255320e-11  8.772910e-11  
ENSG00000000419.13  1.516099e-01  2.123529e-01  
ENSG00000000457.14  7.360624e-13  5.828456e-12  
ENSG00000000460.17  1.896341e-10  1.165828e-09  
...                          ...           ...  
ENSG00000288658.1   2.458782e-04  6.493324e-04  
ENSG00000288663.1   7.328255e-01  7.904312e-01  
ENSG00000288670.1   1.884089e-01  2.559486e-01  
ENSG00000288674.1   5.843248e-02  9.229501e-02  
ENSG00000288675.1   9.035578e-06  3.005347e-05  

[29995 rows x 6 columns]
Performing contrastive analysis for LumB vs. Normal

Running Wald tests...
... done in 2.62 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumB vs Normal
                       baseMean  log2FoldChange     lfcSE      stat  \
ENSG00000000003.15  3087.392130       -0.660339  0.159710 -4.134599   
ENSG00000000005.6     72.999641       -0.751062  0.422829 -1.776279   
ENSG00000000419.13  2386.496112        0.548828  0.091000  6.031049   
ENSG00000000457.14  1596.783158        0.389797  0.091967  4.238455   
ENSG00000000460.17   715.140714        1.049255  0.117618  8.920861   
...                         ...             ...       ...       ...   
ENSG00000288658.1     22.532026        0.072986  0.349326  0.208934   
ENSG00000288663.1     29.867524        0.159890  0.141706  1.128321   
ENSG00000288670.1    422.158728        0.703788  0.115554  6.090571   
ENSG00000288674.1      8.197550       -0.375132  0.169207 -2.216998   
ENSG00000288675.1     32.370671       -0.664767  0.171077 -3.885788   

                          pvalue          padj  
ENSG00000000003.15  3.555757e-05  1.318795e-04  
ENSG00000000005.6   7.568690e-02  1.186675e-01  
ENSG00000000419.13  1.628993e-09  1.457548e-08  
ENSG00000000457.14  2.250635e-05  8.702824e-05  
ENSG00000000460.17  4.626757e-19  2.505046e-17  
...                          ...           ...  
ENSG00000288658.1   8.345000e-01  8.730974e-01  
ENSG00000288663.1   2.591845e-01  3.374382e-01  
ENSG00000288670.1   1.125090e-09  1.042542e-08  
ENSG00000288674.1   2.662322e-02  4.801079e-02  
ENSG00000288675.1   1.019985e-04  3.432181e-04  

[29995 rows x 6 columns]
Performing contrastive analysis for LumB vs. Basal

Running Wald tests...
... done in 2.88 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 LumB vs Basal
                       baseMean  log2FoldChange     lfcSE       stat  \
ENSG00000000003.15  3087.392130       -0.956178  0.095308 -10.032555   
ENSG00000000005.6     72.999641       -0.558979  0.252400  -2.214654   
ENSG00000000419.13  2386.496112       -0.006451  0.054263  -0.118890   
ENSG00000000457.14  1596.783158        0.379624  0.054849   6.921272   
ENSG00000000460.17   715.140714       -0.409531  0.069938  -5.855629   
...                         ...             ...       ...        ...   
ENSG00000288658.1     22.532026       -1.139764  0.207297  -5.498215   
ENSG00000288663.1     29.867524        0.642875  0.084864   7.575379   
ENSG00000288670.1    422.158728        0.270494  0.068745   3.934774   
ENSG00000288674.1      8.197550       -0.187841  0.101419  -1.852129   
ENSG00000288675.1     32.370671       -0.652634  0.102204  -6.385618   

                          pvalue          padj  
ENSG00000000003.15  1.096424e-23  6.605189e-23  
ENSG00000000005.6   2.678385e-02  3.592073e-02  
ENSG00000000419.13  9.053625e-01  9.182818e-01  
ENSG00000000457.14  4.476050e-12  1.394755e-11  
ENSG00000000460.17  4.752078e-09  1.200628e-08  
...                          ...           ...  
ENSG00000288658.1   3.836546e-08  9.064050e-08  
ENSG00000288663.1   3.580819e-14  1.268982e-13  
ENSG00000288670.1   8.327492e-05  1.475736e-04  
ENSG00000288674.1   6.400724e-02  8.153584e-02  
ENSG00000288675.1   1.707070e-10  4.801086e-10  

[29995 rows x 6 columns]
Performing contrastive analysis for Normal vs. Basal

Running Wald tests...
... done in 2.37 seconds.

Log2 fold change & Wald test p-value: paper_BRCA_Subtype_PAM50 Normal vs Basal
                       baseMean  log2FoldChange     lfcSE       stat  \
ENSG00000000003.15  3087.392130       -0.295840  0.161945  -1.826789   
ENSG00000000005.6     72.999641        0.192083  0.428710   0.448048   
ENSG00000000419.13  2386.496112       -0.555279  0.092278  -6.017474   
ENSG00000000457.14  1596.783158       -0.010172  0.093272  -0.109060   
ENSG00000000460.17   715.140714       -1.458786  0.119249 -12.233141   
...                         ...             ...       ...        ...   
ENSG00000288658.1     22.532026       -1.212750  0.353853  -3.427268   
ENSG00000288663.1     29.867524        0.482985  0.144510   3.342218   
ENSG00000288670.1    422.158728       -0.433294  0.117198  -3.697105   
ENSG00000288674.1      8.197550        0.187291  0.171522   1.091936   
ENSG00000288675.1     32.370671        0.012134  0.173117   0.070089   

                          pvalue          padj  
ENSG00000000003.15  6.773147e-02  1.155634e-01  
ENSG00000000005.6   6.541185e-01  7.328111e-01  
ENSG00000000419.13  1.771600e-09  1.772487e-08  
ENSG00000000457.14  9.131548e-01  9.377274e-01  
ENSG00000000460.17  2.067839e-34  5.743039e-32  
...                          ...           ...  
ENSG00000288658.1   6.096865e-04  1.915328e-03  
ENSG00000288663.1   8.311184e-04  2.519648e-03  
ENSG00000288670.1   2.180718e-04  7.638751e-04  
ENSG00000288674.1   2.748612e-01  3.696571e-01  
ENSG00000288675.1   9.441229e-01  9.600946e-01  

[29995 rows x 6 columns]
Fit type used for VST : parametric

Fitting dispersions...
... done in 9.63 seconds.

../../_images/f49901ee415f2d1167a5107857f61b8f6cf9b2bac11952798afedbff6d3c5816.png

Clearing gpu memory
Total = 42.4Gb 	 Reserved = 0.3Gb 	 Allocated = 0.3Gb

../../_images/014f71fbf9621706b1f4ecd20af8668d96d28838c63ec0219ae129ba034610cd.png

GCN_MME(
  (encoder_dims): ModuleList(
    (0): Encoder(
      (encoder): ModuleList(
        (0): Linear(in_features=464, out_features=500, bias=True)
        (1): Linear(in_features=500, out_features=16, bias=True)
      )
      (norm): ModuleList(
        (0): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (decoder): Sequential(
        (0): Linear(in_features=16, out_features=64, bias=True)
      )
      (drop): Dropout(p=0.5, inplace=False)
    )
    (1): Encoder(
      (encoder): ModuleList(
        (0): Linear(in_features=29995, out_features=500, bias=True)
        (1): Linear(in_features=500, out_features=16, bias=True)
      )
      (norm): ModuleList(
        (0): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (decoder): Sequential(
        (0): Linear(in_features=16, out_features=64, bias=True)
      )
      (drop): Dropout(p=0.5, inplace=False)
    )
  )
  (gnnlayers): ModuleList(
    (0): GraphConv(in=64, out=32, normalization=both, activation=None)
    (1): GraphConv(in=32, out=5, normalization=both, activation=None)
  )
  (batch_norms): ModuleList(
    (0): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (drop): Dropout(p=0.5, inplace=False)
)
Graph(num_nodes=1076, num_edges=18346,
      ndata_schemes={'idx': Scheme(shape=(), dtype=torch.int64), 'label': Scheme(shape=(5,), dtype=torch.int64), 'feat': Scheme(shape=(30459,), dtype=torch.float32)}
      edata_schemes={})
Epoch 00000 | Loss 1.8456 | Train Acc. 0.2435 | 
Epoch 00005 | Loss 0.5908 | Train Acc. 0.8030 | 
Epoch 00010 | Loss 0.5297 | Train Acc. 0.8197 | 
Epoch 00015 | Loss 0.5006 | Train Acc. 0.8178 | 
Epoch 00020 | Loss 0.4776 | Train Acc. 0.8327 | 
Epoch 00025 | Loss 0.4348 | Train Acc. 0.8494 | 
Epoch 00030 | Loss 0.4500 | Train Acc. 0.8309 | 
Epoch 00035 | Loss 0.4246 | Train Acc. 0.8364 | 
Epoch 00040 | Loss 0.4182 | Train Acc. 0.8439 | 
Epoch 00045 | Loss 0.4020 | Train Acc. 0.8532 | 
Epoch 00050 | Loss 0.3798 | Train Acc. 0.8439 | 
Epoch 00055 | Loss 0.4173 | Train Acc. 0.8439 | 
Epoch 00060 | Loss 0.3841 | Train Acc. 0.8625 | 
Epoch 00065 | Loss 0.3747 | Train Acc. 0.8532 | 
Epoch 00070 | Loss 0.3782 | Train Acc. 0.8550 | 
Epoch 00075 | Loss 0.3646 | Train Acc. 0.8662 | 
Epoch 00080 | Loss 0.3678 | Train Acc. 0.8513 | 
Epoch 00085 | Loss 0.3664 | Train Acc. 0.8550 | 
Epoch 00090 | Loss 0.3517 | Train Acc. 0.8717 | 
Epoch 00095 | Loss 0.3592 | Train Acc. 0.8606 | 
Epoch 00100 | Loss 0.3514 | Train Acc. 0.8606 | 
Epoch 00105 | Loss 0.3428 | Train Acc. 0.8625 | 
Epoch 00110 | Loss 0.3339 | Train Acc. 0.8680 | 
Epoch 00115 | Loss 0.3399 | Train Acc. 0.8699 | 
Epoch 00120 | Loss 0.3467 | Train Acc. 0.8699 | 
Epoch 00125 | Loss 0.3423 | Train Acc. 0.8755 | 
Epoch 00130 | Loss 0.3418 | Train Acc. 0.8680 | 
Epoch 00135 | Loss 0.3335 | Train Acc. 0.8792 | 
Epoch 00140 | Loss 0.3254 | Train Acc. 0.8755 | 
Epoch 00145 | Loss 0.3189 | Train Acc. 0.8755 | 
Epoch 00150 | Loss 0.3212 | Train Acc. 0.8736 | 
Epoch 00155 | Loss 0.3202 | Train Acc. 0.8717 | 
Epoch 00160 | Loss 0.3114 | Train Acc. 0.8736 | 
Epoch 00165 | Loss 0.3391 | Train Acc. 0.8736 | 
Epoch 00170 | Loss 0.3198 | Train Acc. 0.8755 | 
Epoch 00175 | Loss 0.3160 | Train Acc. 0.8717 | 
Epoch 00180 | Loss 0.3202 | Train Acc. 0.8736 | 
Epoch 00185 | Loss 0.3093 | Train Acc. 0.8829 | 
Epoch 00190 | Loss 0.3128 | Train Acc. 0.8736 | 
Epoch 00195 | Loss 0.3171 | Train Acc. 0.8829 | 
Epoch 00200 | Loss 0.2978 | Train Acc. 0.8792 | 
Epoch 00205 | Loss 0.3125 | Train Acc. 0.8625 | 
Epoch 00210 | Loss 0.3087 | Train Acc. 0.8885 | 
Epoch 00215 | Loss 0.3027 | Train Acc. 0.8829 | 
Epoch 00220 | Loss 0.2999 | Train Acc. 0.8829 | 
Epoch 00225 | Loss 0.3013 | Train Acc. 0.8848 | 
Epoch 00230 | Loss 0.2968 | Train Acc. 0.8903 | 
Epoch 00235 | Loss 0.3221 | Train Acc. 0.8848 | 
Epoch 00240 | Loss 0.3107 | Train Acc. 0.8773 | 
Epoch 00245 | Loss 0.2929 | Train Acc. 0.8959 | 
Epoch 00250 | Loss 0.2958 | Train Acc. 0.8792 | 
Epoch 00255 | Loss 0.2954 | Train Acc. 0.8848 | 
Epoch 00260 | Loss 0.2980 | Train Acc. 0.8922 | 
Epoch 00265 | Loss 0.3129 | Train Acc. 0.8810 | 
Epoch 00270 | Loss 0.3026 | Train Acc. 0.8829 | 
Epoch 00275 | Loss 0.2947 | Train Acc. 0.8866 | 
Epoch 00280 | Loss 0.3082 | Train Acc. 0.8829 | 
Epoch 00285 | Loss 0.3050 | Train Acc. 0.8792 | 
Epoch 00290 | Loss 0.3064 | Train Acc. 0.8680 | 
Epoch 00295 | Loss 0.3073 | Train Acc. 0.8755 | 
Epoch 00300 | Loss 0.3084 | Train Acc. 0.8792 | 
Epoch 00305 | Loss 0.2885 | Train Acc. 0.8978 | 
Epoch 00310 | Loss 0.2965 | Train Acc. 0.8829 | 
Epoch 00315 | Loss 0.3088 | Train Acc. 0.8662 | 
Epoch 00320 | Loss 0.3027 | Train Acc. 0.8903 | 
Epoch 00325 | Loss 0.3060 | Train Acc. 0.8848 | 
Epoch 00330 | Loss 0.3038 | Train Acc. 0.8885 | 
Epoch 00335 | Loss 0.3032 | Train Acc. 0.8736 | 
Epoch 00340 | Loss 0.2948 | Train Acc. 0.8810 | 
Epoch 00345 | Loss 0.3166 | Train Acc. 0.8810 | 
Epoch 00350 | Loss 0.3035 | Train Acc. 0.8885 | 
Epoch 00355 | Loss 0.2869 | Train Acc. 0.8959 | 
Epoch 00360 | Loss 0.3090 | Train Acc. 0.8810 | 
Epoch 00365 | Loss 0.3108 | Train Acc. 0.8848 | 
Epoch 00370 | Loss 0.3101 | Train Acc. 0.8848 | 
Epoch 00375 | Loss 0.3036 | Train Acc. 0.8773 | 
Epoch 00380 | Loss 0.2995 | Train Acc. 0.8829 | 
Epoch 00385 | Loss 0.3024 | Train Acc. 0.8848 | 
Epoch 00390 | Loss 0.2995 | Train Acc. 0.8848 | 
Epoch 00395 | Loss 0.2863 | Train Acc. 0.8922 | 
Epoch 00400 | Loss 0.2827 | Train Acc. 0.9052 | 
Epoch 00405 | Loss 0.2953 | Train Acc. 0.8773 | 
Epoch 00410 | Loss 0.2914 | Train Acc. 0.8773 | 
Epoch 00415 | Loss 0.2812 | Train Acc. 0.8959 | 
Epoch 00420 | Loss 0.2828 | Train Acc. 0.8829 | 
Early stopping! No improvement for 100 consecutive epochs.

../../_images/1088e3437251ae6ff9db4f178b57ee8bb2d55cc73d40e385744c5c53b697a9e8.png

Fold : 2 | Test Accuracy = 0.8290 | F1 = 0.7999 
Total = 42.4Gb 	 Reserved = 0.8Gb 	 Allocated = 0.4Gb
Clearing gpu memory
Total = 42.4Gb 	 Reserved = 0.3Gb 	 Allocated = 0.3Gb

<Figure size 640x480 with 0 Axes>

accuracy = []
F1 = []
i = 0
for metric in output_metrics :
    
    accuracy.append(metric[1])
    F1.append(metric[2])


print("%i Fold Cross Validation Accuracy = %2.2f \u00B1 %2.2f" %(5 , np.mean(accuracy)*100 , np.std(accuracy)*100))
print("%i Fold Cross Validation F1 = %2.2f \u00B1 %2.2f" %(5 , np.mean(F1)*100 , np.std(F1)*100))

confusion_matrix(test_logits , test_labels , meta.astype('category').cat.categories)
plt.title('Test Accuracy = %2.1f %%' % (np.mean(accuracy)*100))

precision_recall_plot , all_predictions_conf = AUROC(test_logits, test_labels , meta)

node_predictions = []
true_label = []
display_label = meta.astype('category').cat.categories

for pred , true in zip(all_predictions_conf.argmax(1) , test_labels.argmax(1).detach().cpu())  : 
    node_predictions.append(display_label[pred])
    true_label.append(display_label[true.item()])

preds = pd.DataFrame({'Actual' : true_label , 'Predicted' : node_predictions})

5 Fold Cross Validation Accuracy = 83.27 ± 0.37
5 Fold Cross Validation F1 = 80.19 ± 0.20

../../_images/21770708103173363f8d9b39d40b045113546bc2aec748772f7896badb0faf9f.png

../../_images/6a1e0908e050849ac549502897ceed861b668af3ec45f751caea7cdfb9d101dc.png