Skip to content

Commit

Permalink
newer updates
Browse files Browse the repository at this point in the history
  • Loading branch information
SaniyaKhullar committed Oct 23, 2023
1 parent 39329ae commit 307111c
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 21 deletions.
58 changes: 58 additions & 0 deletions demo/another_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import sys
sys.path.append("../code") # assuming "code" is one directory up and then down into "code"

from DemoDataBuilderXandY import generate_dummy_data
from Netrem_model_builder import netrem, netremCV
import PriorGraphNetwork as graph
import error_metrics as em
import essential_functions as ef
import netrem_evaluation_functions as nm_eval
import Netrem_model_builder as nm

dummy_data = generate_dummy_data(corrVals = [0.9, 0.5, 0.1, -0.2, -0.8, -0.3],
standardize_X = False,
center_y = False,
num_samples_M = 100,
train_data_percent = 70)

X_df = dummy_data.X_df
X_df.head()

y_df = dummy_data.y_df
y_df.head()

# 70 samples for training data (used to train and fit GRegulNet model)
X_train = dummy_data.view_X_train_df()
y_train = dummy_data.view_y_train_df()

# 30 samples for testing data
X_test = dummy_data.view_X_test_df()
y_test = dummy_data.view_y_test_df()

# prior network edge_list:
edge_list = [["TF1", "TF2", 0.9], ["TF4", "TF5", 0.75], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"],
["TF2", "TF3"], ["TF2", "TF4"], ["TF2", "TF5"], ["TF3", "TF4"], ["TF3", "TF5"]]

beta_network_val = 3
# by default, cv_for_alpha is False, so alpha_lasso_val will be specified for the alpha_lasso parameter.
alpha_lasso_val = 0.01

# Building the network regularized regression model:
# Please note: To include nodes found in the gene expression data that are not found in the PPI Network (e.g. TF6 in our case), we use False for the overlapped_nodes_only argument (otherwise, we would only use TFs 1 to 5):
netrem_demo = nm.netrem(edge_list = edge_list,
beta_net = beta_network_val,
alpha_lasso = alpha_lasso_val,
overlapped_nodes_only = False, # so we include TF6
view_network = True)

# Fitting the gregulnet model on training data: X_train and y_train:
netrem_demo.fit(X_train, y_train)

pred_y_test = netrem_demo.predict(X_test) # predicted values for y_test
mse_test = netrem_demo.test_mse(X_test, y_test)
print(f"Please note that the testing Mean Square Error (MSE) is {mse_test}")

# To view and extract the predicted model coefficients for the predictors:
netrem_demo.model_coef_df

netrem_demo.B_interaction_df
49 changes: 28 additions & 21 deletions demo/demo_toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,45 +9,47 @@
import netrem_evaluation_functions as nm_eval
import Netrem_model_builder as nm

dummy_data = generate_dummy_data(corrVals = [0.9, 0.5, 0.1, -0.2, -0.8, -0.3],
standardize_X = False,
center_y = False,
num_samples_M = 100,
train_data_percent = 70)
dummy_data = generate_dummy_data(corrVals = [0.9, 0.5, 0.4, -0.3, -0.8], # the # of elements in corrVals is the # of predictors (X)
num_samples_M = 100000, # the number of samples M
train_data_percent = 70) # the remainder out of 100,000 will be kept for testing. If 100, then ALL data is used for training and testing.

X_df = dummy_data.X_df
X_df.head()

y_df = dummy_data.y_df
y_df.head()

# 70 samples for training data (used to train and fit GRegulNet model)
# 70,000 samples for training data (used to train and fit GRegulNet model)
X_train = dummy_data.view_X_train_df()
y_train = dummy_data.view_y_train_df()

# 30 samples for testing data
# 30,000 samples for testing data
X_test = dummy_data.view_X_test_df()
y_test = dummy_data.view_y_test_df()

# prior network edge_list:
edge_list = [["TF1", "TF2", 0.9], ["TF4", "TF5", 0.75], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"],
["TF2", "TF3"], ["TF2", "TF4"], ["TF2", "TF5"], ["TF3", "TF4"], ["TF3", "TF5"]]
X_train.corr() # pairwise correlations among the training samples
X_test.corr() # pairwise correlations among the training samples

beta_network_val = 3
# by default, cv_for_alpha is False, so alpha_lasso_val will be specified for the alpha_lasso parameter.
alpha_lasso_val = 0.01

# prior network edge_list (missing edges or edges with no edge weight will be added with the default_edge_list so the network is fully-connected):
edge_list = [["TF1", "TF2", 0.8], ["TF4", "TF5", 0.95], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"],
["TF2", "TF3"], ["TF2", "TF4"], ["TF2", "TF5"], ["TF3", "TF4"], ["TF3", "TF5"]]

beta_network_val = 1
# by default, model_type is Lasso, so alpha_lasso_val will be specified for the alpha_lasso parameter.
# However, we will specify model_type = LassoCV, so our alpha_lasso is determined by cross-validation on training data).

# Building the network regularized regression model:
# Please note: To include nodes found in the gene expression data that are not found in the PPI Network (e.g. TF6 in our case), we use False for the overlapped_nodes_only argument (otherwise, we would only use TFs 1 to 5):
netrem_demo = nm.netrem(edge_list = edge_list,
beta_net = beta_network_val,
alpha_lasso = alpha_lasso_val,
overlapped_nodes_only = False, # so we include TF6
view_network = True)

# Fitting the gregulnet model on training data: X_train and y_train:
# By default, edges are constructed between all of the nodes; nodes with a missing edge are assigned the default_edge_weight.
netrem_demo = netrem(edge_list = edge_list,
beta_net = beta_network_val,
model_type = "LassoCV",
view_network = True)

# Fitting the NetREm model on training data: X_train and y_train:
netrem_demo.fit(X_train, y_train)


pred_y_test = netrem_demo.predict(X_test) # predicted values for y_test
mse_test = netrem_demo.test_mse(X_test, y_test)
print(f"Please note that the testing Mean Square Error (MSE) is {mse_test}")
Expand All @@ -56,3 +58,8 @@
netrem_demo.model_coef_df

netrem_demo.B_interaction_df


netrem_demo.final_corr_vs_coef_df
netrem_demo.combined_df
organize_B_interaction_network(netrem_demo)

0 comments on commit 307111c

Please sign in to comment.