Skip to content

Commit

Permalink
newest updates
Browse files Browse the repository at this point in the history
  • Loading branch information
SaniyaKhullar committed May 2, 2024
1 parent 7f2460f commit b9d6acf
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 37 deletions.
66 changes: 39 additions & 27 deletions code/Netrem_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,24 +379,32 @@ def fit(self, X, y): # fits a model Function used for model training
if len(selected_cols) == 0:
self.model_nonzero_coef_df = None
self.num_final_predictors = 0
self.final_corr_vs_coef_df = pd.DataFrame()
self.combined_df = pd.DataFrame()
else:
self.model_nonzero_coef_df = self.model_coef_df[selected_cols]
if len(selected_cols) > 1: # and self.model_type != "Linear":
self.netrem_model_predictor_results(y)
self.num_final_predictors = len(selected_cols)
if "y_intercept" in selected_cols:
self.num_final_predictors = self.num_final_predictors - 1
else:
self.final_corr_vs_coef_df = pd.DataFrame()
self.combined_df = pd.DataFrame()

self.organize_B_interaction_list()
self.TF_coord_scores_pairwise_df = return_TF_coord_scores_df(self)
return self


def netrem_model_predictor_results(self, y): # olders
""" :) Please note that this function by Saniya works on a netrem model and returns information about the predictors
such as their Pearson correlations with y, their rankings as well.
It returns: sorted_df, final_corr_vs_coef_df, combined_df """
It returns: sorted_df, final_corr_vs_coef_df, combined_df """
abs_df = self.model_nonzero_coef_df.replace("None", np.nan).apply(pd.to_numeric, errors='coerce').abs()
if abs_df.shape[0] == 1:
abs_df = pd.DataFrame([abs_df.squeeze()])

sorted_series = abs_df.squeeze().sort_values(ascending=False)
sorted_df = pd.DataFrame(sorted_series) # convert the sorted series back to a DataFrame
sorted_df['Rank'] = range(1, len(sorted_df) + 1) # add a column for the rank
Expand All @@ -417,11 +425,11 @@ def netrem_model_predictor_results(self, y): # olders
sorting["input_data"] = "X_train"
all_df = pd.concat([all_df, sorting])
self.corr_vs_coef_df = all_df
self.final_corr_vs_coef_df = self.corr_vs_coef_df[["info", "input_data"] + self.model_nonzero_coef_df.columns.tolist()[1:]]
netrem_model_df = self.model_nonzero_coef_df.transpose()
netrem_model_df.columns = ["coef"]
netrem_model_df["TF"] = netrem_model_df.index.tolist()
netrem_model_df["TG"] = tg
self.final_corr_vs_coef_df = self.corr_vs_coef_df[["info", "input_data"] + self.model_nonzero_coef_df.columns.tolist()[1:]]
if self.y_intercept:
netrem_model_df["info"] = "netrem_with_intercept"
else:
Expand Down Expand Up @@ -1167,6 +1175,13 @@ def netremCV(edge_list, X, y,
return newest_netrem


# Function to lookup coefficient
def lookup_coef(tf, netrem_model):
model_nonzero_coef_df = netrem_model.model_nonzero_coef_df
coef_series = model_nonzero_coef_df.iloc[0].drop('y_intercept')
return coef_series.get(tf, 0) # Returns 0 if TF is not found


def organize_predictor_interaction_network(netrem_model):
if "model_nonzero_coef_df" not in vars(netrem_model).keys():
print(":( No NetREm model was built")
Expand All @@ -1175,6 +1190,9 @@ def organize_predictor_interaction_network(netrem_model):
if "model_type" in TF_interaction_df.columns.tolist():
TF_interaction_df = TF_interaction_df.drop(columns = ["model_type"])
num_TFs = TF_interaction_df.shape[0]
TF_coord_scores_pairwise_df = netrem_model.TF_coord_scores_pairwise_df
if TF_coord_scores_pairwise_df.shape[0] == 0:
return None
TF_interaction_df = netrem_model.TF_coord_scores_pairwise_df.drop(columns = ["absVal_coord_score"])
TF_interaction_df = TF_interaction_df.rename(columns = {"coordination_score":"coord_score_cs"})

Expand Down Expand Up @@ -1205,8 +1223,9 @@ def organize_predictor_interaction_network(netrem_model):

# Step 3: Calculate the percentile
TF_interaction_df['cs_magnitude_percentile'] = (1 - (TF_interaction_df['cs_magnitude_rank'] / TF_interaction_df['absVal_coordScore'].count())) * 100
TF_interaction_df["TF_TF"] = TF_interaction_df["node_1"] + "_" + TF_interaction_df["node_2"]

TF_interaction_df["TF_TF"] = TF_interaction_df["TF1"] + "_" + TF_interaction_df["TF2"]
#TF_interaction_df["TF_TF"] = TF_interaction_df["TF1"] + "_" + TF_interaction_df["TF2"]

standardized_data = True
if "old_X_df" in vars(netrem_model).keys():
Expand All @@ -1216,43 +1235,36 @@ def organize_predictor_interaction_network(netrem_model):
original_X_corr_mat = netrem_model.X_df.corr()
standardized_data = False
# Melting the DataFrame into a 3-column edge list
original_X_corr_df = original_X_corr_mat.reset_index().melt(id_vars=["index"], var_name="TF2", value_name="original_corr")
original_X_corr_df.rename(columns={"index": "TF1"}, inplace=True)
original_X_corr_df = original_X_corr_df[original_X_corr_df["TF1"] != original_X_corr_df["TF2"]]
original_X_corr_df["TF_TF"] = original_X_corr_df["TF1"] + "_" + original_X_corr_df["TF2"]
original_X_corr_df = original_X_corr_mat.reset_index().melt(id_vars=["index"], var_name="node_2", value_name="original_corr")
original_X_corr_df.rename(columns={"index": "node_1"}, inplace=True)
original_X_corr_df = original_X_corr_df[original_X_corr_df["node_1"] != original_X_corr_df["node_2"]]
original_X_corr_df["TF_TF"] = original_X_corr_df["node_1"] + "_" + original_X_corr_df["node_2"]
# Display the first few rows to verify the format

if standardized_data:
# Melting the DataFrame into a 3-column edge list
standardized_X_corr_df = standardized_X_corr_mat.reset_index().melt(id_vars=["index"], var_name="TF2", value_name="standardized_corr")
standardized_X_corr_df.rename(columns={"index": "TF1"}, inplace=True)
standardized_X_corr_df = standardized_X_corr_df[standardized_X_corr_df["TF1"] != standardized_X_corr_df["TF2"]]
standardized_X_corr_df["TF_TF"] = standardized_X_corr_df["TF1"] + "_" + standardized_X_corr_df["TF2"]
standardized_X_corr_df.drop(columns = ["TF1", "TF2"], inplace = True)
original_X_corr_df = pd.merge(original_X_corr_df, standardized_X_corr_df).drop(columns = ["TF1", "TF2"])


standardized_X_corr_df = standardized_X_corr_mat.reset_index().melt(id_vars=["index"], var_name="node_2", value_name="standardized_corr")
standardized_X_corr_df.rename(columns={"index": "node_1"}, inplace=True)
standardized_X_corr_df = standardized_X_corr_df[standardized_X_corr_df["node_1"] != standardized_X_corr_df["node_2"]]
standardized_X_corr_df["TF_TF"] = standardized_X_corr_df["node_1"] + "_" + standardized_X_corr_df["node_2"]
standardized_X_corr_df.drop(columns = ["node_1", "node_2"], inplace = True)
original_X_corr_df = pd.merge(original_X_corr_df, standardized_X_corr_df).drop(columns = ["node_1", "node_2"])
TF_interaction_df = pd.merge(TF_interaction_df, original_X_corr_df)

default_edge_w = netrem_model.network.default_edge_weight
if "W_df" in vars(netrem_model.network).keys():
W_df = netrem_model.network.W_df
else:
W_df = netrem_model.W_df

ppi_net_df = W_df.reset_index().melt(id_vars=["index"], var_name="TF2", value_name="PPI_score")
ppi_net_df.rename(columns={"index": "TF1"}, inplace=True)

ppi_net_df = W_df.reset_index().melt(id_vars=["index"], var_name="node_2", value_name="PPI_score")
ppi_net_df.rename(columns={"index": "node_1"}, inplace=True)
ppi_net_df["novel_link"] = np.where((ppi_net_df.PPI_score == default_edge_w), "yes", "no")
ppi_net_df["TF_TF"] = ppi_net_df["TF1"] + "_" + ppi_net_df["TF2"]
ppi_net_df = ppi_net_df[ppi_net_df["TF1"] != ppi_net_df["TF2"]] # 42849 rows × 3 columns
ppi_net_df = ppi_net_df.drop(columns = ["TF1", "TF2"])

ppi_net_df["TF_TF"] = ppi_net_df["node_1"] + "_" + ppi_net_df["node_2"]
ppi_net_df = ppi_net_df[ppi_net_df["node_1"] != ppi_net_df["node_2"]] # 42849 rows × 3 columns
ppi_net_df = ppi_net_df.drop(columns = ["node_1", "node_2"])
TF_interaction_df = pd.merge(TF_interaction_df, ppi_net_df)
TF_interaction_df["absVal_diff_cs_and_originalCorr"] = abs(TF_interaction_df["coord_score_cs"] - TF_interaction_df["standardized_corr"])

TF_interaction_df['c_1'] = TF_interaction_df['TF1'].apply(lookup_coef, args=(netrem_model,))
TF_interaction_df['c_2'] = TF_interaction_df['TF2'].apply(lookup_coef, args=(netrem_model,))
TF_interaction_df['c_1'] = TF_interaction_df['node_1'].apply(lookup_coef, args=(netrem_model,))
TF_interaction_df['c_2'] = TF_interaction_df['node_2'].apply(lookup_coef, args=(netrem_model,))
return TF_interaction_df


Expand Down
4 changes: 2 additions & 2 deletions code/netrem_evaluation_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
from skopt.utils import use_named_args
#import Netrem_model_builder as nm
from sklearn import linear_model, preprocessing # 9/19
import NetremmerFinal2024 as nm
from NetremmerFinal2024 import *
import Netrem_model_builder as nm
from Netrem_model_builder import *

class BayesianObjective_Lasso:
def __init__(self, X, y, cv_folds, model, scorer="mse", print_network=False):
Expand Down
65 changes: 65 additions & 0 deletions code/previous_version/demo_toy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import sys
sys.path.append("../code") # assuming "code" is one directory up and then down into "code"

from DemoDataBuilderXandY import generate_dummy_data
from Netrem_model_builder import netrem, netremCV
import PriorGraphNetwork as graph
import error_metrics as em
import essential_functions as ef
import netrem_evaluation_functions as nm_eval
import Netrem_model_builder as nm

dummy_data = generate_dummy_data(corrVals = [0.9, 0.5, 0.4, -0.3, -0.8], # the # of elements in corrVals is the # of predictors (X)
num_samples_M = 100000, # the number of samples M
train_data_percent = 70) # the remainder out of 100,000 will be kept for testing. If 100, then ALL data is used for training and testing.

X_df = dummy_data.X_df
X_df.head()

y_df = dummy_data.y_df
y_df.head()

# 70,000 samples for training data (used to train and fit GRegulNet model)
X_train = dummy_data.view_X_train_df()
y_train = dummy_data.view_y_train_df()

# 30,000 samples for testing data
X_test = dummy_data.view_X_test_df()
y_test = dummy_data.view_y_test_df()

X_train.corr() # pairwise correlations among the training samples
X_test.corr() # pairwise correlations among the training samples


# prior network edge_list (missing edges or edges with no edge weight will be added with the default_edge_list so the network is fully-connected):
edge_list = [["TF1", "TF2", 0.8], ["TF4", "TF5", 0.95], ["TF1", "TF3"], ["TF1", "TF4"], ["TF1", "TF5"],
["TF2", "TF3"], ["TF2", "TF4"], ["TF2", "TF5"], ["TF3", "TF4"], ["TF3", "TF5"]]

beta_network_val = 1
# by default, model_type is Lasso, so alpha_lasso_val will be specified for the alpha_lasso parameter.
# However, we will specify model_type = LassoCV, so our alpha_lasso is determined by cross-validation on training data).

# Building the network regularized regression model:
# By default, edges are constructed between all of the nodes; nodes with a missing edge are assigned the default_edge_weight.
netrem_demo = netrem(edge_list = edge_list,
beta_net = beta_network_val,
model_type = "LassoCV",
view_network = True)

# Fitting the NetREm model on training data: X_train and y_train:
netrem_demo.fit(X_train, y_train)


pred_y_test = netrem_demo.predict(X_test) # predicted values for y_test
mse_test = netrem_demo.test_mse(X_test, y_test)
print(f"Please note that the testing Mean Square Error (MSE) is {mse_test}")

# To view and extract the predicted model coefficients for the predictors:
netrem_demo.model_coef_df

netrem_demo.B_interaction_df


netrem_demo.final_corr_vs_coef_df
netrem_demo.combined_df
organize_B_interaction_network(netrem_demo)
20 changes: 12 additions & 8 deletions demo/demo_toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,24 @@
import essential_functions as ef
import netrem_evaluation_functions as nm_eval
import Netrem_model_builder as nm

dummy_data = generate_dummy_data(corrVals = [0.9, 0.5, 0.4, -0.3, -0.8], # the # of elements in corrVals is the # of predictors (X)
M = 10000 # number of samples (e.g. single-cells)
corrs_list = [0.9, 0.5, 0.4, -0.3, -0.8] # the individual correlations (r) of each X variable with y
dummy_data = generate_dummy_data(corrVals = corrs_list, # the # of elements in corrVals is the # of predictors (X)
num_samples_M = 100000, # the number of samples M
train_data_percent = 70) # the remainder out of 100,000 will be kept for testing. If 100, then ALL data is used for training and testing.
sparsity_factor_perc = 40, # approximately what % of data for each variable is sparse (0)
train_data_percent = 70) # the remainder out of 10,000 will be kept for testing. If 100, then ALL data is used for training and testing.

X_df = dummy_data.X_df
X_df.head()

y_df = dummy_data.y_df
y_df.head()

# 70,000 samples for training data (used to train and fit GRegulNet model)
# 7,000 samples for training data (used to train and fit GRegulNet model)
X_train = dummy_data.view_X_train_df()
y_train = dummy_data.view_y_train_df()

# 30,000 samples for testing data
# 3,000 samples for testing data
X_test = dummy_data.view_X_test_df()
y_test = dummy_data.view_y_test_df()

Expand All @@ -36,14 +38,16 @@
["TF2", "TF3"], ["TF2", "TF4"], ["TF2", "TF5"], ["TF3", "TF4"], ["TF3", "TF5"]]

beta_network_val = 1
alpha_lasso_val = 0.1
# by default, model_type is Lasso, so alpha_lasso_val will be specified for the alpha_lasso parameter.
# However, we will specify model_type = LassoCV, so our alpha_lasso is determined by cross-validation on training data).

# Building the network regularized regression model:
# By default, edges are constructed between all of the nodes; nodes with a missing edge are assigned the default_edge_weight.
netrem_demo = netrem(edge_list = edge_list,
beta_net = beta_network_val,
model_type = "LassoCV",
alpha_lasso = alpha_lasso_val,
model_type = "Lasso",
view_network = True)

# Fitting the NetREm model on training data: X_train and y_train:
Expand All @@ -57,9 +61,9 @@
# To view and extract the predicted model coefficients for the predictors:
netrem_demo.model_coef_df

netrem_demo.B_interaction_df
netrem_demo.TF_interaction_df


netrem_demo.final_corr_vs_coef_df
netrem_demo.combined_df
organize_B_interaction_network(netrem_demo)
nm.organize_predictor_interaction_network(netrem_demo)

0 comments on commit b9d6acf

Please sign in to comment.