-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathupsert_tsv.py
33 lines (26 loc) · 1.01 KB
/
upsert_tsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
import sys
import os
def upsert_df (df: pd.DataFrame, old_df: pd.DataFrame, key: str):
"""
Args:
df (pd.DataFrame): pandas dataframe with columns: name, type, role, states
old_df (pd.DataFrame): pandas dataframe with columns: source, target
key (str): key to use for upsert
Returns:
pd.DataFrame: combined dataframe
"""
df = df.set_index(key)
old_df = old_df.set_index(key)
combined = df.combine_first(old_df)
return combined
if __name__ == "__main__":
# list of strings
files = {"potentials": "variables", "nodes": "name", "links": ["source", "target"]}
for f in files:
old_df_file = os.path.join("./source", f"{f}.tsv")
new_df_file = os.path.join("./pgmx_output", f"pgmx_output_{f}.tsv")
old_df = pd.read_csv(old_df_file, sep="\t")
new_df = pd.read_csv(new_df_file, sep="\t")
combined = upsert_df(new_df, old_df, files[f])
combined.to_csv(old_df_file, sep="\t", na_rep='NULL')