-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtsv_to_neo4j.py
34 lines (22 loc) · 970 Bytes
/
tsv_to_neo4j.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
import os
output_folder = "./neo4j"
def to_neo4j(nodes_df: pd.DataFrame, links_df: pd.DataFrame):
"""
Args:
nodes_df (pd.DataFrame): pandas dataframe with columns: name, type, role, states
links_df (pd.DataFrame): pandas dataframe with columns: source, target
Returns:
"""
node_labels = pd.unique(nodes_df['label'])
for l in node_labels:
temp_df = nodes_df[nodes_df['label'] == l]
temp_df.to_csv(os.path.join(output_folder, l.lower() + ".tsv"), sep="\t", index=False, na_rep='NULL')
link_labels = pd.unique(links_df['label'])
for l in link_labels:
temp_df = links_df[links_df['label'] == l]
temp_df.to_csv(os.path.join(output_folder, l.lower() + ".tsv"), sep="\t", index=False, na_rep='NULL')
if __name__ == "__main__":
nodes = pd.read_csv("./source/nodes.tsv", sep="\t")
links = pd.read_csv("./source/links.tsv", sep="\t")
to_neo4j(nodes, links)