Skip to content

Commit

Permalink
updated notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
HercoZauZau committed Jun 1, 2024
1 parent e3bfbac commit ee31570
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 47 deletions.
30 changes: 9 additions & 21 deletions notebooks/01. data_collect/get_repos_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,17 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"id": "3cad27e9",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables from .env file\n",
"load_dotenv()\n",
"\n",
"dir_name = os.getenv('DIR_NAME')"
"dir_name = os.getenv('DIR_NAME')\n",
"base_path = f'../../data/processed/{dir_name}'\n",
"os.makedirs(base_path, exist_ok=True)"
]
},
{
Expand Down Expand Up @@ -124,35 +126,21 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "bc9dfc19-e98d-4619-8856-273d97882f57",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Progress Bar: 22%|██▏ | 378/1729 [10:10<28:08, 1.25s/it] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Request Error: 502\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Progress Bar: 100%|██████████| 1729/1729 [39:41<00:00, 1.38s/it] \n"
"Progress Bar: 100%|██████████| 1734/1734 [41:48<00:00, 1.45s/it] \n"
]
}
],
"source": [
"# Load existing data from CSV file into a DataFrame\n",
"users_ids = pd.read_csv(f\"../../data/processed/{dir_name}/users_ids.csv\", encoding='latin1')\n",
"users_ids = pd.read_csv(f\"{base_path}/users_ids.csv\", encoding='latin1')\n",
"users_ids = users_ids['user_id'].to_list()\n",
"\n",
"# Retrieve repository data for user IDs\n",
Expand All @@ -161,13 +149,13 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "7d89602d-1477-4d4a-95e4-ee2dbcedce0c",
"metadata": {},
"outputs": [],
"source": [
"# Save repository data to a CSV file\n",
"file_name = f\"../../data/processed/{dir_name}/repos_data.csv\"\n",
"file_name = f\"{base_path}/repos_data.csv\"\n",
"users_data = pd.DataFrame(final_data)\n",
"users_data.to_csv(file_name, index=False, encoding='latin1')\n",
"\n",
Expand Down
Loading

0 comments on commit ee31570

Please sign in to comment.