-
Notifications
You must be signed in to change notification settings - Fork 37
/
short-sale-volume-data-importer.py
executable file
·125 lines (104 loc) · 3.63 KB
/
short-sale-volume-data-importer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env -S uv run --quiet --script
# /// script
# dependencies = [
# "pandas",
# ]
# ///
"""
A simple script to import short sale volume data into SQLite database
The data is available from https://www.finra.org/finra-data/browse-catalog/short-sale-volume-data/daily-short-sale-volume-files
Usage:
./short-sale-volume-data-importer.py -h
./short-sale-volume-data-importer.py -v # To log INFO messages
./short-sale-volume-data-importer.py -vv # To log DEBUG messages
"""
import logging
import sqlite3
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from pathlib import Path
from sqlite3 import IntegrityError
import pandas as pd
from common.logger import setup_logging
def parse_args():
parser = ArgumentParser(
description=__doc__, formatter_class=RawDescriptionHelpFormatter
)
parser.add_argument(
"-v",
"--verbose",
action="count",
default=0,
dest="verbose",
help="Increase verbosity of logging output",
)
parser.add_argument(
"-i",
"--input",
type=Path,
required=True,
help="Path to the input directory containing short sale volume data files",
)
parser.add_argument(
"-d",
"--database",
type=Path,
default="short_sale_volume.db",
help="Path to the SQLite database file (default: short_sale_volume.db)",
)
return parser.parse_args()
def create_database_schema(db_path):
"""Create the database schema if it doesn't exist"""
with sqlite3.connect(db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS short_sale_volume (
date TEXT NOT NULL,
symbol TEXT NOT NULL,
short_volume INTEGER,
short_exempt_volume INTEGER,
total_volume INTEGER,
market TEXT,
PRIMARY KEY (date, symbol)
)
""")
conn.commit()
def import_data(input_file, db_path):
"""Import data from text file to SQLite database"""
try:
# Read the input file
df = pd.read_csv(input_file, delimiter="|")
# Rename columns to match the database schema
column_mapping = {
"Date": "date",
"Symbol": "symbol",
"ShortVolume": "short_volume",
"ShortExemptVolume": "short_exempt_volume",
"TotalVolume": "total_volume",
"Market": "market",
}
df.rename(columns=column_mapping, inplace=True)
# Validate required columns
required_columns = list(column_mapping.values())
if not all(col in df.columns for col in required_columns):
raise ValueError(
f"Input file must contain these columns: {required_columns}"
)
# Filter out rows with missing symbols
df = df.dropna(subset=["symbol"])
# Write to SQLite database
with sqlite3.connect(db_path) as conn:
df.to_sql("short_sale_volume", conn, if_exists="append", index=False)
logging.info(f"Successfully imported data from {input_file} to {db_path}")
except IntegrityError as e:
logging.error(f"Error importing data: {str(e)}")
def main(args):
# Create the output SQLite database if it doesn't exist
args.database.parent.mkdir(parents=True, exist_ok=True)
create_database_schema(args.database)
# Process all files in the input directory
for input_file in args.input.glob("*.txt"):
import_data(input_file, args.database)
if __name__ == "__main__":
args = parse_args()
setup_logging(args.verbose)
main(args)