-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
260 lines (239 loc) · 9.27 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import io
import abc
import zipfile
import psycopg2
import requests
import pandas as pd
from column_mapping import *
# this is also "out of place", cause it doesn't use any of the builders.
# It's like this because of the way historical data is set up
class inf_diario(cvm_dados):
def __init__(self, ano):
super().__init__(f"http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/HIST/inf_diario_fi_{ano}.zip")
class cvm_helper:
"""
incomplete, the ideia is for it to be an alternative to actually going to the website
types:
package
group
tag
"""
def __init__(self, list_type="package"):
r = requests.get(f"http://dados.cvm.gov.br/api/3/action/{list_type}_list")
if r.ok:
r = r.json()
else:
raise Exception("Erro na request, verifique o nome do tipo de lista")
if r["success"]:
self.result = r["result"]
else:
raise Exception("Erro na request, verifique o nome do tipo de lista")
def get_package(self, resource_name):
pass
class cvm_pkg:
def __init__(self, pkg):
self.pkg = pkg
self.url = f"http://dados.cvm.gov.br/api/3/action/package_show?id={pkg}"
def show_resources(self):
self.resources = requests.get(self.url).json()["result"]["resources"]
def get_all_data(self, concat=False):
self.dfs = []
if not hasattr(self, 'resources'):
self.show_resources()
for i in self.resources:
print(i["name"], i["format"])
data_getter = data_factory(i["format"])
if data_getter:
self.dfs.append(data_getter(i["url"]).make_df().df)
if concat:
self.df = pd.concat(self.dfs)
def getone(self, resource):
"""
Resource must have at least a `format` and an `url`
"""
data_getter = data_factory(resource["format"])
if data_getter:
self.df = data_getter(resource["url"]).make_df().df
class bcb_sgs():
def __init__(self, serie, ultimos=None, ini_fim=(None, None)):
self.url = f"http://api.bcb.gov.br/dados/serie/bcdata.sgs.{serie}/dados"
if ultimos:
self.url += f"/ultimos/{ultimos}"
self.url += "?formato=json"
elif all(ini_fim):
self.url += "?formato=json"
self.url += (
f"&dataInicial={ini_fim[0].strftime('%d/%m/%Y')}"
f"&dataFinal={ini_fim[1].strftime('%d/%m/%Y')}"
)
else:
self.url += "?formato=json"
def get_data(self):
r = requests.get(self.url)
df = pd.DataFrame().from_records(r.json(), index="data")
df = df.astype(float) / 100
df.index = pd.to_datetime(df.index, format="%d/%m/%Y")
self.df = df
class db_handler():
def __init__(self, dbname, user, password, cvm_pkg):
self.dbname = dbname
self.user = user
self.password = password
self.cvm_pkg = cvm_pkg
def _open_conn(self):
self.conn = psycopg2.connect(dbname=self.dbname,
user=self.user,
password=self.password
)
def _gen_insert(self):
"""
to-do: factory for different tables
"""
gen_query = insert_factory(self.cvm_pkg)
self.query = gen_query(self.cvm_pkg.df).gen().query
def insert_data(self):
self._open_conn()
c = self.conn.cursor()
c.execute(self.query)
conn.commit()
conn.close()
def __del__(self):
if self.conn:
conn.close()
class insert_gen(abc.ABC):
def __init__(self, df):
self.df = df
@abc.abstractmethod
def gen(self):
pass
class insert_inf_diario(insert_gen):
def gen(self):
tmp_values = (
"INSERT INTO "
"inf_diario (data, cnpj_fundo, vl_cota, "
"pl, captacao, resgate, "
"tipo, nr_cotst, vl_total) "
"VALUES "
)
for _, i in self.df.iterrows():
tmp_values += (f"('{i['data']}', '{i['cnpj_fundo']}', {i['vl_cota']}, "
f"{i['pl']}, {i['captacao']}, {i['resgate']}, "
f"'{i['tipo']}', {i['nr_cotst']}, {i['vl_total']}), "
)
self.query = tmp_values[:-2] + ";"
return self
class insert_cad_fnd:
def gen(self):
tmp_values = (
"INSERT INTO "
"inf_diario (cnpj_fundo, tipo, nome, "
"dt_reg, dt_const, cd_cvm, "
"dt_cancel, sit, dt_ini_sit, "
"dt_ini_ativ, dt_ini_exerc, dt_fim_exerc, "
"classe, dt_ini_classe, rentab_fundo, "
"condom, cotas, exclusivo, "
"trib_lprazo, publico_alvo, entid_invest, "
"taxa_perfm, inf_taxa_perfm, taxa_adm, "
"inf_taxa_adm, diretor, cnpj_admin, "
"admin, pf_pj_gestor, cpf_cnpj_gestor, "
"gestor, cnpj_auditor, auditor, "
"cnpj_custodiante, custodiante, cnpj_controlador, "
"controlador) "
"VALUES "
)
for _, i in self.df.iterrows():
tmp_values += (
f"'({i['cnpj_fundo']}', '{i['tipo']}', '{i['nome']}', "
f"'{i['dt_reg']}', '{i['dt_const']}', {i['cd_cvm']}, "
f"'{i['dt_cancel']}', '{i['sit']}', '{i['dt_ini_sit']}', "
f"'{i['dt_ini_ativ']}', '{i['dt_ini_exerc']}', '{i['dt_fim_exerc']}', "
f"'{i['classe']}', '{i['dt_ini_classe']}', '{i['rentab_fundo']}', "
f"'{i['condom']}', '{i['cotas']}', '{i['exclusivo']}', "
f"'{i['trib_lprazo']}', '{i['publico_alvo']}', '{i['entid_invest']}', "
f"{i['taxa_perfm']}, '{i['inf_taxa_perfm']}', {i['taxa_adm']}, "
f"'{i['inf_taxa_adm']}', '{i['diretor']}', '{i['cnpj_admin']}', "
f"'{i['admin']}', '{i['pf_pj_gestor']}', '{i['cpf_cnpj_gestor']}', "
f"'{i['gestor']}', '{i['cnpj_auditor']}', '{i['auditor']}', "
f"'{i['cnpj_custodiante']}', '{i['custodiante']}', '{i['cnpj_controlador']}', "
f"'{i['controlador']}), '"
)
self.query = tmp_values[:-2] + ";"
return self
class insert_factory:
def __new__(cls, pkg):
if pkg == "fi-doc-inf_diario":
return insert_inf_diario
elif pkg == "fi-cad":
return insert_cad_fnd
if __name__=="__main__":
# # pegar série histórica de fundos
# dfs = []
# for i in range(2005, 2021, 1):
# a = inf_diario(i)
# a.get_data()
# print(a.list_files())
# for i in a.list_files():
# dfs.append(pd.read_csv(io.BytesIO(a.select_file(i)), sep=";", encoding="ansi"))
# print(len(dfs))
# print(len(dfs))
# df = pd.concat(dfs)
# df = df.rename(columns=lambda x: x.lower())
# df = df.rename({"vl_patrim_liq": "vl_pl",
# "captc_dia": "cpt",
# "resg_dia": "rgt",
# "nr_cotst": "n_cts",
# })
# print(df.shape)
# print(df.info())
# print(cvm_helper().result)
a = cvm_pkg("fi-doc-inf_diario")
a.show_resources()
a.get_all_data()
df = pd.concat(a.dfs)
df = df.rename(columns=lambda x: x.lower()).rename(columns=inf_diario_cols)
df["data"] = df["data"].dt.strftime("%Y-%m-%d")
# df = df.rename(columns=lambda x: x.lower())
# df = df.rename({"vl_patrim_liq": "vl_pl",
# "captc_dia": "cpt",
# "resg_dia": "rgt",
# "nr_cotst": "n_cts",
# })
print(df.shape)
print(df.info())
print(df.head(2))
print(df.tail(2))
# df = pd.read_csv("fi_doc_info_diario.csv", nrows=5)
# print(df.shape)
# print(df.info())
# print(df.head(2))
# print(df.tail(2))
# a = cvm_helper()
# a.result
#
# histórico do fi-cad
b = cvm_pkg("fi-cad")
b.show_resources()
b.resources
for i in b.resources:
print(i["name"])
# b.getone(b.resources[1])
# b.df.info()
# b.df = b.df.drop(["VL_PATRIM_LIQ", "DT_PATRIM_LIQ"], axis=1)
# b.df["CD_CVM"] = b.df["CD_CVM"].astype("Int64")
# b.df["CONDOM"] = df["CONDOM"].apply(lambda x: x[0] if type(x)==str else x)
# # os dois são 23 msm
# if b.df["SIT"].str.len().max() > 23:
# print("bruH sit")
# if b.df["CLASSE"].str.len().max() > 23:
# print("bruH classe")
# if b.df["RENTAB_FUNDO"].str.len().max() > 55:
# print("bruh rentab_fundo")
# if b.df["PUBLICO_ALVO"].str.len().max() > 13:
# print("bruh rentab_fundo")
# b.df = b.df.rename(columns=lambda x: x.lower()).rename(columns=cad_fnd_cols)
# b.df.info()
#
# [14,17,18,20,22,24,27,37,38]
# # b.df.keys()
# # # se o zip tem txt dentro e n csv eu nunca verifico isso
# # # no get_all_data