Skip to content
This repository was archived by the owner on May 23, 2023. It is now read-only.

Commit

Permalink
switch to custom logger (1)
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronkaplan committed May 20, 2021
1 parent 876edf5 commit 8f836d5
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 29 deletions.
9 changes: 4 additions & 5 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,15 +1019,14 @@ async def import_csv_spycloud(parent_ticket_id: str,
logger.info("skipping item (%s, %s), since it already existed in the DB." % (email, password))
continue # next item
except Exception as ex:
logger.exception(
"Could not deduplicate item (%s, %s). Skipping this row. Reason: %s" % (email, password, str(ex)))
logger.error("Could not deduplicate item (%s, %s). Skipping this row. Reason: %s" % (email, password, str(ex)))
continue
try:
item = enrich(item, leak_id = leak_id)
item.leak_id = leak_id
except Exception as ex:
errmsg = "Could not enrich item (%s, %s). Skipping this row. Reason: %s" % (email, password, str(ex),)
logger.exception(errmsg)
logger.error(errmsg)
item.error_msg = errmsg
item.needs_human_intervention = True
item.notify = False
Expand All @@ -1044,7 +1043,7 @@ async def import_csv_spycloud(parent_ticket_id: str,
db_output.process(out_item)
except Exception as ex:
errmsg = "Could not store row. Skipping this row. Reason: %s" % str(ex)
logger.exception(errmsg)
logger.error(errmsg)
out_item.error_msg = errmsg
out_item.needs_human_intervention = True
out_item.notify = False
Expand Down Expand Up @@ -1158,7 +1157,7 @@ async def import_csv_with_leak_id(leak_id: int,
t1 = time.time()
d = round(t1 - t0, 3)
num_deduped = len(inserted_ids)
logger.info("inserted %d rows, %d duplicates, %d new rows" % (i, i - num_deduped, num_deduped))
# logger.info("inserted %d rows, %d duplicates, %d new rows" % (i, i - num_deduped, num_deduped))

# now get the data of all the IDs / dedup
try:
Expand Down
2 changes: 1 addition & 1 deletion lib/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def getlogger(name: str, log_level=logging.INFO) -> logging.Logger:
ch.setFormatter(formatter)
logger.addHandler(ch)

logger.info('Setting up logger: DONE')
logger.info('Logger ready')

return logger

Expand Down
40 changes: 19 additions & 21 deletions modules/collectors/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""importer.parser """


import logging
from lib.helpers import getlogger
from pathlib import Path
import csv
import time
Expand All @@ -11,6 +11,8 @@

debug = True

logger = getlogger(__name__)


# noinspection PyTypeChecker
def peek_into_file(fname: Path) -> csv.Dialect:
Expand All @@ -23,16 +25,16 @@ def peek_into_file(fname: Path) -> csv.Dialect:

with fname.open(mode='r') as f:
sniffer = csv.Sniffer()
logging.debug("has apikeyheader: %s", sniffer.has_header(f.readline()))
logger.debug("has apikeyheader: %s", sniffer.has_header(f.readline()))
f.seek(0)
dialect = sniffer.sniff(f.readline(50))
logging.debug("delim: '%s'", dialect.delimiter)
logging.debug("quotechar: '%s'", dialect.quotechar)
logging.debug("doublequote: %s", dialect.doublequote)
logging.debug("escapechar: '%s'", dialect.escapechar)
logging.debug("lineterminator: %r", dialect.lineterminator)
logging.debug("quoting: %s", dialect.quoting)
logging.debug("skipinitialspace: %s", dialect.skipinitialspace)
logger.debug("delim: '%s'", dialect.delimiter)
logger.debug("quotechar: '%s'", dialect.quotechar)
logger.debug("doublequote: %s", dialect.doublequote)
logger.debug("escapechar: '%s'", dialect.escapechar)
logger.debug("lineterminator: %r", dialect.lineterminator)
logger.debug("quoting: %s", dialect.quoting)
logger.debug("skipinitialspace: %s", dialect.skipinitialspace)
return dialect


Expand All @@ -52,23 +54,23 @@ def parse_file(self, fname: Path, leak_id: int = None, csv_dialect=None) -> pd.D
a DataFrame
number of errors while parsing
"""
logging.info("Parsing file %s..." % fname)
logger.info("Parsing file %s..." % fname)
try:
if csv_dialect:
dialect = csv_dialect
else:
dialect = peek_into_file(fname) # try to guess
df = pd.read_csv(fname, dialect=dialect, error_bad_lines=False, warn_bad_lines=True) # , usecols=range(2))
logging.debug(df.head())
logging.debug(df.info())
logging.debug("Parsing file 2...")
logger.debug(df.head())
logger.debug(df.info())
logger.debug("Parsing file 2...")
df.insert(0, 'leak_id', leak_id)
logging.debug(df.head())
logging.debug("parsed %s", fname)
logger.debug(df.head())
logger.debug("parsed %s", fname)
return df

except Exception as ex:
logging.error("could not pandas.read_csv(%s). Reason: %s. Skipping file." % (fname, str(ex)))
logger.error("could not pandas.read_csv(%s). Reason: %s. Skipping file." % (fname, str(ex)))
raise ex # pass it on

def normalize_data(self, df: pd.DataFrame, leak_id: int = None) -> pd.DataFrame:
Expand All @@ -85,13 +87,9 @@ def normalize_data(self, df: pd.DataFrame, leak_id: int = None) -> pd.DataFrame:

if __name__ == "__main__":

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)
if debug:
logging.getLogger().setLevel(logging.DEBUG)

p = BaseParser()
t0 = time.time()
# p.parse_recursively('test_leaks', '*.txt')
t1 = time.time()
logging.info("processed everything in %f [sec]", (t1 - t0))
logger.info("processed everything in %f [sec]", (t1 - t0))
8 changes: 6 additions & 2 deletions modules/output/db.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Database output module. Stores an IDF item to the DB."""
import logging
from lib.helpers import getlogger

import psycopg2
import psycopg2.extras

Expand All @@ -8,6 +9,9 @@
from models.outdf import LeakData


logger = getlogger(__name__)


class PostgresqlOutput(BaseOutput):
dbconn = None

Expand Down Expand Up @@ -46,6 +50,6 @@ def process(self, data: LeakData) -> bool:
leak_data_id = int(cur.fetchone()['id'])
print("leak_data_id: %s" % leak_data_id)
except psycopg2.Error as ex:
logging.error("%s(): error: %s" % (self.process.__name__, ex.pgerror))
logger.error("%s(): error: %s" % (self.process.__name__, ex.pgerror))
raise ex
return True
4 changes: 4 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from lib.helpers import getlogger

import urllib.parse
import uuid
import unittest
Expand All @@ -11,6 +13,7 @@
VALID_AUTH = {'x-api-key': 'random-test-api-key'}
INVALID_AUTH = {'x-api-key': 'random-test-api-XXX'}

logger = getlogger(__name__)
client = TestClient(app) # , base_url='http://localhost:8080/')


Expand Down Expand Up @@ -428,6 +431,7 @@ def test_import_csv_with_leak_id():
fixtures_file = "./tests/fixtures/data.csv"
f = open(fixtures_file, "rb")
response = client.post('/import/csv/by_leak/%s' % (_id,), files = {"_file": f}, headers = VALID_AUTH)
logger.info("response = %r" % response.text)
assert 200 <= response.status_code < 300
assert response.json()['meta']['count'] >= 0

Expand Down

0 comments on commit 8f836d5

Please sign in to comment.