-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
92 lines (74 loc) · 3.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import re
import codecs
from sys import argv
import datetime
time = datetime.datetime.now().strftime("%Y.%m.%d_%H.%M.%S")
LOG_FILE = f"[{time}]_LOG.log" # Имя файла логов
output_filename = f"[{time}]_FOUND.txt"
def extract_emails(fpath, extensions):
if extensions and not any(fpath.endswith(ext) for ext in extensions):
return None
encodings = [
"utf-8",
"latin-1",
"koi8-r",
"windows-1251",
"ascii",
"cp1251",
"utf-16",
"utf-32",
"iso-8859-1",
] # Пробуем разные кодировки
for encoding in encodings:
try:
with open(fpath, "r", encoding=encoding) as f:
with codecs.open(output_filename, "a", encoding="utf-8") as mail_file:
for line in f:
emails = re.findall(
r'''(?:\.?)([\w\-_+#~!$&\'\.]+(?<!\.)
(@|[ ]\(?[ ]?(at|AT)[ ]?\)?[ ])(?<!\.)
[\w]+[\w\-\.]*\.[a-zA-Z-]{2,5})(?:[^\w])''',
line,
re.VERBOSE,
)
if emails:
for email in emails:
cleaned_email = re.sub(r'[><;\'"%?&]', "", email[0])
print(f"{fpath}: {cleaned_email}")
mail_file.write(cleaned_email + "\n")
# Запись в лог-файл
with open(LOG_FILE, "a", encoding="utf-8") as log_file:
log_file.write(
f"Найден email: {cleaned_email} в файле: {fpath}\n"
)
return None
except UnicodeDecodeError:
pass # Пропускаем кодировку, если декодировка не удалась
print(f"Не удалось определить кодировку файла {fpath}.")
def process_path(path, extensions):
if os.path.isdir(path):
print(f"Обработка папки: {path}")
for root, _, files in os.walk(path):
for filename in files:
fpath = os.path.join(root, filename)
process_file(fpath, extensions)
elif os.path.isfile(path):
print(f"Обработка файла: {path}")
extract_emails(path, extensions)
else:
print(f"Неверный путь: {path}")
def process_file(fpath, extensions):
print(f"Обработка файла: {fpath}")
extract_emails(fpath, extensions)
if __name__ == "__main__":
if len(argv) < 2:
print("Необходимо указать путь к файлу или папке.")
exit()
path = argv[1]
extensions = argv[2:] if len(argv) > 2 else None
# Очистка лог-файла перед запуском
with open(LOG_FILE, "w", encoding="utf-8") as log_file:
log_file.write("") # Очистка содержимого
process_path(path, extensions)
os.system("pause")