Skip to content

Commit

Permalink
feat: Completed migration from successful_applications.txt to .json
Browse files Browse the repository at this point in the history
  • Loading branch information
Vel-San committed Feb 17, 2024
1 parent 185a4f3 commit 4952cb0
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 43 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,9 @@ The filter list is designed to exclude listings based on specified keywords. Sim

## Logging

Successful applications are recorded in `logging/successful_applications.txt`.
Successful applications are recorded in `logging/successful_applications.json`.

**Important**: This log prevents reapplication to the same flats. Do not delete it unless you intend to reapply to all available flats.
**Important**: This log prevents reapplication to the same flats. ***DO NOT DELETE*** it unless you intend to re-apply to all available flats.

## Additional Information

Expand All @@ -177,8 +177,8 @@ As of now, there are no timeouts, bot checks, or captchas on the website (which
- [X] Make a docker container out of the bot
- [X] CI/CD for Github
- [X] Fix test-data
- [ ] Change "successful_applications.txt" to JSON type
- [ ] Add support for multi user wbm_config files
- [X] Change "successful_applications.txt" to JSON type
- [ ] ~~Add support for multi user wbm_config files~~
- [ ] Add "excluded_applications.json" that shows all applications that were excluded by the filter
- [ ] Automatically detect if internet network connection is down and pause/restart once back
- [ ] Make an compiled exec of the bot using pyinstaller
2 changes: 1 addition & 1 deletion wbmbot_v2/helpers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
wbm_test_config_name = f"{os.getcwd()}/test-data/wbm_test_config.json"

# Applications Logger that we applied for
log_file_path = f"{os.getcwd()}/logging/successful_applications.txt"
log_file_path = f"{os.getcwd()}/logging/successful_applications.json"

# Script Logging
script_log_path = f"{os.getcwd()}/logging/wbmbot-v2_{today}.log"
Expand Down
41 changes: 11 additions & 30 deletions wbmbot_v2/helpers/webDriverOperations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from utility import io_operations
from utility import io_operations, misc_operations

__appname__ = os.path.splitext(os.path.basename(__file__))[0]
color_me = wbm_logger.ColoredLogger(__appname__)
Expand Down Expand Up @@ -314,29 +314,6 @@ def find_flats(web_driver):
return web_driver.find_elements(By.CSS_SELECTOR, ".row.openimmo-search-list-item")


def check_flat_already_applied(flat_obj, email, log):
"""Check if an application for the flat has already been sent."""

return (
f"[{email.strip()}] - Application sent for flat: {flat_obj.title} | {flat_obj.hash}"
in log
)


def contains_filter_keywords(flat_elem, user_filters):
"""Check if the flat contains any of the filter keywords and return the keywords."""

# Find all keywords that are in the flat_elem's text
keywords_found = [
keyword
for keyword in user_filters
if str(keyword).strip().lower() in flat_elem.text.lower()
]

# Return a tuple of boolean and keywords found
return (bool(keywords_found), keywords_found)


def apply_to_flat(
web_driver,
flat_element,
Expand Down Expand Up @@ -410,7 +387,6 @@ def process_flats(
constants.offline_angebote_path,
f"{constants.now}/page_{current_page}",
)
log_content = io_operations.read_log_file(constants.log_file_path)

for i, flat_elem in enumerate(all_flats):
time.sleep(2) # Sleep to mimic human behavior and avoid detection
Expand All @@ -427,11 +403,15 @@ def process_flats(

for email in user_profile.emails:
# Proceed to check whether we should apply to the flat or skip
if not check_flat_already_applied(flat_obj, email, log_content):
if contains_filter_keywords(flat_elem, user_profile.filter)[0]:
if not io_operations.check_flat_already_applied(
constants.log_file_path, email, flat_obj
):
if misc_operations.contains_filter_keywords(
flat_elem, user_profile.filter
)[0]:
LOG.warning(
color_me.yellow(
f"Ignoring flat '{flat_obj.title}' because it contains filter keyword(s) --> {contains_filter_keywords(flat_elem, user_profile.filter)[1]}"
f"Ignoring flat '{flat_obj.title}' because it contains filter keyword(s) --> {misc_operations.contains_filter_keywords(flat_elem, user_profile.filter)[1]}"
)
)
continue
Expand All @@ -450,8 +430,9 @@ def process_flats(
email,
test,
)
log_entry = f"[{constants.today}] - [{email}] - Application sent for flat: {flat_obj.title} | {flat_obj.hash}\n"
io_operations.write_log_file(constants.log_file_path, log_entry)
io_operations.write_log_file(
constants.log_file_path, email, flat_obj
)
LOG.info(color_me.green("Done!"))
time.sleep(1.5)
web_driver.get(start_url)
Expand Down
93 changes: 85 additions & 8 deletions wbmbot_v2/utility/io_operations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import os
import re

from helpers import constants
from logger import wbm_logger
from utility import interaction

Expand Down Expand Up @@ -58,16 +60,62 @@ def initialize_application_logger(log_file: str):
pass


def write_log_file(log_file: str, entry: str):
"""Write an entry to the log file."""
with open(log_file, "a") as myfile:
myfile.write(entry)
def write_log_file(log_file: str, email: str, flat_obj):
"""
Write a nested dictionary log entry to the log file.
Parameters:
log_file (str): The path to the JSON log file.
email (str): The email associated with the log entry.
flat_obj (object): An object containing information about the flat.
def read_log_file(log_file: str):
"""Read and return the content of the log file."""
with open(log_file, "r") as myfile:
return myfile.read()
Returns:
None
"""

# Read existing log entries
try:
with open(log_file, "r") as json_file:
existing_log = json.load(json_file)
except FileNotFoundError:
existing_log = {}
except json.decoder.JSONDecodeError:
existing_log = {}

# Check if email already exists in the log
if email in existing_log:
# Check if the entry with the same hash exists
if flat_obj.hash not in existing_log[email]:
existing_log[email][flat_obj.hash] = {
"date": constants.today.isoformat(),
"title": flat_obj.title,
"street": flat_obj.street,
"zip_code": flat_obj.zip_code,
"rent": re.sub(r"(\D)(\d)", r"\1 \2", flat_obj.total_rent),
"size": re.sub(r"(\D)(\d)", r"\1 \2", flat_obj.size),
"rooms": re.sub(r"(\D)(\d)", r"\1 \2", flat_obj.rooms),
"wbs?": flat_obj.wbs,
}
else:
# If email doesn't exist, add a new entry
existing_log[email] = {
flat_obj.hash: {
"date": constants.today.isoformat(),
"title": flat_obj.title,
"date": constants.today.isoformat(),
"title": flat_obj.title,
"street": flat_obj.street,
"zip_code": flat_obj.zip_code,
"rent": re.sub(r"(\D)(\d)", r"\1 \2", flat_obj.total_rent),
"size": re.sub(r"(\D)(\d)", r"\1 \2", flat_obj.size),
"rooms": re.sub(r"(\D)(\d)", r"\1 \2", flat_obj.rooms),
"wbs?": flat_obj.wbs,
}
}

# Write the updated log back to the file
with open(log_file, "w") as json_file:
json.dump(existing_log, json_file, indent=4, ensure_ascii=False)


def create_directory_if_not_exists(directory_path: str) -> None:
Expand All @@ -84,3 +132,32 @@ def create_directory_if_not_exists(directory_path: str) -> None:
os.makedirs(directory_path, exist_ok=True)
except OSError as e:
LOG.error(color_me.red(f"Error to create directory ({directory_path}): {e}"))


def check_flat_already_applied(log_file: str, email: str, flat_obj):
"""
Check if an application for the flat has already been sent.
Parameters:
log_file (str): The path to the JSON log file.
flat_obj (object): An object containing information about the flat.
email (str): The email associated with the log entry.
Returns:
bool: True if an application has already been sent, False otherwise.
"""

try:
with open(log_file, "r") as json_file:
log = json.load(json_file)
except FileNotFoundError:
return False
except json.decoder.JSONDecodeError:
return False

email = email.strip()
if email in log:
for flat_hash, flat_data in log[email].items():
if flat_hash == flat_obj.hash:
return True
return False
12 changes: 12 additions & 0 deletions wbmbot_v2/utility/misc_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
def contains_filter_keywords(flat_elem, user_filters):
"""Check if the flat contains any of the filter keywords and return the keywords."""

# Find all keywords that are in the flat_elem's text
keywords_found = [
keyword
for keyword in user_filters
if str(keyword).strip().lower() in flat_elem.text.lower()
]

# Return a tuple of boolean and keywords found
return (bool(keywords_found), keywords_found)

0 comments on commit 4952cb0

Please sign in to comment.