-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: optimize triplestore upload process
Optimize upload_sparql_updates function with: - Add connection pooling through TriplestoreConnection singleton - Implement hybrid JSON/Redis caching with CacheManager - Optimize cache lookups by loading all data in memory at initialization - Add graceful shutdown handling for cache persistence - Add informative progress messages The main optimizations focus on reducing database connections overhead and improving cache performance for large-scale uploads.
- Loading branch information
arcangelo7
committed
Jan 28, 2025
1 parent
491cc1e
commit 648091e
Showing
4 changed files
with
557 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
import json | ||
import os | ||
import signal | ||
import atexit | ||
from typing import Set | ||
import redis | ||
from redis.exceptions import ConnectionError as RedisConnectionError | ||
|
||
class CacheManager: | ||
REDIS_DB = 4 # Database di default per Redis | ||
REDIS_KEY = "processed_files" # Chiave per il set Redis | ||
|
||
def __init__(self, json_cache_file: str, redis_host: str = 'localhost', redis_port: int = 6379): | ||
self.json_cache_file = json_cache_file | ||
self._redis = None | ||
self.redis_host = redis_host | ||
self.redis_port = redis_port | ||
self.processed_files: Set[str] = set() | ||
|
||
# Inizializza il cache | ||
self._init_cache() | ||
|
||
# Registra handlers per graceful shutdown | ||
self._register_shutdown_handlers() | ||
|
||
def _init_redis(self) -> None: | ||
"""Inizializza la connessione Redis""" | ||
try: | ||
self._redis = redis.Redis( | ||
host=self.redis_host, | ||
port=self.redis_port, | ||
db=self.REDIS_DB, | ||
decode_responses=True # Assicura che le stringhe siano decodificate | ||
) | ||
self._redis.ping() # Verifica la connessione | ||
except RedisConnectionError: | ||
print("Warning: Redis non disponibile. Using only JSON cache.") | ||
self._redis = None | ||
|
||
def _init_cache(self) -> None: | ||
"""Inizializza il cache da file JSON e Redis""" | ||
self._init_redis() | ||
|
||
# Carica dal file JSON | ||
if os.path.exists(self.json_cache_file): | ||
with open(self.json_cache_file, 'r', encoding='utf8') as f: | ||
self.processed_files.update(json.load(f)) | ||
|
||
# Se Redis è disponibile, sincronizza | ||
if self._redis: | ||
# Carica i dati esistenti da Redis | ||
existing_redis_files = self._redis.smembers(self.REDIS_KEY) | ||
# Aggiunge i file dal JSON a Redis | ||
if self.processed_files: | ||
self._redis.sadd(self.REDIS_KEY, *self.processed_files) | ||
# Aggiorna il set locale con i dati da Redis | ||
self.processed_files.update(existing_redis_files) | ||
|
||
def _save_to_json(self) -> None: | ||
"""Salva il cache su file JSON""" | ||
with open(self.json_cache_file, 'w', encoding='utf8') as f: | ||
json.dump(list(self.processed_files), f) | ||
|
||
def _register_shutdown_handlers(self) -> None: | ||
"""Registra gli handler per gestire l'interruzione del processo""" | ||
atexit.register(self._cleanup) | ||
signal.signal(signal.SIGINT, self._signal_handler) | ||
signal.signal(signal.SIGTERM, self._signal_handler) | ||
|
||
def _signal_handler(self, signum, frame) -> None: | ||
"""Gestisce i segnali di interruzione""" | ||
print(f"\nRicevuto segnale di interruzione {signum}") | ||
self._cleanup() | ||
exit(0) | ||
|
||
def _cleanup(self) -> None: | ||
"""Esegue le operazioni di cleanup""" | ||
print("\nSalvataggio cache su file...") | ||
if self._redis: | ||
# Aggiorna il set locale con i dati più recenti da Redis | ||
self.processed_files.update(self._redis.smembers(self.REDIS_KEY)) | ||
self._save_to_json() | ||
print("Cache salvato.") | ||
|
||
def add(self, filename: str) -> None: | ||
""" | ||
Aggiunge un file al cache | ||
Args: | ||
filename (str): Nome del file da aggiungere | ||
""" | ||
self.processed_files.add(filename) | ||
if self._redis: | ||
self._redis.sadd(self.REDIS_KEY, filename) | ||
|
||
def __contains__(self, filename: str) -> bool: | ||
""" | ||
Verifica se un file è nel cache | ||
Args: | ||
filename (str): Nome del file da verificare | ||
Returns: | ||
bool: True se il file è nel cache, False altrimenti | ||
""" | ||
return filename in self.processed_files | ||
|
||
def get_all(self) -> Set[str]: | ||
""" | ||
Restituisce tutti i file nel cache | ||
Returns: | ||
Set[str]: Set di nomi dei file processati | ||
""" | ||
if self._redis: | ||
self.processed_files.update(self._redis.smembers(self.REDIS_KEY)) | ||
return self.processed_files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from SPARQLWrapper import SPARQLWrapper, POST | ||
from typing import Optional | ||
|
||
class TriplestoreConnection: | ||
_instance: Optional['TriplestoreConnection'] = None | ||
_sparql: Optional[SPARQLWrapper] = None | ||
|
||
def __new__(cls, endpoint_url: Optional[str] = None): | ||
if cls._instance is None: | ||
cls._instance = super(TriplestoreConnection, cls).__new__(cls) | ||
if endpoint_url: | ||
cls._instance._init_connection(endpoint_url) | ||
elif endpoint_url: | ||
# Se viene fornito un nuovo URL e l'istanza esiste già, aggiorna la connessione | ||
cls._instance._init_connection(endpoint_url) | ||
return cls._instance | ||
|
||
def _init_connection(self, endpoint_url: str) -> None: | ||
"""Inizializza la connessione al triplestore""" | ||
self._sparql = SPARQLWrapper(endpoint_url) | ||
self._sparql.setMethod(POST) | ||
|
||
@property | ||
def sparql(self) -> SPARQLWrapper: | ||
"""Restituisce l'istanza di SPARQLWrapper""" | ||
if self._sparql is None: | ||
raise RuntimeError("Connection not initialized. Provide endpoint_url when creating instance.") | ||
return self._sparql | ||
|
||
def execute_update(self, query: str) -> bool: | ||
""" | ||
Esegue una query di update sul triplestore | ||
Args: | ||
query (str): Query SPARQL da eseguire | ||
Returns: | ||
bool: True se l'esecuzione ha successo, False altrimenti | ||
""" | ||
try: | ||
self.sparql.setQuery(query) | ||
self.sparql.queryAndConvert() | ||
return True | ||
except Exception as e: | ||
print(f"Error executing query: {e}") | ||
return False |
Oops, something went wrong.