diff --git a/homcc/server/cache.py b/homcc/server/cache.py index 5cc39ed..a0abf97 100644 --- a/homcc/server/cache.py +++ b/homcc/server/cache.py @@ -3,32 +3,77 @@ # https://github.com/celonis/homcc/blob/main/LICENSE """Caching module of the homcc server.""" +from collections import OrderedDict import logging from pathlib import Path from threading import Lock -from typing import Dict logger = logging.getLogger(__name__) +def mib_to_bytes(mb: int) -> int: + return mb * 1024**2 + + class Cache: """Represents the homcc server cache that is used to cache dependencies.""" - cache: Dict[str, str] + cache: OrderedDict[str, str] """'Hash' -> 'File path' on server map for holding paths to cached files""" cache_mutex: Lock """Mutex for locking the cache.""" cache_folder: Path """Path to the cache on the file system.""" + max_size_bytes: int + """Maximum size in bytes of the cache.""" + current_size: int + """Current size in bytes""" + + def __init__(self, root_folder: Path, max_size_bytes: int): + if max_size_bytes <= 0: + raise RuntimeError("Maximum size of cache must be strictly positive.") - def __init__(self, root_folder: Path): self.cache_folder = self._create_cache_folder(root_folder) - self.cache: Dict[str, str] = {} + self.cache: OrderedDict[str, str] = OrderedDict() self.cache_mutex: Lock = Lock() + self.max_size_bytes = max_size_bytes + self.current_size = 0 - def __contains__(self, key): + def _get_cache_file_path(self, hash: str) -> Path: + return self.cache_folder / hash + + def __contains__(self, key: str): with self.cache_mutex: - return key in self.cache + contained: bool = key in self.cache + if contained: + self.cache.move_to_end(key) + + return contained + + def __len__(self) -> int: + with self.cache_mutex: + return len(self.cache) + + def _evict_oldest(self): + """ + Evicts the oldest entry from the cache. + Note: The caller of this method has to ensure that the cache is locked. + """ + oldest_hash = next(iter(self.cache)) + oldest_path = self._get_cache_file_path(oldest_hash) + oldest_size = oldest_path.stat().st_size + + try: + Path.unlink(oldest_path, missing_ok=False) + except FileNotFoundError: + logger.error( + "Tried to evict cache entry with hash '%s', but corresponding cache file at '%s' did not exist.", + oldest_hash, + oldest_path, + ) + + self.current_size -= oldest_size + del self.cache[oldest_hash] @staticmethod def _create_cache_folder(root_temp_folder: Path) -> Path: @@ -39,15 +84,29 @@ def _create_cache_folder(root_temp_folder: Path) -> Path: logger.info("Created cache folder in '%s'.", cache_folder.absolute()) return cache_folder - def get(self, hash_value: str) -> str: + def get(self, hash: str) -> str: """Gets an entry (path) from the cache given a hash.""" with self.cache_mutex: - return self.cache[hash_value] + self.cache.move_to_end(hash) + return self.cache[hash] - def put(self, hash_value: str, content: bytearray): + def put(self, hash: str, content: bytearray): """Stores a dependency in the cache.""" - cached_file_path = self.cache_folder / hash_value - Path.write_bytes(cached_file_path, content) + if len(content) > self.max_size_bytes: + logger.error( + """File with hash '%s' can not be added to cache as it is larger than the maximum cache size. + (size in bytes: %i, max. cache size in bytes: %i)""", + hash, + len(content), + self.max_size_bytes, + ) + raise RuntimeError("Cache size insufficient") + cached_file_path = self._get_cache_file_path(hash) with self.cache_mutex: - self.cache[hash_value] = str(cached_file_path) + while self.current_size + len(content) > self.max_size_bytes: + self._evict_oldest() + + Path.write_bytes(cached_file_path, content) + self.current_size += len(content) + self.cache[hash] = str(cached_file_path) diff --git a/homcc/server/server.py b/homcc/server/server.py index bb5ee44..74c3bf6 100644 --- a/homcc/server/server.py +++ b/homcc/server/server.py @@ -77,7 +77,7 @@ def __init__(self, address: Optional[str], port: Optional[int], limit: Optional[ self.current_amount_connections: int = 0 # indicates the amount of clients that are currently connected self.current_amount_connections_mutex: Lock = Lock() - self.cache = Cache(Path(self.root_temp_folder.name)) + self.cache = Cache(root_folder=Path(self.root_temp_folder.name), max_entries=1000) # TODO @staticmethod def send_message(request, message: Message): diff --git a/tests/server/cache_test.py b/tests/server/cache_test.py index 20d06eb..f81544b 100644 --- a/tests/server/cache_test.py +++ b/tests/server/cache_test.py @@ -13,30 +13,112 @@ class TestCache: """Tests the server cache.""" - def test(self): + def test_simple(self): with TemporaryDirectory() as tmp_dir: - cache_dir = Path(tmp_dir) - cache = Cache(cache_dir) + root_dir = Path(tmp_dir) + cache = Cache(root_dir, 1000) + cache_dir = root_dir / "cache" file1 = bytearray([0x1, 0x2, 0x3, 0x9]) cache.put("hash1", file1) - assert cache.get("hash1") == str(cache_dir / "cache" / "hash1") + assert cache.get("hash1") == str(cache_dir / "hash1") assert "hash1" in cache assert Path.read_bytes(Path(cache.get("hash1"))) == file1 file2 = bytearray([0x3, 0x6, 0x3, 0x9]) cache.put("hash2", file2) - assert cache.get("hash2") == str(cache_dir / "cache" / "hash2") + assert cache.get("hash2") == str(cache_dir / "hash2") assert "hash2" in cache assert Path.read_bytes(Path(cache.get("hash2"))) == file2 file3 = bytearray([0x4, 0x2]) cache.put("hash3", file3) - assert cache.get("hash3") == str(cache_dir / "cache" / "hash3") + assert cache.get("hash3") == str(cache_dir / "hash3") assert "hash3" in cache assert Path.read_bytes(Path(cache.get("hash3"))) == file3 assert "other_hash" not in cache + + def test_eviction_size_limit(self): + with TemporaryDirectory() as tmp_dir: + root_dir = Path(tmp_dir) + cache = Cache(root_dir, max_size_bytes=10) + cache_dir = root_dir / "cache" + + cache.put("hash1", bytearray([0x1, 0x2, 0x3, 0x9])) + cache.put("hash2", bytearray([0x1, 0x2, 0x3, 0xA])) + cache.put("hash3", bytearray([0xFF, 0xFF])) + assert len(cache) == 3 + assert (cache_dir / "hash1").exists() + assert (cache_dir / "hash2").exists() + assert (cache_dir / "hash3").exists() + + cache.put("hash4", bytearray([0x1])) + assert len(cache) == 3 + assert "hash2" in cache + assert "hash3" in cache + assert "hash4" in cache + assert not (cache_dir / "hash1").exists() + assert (cache_dir / "hash2").exists() + assert (cache_dir / "hash3").exists() + assert (cache_dir / "hash4").exists() + + cache.put("hash5", bytearray([0x1])) + assert len(cache) == 4 + assert "hash2" in cache + assert "hash3" in cache + assert "hash4" in cache + assert "hash5" in cache + assert (cache_dir / "hash2").exists() + assert (cache_dir / "hash3").exists() + assert (cache_dir / "hash4").exists() + assert (cache_dir / "hash5").exists() + + cache.put("hash6", bytearray([0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9])) + assert len(cache) == 2 + assert not (cache_dir / "hash2").exists() + assert not (cache_dir / "hash3").exists() + assert not (cache_dir / "hash4").exists() + assert "hash5" in cache + assert "hash6" in cache + + def test_eviction_order_lru(self): + with TemporaryDirectory() as tmp_dir: + root_dir = Path(tmp_dir) + cache = Cache(root_dir, max_size_bytes=10) + cache_dir = root_dir / "cache" + + cache.put("hash1", bytearray([0x1, 0x2, 0x3, 0x9])) + cache.put("hash2", bytearray([0x1, 0x2, 0x3, 0xA])) + cache.put("hash3", bytearray([0xFF, 0xFF])) + assert len(cache) == 3 + assert (cache_dir / "hash1").exists() + assert (cache_dir / "hash2").exists() + assert (cache_dir / "hash3").exists() + + cache.get("hash1") # make "hash1" the latest used element + cache.put("hash4", bytearray([0xFF, 0xFF, 0x0, 0x0])) + assert len(cache) == 3 + assert "hash2" not in cache + assert "hash1" in cache + assert "hash3" in cache + assert "hash4" in cache + # TODO: method for asserts combining IO exists and cache exists to reduce boilerplate + assert not (cache_dir / "hash2").exists() + assert (cache_dir / "hash1").exists() + assert (cache_dir / "hash3").exists() + assert (cache_dir / "hash4").exists() + + assert "hash3" in cache # make "hash3" the latest used element + cache.put("hash5", bytearray([0xFF, 0xFF, 0x0, 0x0, 0xFF, 0xFF, 0x0, 0x0])) + assert len(cache) == 2 + assert "hash3" in cache + assert "hash5" in cache + assert not (cache_dir / "hash1").exists() + assert not (cache_dir / "hash2").exists() + assert (cache_dir / "hash3").exists() + assert not (cache_dir / "hash4").exists() + assert (cache_dir / "hash5").exists()