Skip to content

Commit

Permalink
WIP: lru cache
Browse files Browse the repository at this point in the history
  • Loading branch information
OliLay committed Oct 24, 2023
1 parent 071a840 commit f515cfe
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 19 deletions.
83 changes: 71 additions & 12 deletions homcc/server/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,77 @@
# https://github.com/celonis/homcc/blob/main/LICENSE

"""Caching module of the homcc server."""
from collections import OrderedDict
import logging
from pathlib import Path
from threading import Lock
from typing import Dict

logger = logging.getLogger(__name__)


def mib_to_bytes(mb: int) -> int:
return mb * 1024**2


class Cache:
"""Represents the homcc server cache that is used to cache dependencies."""

cache: Dict[str, str]
cache: OrderedDict[str, str]
"""'Hash' -> 'File path' on server map for holding paths to cached files"""
cache_mutex: Lock
"""Mutex for locking the cache."""
cache_folder: Path
"""Path to the cache on the file system."""
max_size_bytes: int
"""Maximum size in bytes of the cache."""
current_size: int
"""Current size in bytes"""

def __init__(self, root_folder: Path, max_size_bytes: int):
if max_size_bytes <= 0:
raise RuntimeError("Maximum size of cache must be strictly positive.")

def __init__(self, root_folder: Path):
self.cache_folder = self._create_cache_folder(root_folder)
self.cache: Dict[str, str] = {}
self.cache: OrderedDict[str, str] = OrderedDict()
self.cache_mutex: Lock = Lock()
self.max_size_bytes = max_size_bytes
self.current_size = 0

def __contains__(self, key):
def _get_cache_file_path(self, hash: str) -> Path:
return self.cache_folder / hash

def __contains__(self, key: str):
with self.cache_mutex:
return key in self.cache
contained: bool = key in self.cache
if contained:
self.cache.move_to_end(key)

return contained

def __len__(self) -> int:
with self.cache_mutex:
return len(self.cache)

def _evict_oldest(self):
"""
Evicts the oldest entry from the cache.
Note: The caller of this method has to ensure that the cache is locked.
"""
oldest_hash = next(iter(self.cache))
oldest_path = self._get_cache_file_path(oldest_hash)
oldest_size = oldest_path.stat().st_size

try:
Path.unlink(oldest_path, missing_ok=False)
except FileNotFoundError:
logger.error(
"Tried to evict cache entry with hash '%s', but corresponding cache file at '%s' did not exist.",
oldest_hash,
oldest_path,
)

self.current_size -= oldest_size
del self.cache[oldest_hash]

@staticmethod
def _create_cache_folder(root_temp_folder: Path) -> Path:
Expand All @@ -39,15 +84,29 @@ def _create_cache_folder(root_temp_folder: Path) -> Path:
logger.info("Created cache folder in '%s'.", cache_folder.absolute())
return cache_folder

def get(self, hash_value: str) -> str:
def get(self, hash: str) -> str:
"""Gets an entry (path) from the cache given a hash."""
with self.cache_mutex:
return self.cache[hash_value]
self.cache.move_to_end(hash)
return self.cache[hash]

def put(self, hash_value: str, content: bytearray):
def put(self, hash: str, content: bytearray):
"""Stores a dependency in the cache."""
cached_file_path = self.cache_folder / hash_value
Path.write_bytes(cached_file_path, content)
if len(content) > self.max_size_bytes:
logger.error(
"""File with hash '%s' can not be added to cache as it is larger than the maximum cache size.
(size in bytes: %i, max. cache size in bytes: %i)""",
hash,
len(content),
self.max_size_bytes,
)
raise RuntimeError("Cache size insufficient")

cached_file_path = self._get_cache_file_path(hash)
with self.cache_mutex:
self.cache[hash_value] = str(cached_file_path)
while self.current_size + len(content) > self.max_size_bytes:
self._evict_oldest()

Path.write_bytes(cached_file_path, content)
self.current_size += len(content)
self.cache[hash] = str(cached_file_path)
2 changes: 1 addition & 1 deletion homcc/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(self, address: Optional[str], port: Optional[int], limit: Optional[
self.current_amount_connections: int = 0 # indicates the amount of clients that are currently connected
self.current_amount_connections_mutex: Lock = Lock()

self.cache = Cache(Path(self.root_temp_folder.name))
self.cache = Cache(root_folder=Path(self.root_temp_folder.name), max_entries=1000) # TODO

@staticmethod
def send_message(request, message: Message):
Expand Down
94 changes: 88 additions & 6 deletions tests/server/cache_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,112 @@
class TestCache:
"""Tests the server cache."""

def test(self):
def test_simple(self):
with TemporaryDirectory() as tmp_dir:
cache_dir = Path(tmp_dir)
cache = Cache(cache_dir)
root_dir = Path(tmp_dir)
cache = Cache(root_dir, 1000)
cache_dir = root_dir / "cache"

file1 = bytearray([0x1, 0x2, 0x3, 0x9])
cache.put("hash1", file1)

assert cache.get("hash1") == str(cache_dir / "cache" / "hash1")
assert cache.get("hash1") == str(cache_dir / "hash1")
assert "hash1" in cache
assert Path.read_bytes(Path(cache.get("hash1"))) == file1

file2 = bytearray([0x3, 0x6, 0x3, 0x9])
cache.put("hash2", file2)

assert cache.get("hash2") == str(cache_dir / "cache" / "hash2")
assert cache.get("hash2") == str(cache_dir / "hash2")
assert "hash2" in cache
assert Path.read_bytes(Path(cache.get("hash2"))) == file2

file3 = bytearray([0x4, 0x2])
cache.put("hash3", file3)

assert cache.get("hash3") == str(cache_dir / "cache" / "hash3")
assert cache.get("hash3") == str(cache_dir / "hash3")
assert "hash3" in cache
assert Path.read_bytes(Path(cache.get("hash3"))) == file3

assert "other_hash" not in cache

def test_eviction_size_limit(self):
with TemporaryDirectory() as tmp_dir:
root_dir = Path(tmp_dir)
cache = Cache(root_dir, max_size_bytes=10)
cache_dir = root_dir / "cache"

cache.put("hash1", bytearray([0x1, 0x2, 0x3, 0x9]))
cache.put("hash2", bytearray([0x1, 0x2, 0x3, 0xA]))
cache.put("hash3", bytearray([0xFF, 0xFF]))
assert len(cache) == 3
assert (cache_dir / "hash1").exists()
assert (cache_dir / "hash2").exists()
assert (cache_dir / "hash3").exists()

cache.put("hash4", bytearray([0x1]))
assert len(cache) == 3
assert "hash2" in cache
assert "hash3" in cache
assert "hash4" in cache
assert not (cache_dir / "hash1").exists()
assert (cache_dir / "hash2").exists()
assert (cache_dir / "hash3").exists()
assert (cache_dir / "hash4").exists()

cache.put("hash5", bytearray([0x1]))
assert len(cache) == 4
assert "hash2" in cache
assert "hash3" in cache
assert "hash4" in cache
assert "hash5" in cache
assert (cache_dir / "hash2").exists()
assert (cache_dir / "hash3").exists()
assert (cache_dir / "hash4").exists()
assert (cache_dir / "hash5").exists()

cache.put("hash6", bytearray([0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9]))
assert len(cache) == 2
assert not (cache_dir / "hash2").exists()
assert not (cache_dir / "hash3").exists()
assert not (cache_dir / "hash4").exists()
assert "hash5" in cache
assert "hash6" in cache

def test_eviction_order_lru(self):
with TemporaryDirectory() as tmp_dir:
root_dir = Path(tmp_dir)
cache = Cache(root_dir, max_size_bytes=10)
cache_dir = root_dir / "cache"

cache.put("hash1", bytearray([0x1, 0x2, 0x3, 0x9]))
cache.put("hash2", bytearray([0x1, 0x2, 0x3, 0xA]))
cache.put("hash3", bytearray([0xFF, 0xFF]))
assert len(cache) == 3
assert (cache_dir / "hash1").exists()
assert (cache_dir / "hash2").exists()
assert (cache_dir / "hash3").exists()

cache.get("hash1") # make "hash1" the latest used element
cache.put("hash4", bytearray([0xFF, 0xFF, 0x0, 0x0]))
assert len(cache) == 3
assert "hash2" not in cache
assert "hash1" in cache
assert "hash3" in cache
assert "hash4" in cache
# TODO: method for asserts combining IO exists and cache exists to reduce boilerplate
assert not (cache_dir / "hash2").exists()
assert (cache_dir / "hash1").exists()
assert (cache_dir / "hash3").exists()
assert (cache_dir / "hash4").exists()

assert "hash3" in cache # make "hash3" the latest used element
cache.put("hash5", bytearray([0xFF, 0xFF, 0x0, 0x0, 0xFF, 0xFF, 0x0, 0x0]))
assert len(cache) == 2
assert "hash3" in cache
assert "hash5" in cache
assert not (cache_dir / "hash1").exists()
assert not (cache_dir / "hash2").exists()
assert (cache_dir / "hash3").exists()
assert not (cache_dir / "hash4").exists()
assert (cache_dir / "hash5").exists()

0 comments on commit f515cfe

Please sign in to comment.