Skip to content

Commit

Permalink
feat: LRU cache for dependencies (#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
OliLay authored Oct 31, 2023
1 parent 071a840 commit 9877a29
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 45 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ Additionally, `HOMCC` provides sandboxed compiler execution for remote compilati
HOMCCD_ADDRESS
HOMCCD_LOG_LEVEL
HOMCCD_VERBOSE
HOMCC_MAX_DEPENDENCY_CACHE_SIZE
</pre></sub></td>
<td><sub><pre lang="ini">
[homcc]
Expand All @@ -188,6 +190,7 @@ Additionally, `HOMCC` provides sandboxed compiler execution for remote compilati
address=0.0.0.0
log_level=DEBUG
verbose=True
max_dependency_cache_size=10G
</pre></sub></td>
<td><sub><pre>
# Client configuration
Expand All @@ -207,14 +210,14 @@ Additionally, `HOMCC` provides sandboxed compiler execution for remote compilati
IP address to listen on
Detail level for log messages: {DEBUG, INFO, WARNING, ERROR, CRITICAL}
Enable verbosity mode which implies detailed and colored logging
Maximum size of the dependency cache. You must specify either 'M' (Mebibyte) or 'G' (Gibibyte) as suffix.
</pre></sub></td>
</tr>
</table>
## Deployment hints
Things to keep in mind when deploying `homccd`:
- `homcc` currently does not support any transport encryption such as TLS, so source files would get transmitted over the internet in plain text if not using a VPN.
- `homccd` currently does not support cache eviction. The dependency cache is therefore growing until there is no space any more. We recommend to restart the `homccd` service every 24 hours (e.g. using a cronjob) so that the cache gets cleared regularly.
- `homccd` does not limit simultaneous connections of a single client. A malicious client could therefore block the service by always opening up connections until no server slots are available any more.
- `homccd` does not limit access to docker containers or chroot environments. A client can choose any docker container or chroot environment available on the server to execute the compilation in.
Expand Down
69 changes: 61 additions & 8 deletions homcc/server/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,70 @@

"""Caching module of the homcc server."""
import logging
from collections import OrderedDict
from pathlib import Path
from threading import Lock
from typing import Dict

logger = logging.getLogger(__name__)


class Cache:
"""Represents the homcc server cache that is used to cache dependencies."""

cache: Dict[str, str]
cache: OrderedDict[str, str]
"""'Hash' -> 'File path' on server map for holding paths to cached files"""
cache_mutex: Lock
"""Mutex for locking the cache."""
cache_folder: Path
"""Path to the cache on the file system."""
max_size_bytes: int
"""Maximum size of the cache in bytes."""
current_size_bytes: int
"""Current size of the cache in bytes."""

def __init__(self, root_folder: Path, max_size_bytes: int):
if max_size_bytes <= 0:
raise RuntimeError("Maximum size of cache must be strictly positive.")

def __init__(self, root_folder: Path):
self.cache_folder = self._create_cache_folder(root_folder)
self.cache: Dict[str, str] = {}
self.cache: OrderedDict[str, str] = OrderedDict()
self.cache_mutex: Lock = Lock()
self.max_size_bytes = max_size_bytes
self.current_size_bytes = 0

def _get_cache_file_path(self, hash_value: str) -> Path:
return self.cache_folder / hash_value

def __contains__(self, key: str):
with self.cache_mutex:
contained: bool = key in self.cache
if contained:
self.cache.move_to_end(key)

def __contains__(self, key):
return contained

def __len__(self) -> int:
with self.cache_mutex:
return key in self.cache
return len(self.cache)

def _evict_oldest(self):
"""
Evicts the oldest entry from the cache.
Note: The caller of this method has to ensure that the cache is locked.
"""
oldest_hash, oldest_path_str = self.cache.popitem(last=False)
oldest_path = Path(oldest_path_str)

try:
self.current_size_bytes -= oldest_path.stat().st_size
oldest_path.unlink(missing_ok=False)
except FileNotFoundError:
logger.error(
"""Tried to evict cache entry with hash '%s', but corresponding cache file at '%s' did not exist.
This may lead to an invalid cache size calculation.""",
oldest_hash,
oldest_path,
)

@staticmethod
def _create_cache_folder(root_temp_folder: Path) -> Path:
Expand All @@ -42,12 +81,26 @@ def _create_cache_folder(root_temp_folder: Path) -> Path:
def get(self, hash_value: str) -> str:
"""Gets an entry (path) from the cache given a hash."""
with self.cache_mutex:
self.cache.move_to_end(hash_value)
return self.cache[hash_value]

def put(self, hash_value: str, content: bytearray):
"""Stores a dependency in the cache."""
cached_file_path = self.cache_folder / hash_value
Path.write_bytes(cached_file_path, content)
if len(content) > self.max_size_bytes:
logger.error(
"""File with hash '%s' can not be added to cache as it is larger than the maximum cache size.
(size in bytes: %i, max. cache size in bytes: %i)""",
hash_value,
len(content),
self.max_size_bytes,
)
raise RuntimeError("Cache size insufficient")

cached_file_path = self._get_cache_file_path(hash_value)
with self.cache_mutex:
while self.current_size_bytes + len(content) > self.max_size_bytes:
self._evict_oldest()

Path.write_bytes(cached_file_path, content)
self.current_size_bytes += len(content)
self.cache[hash_value] = str(cached_file_path)
8 changes: 8 additions & 0 deletions homcc/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
ServerConfig,
parse_cli_args,
parse_config,
size_string_to_bytes,
)
from homcc.server.server import ( # pylint: disable=wrong-import-position
start_server,
Expand All @@ -67,6 +68,9 @@ def main():
level=LogLevel.INFO,
)

# TODO(o.layer): The argument parsing code should below be moved to/abstracted in parsing.py,
# similar to how it is done for the client

# LOG_LEVEL and VERBOSITY
log_level: Optional[str] = homccd_args_dict["log_level"]

Expand Down Expand Up @@ -100,6 +104,10 @@ def main():
if (address := homccd_args_dict["listen"]) is not None:
homccd_config.address = address

# MAX_DEPENDENCY_CACHE_SIZE
if (max_dependency_cache_size := homccd_args_dict["max_dependency_cache_size"]) is not None:
homccd_config.max_dependency_cache_size_bytes = size_string_to_bytes(max_dependency_cache_size)

# provide additional DEBUG information
logger.debug(
"%s - %s\n" "Caller:\t%s\n" "%s", # homccd location and version; homccd caller; config info
Expand Down
65 changes: 63 additions & 2 deletions homcc/server/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,28 @@

logger = logging.getLogger(__name__)


def mib_to_bytes(mb: int) -> int:
return mb * 1024**2


def gib_to_bytes(gb: int) -> int:
return gb * 1024**3


def size_string_to_bytes(size_string: str) -> int:
"""Converts e.g. 100M or 1G to bytes. Only supports M (Mebibyte) and G (Gibibyte)"""
unit = size_string[-1]
amount = size_string[:-1]

if unit == "M":
return mib_to_bytes(int(amount))
elif unit == "G":
return gib_to_bytes(int(amount))

raise ArgumentTypeError(f"Invalid size string: '{size_string}'. Specify either M (Mebibyte) or G (Gibibyte).")


HOMCC_SERVER_CONFIG_SECTION: str = "homccd"

DEFAULT_ADDRESS: str = "0.0.0.0"
Expand All @@ -31,6 +53,7 @@
or os.cpu_count() # total number of physical CPUs on the machine
or -1 # fallback error value
)
DEFAULT_MAX_CACHE_SIZE_BYTES: int = gib_to_bytes(10)


class ShowVersion(Action):
Expand Down Expand Up @@ -74,6 +97,7 @@ class EnvironmentVariables:
HOMCCD_ADDRESS_ENV_VAR: ClassVar[str] = "HOMCCD_ADDRESS"
HOMCCD_LOG_LEVEL_ENV_VAR: ClassVar[str] = "HOMCCD_LOG_LEVEL"
HOMCCD_VERBOSE_ENV_VAR: ClassVar[str] = "HOMCCD_VERBOSE"
HOMCCD_MAX_DEPENDENCY_CACHE_SIZE: ClassVar[str] = "HOMCCD_MAX_DEPENDENCY_CACHE_SIZE"

@classmethod
def __iter__(cls) -> Iterator[str]:
Expand All @@ -83,6 +107,7 @@ def __iter__(cls) -> Iterator[str]:
cls.HOMCCD_ADDRESS_ENV_VAR,
cls.HOMCCD_LOG_LEVEL_ENV_VAR,
cls.HOMCCD_VERBOSE_ENV_VAR,
cls.HOMCCD_MAX_DEPENDENCY_CACHE_SIZE,
)

@classmethod
Expand Down Expand Up @@ -112,12 +137,20 @@ def get_verbose(cls) -> Optional[bool]:
return re.match(r"^(1)|(yes)|(true)|(on)$", verbose, re.IGNORECASE) is not None
return None

@classmethod
def get_max_dependency_cache_size(cls) -> Optional[int]:
if max_dependency_cache_size := os.getenv(cls.HOMCCD_MAX_DEPENDENCY_CACHE_SIZE):
return size_string_to_bytes(max_dependency_cache_size)

return None

files: List[str]
address: Optional[str]
port: Optional[int]
limit: Optional[int]
log_level: Optional[LogLevel]
verbose: bool
max_dependency_cache_size_bytes: Optional[int]

def __init__(
self,
Expand All @@ -128,6 +161,7 @@ def __init__(
address: Optional[str] = None,
log_level: Optional[str] = None,
verbose: Optional[bool] = None,
max_dependency_cache_size_bytes: Optional[int] = None,
):
self.files = files

Expand All @@ -140,6 +174,10 @@ def __init__(
verbose = self.EnvironmentVariables.get_verbose() or verbose
self.verbose = verbose is not None and verbose

self.max_dependency_cache_size_bytes = (
self.EnvironmentVariables.get_max_dependency_cache_size() or max_dependency_cache_size_bytes
)

@classmethod
def empty(cls):
return cls(files=[])
Expand All @@ -151,8 +189,19 @@ def from_config_section(cls, files: List[str], homccd_config: SectionProxy) -> S
address: Optional[str] = homccd_config.get("address")
log_level: Optional[str] = homccd_config.get("log_level")
verbose: Optional[bool] = homccd_config.getboolean("verbose")

return ServerConfig(files=files, limit=limit, port=port, address=address, log_level=log_level, verbose=verbose)
max_dependency_cache_size: Optional[str] = homccd_config.get("max_dependency_cache_size")

return ServerConfig(
files=files,
limit=limit,
port=port,
address=address,
log_level=log_level,
verbose=verbose,
max_dependency_cache_size_bytes=None
if max_dependency_cache_size is None
else size_string_to_bytes(max_dependency_cache_size),
)

def __str__(self):
return (
Expand All @@ -162,6 +211,7 @@ def __str__(self):
f"\taddress:\t{self.address}\n"
f"\tlog_level:\t{self.log_level}\n"
f"\tverbose:\t{self.verbose}\n"
f"\tmax_dependency_cache_size_bytes:\t{self.max_dependency_cache_size_bytes}\n"
)


Expand All @@ -181,6 +231,9 @@ def min_job_limit(value: Union[int, str], minimum: int = 0) -> int:

raise ArgumentTypeError(f"LIMIT must be more than {minimum}")

def max_dependency_cache_size_bytes(value: str) -> int:
return size_string_to_bytes(value)

general_options_group = parser.add_argument_group("Options")
networking_group = parser.add_argument_group(" Networking")
debug_group = parser.add_argument_group(" Debug")
Expand All @@ -206,6 +259,14 @@ def min_job_limit(value: Union[int, str], minimum: int = 0) -> int:
action="store_true",
help="enforce that only configurations provided via the CLI are used",
)
general_options_group.add_argument(
"--max-dependency-cache-size",
required=False,
metavar="SIZE",
type=max_dependency_cache_size_bytes,
help=f"""The maximum cache size for the dependency cache. Expects a size string, e.g. 100M or 10G.
Default: {DEFAULT_MAX_CACHE_SIZE_BYTES} bytes""",
)

# networking
networking_group.add_argument(
Expand Down
10 changes: 7 additions & 3 deletions homcc/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from homcc.server.parsing import (
DEFAULT_ADDRESS,
DEFAULT_LIMIT,
DEFAULT_MAX_CACHE_SIZE_BYTES,
DEFAULT_PORT,
ServerConfig,
)
Expand All @@ -56,7 +57,9 @@
class TCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
"""TCP Server instance, holding data relevant across compilations."""

def __init__(self, address: Optional[str], port: Optional[int], limit: Optional[int]):
def __init__(
self, address: Optional[str], port: Optional[int], limit: Optional[int], max_cache_size_bytes: Optional[int]
):
address = address or DEFAULT_ADDRESS
port = port or DEFAULT_PORT

Expand All @@ -77,7 +80,8 @@ def __init__(self, address: Optional[str], port: Optional[int], limit: Optional[
self.current_amount_connections: int = 0 # indicates the amount of clients that are currently connected
self.current_amount_connections_mutex: Lock = Lock()

self.cache = Cache(Path(self.root_temp_folder.name))
max_cache_size_bytes = max_cache_size_bytes or DEFAULT_MAX_CACHE_SIZE_BYTES
self.cache = Cache(root_folder=Path(self.root_temp_folder.name), max_size_bytes=max_cache_size_bytes)

@staticmethod
def send_message(request, message: Message):
Expand Down Expand Up @@ -518,7 +522,7 @@ def handle(self):

def start_server(config: ServerConfig) -> Tuple[TCPServer, threading.Thread]:
try:
server: TCPServer = TCPServer(config.address, config.port, config.limit)
server: TCPServer = TCPServer(config.address, config.port, config.limit, config.max_dependency_cache_size_bytes)
except OSError as err:
logger.error("Could not start TCP server: %s", err)
raise ServerInitializationError from err
Expand Down
Loading

0 comments on commit 9877a29

Please sign in to comment.