Skip to content

Commit

Permalink
Documentation, Dockerfile, cleaning
Browse files Browse the repository at this point in the history
* Improved documentation of some components
* Optimization of the Dockerfile
* Deletion of unused old scripts
  • Loading branch information
sv-giampa committed Dec 15, 2023
1 parent 66760c2 commit 8ba65a1
Show file tree
Hide file tree
Showing 8 changed files with 317 additions and 231 deletions.
19 changes: 12 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,22 @@ FROM ubuntu:20.04
USER root
WORKDIR /root

# RUN alias apt_install="DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y"
# RUN alias apt_clean="apt clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*"

COPY --chmod=777 apt_install /apt_install

# install utility software packages
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y software-properties-common&& rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y inetutils-ping net-tools wget && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y htop screen zip nano && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN /apt_install software-properties-common
RUN /apt_install inetutils-ping net-tools wget
RUN /apt_install htop screen zip nano

# install and configure git
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y git && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN /apt_install git
RUN DEBIAN_FRONTEND=noninteractive git config --global commit.gpgsign false

# configure ssh daemon
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y openssh-server && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN /apt_install openssh-server
RUN if ! [ -d /var/run/sshd ]; then mkdir /var/run/sshd; fi
RUN echo 'root:password!!' | chpasswd
RUN sed -i 's/^[# ]*PermitRootLogin .*$/PermitRootLogin yes/g' /etc/ssh/sshd_config
Expand Down Expand Up @@ -41,9 +46,9 @@ RUN echo 'export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/' >> ~/.bashrc
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y python3 python3-pip python3-dev && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*

# install PyCOMPSs
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y graphviz xdg-utils libtool automake build-essential \
RUN /apt_install graphviz xdg-utils libtool automake build-essential \
python python-dev libpython2.7 libboost-serialization-dev libboost-iostreams-dev libxml2 libxml2-dev csh gfortran \
libgmp3-dev flex bison texinfo libpapi-dev && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
libgmp3-dev flex bison texinfo libpapi-dev
RUN python3 -m pip install --upgrade pip setuptools
RUN python3 -m pip install dill guppy3
RUN python3 -m pip install "pycompss==3.1" -v
Expand Down
13 changes: 13 additions & 0 deletions apt_install
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

# script used for installing software through apt in Dockerfiles, avoiding layer cache and size problems

# update packages lists
DEBIAN_FRONTEND=noninteractive apt update -y

# install required software
DEBIAN_FRONTEND=noninteractive apt install -y $@

# clean apt cache and lists
DEBIAN_FRONTEND=noninteractive apt clean
rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
57 changes: 0 additions & 57 deletions parsoda/function/analysis/parallel_fp_growth.py

This file was deleted.

38 changes: 0 additions & 38 deletions parsoda/function/analysis/sequential_fp_growth.py

This file was deleted.

33 changes: 19 additions & 14 deletions parsoda/model/driver/parsoda_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,13 @@ def init_environment(self) -> None:
def set_num_partitions(self, num_partitions: int) -> None:
"""
Sets the number of data partitions
:return: None
"""
pass

@abstractmethod
def set_chunk_size(self, chunk_size: int) -> None:
"""
Sets the size of data partitions in bytes
:return: None
"""
pass

Expand All @@ -55,53 +53,60 @@ def crawl(self, crawler: List[Crawler]) -> None:
After invoking this function the implementor should hold a representation of an initial dataset
(e.g., on Spark a new RDD is populated with the SocialDataItem objects provided by crawlers)
:return: None
"""
pass

@abstractmethod
def filter(self, filter_func: Callable[[Any], bool]) -> None:
"""
Applies the given filter to the current dataset, dropping all items that does not satisfy the filter
:param filter_func: the filter to apply
:return: None
Args:
filter_func: the filter to apply
"""
pass

@abstractmethod
def flatmap(self, mapper: Callable[[Any], Iterable[Any]]) -> None:
"""
Executes a mapping of each item to a list of custom key-value pairs, represented as tuples of two elements each
:param mapper: the (object -> list[(K,V)]) mapping function to apply
:return: None
Args:
mapper: the (object -> list[(K,V)]) mapping function to apply
"""
pass

def map(self, mapper: Callable[[Any], Any]) -> None:
"""
Executes a mapping of each item in the current dataset to a new object
:param mapper: the (object -> list[(K,V)]) mapping function to apply
:return: None
Executes a mapping of each item in the current dataset to a new object.
Args:
mapper: the (object -> list[(K,V)]) mapping function to apply
"""
self.flatmap(_flatmapper(mapper))

#TODO: documentation
def group_by_key(self) -> None:
"""Assumes that the current dataset is a bulk of key-value pairs and creates a new dataset which groups all the items with the same key. The new dataset will be a bulk of (key)-(list-of-values) pairs.
"""Assumes that the current dataset is a bulk of key-value pairs
and creates a new dataset which groups all the items with the same key.
The new dataset will be a bulk of (key)-(list-of-values) pairs.
"""
pass

def get_result(self) -> Any:
"""
Gets the current dataset
:return: the current dataset
Returns:
Any: the current dataset
"""
pass

@abstractmethod
def dispose_environment(self) -> None:
"""
Disposes instantiated resources of the underlying environment, after executing the ParSoDA application, in order to reuse this driver as a new fresh driver that should be re-initialized
:return: None
Disposes instantiated resources of the underlying environment,
after executing the ParSoDA application, in order to reuse
this driver as a new fresh driver that should be re-initialized
"""
pass
Loading

0 comments on commit 8ba65a1

Please sign in to comment.