Skip to content

Commit

Permalink
Merge branch 'release' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
sv-giampa committed Jan 29, 2024
2 parents 046b184 + 9723fb8 commit a800e68
Show file tree
Hide file tree
Showing 15 changed files with 468 additions and 259 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/create-prerelease.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
run: |
echo "::set-output name=version::$(date +'%Y-%m-%d' --utc)"
-
name: Create Stable Release
name: Create Pre-Release
id: create_release
uses: actions/create-release@v1
env:
Expand Down
19 changes: 12 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,22 @@ FROM ubuntu:20.04
USER root
WORKDIR /root

# RUN alias apt_install="DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y"
# RUN alias apt_clean="apt clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*"

COPY --chmod=777 apt_install /apt_install

# install utility software packages
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y software-properties-common&& rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y inetutils-ping net-tools wget && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y htop screen zip nano && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN /apt_install software-properties-common
RUN /apt_install inetutils-ping net-tools wget
RUN /apt_install htop screen zip nano

# install and configure git
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y git && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN /apt_install git
RUN DEBIAN_FRONTEND=noninteractive git config --global commit.gpgsign false

# configure ssh daemon
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y openssh-server && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
RUN /apt_install openssh-server
RUN if ! [ -d /var/run/sshd ]; then mkdir /var/run/sshd; fi
RUN echo 'root:password!!' | chpasswd
RUN sed -i 's/^[# ]*PermitRootLogin .*$/PermitRootLogin yes/g' /etc/ssh/sshd_config
Expand Down Expand Up @@ -41,9 +46,9 @@ RUN echo 'export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/' >> ~/.bashrc
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y python3 python3-pip python3-dev && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*

# install PyCOMPSs
RUN DEBIAN_FRONTEND=noninteractive apt update -y && apt install -y graphviz xdg-utils libtool automake build-essential \
RUN /apt_install graphviz xdg-utils libtool automake build-essential \
python python-dev libpython2.7 libboost-serialization-dev libboost-iostreams-dev libxml2 libxml2-dev csh gfortran \
libgmp3-dev flex bison texinfo libpapi-dev && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
libgmp3-dev flex bison texinfo libpapi-dev
RUN python3 -m pip install --upgrade pip setuptools
RUN python3 -m pip install dill guppy3
RUN python3 -m pip install "pycompss==3.1" -v
Expand Down
13 changes: 13 additions & 0 deletions apt_install
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

# script used for installing software through apt in Dockerfiles, avoiding layer cache and size problems

# update packages lists
DEBIAN_FRONTEND=noninteractive apt update -y

# install required software
DEBIAN_FRONTEND=noninteractive apt install -y $@

# clean apt cache and lists
DEBIAN_FRONTEND=noninteractive apt clean
rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
57 changes: 0 additions & 57 deletions parsoda/function/analysis/parallel_fp_growth.py

This file was deleted.

38 changes: 0 additions & 38 deletions parsoda/function/analysis/sequential_fp_growth.py

This file was deleted.

33 changes: 19 additions & 14 deletions parsoda/model/driver/parsoda_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,13 @@ def init_environment(self) -> None:
def set_num_partitions(self, num_partitions: int) -> None:
"""
Sets the number of data partitions
:return: None
"""
pass

@abstractmethod
def set_chunk_size(self, chunk_size: int) -> None:
"""
Sets the size of data partitions in bytes
:return: None
"""
pass

Expand All @@ -55,53 +53,60 @@ def crawl(self, crawler: List[Crawler]) -> None:
After invoking this function the implementor should hold a representation of an initial dataset
(e.g., on Spark a new RDD is populated with the SocialDataItem objects provided by crawlers)
:return: None
"""
pass

@abstractmethod
def filter(self, filter_func: Callable[[Any], bool]) -> None:
"""
Applies the given filter to the current dataset, dropping all items that does not satisfy the filter
:param filter_func: the filter to apply
:return: None
Args:
filter_func: the filter to apply
"""
pass

@abstractmethod
def flatmap(self, mapper: Callable[[Any], Iterable[Any]]) -> None:
"""
Executes a mapping of each item to a list of custom key-value pairs, represented as tuples of two elements each
:param mapper: the (object -> list[(K,V)]) mapping function to apply
:return: None
Args:
mapper: the (object -> list[(K,V)]) mapping function to apply
"""
pass

def map(self, mapper: Callable[[Any], Any]) -> None:
"""
Executes a mapping of each item in the current dataset to a new object
:param mapper: the (object -> list[(K,V)]) mapping function to apply
:return: None
Executes a mapping of each item in the current dataset to a new object.
Args:
mapper: the (object -> list[(K,V)]) mapping function to apply
"""
self.flatmap(_flatmapper(mapper))

#TODO: documentation
def group_by_key(self) -> None:
"""Assumes that the current dataset is a bulk of key-value pairs and creates a new dataset which groups all the items with the same key. The new dataset will be a bulk of (key)-(list-of-values) pairs.
"""Assumes that the current dataset is a bulk of key-value pairs
and creates a new dataset which groups all the items with the same key.
The new dataset will be a bulk of (key)-(list-of-values) pairs.
"""
pass

def get_result(self) -> Any:
"""
Gets the current dataset
:return: the current dataset
Returns:
Any: the current dataset
"""
pass

@abstractmethod
def dispose_environment(self) -> None:
"""
Disposes instantiated resources of the underlying environment, after executing the ParSoDA application, in order to reuse this driver as a new fresh driver that should be re-initialized
:return: None
Disposes instantiated resources of the underlying environment,
after executing the ParSoDA application, in order to reuse
this driver as a new fresh driver that should be re-initialized
"""
pass
11 changes: 11 additions & 0 deletions parsoda/model/function/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,15 @@ class Analyzer(ABC, Generic[K, R, A]):

@abstractmethod
def analyze(self, driver: ParsodaDriver, data: Dict[K, R]) -> A:
"""Applies an analysis algorithm to the output data from reduction step.
The analyzer might be a sequential, parallel or distributed algorithm.
In the latter case, the algorithm would use the same driver used by the current application for running a new, nested, ParSoDA application.
Args:
driver (ParsodaDriver): the driver used during the execution of the parallel phase
data (Dict[K, R]): output data from reducton step organized as a dictionary of key-value pairs
Returns:
A: the outputdata type from the analysis
"""
pass
5 changes: 5 additions & 0 deletions parsoda/model/function/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ def get_partitions(self, num_of_partitions=0, partition_size=1024*1024*1024) ->

@abstractmethod
def supports_remote_partitioning(self) -> bool:
"""Checks if the crawler supports remote partitioning, i.e. the ability to read data directly from the worker nodes
Returns:
bool: true if the crawler supports remote partitionig of data source.
"""
pass


Expand Down
13 changes: 8 additions & 5 deletions parsoda/model/function/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ class Filter(ABC):

@abstractmethod
def test(self, item: SocialDataItem) -> bool:
"""
Test if the item satisfies the predicate of the filter
:param item: the item to test
:return: True if the item satisfies the predicate, False otherwise
"""
"""Test if the item satisfies the predicate of the filter
Args:
item (SocialDataItem): the item to test
Returns:
bool: True if the item satisfies the predicate, False otherwise
"""
pass
11 changes: 7 additions & 4 deletions parsoda/model/function/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@ class Mapper(ABC, Generic[K, V]):

@abstractmethod
def map(self, item: SocialDataItem) -> Iterable[Tuple[K, V]]:
"""
Returns a list of key-value pairs computed from the given item.
"""Returns a list of key-value pairs computed from the given item.
Example result: [ (item.user_id, item.tags[0]), (item.user_id, item.tags[1]), ... ]
:param item: the item to map
:return: a list of key-value pairs
Args:
item (SocialDataItem): the item to map
Returns:
Iterable[Tuple[K, V]]: an iterable of key-value pairs
"""
pass
13 changes: 8 additions & 5 deletions parsoda/model/function/reducer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@ class Reducer(ABC, Generic[K, V, R]):
"""

def reduce(self, key: K, values: List[V]) -> R:
"""
Applies the reduction algorithm to values
:param key: the key all values are associated to
:param values: all the values associated to the key
:return: the reduced value
"""Applies the reduction algorithm to values
Args:
key (K): the key all values are associated to
values (List[V]): all the values associated to the key
Returns:
R: the reduced value
"""
pass
5 changes: 5 additions & 0 deletions parsoda/model/function/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@ class Visualizer(ABC, Generic[A]):

@abstractmethod
def visualize(self, result: A) -> None:
"""Transforms data from the analysis step in some output format, then write them to some output device or system.
Args:
result (A): the data resulting from the analysis step
"""
pass
Loading

0 comments on commit a800e68

Please sign in to comment.