From 821b8608d0dfb5b9e51c3cfd389ab71479fa974d Mon Sep 17 00:00:00 2001 From: Stefano Dalla Palma Date: Tue, 27 Oct 2020 17:44:25 +0100 Subject: [PATCH] updated Dockerfile. Replaced is_valid_dir to is_valid_dir_or_url in cli --- Dockerfile | 19 +++++++++++++++---- README.md | 30 ++++++++++++++++++++++++++++-- docs/index.md | 2 +- repominer/cli.py | 19 ++++++++++++++++--- repominer/metrics/base.py | 13 ++++++++----- setup.py | 2 +- tests/test_cli_extract_metrics.py | 16 +++++++++++++--- 7 files changed, 82 insertions(+), 19 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4f1b272..4592801 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,23 @@ -FROM python:3.8-buster +FROM ubuntu:20.04 MAINTAINER Stefano Dalla Palma -RUN python3.8 -m pip install --upgrade pip +# Install python +RUN apt-get update \ + && apt-get install -y python3-pip python3-dev \ + && cd /usr/local/bin \ + && ln -s /usr/bin/python3 python \ + && pip3 install --upgrade pip + +# Install git +RUN apt-get install git -y COPY . /app WORKDIR /app -RUN pip install repository-miner==0.7.0 +# Install application (latest) +RUN pip install -r requirements.txt +RUN pip install repository-miner -CMD repo-miner -h \ No newline at end of file +# Environment variable for temporary repositories +ENV TMP_REPOSITORIES_DIR=/tmp/ diff --git a/README.md b/README.md index 88b9b0c..7439dd2 100644 --- a/README.md +++ b/README.md @@ -27,11 +27,37 @@ pip install . # How to test -``` +```text pip install pytest unzip test_data.zip -d . -pytest tests/ +pytest ``` + +# How to build Docker container + +`docker build --tag repo-miner:latest .` + +# How to run Docker container + +First create or define a directory to mount inside the Docker container to access the results once generated. +For the sake of the example let's use `/tmp/`. + +## Mine + +Using the `github` argument: + +`docker run -v /tmp:/app -e GITHUB_ACCESS_TOKEN=$GITHUB_ACCESS_TOKEN repo-miner:test repo-miner mine github ansible adriagalin/ansible.motd . --verbose` + +Using the `github` argument: + +`docker run -v /tmp:/app -e GITLAB_ACCESS_TOKEN=$GITHUB_ACCESS_TOKEN repo-miner:test repo-miner mine github ansible adriagalin/ansible.motd . --verbose` + + +## Extract metrics + +`docker run -v /tmp:/app repo-miner:test repo-miner extract-metrics https://github.com//.git /tmp/failure_prone_files.json ansible all release . --verbose` + + ## CHANGELOG See the [CHANGELOG](CHANGELOG.md) for information about the release history. diff --git a/docs/index.md b/docs/index.md index 12ba794..9b45286 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,7 +17,7 @@ pip install . To avoid affecting the current environment, it is strongly recommended to create and activate a virtual environment: -``` +```text sudo apt install python3-venv python3 -m venv repo-miner-env source repo-miner-env/bin/activate diff --git a/repominer/cli.py b/repominer/cli.py index a6d930d..8bc136d 100644 --- a/repominer/cli.py +++ b/repominer/cli.py @@ -14,7 +14,19 @@ from repominer.mining.tosca import ToscaMiner from repominer.report import create_report -VERSION = '0.7.0' +VERSION = '0.7.1' + + +def valid_dir_or_url(x: str) -> str: + """ + Check if x is a directory and exists, or a remote url + :param x: a path + :return: the path if exists or is a remote url; raise an ArgumentTypeError otherwise + """ + if not (os.path.isdir(x) or x.startswith("git@") or x.startswith("https://")): + raise ArgumentTypeError('Insert a valid path or url') + + return x def valid_dir(x: str) -> str: @@ -84,7 +96,7 @@ def set_extract_metrics_parser(subparsers): parser.add_argument(action='store', dest='path_to_repo', - type=valid_dir, + type=valid_dir_or_url, help='the absolute path to a cloned repository or the url to a remote repository') parser.add_argument(action='store', @@ -204,7 +216,8 @@ def extract_metrics(args: Namespace): global extractor if args.verbose: - print(f'Extracting metrics from {args.path_to_repo} using report {args.src} [started at: {datetime.now().hour}:{datetime.now().minute}]') + print( + f'Extracting metrics from {args.path_to_repo} using report {args.src} [started at: {datetime.now().hour}:{datetime.now().minute}]') with open(args.src, 'r') as f: labeled_files = json.load(f, cls=FailureProneFileDecoder) diff --git a/repominer/metrics/base.py b/repominer/metrics/base.py index ce675bd..fc1a354 100644 --- a/repominer/metrics/base.py +++ b/repominer/metrics/base.py @@ -45,8 +45,13 @@ def __init__(self, path_to_repo: str, at: str = 'release'): self.path_to_repo = path_to_repo - # If path_to_repo is a remote url, then clone it in os.getenv('TMP_REPOSITORIES_DIR') - if is_remote(path_to_repo): + if os.path.isdir(path_to_repo): + self.repo_miner = RepositoryMining(path_to_repo=path_to_repo, + only_releases=True if at == 'release' else False, + order='date-order') + + elif is_remote(path_to_repo): + # If path_to_repo is a remote url, then clone it to os.getenv('TMP_REPOSITORIES_DIR') self.repo_miner = RepositoryMining(path_to_repo=path_to_repo, clone_repo_to=os.getenv('TMP_REPOSITORIES_DIR'), only_releases=True if at == 'release' else False, @@ -57,9 +62,7 @@ def __init__(self, path_to_repo: str, at: str = 'release'): self.path_to_repo = os.path.join(os.getenv('TMP_REPOSITORIES_DIR'), repo_name) else: - self.repo_miner = RepositoryMining(path_to_repo=path_to_repo, - only_releases=True if at == 'release' else False, - order='date-order') + raise ValueError(f'{path_to_repo} does not seem a path or url to a Git repository.') self.dataset = pd.DataFrame() diff --git a/setup.py b/setup.py index 0a32648..a29bdb2 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r") as fh: long_description = fh.read() -VERSION = '0.7.0' +VERSION = '0.7.1' setup(name='repository-miner', version=VERSION, diff --git a/tests/test_cli_extract_metrics.py b/tests/test_cli_extract_metrics.py index 8a87453..ef40fe3 100644 --- a/tests/test_cli_extract_metrics.py +++ b/tests/test_cli_extract_metrics.py @@ -11,19 +11,29 @@ class CLIExtractMetricsTestCase(unittest.TestCase): @classmethod def setUpClass(cls): cls.path_to_repo = os.path.join(os.getcwd(), 'test_data', 'repositories', 'ansible.motd') + cls.url_to_repo = 'https://github.com/adriagalin/ansible.motd.git' cls.path_to_report = os.path.join(os.getcwd(), 'test_data', 'ansible_report.json') cls.path_to_tmp_dir = os.path.join(os.getcwd(), 'test_data', 'tmp') os.mkdir(cls.path_to_tmp_dir) + os.environ["TMP_REPOSITORIES_DIR"] = cls.path_to_tmp_dir @classmethod def tearDownClass(cls): shutil.rmtree(cls.path_to_tmp_dir) + del os.environ["TMP_REPOSITORIES_DIR"] - def test_mine(self): + def test_remote_url(self): + result = os.system('repo-miner extract-metrics {0} {1} ansible all release {2}'.format(self.url_to_repo, + self.path_to_report, + self.path_to_tmp_dir)) + assert result == 0 + assert 'metrics.csv' in os.listdir(self.path_to_tmp_dir) + + def test_local_path(self): result = os.system('repo-miner extract-metrics {0} {1} ansible all release {2}'.format(self.path_to_repo, - self.path_to_report, - self.path_to_tmp_dir)) + self.path_to_report, + self.path_to_tmp_dir)) assert result == 0 assert 'metrics.csv' in os.listdir(self.path_to_tmp_dir)