Skip to content

Commit

Permalink
updated Dockerfile. Replaced is_valid_dir to is_valid_dir_or_url in cli
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanodallapalma committed Oct 27, 2020
1 parent fc048f9 commit 821b860
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 19 deletions.
19 changes: 15 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
FROM python:3.8-buster
FROM ubuntu:20.04

MAINTAINER Stefano Dalla Palma

RUN python3.8 -m pip install --upgrade pip
# Install python
RUN apt-get update \
&& apt-get install -y python3-pip python3-dev \
&& cd /usr/local/bin \
&& ln -s /usr/bin/python3 python \
&& pip3 install --upgrade pip

# Install git
RUN apt-get install git -y

COPY . /app
WORKDIR /app

RUN pip install repository-miner==0.7.0
# Install application (latest)
RUN pip install -r requirements.txt
RUN pip install repository-miner

CMD repo-miner -h
# Environment variable for temporary repositories
ENV TMP_REPOSITORIES_DIR=/tmp/
30 changes: 28 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,37 @@ pip install .

# How to test

```
```text
pip install pytest
unzip test_data.zip -d .
pytest tests/
pytest
```


# How to build Docker container

`docker build --tag repo-miner:latest .`

# How to run Docker container

First create or define a directory to mount inside the Docker container to access the results once generated.
For the sake of the example let's use `/tmp/`.

## Mine

Using the `github` argument:

`docker run -v /tmp:/app -e GITHUB_ACCESS_TOKEN=$GITHUB_ACCESS_TOKEN repo-miner:test repo-miner mine github ansible adriagalin/ansible.motd . --verbose`

Using the `github` argument:

`docker run -v /tmp:/app -e GITLAB_ACCESS_TOKEN=$GITHUB_ACCESS_TOKEN repo-miner:test repo-miner mine github ansible adriagalin/ansible.motd . --verbose`


## Extract metrics

`docker run -v /tmp:/app repo-miner:test repo-miner extract-metrics https://github.com/<owner>/<repository>.git /tmp/failure_prone_files.json ansible all release . --verbose`


## CHANGELOG
See the [CHANGELOG](CHANGELOG.md) for information about the release history.
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pip install .

To avoid affecting the current environment, it is strongly recommended to create and activate a virtual environment:

```
```text
sudo apt install python3-venv
python3 -m venv repo-miner-env
source repo-miner-env/bin/activate
Expand Down
19 changes: 16 additions & 3 deletions repominer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,19 @@
from repominer.mining.tosca import ToscaMiner
from repominer.report import create_report

VERSION = '0.7.0'
VERSION = '0.7.1'


def valid_dir_or_url(x: str) -> str:
"""
Check if x is a directory and exists, or a remote url
:param x: a path
:return: the path if exists or is a remote url; raise an ArgumentTypeError otherwise
"""
if not (os.path.isdir(x) or x.startswith("git@") or x.startswith("https://")):
raise ArgumentTypeError('Insert a valid path or url')

return x


def valid_dir(x: str) -> str:
Expand Down Expand Up @@ -84,7 +96,7 @@ def set_extract_metrics_parser(subparsers):

parser.add_argument(action='store',
dest='path_to_repo',
type=valid_dir,
type=valid_dir_or_url,
help='the absolute path to a cloned repository or the url to a remote repository')

parser.add_argument(action='store',
Expand Down Expand Up @@ -204,7 +216,8 @@ def extract_metrics(args: Namespace):
global extractor

if args.verbose:
print(f'Extracting metrics from {args.path_to_repo} using report {args.src} [started at: {datetime.now().hour}:{datetime.now().minute}]')
print(
f'Extracting metrics from {args.path_to_repo} using report {args.src} [started at: {datetime.now().hour}:{datetime.now().minute}]')

with open(args.src, 'r') as f:
labeled_files = json.load(f, cls=FailureProneFileDecoder)
Expand Down
13 changes: 8 additions & 5 deletions repominer/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,13 @@ def __init__(self, path_to_repo: str, at: str = 'release'):

self.path_to_repo = path_to_repo

# If path_to_repo is a remote url, then clone it in os.getenv('TMP_REPOSITORIES_DIR')
if is_remote(path_to_repo):
if os.path.isdir(path_to_repo):
self.repo_miner = RepositoryMining(path_to_repo=path_to_repo,
only_releases=True if at == 'release' else False,
order='date-order')

elif is_remote(path_to_repo):
# If path_to_repo is a remote url, then clone it to os.getenv('TMP_REPOSITORIES_DIR')
self.repo_miner = RepositoryMining(path_to_repo=path_to_repo,
clone_repo_to=os.getenv('TMP_REPOSITORIES_DIR'),
only_releases=True if at == 'release' else False,
Expand All @@ -57,9 +62,7 @@ def __init__(self, path_to_repo: str, at: str = 'release'):
self.path_to_repo = os.path.join(os.getenv('TMP_REPOSITORIES_DIR'), repo_name)

else:
self.repo_miner = RepositoryMining(path_to_repo=path_to_repo,
only_releases=True if at == 'release' else False,
order='date-order')
raise ValueError(f'{path_to_repo} does not seem a path or url to a Git repository.')

self.dataset = pd.DataFrame()

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
with open("README.md", "r") as fh:
long_description = fh.read()

VERSION = '0.7.0'
VERSION = '0.7.1'

setup(name='repository-miner',
version=VERSION,
Expand Down
16 changes: 13 additions & 3 deletions tests/test_cli_extract_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,29 @@ class CLIExtractMetricsTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.path_to_repo = os.path.join(os.getcwd(), 'test_data', 'repositories', 'ansible.motd')
cls.url_to_repo = 'https://github.com/adriagalin/ansible.motd.git'
cls.path_to_report = os.path.join(os.getcwd(), 'test_data', 'ansible_report.json')

cls.path_to_tmp_dir = os.path.join(os.getcwd(), 'test_data', 'tmp')
os.mkdir(cls.path_to_tmp_dir)
os.environ["TMP_REPOSITORIES_DIR"] = cls.path_to_tmp_dir

@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.path_to_tmp_dir)
del os.environ["TMP_REPOSITORIES_DIR"]

def test_mine(self):
def test_remote_url(self):
result = os.system('repo-miner extract-metrics {0} {1} ansible all release {2}'.format(self.url_to_repo,
self.path_to_report,
self.path_to_tmp_dir))
assert result == 0
assert 'metrics.csv' in os.listdir(self.path_to_tmp_dir)

def test_local_path(self):
result = os.system('repo-miner extract-metrics {0} {1} ansible all release {2}'.format(self.path_to_repo,
self.path_to_report,
self.path_to_tmp_dir))
self.path_to_report,
self.path_to_tmp_dir))
assert result == 0
assert 'metrics.csv' in os.listdir(self.path_to_tmp_dir)

Expand Down

0 comments on commit 821b860

Please sign in to comment.