-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun_all.sh
executable file
·37 lines (32 loc) · 1.06 KB
/
run_all.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
#create a n node cluster with default configurations
#2 arguments for number of datasets that will downloaded and for number of spark cluster nodes
optspec=":d:n:-:"
while getopts "$optspec" o; do
case "${o}" in
d)
number_of_datasets=${OPTARG}
;;
n)
number_of_nodes=${OPTARG}
;;
-)
case "${OPTARG}" in
datasets=*)
number_of_datasets=${OPTARG#*=}
;;
nodes=*)
number_of_nodes=${OPTARG#*=}
;;
esac;;
esac
done
#load data from eddn.io also transform json in csv file as cassandra load preparation
bash ./download_and_transform_data.sh --datasets=$number_of_datasets
#create docker cluster including cassandra db and spark cluster
bash ./run_docker_compose.sh --nodes=$number_of_nodes
sleep 30
#create keyspace and table also load data into cassandra
bash ./load_data_into_cassandra.sh
#copy pyspark scripts and execute them
bash ./exec_pyspark_scripts.sh