Skip to content

Commit

Permalink
Updated scripts / numbers for motivation.
Browse files Browse the repository at this point in the history
  • Loading branch information
sukritkalra committed Jan 29, 2024
1 parent cb0bc78 commit 8fd8302
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 1 deletion.
6 changes: 6 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,12 @@
"If `scheduler_selective_rescheduling` is True, then this flag defines the number "
"of TaskGraphs to sample for rescheduling.",
)
flags.DEFINE_float(
"scheduler_reconsideration_period",
0.1,
"The percentage of critical path duration until which the scheduler will try "
"placing the TaskGraph, and drop the TaskGraph if it cannot be placed after.",
)

# Workload definition related flags.
flags.DEFINE_integer(
Expand Down
6 changes: 6 additions & 0 deletions profiles/workers/alibaba_cluster_30_slots.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
- name: WorkerPool_1
workers:
- name: Worker_1_1
resources:
- name: Slot_1
quantity: 30
4 changes: 3 additions & 1 deletion schedulers/tetrisched_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,9 @@ def __init__(
# the release time and the deadline. So, if a TaskGraph was released at 100 and
# has a deadline of 500, it will be retried until scheduler invocations upto
# 180, and will be dropped after.
self._task_graph_reconsideration_period = 0.10
self._task_graph_reconsideration_period = (
0.10 if _flags is None else _flags.scheduler_reconsideration_period
)
self._previously_considered_task_graphs: Set[str] = set()

# A cache for the STRLs generated for individual tasks.
Expand Down
156 changes: 156 additions & 0 deletions scripts/run_alibaba_motivation_experiments.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#!/bin/bash
# Move to the simulator directory.
if [[ -z ${ERDOS_SIMULATOR_DIR} ]]; then
echo "[x] ERRROR: ERDOS_SIMULATOR_DIR is not set"
exit 1
fi
cd ${ERDOS_SIMULATOR_DIR}

LOG_DIR=$1
if [[ -z ${LOG_DIR} ]]; then
echo "[x] ERROR: Please provide a directory to output results to as the first argument."
exit 2
fi


# Random seeds.
# We use different seeds so that we can run with a different set of TaskGraphs
# being chosen from the trace, along with different arrival patterns.
RANDOM_SEEDS=(420665456 9165261 106432512 95498947 105937176 66362485 416681780)

# Schedulers
# We use the following baseline schedulers to compare the performance of DAGSched with.
SCHEDULERS=(EDF TetriSched GraphenePrime DAGSched)

# Poisson arrival rates.
# We use the following arrival rates for the Poisson arrival process.
MEDIUM_ARRIVAL_RATES=(0.0075 0.005 0.0046 0.004 0.0036)
HARD_ARRIVAL_RATES=(0.0025 0.0025 0.002 0.002 0.0015)

execute_experiment () {
SCHEDULER=$1
RANDOM_SEED=$2
LOG_DIR=$3
LOG_BASE="run_${RANDOM_SEED}"
MEDIUM_ARRIVAL_RATE=$4
HARD_ARRIVAL_RATE=$5

EXPERIMENT_DIR="${LOG_DIR}/${SCHEDULER}/${LOG_BASE}/arrival_rate_${MEDIUM_ARRIVAL_RATE}_${HARD_ARRIVAL_RATE}"
mkdir -p ${EXPERIMENT_DIR}

if [ -f "${EXPERIMENT_DIR}/alibaba_trace_replay.csv" ]; then
echo "[x] The experiment for ${SCHEDULER} with random seed ${RANDOM_SEED} has already been run."
return
fi

# Build the baseline configuration for the experiment.
EXPERIMENT_CONF="\
# Output configuration.
--log_dir=${EXPERIMENT_DIR}
--log_file_name=alibaba_trace_replay.log
--csv_file_name=alibaba_trace_replay.csv
--log_level=debug
"

EXPERIMENT_CONF+="
# Worker configuration.
--worker_profile_path=profiles/workers/alibaba_cluster_30_slots.yaml
"

EXPERIMENT_CONF+="
# Workload configuration.
--execution_mode=replay
--replay_trace=alibaba
--workload_profile_paths=traces/alibaba-cluster-trace-v2018/easy_dag_sukrit_10k.pkl,traces/alibaba-cluster-trace-v2018/medium_dag_sukrit_10k.pkl,traces/alibaba-cluster-trace-v2018/hard_dag_sukrit_10k.pkl
--workload_profile_path_labels=easy,medium,hard
--override_release_policies=poisson,poisson,poisson
--override_num_invocations=0,200,100
--override_poisson_arrival_rates=0.0075,${MEDIUM_ARRIVAL_RATE},${HARD_ARRIVAL_RATE}
--randomize_start_time_max=50
--min_deadline=5
--max_deadline=500
--min_deadline_variances=25,50,10
--max_deadline_variances=50,100,25
# Loader configuration.
--alibaba_loader_task_cpu_divisor=10
--alibaba_loader_min_critical_path_runtimes=200,500,600
--alibaba_loader_max_critical_path_runtimes=500,1000,1000
"

if [[ ${SCHEDULER} == "EDF" ]]; then
EXPERIMENT_CONF+="
# Scheduler configuration.
--scheduler=EDF
--enforce_deadlines
"
elif [[ ${SCHEDULER} == "TetriSched" ]]; then
EXPERIMENT_CONF+="
# Scheduler configuration.
--scheduler=TetriSched
--enforce_deadlines
--scheduler_time_discretization=1
--scheduler_enable_optimization_pass
--retract_schedules
--scheduler_time_limit=120
"
elif [[ ${SCHEDULER} == "GraphenePrime" ]]; then
EXPERIMENT_CONF+="
# Scheduler configuration.
--scheduler=GraphenePrime
--scheduler_selective_rescheduling
--scheduler_selective_rescheduling_sample_size=2
--scheduler_time_discretization=2
--scheduler_enable_optimization_pass
--scheduler_plan_ahead=1000
--retract_schedules
--scheduler_time_limit=120
"
elif [[ ${SCHEDULER} == "DAGSched" ]]; then
EXPERIMENT_CONF+="
# Scheduler configuration.
--scheduler=TetriSched
--release_taskgraphs
--enforce_deadlines
--scheduler_time_discretization=1
--scheduler_enable_optimization_pass
--scheduler_reconsideration_period=0.2
--retract_schedules
--scheduler_time_limit=120
"
else
echo "[x] ERROR: Unknown scheduler ${SCHEDULER}"
exit 1
fi

EXPERIMENT_CONF+="\
--scheduler_runtime=0
--random_seed=${RANDOM_SEED}"

echo "${EXPERIMENT_CONF}" | sed -e 's/^[ \t]*//' > ${EXPERIMENT_DIR}/alibaba_trace_replay.conf

echo "[x] Constructed configuration for ${EXPERIMENT_DIR}. Beginning experiment"

if ! python3 main.py --flagfile=${EXPERIMENT_DIR}/alibaba_trace_replay.conf > ${EXPERIMENT_DIR}/alibaba_trace_replay.output; then
echo "[x] Failed in the execution of ${LOG_BASE}. Exiting."
exit 3
fi

echo "[x] Finished execution of ${EXPERIMENT_DIR}."
}

if [ ${#MEDIUM_ARRIVAL_RATES[@]} -ne ${#HARD_ARRIVAL_RATES[@]} ]; then
echo "[x] ERROR: The number of medium and hard arrival rates must be the same."
exit 1
fi

for SCHEDULER in ${SCHEDULERS[@]}; do
for RANDOM_SEED in ${RANDOM_SEEDS[@]}; do
for ((i=0; i<${#MEDIUM_ARRIVAL_RATES[@]}; i++)); do
MEDIUM_ARRIVAL_RATE=${MEDIUM_ARRIVAL_RATES[$i]}
HARD_ARRIVAL_RATE=${HARD_ARRIVAL_RATES[$i]}
echo "[x] Running ${SCHEDULER} with random seed ${RANDOM_SEED} and arrival rates ${MEDIUM_ARRIVAL_RATE} and ${HARD_ARRIVAL_RATE}"
execute_experiment ${SCHEDULER} ${RANDOM_SEED} ${LOG_DIR} ${MEDIUM_ARRIVAL_RATE} ${HARD_ARRIVAL_RATE}
done
done
done

0 comments on commit 8fd8302

Please sign in to comment.