diff --git a/README.md b/README.md index 43b3602..a4962e9 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,7 @@ $ python experiments/experiment_max_batchsize_ilp.py --model-name MobileNet --ba ## Troubleshooting +### Gurobi license errors If Gurobi is unable to locate your license file, set its path via an environment variable: ``` export GRB_LICENSE_FILE=/path/to/gurobi.lic @@ -147,5 +148,12 @@ export GRB_LICENSE_FILE=/path/to/gurobi.lic For example, the licence is stored by default at `$HOME/gurobi.lic`. +### Evaluation machine resources +* 2x Intel E5-2670 CPUs - (Haswell 12 Cores / 24 Threads) +* 256GB DDR4 RAM +* 4TB HDD +* Kernel: `Ubuntu 18.04.3 LTS (GNU/Linux 5.3.0-24-generic x86_64)` + ## All supported model architectures -The following architectures are implemented via the `--model-name` argument: DenseNet121,DenseNet169,DenseNet201,InceptionV3,MobileNet,MobileNetV2,NASNetLarge,NASNetMobile,ResNet101,ResNet101V2,ResNet152,ResNet152V2,ResNet50,ResNet50V2,VGG16,VGG19,Xception,fcn_32,fcn_32_mobilenet,fcn_32_resnet50,fcn_32_vgg,fcn_8,fcn_8_mobilenet,fcn_8_resnet50,fcn_8_vgg,linear0,linear1,linear10,linear11,linear12,linear13,linear14,linear15,linear16,linear17,linear18,linear19,linear2,linear20,linear21,linear22,linear23,linear24,linear25,linear26,linear27,linear28,linear29,linear3,linear30,linear31,linear4,linear5,linear6,linear7,linear8,linear9,mobilenet_segnet,mobilenet_unet,pspnet,pspnet_101,pspnet_50,resnet50_pspnet,resnet50_segnet,resnet50_unet,segnet,test,unet,unet_mini,vgg_pspnet,vgg_segnet,vgg_unet +The following architectures are implemented via the `--model-name` argument: +```DenseNet121,DenseNet169,DenseNet201,InceptionV3,MobileNet,MobileNetV2,NASNetLarge,NASNetMobile,ResNet101,ResNet101V2,ResNet152,ResNet152V2,ResNet50,ResNet50V2,VGG16,VGG19,Xception,fcn_32,fcn_32_mobilenet,fcn_32_resnet50,fcn_32_vgg,fcn_8,fcn_8_mobilenet,fcn_8_resnet50,fcn_8_vgg,linear0,linear1,linear10,linear11,linear12,linear13,linear14,linear15,linear16,linear17,linear18,linear19,linear2,linear20,linear21,linear22,linear23,linear24,linear25,linear26,linear27,linear28,linear29,linear3,linear30,linear31,linear4,linear5,linear6,linear7,linear8,linear9,mobilenet_segnet,mobilenet_unet,pspnet,pspnet_101,pspnet_50,resnet50_pspnet,resnet50_segnet,resnet50_unet,segnet,test,unet,unet_mini,vgg_pspnet,vgg_segnet,vgg_unet``` diff --git a/experiments/experiment_budget_sweep_with_approximation.py b/experiments/experiment_budget_sweep_with_approximation.py index 334b427..93583d8 100644 --- a/experiments/experiment_budget_sweep_with_approximation.py +++ b/experiments/experiment_budget_sweep_with_approximation.py @@ -36,7 +36,7 @@ from remat.tensorflow2.extraction import dfgraph_from_keras # ILP solve params -NUM_ILP_CORES = os.environ.get("ILP_CORES", 12 if os.cpu_count() > 12 else 4) +NUM_ILP_CORES = os.environ.get("ILP_CORES", 12) # Budget selection parameters NUM_ILP_GLOBAL = 32 diff --git a/reproduce_all.sh b/reproduce_all.sh new file mode 100644 index 0000000..5dc9bd5 --- /dev/null +++ b/reproduce_all.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Thank you to the reproducibility reviewers for MLSys 2020 who provided +# the following scripts to replicate our paper's results. + +commands=( + 'python experiments/experiment_budget_sweep.py --model-name "VGG16" -b 256 --platform p32xlarge' + 'python experiments/experiment_budget_sweep.py --model-name "MobileNet" -b 512 --platform p32xlarge' + 'python experiments/experiment_budget_sweep.py --model-name "vgg_unet" -b 32 --platform p32xlarge' + 'python experiments/experiment_max_batchsize_baseline.py --model-name vgg_unet --batch-size-min 10 --batch-size-max 40 --batch-size-increment 1' + 'python experiments/experiment_max_batchsize_baseline.py --model-name fcn_8_vgg --batch-size-min 10 --batch-size-max 80 --batch-size-increment 1' + 'python experiments/experiment_max_batchsize_baseline.py --model-name segnet --batch-size-min 20 --batch-size-max 50 --batch-size-increment 1' + 'python experiments/experiment_max_batchsize_baseline.py --model-name ResNet50 --batch-size-min 90 --batch-size-max 200 --batch-size-increment 1' + 'python experiments/experiment_max_batchsize_baseline.py --model-name VGG19 --batch-size-min 160 --batch-size-max 300 --batch-size-increment 1' + 'python experiments/experiment_max_batchsize_baseline.py --model-name MobileNet --batch-size-min 200 --batch-size-max 650 --batch-size-increment 1' +) +rm -rf stdout_err +mkdir stdout_err +rm results.txt +index=1 +for i in "${commands[@]}"; do + echo $i + start=$(date +%s%N | cut -b1-13) + eval "$i" &>stdout_err/$index.txt + end=$(date +%s%N | cut -b1-13) + runtime=$((end - start)) + echo $i >>results.txt + echo "$runtime ms" >>results.txt + ((index = index + 1)) +done + +commands=( + 'python experiments/experiment_max_batchsize_ilp.py --model-name vgg_unet --batch-size-min 20 --num-threads 40' + 'python experiments/experiment_max_batchsize_ilp.py --model-name fcn_8_vgg --batch-size-min 20 --num-threads 40' + 'python experiments/experiment_max_batchsize_ilp.py --model-name segnet --batch-size-min 20 --num-threads 40' + 'python experiments/experiment_max_batchsize_ilp.py --model-name ResNet50 --batch-size-min 100 --num-threads 40' + 'python experiments/experiment_max_batchsize_ilp.py --model-name VGG19 --batch-size-min 160 --num-threads 40' + 'python experiments/experiment_max_batchsize_ilp.py --model-name MobileNet --batch-size-min 450 --num-threads 40' +) +rm -rf max_batchsize_ilp +mkdir max_batchsize_ilp +index=1 +for i in "${commands[@]}"; do + echo $i + eval "$i &> max_batchsize_ilp/$index.txt &" + pid=$! + sleep 1800 # change 1800 to any number to have a larger or smaller timeout + kill -SIGINT $pid + ((index = index + 1)) +done