Fix issues as cited in reproducibility reviews

parasj · Feb 20, 2020 · c0def2e · c0def2e
1 parent a91771a
commit c0def2e
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -140,12 +140,20 @@ $ python experiments/experiment_max_batchsize_ilp.py --model-name MobileNet --ba
 
 
 ## Troubleshooting
+### Gurobi license errors
 If Gurobi is unable to locate your license file, set its path via an environment variable:
 ```
 export GRB_LICENSE_FILE=/path/to/gurobi.lic
 ```
 For example, the licence is stored by default at `$HOME/gurobi.lic`.
 
 
+### Evaluation machine resources
+* 2x Intel E5-2670 CPUs - (Haswell 12 Cores / 24 Threads)
+* 256GB DDR4 RAM
+* 4TB HDD
+* Kernel: `Ubuntu 18.04.3 LTS (GNU/Linux 5.3.0-24-generic x86_64)`
+
 ## All supported model architectures
-The following architectures are implemented via the `--model-name` argument: DenseNet121,DenseNet169,DenseNet201,InceptionV3,MobileNet,MobileNetV2,NASNetLarge,NASNetMobile,ResNet101,ResNet101V2,ResNet152,ResNet152V2,ResNet50,ResNet50V2,VGG16,VGG19,Xception,fcn_32,fcn_32_mobilenet,fcn_32_resnet50,fcn_32_vgg,fcn_8,fcn_8_mobilenet,fcn_8_resnet50,fcn_8_vgg,linear0,linear1,linear10,linear11,linear12,linear13,linear14,linear15,linear16,linear17,linear18,linear19,linear2,linear20,linear21,linear22,linear23,linear24,linear25,linear26,linear27,linear28,linear29,linear3,linear30,linear31,linear4,linear5,linear6,linear7,linear8,linear9,mobilenet_segnet,mobilenet_unet,pspnet,pspnet_101,pspnet_50,resnet50_pspnet,resnet50_segnet,resnet50_unet,segnet,test,unet,unet_mini,vgg_pspnet,vgg_segnet,vgg_unet
+The following architectures are implemented via the `--model-name` argument:
+```DenseNet121,DenseNet169,DenseNet201,InceptionV3,MobileNet,MobileNetV2,NASNetLarge,NASNetMobile,ResNet101,ResNet101V2,ResNet152,ResNet152V2,ResNet50,ResNet50V2,VGG16,VGG19,Xception,fcn_32,fcn_32_mobilenet,fcn_32_resnet50,fcn_32_vgg,fcn_8,fcn_8_mobilenet,fcn_8_resnet50,fcn_8_vgg,linear0,linear1,linear10,linear11,linear12,linear13,linear14,linear15,linear16,linear17,linear18,linear19,linear2,linear20,linear21,linear22,linear23,linear24,linear25,linear26,linear27,linear28,linear29,linear3,linear30,linear31,linear4,linear5,linear6,linear7,linear8,linear9,mobilenet_segnet,mobilenet_unet,pspnet,pspnet_101,pspnet_50,resnet50_pspnet,resnet50_segnet,resnet50_unet,segnet,test,unet,unet_mini,vgg_pspnet,vgg_segnet,vgg_unet```
diff --git a/experiments/experiment_budget_sweep_with_approximation.py b/experiments/experiment_budget_sweep_with_approximation.py
@@ -36,7 +36,7 @@
 from remat.tensorflow2.extraction import dfgraph_from_keras
 
 # ILP solve params
-NUM_ILP_CORES = os.environ.get("ILP_CORES", 12 if os.cpu_count() > 12 else 4)
+NUM_ILP_CORES = os.environ.get("ILP_CORES", 12)
 
 # Budget selection parameters
 NUM_ILP_GLOBAL = 32

diff --git a/reproduce_all.sh b/reproduce_all.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Thank you to the reproducibility reviewers for MLSys 2020 who provided
+# the following scripts to replicate our paper's results.
+
+commands=(
+    'python experiments/experiment_budget_sweep.py --model-name "VGG16" -b 256 --platform p32xlarge'
+    'python experiments/experiment_budget_sweep.py --model-name "MobileNet" -b 512 --platform p32xlarge'
+    'python experiments/experiment_budget_sweep.py --model-name "vgg_unet" -b 32 --platform p32xlarge'
+    'python experiments/experiment_max_batchsize_baseline.py --model-name vgg_unet --batch-size-min 10 --batch-size-max 40 --batch-size-increment 1'
+    'python experiments/experiment_max_batchsize_baseline.py --model-name fcn_8_vgg --batch-size-min 10 --batch-size-max 80 --batch-size-increment 1'
+    'python experiments/experiment_max_batchsize_baseline.py --model-name segnet --batch-size-min 20 --batch-size-max 50 --batch-size-increment 1'
+    'python experiments/experiment_max_batchsize_baseline.py --model-name ResNet50 --batch-size-min 90 --batch-size-max 200 --batch-size-increment 1'
+    'python experiments/experiment_max_batchsize_baseline.py --model-name VGG19 --batch-size-min 160 --batch-size-max 300 --batch-size-increment 1'
+    'python experiments/experiment_max_batchsize_baseline.py --model-name MobileNet --batch-size-min 200 --batch-size-max 650 --batch-size-increment 1'
+)
+rm -rf stdout_err
+mkdir stdout_err
+rm results.txt
+index=1
+for i in "${commands[@]}"; do
+    echo $i
+    start=$(date +%s%N | cut -b1-13)
+    eval "$i" &>stdout_err/$index.txt
+    end=$(date +%s%N | cut -b1-13)
+    runtime=$((end - start))
+    echo $i >>results.txt
+    echo "$runtime ms" >>results.txt
+    ((index = index + 1))
+done
+
+commands=(
+    'python experiments/experiment_max_batchsize_ilp.py --model-name vgg_unet --batch-size-min 20 --num-threads 40'
+    'python experiments/experiment_max_batchsize_ilp.py --model-name fcn_8_vgg --batch-size-min 20 --num-threads 40'
+    'python experiments/experiment_max_batchsize_ilp.py --model-name segnet --batch-size-min 20 --num-threads 40'
+    'python experiments/experiment_max_batchsize_ilp.py --model-name ResNet50 --batch-size-min 100 --num-threads 40'
+    'python experiments/experiment_max_batchsize_ilp.py --model-name VGG19 --batch-size-min 160 --num-threads 40'
+    'python experiments/experiment_max_batchsize_ilp.py --model-name MobileNet --batch-size-min 450 --num-threads 40'
+)
+rm -rf max_batchsize_ilp
+mkdir max_batchsize_ilp
+index=1
+for i in "${commands[@]}"; do
+    echo $i
+    eval "$i &> max_batchsize_ilp/$index.txt &"
+    pid=$!
+    sleep 1800 # change 1800 to any number to have a larger or smaller timeout
+    kill -SIGINT $pid
+    ((index = index + 1))
+done