Skip to content

Commit

Permalink
make script idempotent given --dry-run flag
Browse files Browse the repository at this point in the history
  • Loading branch information
KeitaW committed May 8, 2024
1 parent 01eef1b commit 82fd74b
Showing 1 changed file with 22 additions and 24 deletions.
46 changes: 22 additions & 24 deletions 1.architectures/5.sagemaker-hyperpod/easy-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ fi
# Define AWS Region
if [ -z ${AWS_REGION} ]; then
echo "[WARNING] AWS_REGION environment variable is not set, automatically set depending on aws cli default region."
export AWS_REGION=$(aws configure get region)
export AWS_REGION=$(aws "${aws_cli_args[@]}" configure get region)
aws_cli_args+=(--region "${AWS_REGION}")
fi
echo "export AWS_REGION=${AWS_REGION}" >> env_vars
echo "[INFO] AWS_REGION = ${AWS_REGION}"
Expand All @@ -130,10 +131,9 @@ if [ -z ${INSTANCE_COUNT} ]; then
fi

# Retrieve VPC ID
export VPC_ID=`aws cloudformation describe-stacks \
export VPC_ID=`aws "${aws_cli_args[@]}" cloudformation describe-stacks \
--stack-name $STACK_ID_VPC \
--query 'Stacks[0].Outputs[?OutputKey==\`VPC\`].OutputValue' \
--region ${AWS_REGION} \
--output text`

if [[ ! -z $VPC_ID ]]; then
Expand All @@ -145,10 +145,9 @@ else
fi

# Grab the subnet id
export SUBNET_ID=`aws cloudformation describe-stacks \
export SUBNET_ID=`aws "${aws_cli_args[@]}" cloudformation describe-stacks \
--stack-name $STACK_ID_VPC \
--query 'Stacks[0].Outputs[?OutputKey==\`PrimaryPrivateSubnet\`].OutputValue' \
--region ${AWS_REGION} \
--output text`

if [[ ! -z $SUBNET_ID ]]; then
Expand All @@ -160,10 +159,9 @@ else
fi

# Grab the subnet id
export PUBLIC_SUBNET_ID=`aws cloudformation describe-stacks \
export PUBLIC_SUBNET_ID=`aws "${aws_cli_args[@]}" cloudformation describe-stacks \
--stack-name $STACK_ID_VPC \
--query 'Stacks[0].Outputs[?OutputKey==\`PublicSubnet\`].OutputValue' \
--region ${AWS_REGION} \
--output text`

if [[ ! -z $PUBLIC_SUBNET_ID ]]; then
Expand All @@ -175,10 +173,9 @@ else
fi

# Get FSx Filesystem id from CloudFormation
export FSX_ID=`aws cloudformation describe-stacks \
export FSX_ID=`aws "${aws_cli_args[@]}" cloudformation describe-stacks \
--stack-name $STACK_ID_VPC \
--query 'Stacks[0].Outputs[?OutputKey==\`FSxLustreFilesystemId\`].OutputValue' \
--region ${AWS_REGION} \
--output text`

if [[ ! -z $FSX_ID ]]; then
Expand All @@ -190,10 +187,9 @@ else
fi

# Get FSx Filesystem Mountname from CloudFormation
export FSX_MOUNTNAME=`aws cloudformation describe-stacks \
export FSX_MOUNTNAME=`aws "${aws_cli_args[@]}" cloudformation describe-stacks \
--stack-name $STACK_ID_VPC \
--query 'Stacks[0].Outputs[?OutputKey==\`FSxLustreFilesystemMountname\`].OutputValue' \
--region ${AWS_REGION} \
--output text`

if [[ ! -z $FSX_MOUNTNAME ]]; then
Expand All @@ -205,10 +201,9 @@ else
fi

# Get FSx Security Group from CloudFormation
export SECURITY_GROUP=`aws cloudformation describe-stacks \
export SECURITY_GROUP=`aws "${aws_cli_args[@]}" cloudformation describe-stacks \
--stack-name $STACK_ID_VPC \
--query 'Stacks[0].Outputs[?OutputKey==\`SecurityGroup\`].OutputValue' \
--region ${AWS_REGION} \
--output text`

if [[ ! -z $SECURITY_GROUP ]]; then
Expand All @@ -220,10 +215,9 @@ else
fi

# Get sagemaker role ARN
export ROLE=`aws cloudformation describe-stacks \
export ROLE=`aws "${aws_cli_args[@]}" cloudformation describe-stacks \
--stack-name $STACK_ID_VPC \
--query 'Stacks[0].Outputs[?OutputKey==\`AmazonSagemakerClusterExecutionRoleArn\`].OutputValue' \
--region ${AWS_REGION} \
--output text`

if [[ ! -z $ROLE ]]; then
Expand All @@ -246,10 +240,9 @@ else
fi

# Get s3 bucket name
export BUCKET=`aws cloudformation describe-stacks \
export BUCKET=`aws "${aws_cli_args[@]}" cloudformation describe-stacks \
--stack-name $STACK_ID_VPC \
--query 'Stacks[0].Outputs[?OutputKey==\`AmazonS3BucketName\`].OutputValue' \
--region ${AWS_REGION} \
--output text`

if [[ ! -z $BUCKET ]]; then
Expand All @@ -260,12 +253,17 @@ else
return 1
fi


git clone --depth=1 https://github.com/aws-samples/awsome-distributed-training/
if [ ! -d "awsome-distributed-training" ]
then
echo "Cloning the repository..."
git clone --depth=1 https://github.com/aws-samples/awsome-distributed-training/
else
echo "Repository already exists..."
fi
# Use pushd and popd to navigate directories https://en.wikipedia.org/wiki/Pushd_and_popd
pushd awsome-distributed-training/1.architectures/5.sagemaker-hyperpod/LifecycleScripts/
# upload data
aws s3 cp --recursive base-config/ s3://${BUCKET}/src
aws "${aws_cli_args[@]}" s3 cp --recursive base-config/ s3://${BUCKET}/src
# move back to the previous directory
popd

Expand All @@ -286,7 +284,7 @@ cat > provisioning_parameters.json << EOL
EOL

# copy to the S3 Bucket
aws s3 cp provisioning_parameters.json s3://${BUCKET}/src/
aws "${aws_cli_args[@]}" s3 cp provisioning_parameters.json s3://${BUCKET}/src/

cat > cluster-config.json << EOL
{
Expand Down Expand Up @@ -323,12 +321,12 @@ cat > cluster-config.json << EOL
EOL

# Validate Cluster configuration
wget https://raw.githubusercontent.com/aws-samples/awsome-distributed-training/main/1.architectures/5.sagemaker-hyperpod/validate-config.py
wget --no-clobber https://raw.githubusercontent.com/aws-samples/awsome-distributed-training/main/1.architectures/5.sagemaker-hyperpod/validate-config.py
# install boto3
pip3 install boto3
# check config for known issues
python3 validate-config.py --cluster-config cluster-config.json --provisioning-parameters provisioning_parameters.json

echo "aws sagemaker create-cluster --cli-input-json file://cluster-config.json --region ${REGION}"
echo "aws ${aws_cli_args[@]} sagemaker create-cluster --cli-input-json file://cluster-config.json --region ${REGION}"
[[ DRY_RUN -eq 1 ]] && exit 0
aws sagemaker create-cluster --cli-input-json "file://cluster-config.json" --region ${REGION}
aws ${aws_cli_args[@]} sagemaker create-cluster --cli-input-json "file://cluster-config.json" --region ${REGION}

0 comments on commit 82fd74b

Please sign in to comment.