-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTheJobScraperCFN.yaml
126 lines (118 loc) · 4.41 KB
/
TheJobScraperCFN.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# W.I.P. CloudFormation template
AWSTemplateFormatVersion: '2010-09-09'
Description: Creates all the needed resources to run the job scraper
Outputs:
SSHPrivateKeyARN:
Description: The ARN of the SSM parameter storing the private key
Value: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/jobscraper/privatekey'
Resources:
# IAM Role for Lambda
PowerCycleLambdaRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service: lambda.amazonaws.com
Action: sts:AssumeRole
Policies:
- PolicyName: EC2PowerCyclePolicy
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- 'ec2:StopInstances'
- 'ec2:StartInstances'
- 'ec2:DescribeInstanceStatus'
Resource: !GetAtt JobScraperInstance.Arn
# EC2 Instance
JobScraperInstance:
Type: AWS::EC2::Instance
Properties:
ImageId: ami-0149b2da6ceec4bb0 # Ubuntu 22.04
InstanceType: t2.medium
KeyName: !Ref EC2KeyPair
UserData:
Fn::Base64: "echo 'hello world'" # TO:DO replace this with the curl command that gets the setup.sh. Also modify the setup.sh script to handle environment variables so that http flask is not run as root.
NetworkInterfaces:
- AssociatePublicIpAddress: true
DeviceIndex: 0
SubnetId: !Select [ 0, !GetAZs '' ]
GroupSet:
- !Ref JobScraperSecurityGroup
# SSH Key Pair
EC2KeyPair:
Type: AWS::EC2::KeyPair
Properties:
KeyName: JobScraperKeyPair
# Security Group
JobScraperSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: Allow SSH and HTTP/HTTPS
VpcId: !Ref 'VPC' # TO:DO make this reference the default VPC and or just create a basic one
SecurityGroupIngress:
- IpProtocol: tcp
FromPort: 22
ToPort: 22
CidrIp: your-ip-address/32 # TO:DO make this an input variable
- IpProtocol: tcp
FromPort: 80
ToPort: 80
CidrIp: 0.0.0.0/0
- IpProtocol: tcp
FromPort: 443
ToPort: 443
CidrIp: 0.0.0.0/0
# Parameter to Store Private Key
PrivateKeyParameter:
Type: AWS::SSM::Parameter
Name: /jobscraper/privatekey
Type: String
Value: !Ref EC2KeyPair
# Lambda Function that power cycles our EC2 instance once a day in order to get a new IP and ensure no zombie processes are on the loose
PowerCycleLambda:
Type: AWS::Lambda::Function
Properties:
Handler: index.handler
Runtime: python3.9
Role: !GetAtt PowerCycleLambdaRole.Arn
Timeout: 180
Code:
ZipFile: |
import boto3
import time
ec2_client = boto3.client('ec2', region_name='us-east-1')
def handler(event, context):
instance_id = event['instanceId']
try:
# Stop the instance
ec2_client.stop_instances(InstanceIds=[instance_id])
print('Stopping instance')
# Wait for the instance to be fully stopped
params = {'InstanceIds': [instance_id], 'IncludeAllInstances': True}
while True:
response = ec2_client.describe_instance_status(**params)
if response['InstanceStatuses'] and response['InstanceStatuses'][0]['InstanceState']['Name'] == 'stopped':
break
else:
print('Instance not yet stopped, waiting...')
time.sleep(5) # Wait 5 seconds
print('Instance stopped')
# Start the instance
ec2_client.start_instances(InstanceIds=[instance_id])
print('Instance started')
except Exception as error:
print(error)
PowerCycleSchedule:
Type: AWS::Events::Rule
Properties:
ScheduleExpression: cron(0 15 ? * MON-SUN *)
State: ENABLED
Targets:
- Arn: !GetAtt PowerCycleLambda.Arn
Id: 'PowerCycleTarget'
Input: '{"instanceId": "!REF JobScraperInstance"}' # TO:DO make this reference the instance id of the JobScraperInstance correctly