-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserverless.yml
65 lines (57 loc) · 1.63 KB
/
serverless.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
service: serverless-crawl-embed
frameworkVersion: '3'
useDotenv: true
provider:
name: aws
runtime: python3.9
stage: ${opt:stage, 'dev'}
environment:
BASE_URL: ${env:BASE_URL}
CRAWL_QUEUE_URL: ${construct:CrawlQueue.queueUrl}
CRAWL_TABLE_NAME: ${construct:CrawlTable.tableName}
EMBEDDINGS_QUEUE_URL: ${construct:EmbeddingsQueue.queueUrl}
SOURCES_BUCKET_NAME: ${construct:SourcesBucket.bucketName}
PINECONE_API_KEY: ${env:PINECONE_API_KEY}
PINECONE_INDEX_NAME: ${env:PINECONE_INDEX_NAME}
OPENAI_API_KEY: ${env:OPENAI_API_KEY}
EMBEDDING_MODEL: ${env:EMBEDDING_MODEL}
RAW_BUCKET_DIRECTORY: ${env:RAW_BUCKET_DIRECTORY}
PROCESSED_BUCKET_DIRECTORY: ${env:PROCESSED_BUCKET_DIRECTORY}
constructs:
CrawlQueue:
type: queue
worker:
handler: src/handlers/crawl.run
timeout: 30
maxConcurrency: 10
CrawlTable:
type: database/dynamodb-single-table
SourcesBucket:
type: storage
EmbeddingsQueue:
type: queue
worker:
handler: src/handlers/generate_embeddings.run
timeout: 30
maxConcurrency: 10 # keep db connections low
functions:
start:
handler: src/handlers/start.run
enqueue:
handler: src/handlers/enqueue.run
events:
- s3:
bucket: ${construct:SourcesBucket.bucketName}
event: s3:ObjectCreated:*
rules:
- prefix: ${env:RAW_BUCKET_DIRECTORY}/
- suffix: .html
existing: true
testEmbeddings:
handler: src/handlers/test_embeddings.run
plugins:
- serverless-python-requirements
- serverless-lift
custom:
pythonRequirements:
dockerizePip: true