-
Notifications
You must be signed in to change notification settings - Fork 6
/
docker-compose.yaml
85 lines (76 loc) · 2.08 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
version: '3.9'
services:
spark_task_0:
build: .
working_dir: /data
volumes:
- ./data:/data
entrypoint: wget
command: [ "-nc", "https://raw.githubusercontent.com/selva86/datasets/master/bank-full.csv" ]
spark_task_1:
build: .
environment:
- PYTHONPATH=${PYTHONPATH}:/app/src
volumes:
- ./data:/data
- ./src/01_csv_to_parquet:/app/src
entrypoint: python3
command: [ "/app/src/main.py",
"--source_csv_file=/data/bank-full.csv",
"--target_parquet_dir=/data/bank.parquet" ]
depends_on:
spark_task_0:
condition: service_completed_successfully
postgresql:
image: 'bitnami/postgresql:13'
ports:
- 5434:5432
volumes:
- './services/postgresql:/bitnami/postgresql'
environment:
- ALLOW_EMPTY_PASSWORD=yes
- POSTGRESQL_USERNAME=p_user
- POSTGRESQL_PASSWORD=password123
- POSTGRESQL_DATABASE=postgres
healthcheck:
test: ["CMD-SHELL", "pg_isready -U p_user"]
interval: 10s
timeout: 5s
retries: 10
spark_task_2:
build: .
volumes:
- ./data:/data
- ./src/02_parquet_to_postgres:/app/src
links:
- postgresql:postgresql
entrypoint: python3
command: [ "/app/src/main.py",
"--source_parquet_dir=/data/bank.parquet",
"--target_tablename=public.bank",
"--url=jdbc:postgresql://postgresql:5432/postgres",
"--login=p_user",
"--password=password123" ]
depends_on:
spark_task_1:
condition: service_completed_successfully
postgresql:
condition: service_healthy
spark-master:
image: 'bitnami/spark:3.2.0'
environment:
- SPARK_MODE=master
- SPARK_MASTER_URL=spark://spark-master:7077
ports:
- 7077:7077
- 8079:8080
spark-worker:
image: 'bitnami/spark:3.2.0'
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_CORES=1
deploy:
mode: replicated
replicas: 1