Skip to content

Commit 158ed65

Browse files
Update Docker compose
1 parent 43cd8e4 commit 158ed65

File tree

2 files changed

+54
-74
lines changed

2 files changed

+54
-74
lines changed

kafka-docker-compose.yaml

+42-54
Original file line numberDiff line numberDiff line change
@@ -34,22 +34,17 @@ services:
3434
KAFKA_BROKER_ID: 1
3535
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
3636
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
37-
# Define how clients connect to brokers
3837
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
3938
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
40-
# Schema Registry URL for storing and managing Avro schemas
4139
KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://schema-registry:8081
42-
# Confluent Metrics Reporter for Control Center Cluster Monitoring
4340
KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter
4441
CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: "broker:9092"
4542
CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1
4643
CONFLUENT_METRICS_ENABLE: "true"
47-
# For fixing the bug replication factor 3 > the number of node
4844
KAFKA_CONFLUENT_BALANCER_TOPIC_REPLICATION_FACTOR: 1
4945
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
5046
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
5147

52-
# For managing Avro schemas
5348
schema-registry:
5449
image: confluentinc/cp-schema-registry:7.5.0
5550
container_name: streaming-schema-registry
@@ -67,44 +62,42 @@ services:
6762
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: "broker:29092"
6863
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
6964

70-
# For connecting to offline store (Connect Kafka to database)
71-
# connect:
72-
image: confluentinc/cp-kafka-connect:7.5.0
73-
container_name: streaming-connect
74-
depends_on:
75-
broker:
76-
condition: service_healthy
77-
schema-registry:
78-
condition: service_healthy
79-
zookeeper:
80-
condition: service_healthy
81-
ports:
82-
- "8083:8083"
83-
environment:
84-
CONNECT_BOOTSTRAP_SERVERS: "broker:29092"
85-
CONNECT_REST_ADVERTISED_HOST_NAME: connect
86-
CONNECT_REST_PORT: 8083
87-
CONNECT_GROUP_ID: compose-connect-group
88-
CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs
89-
CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
90-
CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000
91-
CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets
92-
CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
93-
CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status
94-
CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
95-
CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter
96-
CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: false
97-
CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
98-
CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: true
99-
# CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter
100-
# CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
101-
# CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
102-
# CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
103-
CONNECT_PLUGIN_PATH: "/usr/share/java,/etc/kafka-connect/jars"
104-
volumes:
105-
- $PWD/data_ingestion/kafka_connect/jars/:/etc/kafka-connect/jars
65+
connect:
66+
image: confluentinc/cp-kafka-connect:7.5.0
67+
container_name: streaming-connect
68+
depends_on:
69+
broker:
70+
condition: service_healthy
71+
schema-registry:
72+
condition: service_healthy
73+
zookeeper:
74+
condition: service_healthy
75+
ports:
76+
- "8083:8083"
77+
environment:
78+
CONNECT_BOOTSTRAP_SERVERS: "broker:29092"
79+
CONNECT_REST_ADVERTISED_HOST_NAME: connect
80+
CONNECT_REST_PORT: 8083
81+
CONNECT_GROUP_ID: compose-connect-group
82+
CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs
83+
CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
84+
CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000
85+
CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets
86+
CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
87+
CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status
88+
CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
89+
CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter
90+
CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: false
91+
CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
92+
CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: true
93+
# CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter
94+
# CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
95+
# CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
96+
# CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
97+
CONNECT_PLUGIN_PATH: "/usr/share/java,/etc/kafka-connect/jars"
98+
volumes:
99+
- $PWD/data_ingestion/kafka_connect/jars/:/etc/kafka-connect/jars
106100

107-
# Confluent control center to manage Kafka
108101
control-center:
109102
image: confluentinc/cp-enterprise-control-center:7.5.0
110103
container_name: streaming-control-center
@@ -127,7 +120,7 @@ services:
127120
CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1
128121
CONTROL_CENTER_CONNECT_HEALTHCHECK_ENDPOINT: "/connectors"
129122
CONFLUENT_METRICS_TOPIC_REPLICATION: 1
130-
# CDC platform to capture changes in DB and stream them to Kafka
123+
131124
debezium:
132125
image: debezium/connect:1.9
133126
container_name: streaming-debezium
@@ -154,22 +147,19 @@ services:
154147
timeout: 5s
155148
retries: 5
156149
ports:
157-
- "8083:8083"
150+
- "8086:8083"
158151
environment:
159152
BOOTSTRAP_SERVERS: broker:29092
160153
GROUP_ID: 1
161154
CONFIG_STORAGE_TOPIC: connect_configs
162155
OFFSET_STORAGE_TOPIC: connect_offsets
163-
# Set to Avro for higher performance
164-
# KEY_CONVERTER: io.confluent.connect.avro.AvroConverter
165-
# VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
166156
KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter
167157
VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
168158
CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
169159
CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
170160
CONNECT_TOPIC_CREATION_ENABLE: true
171161

172-
# Debezium UI
162+
173163
debezium-ui:
174164
image: debezium/debezium-ui:latest
175165
container_name: debezium-ui
@@ -183,8 +173,6 @@ services:
183173
KAFKA_CONNECT_URIS: http://debezium:8083
184174

185175
postgresql:
186-
# Set wal_level from replica (default) to logical
187-
# so that Debezium can capture change
188176
image: postgres:latest
189177
command: ["postgres", "-c", "wal_level=logical"]
190178
container_name: streaming-postgresql
@@ -196,11 +184,11 @@ services:
196184
ports:
197185
- "5432:5432"
198186
environment:
199-
- POSTGRES_DB=v9
200-
- POSTGRES_USER=v9
201-
- POSTGRES_PASSWORD=v9
187+
- POSTGRES_DB=my_database
188+
- POSTGRES_USER=postgres
189+
- POSTGRES_PASSWORD=postgres
202190
volumes:
203191
- cdc_postgres_data:/var/lib/postgresql/data
204192

205193
volumes:
206-
cdc_postgres_data:
194+
cdc_postgres_data:

storage-docker-compose.yaml

+12-20
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,33 @@
1-
version: "3.7"
1+
version: "3.8"
22
services:
3-
# We use PostgreSQL to store Hive metadata about
4-
# how the datafile are mapped to schemas and tables
53
metastore_db:
64
container_name: metastoredb-datalake
75
image: postgres:11
86
hostname: metastore_db
97
ports:
10-
- "5433:5432" # Access via Thrift protocol
8+
- "5433:5432"
119
environment:
1210
POSTGRES_USER: hive
1311
POSTGRES_PASSWORD: hive
1412
POSTGRES_DB: metastore
1513

16-
# Expose service to get metadata, which is a repository of metadata about the tables,
17-
# such as database names, table names, schema and data location of each table
1814
hive-metastore:
1915
container_name: datalake-hive-metastore
2016
image: "starburstdata/hive:3.1.2-e.18"
2117
hostname: hive-metastore
2218
ports:
23-
- "9083:9083" # Access via Thrift protocol
19+
- "9083:9083"
2420
environment:
2521
HIVE_METASTORE_DRIVER: org.postgresql.Driver
2622
HIVE_METASTORE_JDBC_URL: jdbc:postgresql://metastore_db:5432/metastore
2723
HIVE_METASTORE_USER: hive
2824
HIVE_METASTORE_PASSWORD: hive
29-
HIVE_METASTORE_WAREHOUSE_DIR: s3://datalake/ # HDFS config, we don't need it
30-
HIVE_METASTORE_USERS_IN_ADMIN_ROLE: "admin" # We also don't need it
25+
HIVE_METASTORE_WAREHOUSE_DIR: s3://datalake/
26+
HIVE_METASTORE_USERS_IN_ADMIN_ROLE: "admin"
3127
S3_ENDPOINT: http://minio:9000
3228
S3_ACCESS_KEY: minio_access_key
3329
S3_SECRET_KEY: minio_secret_key
3430
S3_PATH_STYLE_ACCESS: "true"
35-
# Below arguments exist for no reasons, but
36-
# we can not live without it
3731
REGION: ""
3832
GOOGLE_CLOUD_KEY_FILE_PATH: ""
3933
AZURE_ADL_CLIENT_ID: ""
@@ -52,19 +46,19 @@ services:
5246
- metastore_db
5347

5448
trino:
55-
ports:
56-
- "8084:8080"
5749
container_name: trinodb-datalake
5850
image: "trinodb/trino:410"
5951
hostname: trino
52+
ports:
53+
- "8084:8080"
6054
volumes:
61-
- ./trino/etc:usr/lib/trino/rtc:ro
62-
- ./trino/catalog:etc/trino/catalog
55+
- /home/vuphan/stream-data-processing/trino/etc:/usr/lib/trino/etc:ro
56+
- /home/vuphan/stream-data-processing/trino/catalog:/etc/trino/catalog
6357
depends_on:
6458
- hive-metastore
6559

6660
minio:
67-
image: mino/minio
61+
image: minio/minio
6862
container_name: minio-datalake
6963
hostname: minio
7064
ports:
@@ -73,11 +67,9 @@ services:
7367
volumes:
7468
- minio_storage:/data
7569
environment:
76-
- MINIO_ACCESS_KEY: minio_access_key
77-
- MINIO_SECRET_KEY: minio_secret_key
70+
- MINIO_ACCESS_KEY=minio_access_key
71+
- MINIO_SECRET_KEY=minio_secret_key
7872
command: server --console-address ":9001" /data
7973

8074
volumes:
8175
minio_storage:
82-
data:
83-
driver: local

0 commit comments

Comments
 (0)