diff --git a/debezium-e2e-benchmark/Dockerfile b/debezium-e2e-benchmark/Dockerfile index 66d44850b..7faea5df8 100644 --- a/debezium-e2e-benchmark/Dockerfile +++ b/debezium-e2e-benchmark/Dockerfile @@ -7,15 +7,6 @@ RUN dnf -y install gcc gcc-c++ python3-devel python3-requests RUN python3 -m pip install JPype1==0.6.3 RUN python3 -m pip install JayDeBeApi matplotlib kafka-python scipy - -#RUN pip install ibm-db -## SQL SERVER connector -#RUN pip install pyodbc -#RUN pip install mysql-connector-python - -# https://docs.omnisci.com/v3.6.0/mapd-core-guide/jaydebeapi/ - - RUN useradd -ms /bin/bash tpc USER tpc diff --git a/debezium-e2e-benchmark/README.md b/debezium-e2e-benchmark/README.md index 6460136bb..72367aec7 100644 --- a/debezium-e2e-benchmark/README.md +++ b/debezium-e2e-benchmark/README.md @@ -1,8 +1,14 @@ # Debezium End-to-end Benchmark -The E2E benchmark is a Python script which inserts data into a dedicated table in a database. One column is a timestap stating when data is inserted into the table. The test compares this time with the time of the correspondig timestamp of the Kafka massage in the topic. The script creates in the `tpcdata` directory the resulting data in a CSV file and some diagrams in PNG format. +The E2E benchmark is a Python script that inserts data into a dedicated table in a database. +One column is a timestamp stating when data is inserted into the table. +The test compares this time with the time of the corresponding timestamp of the Kafka message in the topic. +The script creates in the `tpcdata` directory the resulting data in a CSV file and some diagrams in PNG format. -All the SQL statements required to run the tests are specified in the [tpc-config.json](py/tpc-config.json) file. The number of commits run and the commit interval of the data is controlled in this part: + + +All the SQL statements required to run the tests are specified in the [tpc-config.json](py/tpc-config.json) file. +The number of commits run and the commit interval of the data is controlled in this part: ``` "tpc": { "count": 100000, @@ -15,12 +21,28 @@ All the SQL statements required to run the tests are specified in the [tpc-conf }, ``` -Each entry in the `commit.intervals` array runs one benchmark test. This parameter should not be set to very high values. +Each entry in the `commit.intervals` array runs one benchmark test. +This parameter should not be set to very high values. +Test section jdbc is nessesary for the jdbc connection driver information. e.g. +It need the driver information form "connector.class" in the register.json only + + "jdbc": { + "db2": { + "jdbcdriver": "com.ibm.db2.jcc.DB2Driver", + "jar" : "jcc-11.5.0.0.jar", + .... + +An additional parameter is needed for a test run in a self-contained environment. +Params for db2 are complete, for other database flavors fill out the form accordingly, please. + + "tpctable": "", + "initsql": [ ... ], + "enablecdctablesql": [ ... ] ## Benchmark on existing environment (DB Server / Kafka / Connector) -If you have an existing up und running Debezium environment, you can do the benchmark test by following these steps: +If you have an existing up and running Debezium environment, you can do the benchmark test by following these steps: - Build the benchmark docker image ``` docker build -t debezium-benchmark . ``` @@ -31,7 +53,7 @@ If you have an existing up und running Debezium environment, you can do the benc ``` CREATE TABLE TPC.TEST ( USERNAME VARCHAR(32) NOT NULL, NAME VARCHAR(64), BLOOD_GROUP CHAR(3), RESIDENCE VARCHAR(200), COMPANY VARCHAR(128), ADDRESS VARCHAR(200), BIRTHDATE DATE, SEX CHAR(1), JOB VARCHAR(128), SSN CHAR(11), MAIL VARCHAR(128), ID INTEGER not null GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1), T0 TIMESTAMP NOT NULL GENERATED BY DEFAULT FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP, PRIMARY KEY (ID) ) ORGANIZE BY ROW ``` - SQL create table for SQLServer ``` CREATE TABLE TPC.TEST ( USERNAME VARCHAR(32) NOT NULL, NAME VARCHAR(64), BLOOD_GROUP CHAR(3), RESIDENCE VARCHAR(200), COMPANY VARCHAR(128), ADDRESS VARCHAR(200), BIRTHDATE DATE, SEX CHAR(1), JOB VARCHAR(128), SSN CHAR(11), MAIL VARCHAR(128), ID INT IDENTITY(1,1) PRIMARY KEY, T0 TIMESTAMP DATETIME NULL DEFAULT GETDATE() ) ``` - - SQL crete table for MySQL + - SQL create table for MySQL ``` CREATE TABLE TPC.TEST ( USERNAME VARCHAR(32) NOT NULL, NAME VARCHAR(64), BLOOD_GROUP CHAR(3), RESIDENCE VARCHAR(200), COMPANY VARCHAR(128), ADDRESS VARCHAR(200), BIRTHDATE DATE, SEX CHAR(1), JOB VARCHAR(128), SSN CHAR(11), MAIL VARCHAR(128), ID INTEGER NOT NULL AUTO_INCREMENT, T0 TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ``` - Whitelist the TPC.TEST table in your Denbezium connector config JSON @@ -62,7 +84,7 @@ If you have an existing up und running Debezium environment, you can do the benc -## Benchmark in self-contained environment +## Benchmark in a self-contained environment You will need the following to run the tests on CentOS: diff --git a/debezium-e2e-benchmark/images/tpc_100000_1-h.png b/debezium-e2e-benchmark/images/tpc_100000_1-h.png new file mode 100644 index 000000000..a6f1fc73e Binary files /dev/null and b/debezium-e2e-benchmark/images/tpc_100000_1-h.png differ diff --git a/debezium-e2e-benchmark/images/tpc_100000_1-t-d.png b/debezium-e2e-benchmark/images/tpc_100000_1-t-d.png new file mode 100644 index 000000000..5d6389cdc Binary files /dev/null and b/debezium-e2e-benchmark/images/tpc_100000_1-t-d.png differ diff --git a/debezium-e2e-benchmark/images/tpc_100000_1-t.png b/debezium-e2e-benchmark/images/tpc_100000_1-t.png new file mode 100644 index 000000000..12ce1657b Binary files /dev/null and b/debezium-e2e-benchmark/images/tpc_100000_1-t.png differ diff --git a/debezium-e2e-benchmark/images/tpc_100000_1.png b/debezium-e2e-benchmark/images/tpc_100000_1.png new file mode 100644 index 000000000..05b3aa4b1 Binary files /dev/null and b/debezium-e2e-benchmark/images/tpc_100000_1.png differ diff --git a/debezium-e2e-benchmark/py/tpc-config.json b/debezium-e2e-benchmark/py/tpc-config.json index c94f0b701..0d2a4c72a 100644 --- a/debezium-e2e-benchmark/py/tpc-config.json +++ b/debezium-e2e-benchmark/py/tpc-config.json @@ -31,28 +31,28 @@ ] }, "mysql": { - "jdbcdriver": "com.ibm.db2.jcc.DB2Driver", + "jdbcdriver": "com.mysql.cj.jdbc.Driver", "jar": "mysql-connector-java-8.0.19.jar", "tpctable": "CREATE TABLE TPC.TEST ( USERNAME VARCHAR(32) NOT NULL, NAME VARCHAR(64), BLOOD_GROUP CHAR(3), RESIDENCE VARCHAR(200), COMPANY VARCHAR(128), ADDRESS VARCHAR(200), BIRTHDATE DATE, SEX CHAR(1), JOB VARCHAR(128), SSN CHAR(11), MAIL VARCHAR(128), ID INTEGER NOT NULL AUTO_INCREMENT, T0 TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ", "initsql": [], "enablecdctablesql": [] }, "oracle": { - "jdbcdriver": "com.ibm.db2.jcc.DB2Driver", + "jdbcdriver": "com.oracle.ojdbc", "jar": "ojdbc10-19.3.0.0.jar", "tpctable": "", "initsql": [], "enablecdctablesql": [] }, "sqlserver": { - "jdbcdriver": "com.ibm.db2.jcc.DB2Driver", + "jdbcdriver": "com.microsoft.sqlserver", "jar": "mssql-jdbc-8.2.0.jre8.jar", "tpctable": "CREATE TABLE TPC.TEST ( USERNAME VARCHAR(32) NOT NULL, NAME VARCHAR(64), BLOOD_GROUP CHAR(3), RESIDENCE VARCHAR(200), COMPANY VARCHAR(128), ADDRESS VARCHAR(200), BIRTHDATE DATE, SEX CHAR(1), JOB VARCHAR(128), SSN CHAR(11), MAIL VARCHAR(128), ID INT IDENTITY(1,1) PRIMARY KEY, T0 TIMESTAMP DATETIME NULL DEFAULT GETDATE() )", "initsql": [], "enablecdctablesql": [] }, - "postgress": { - "jdbcdriver": "com.ibm.db2.jcc.DB2Driver", + "postgresql": { + "jdbcdriver": "org.postgresql.Driver", "jar": "postgresql-9.1-901.jdbc4.jar", "tpctable": "", "initsql": [], diff --git a/debezium-e2e-benchmark/py/tpc-run-test.py b/debezium-e2e-benchmark/py/tpc-run-test.py index 853318dfe..5241b8627 100755 --- a/debezium-e2e-benchmark/py/tpc-run-test.py +++ b/debezium-e2e-benchmark/py/tpc-run-test.py @@ -13,7 +13,6 @@ from pprint import pprint import requests import datetime import threading - import jpype @@ -154,12 +153,12 @@ def main(argv): print('tpc-connector deleted') pass - dockerbootstrapserver = config['config']['database.history.kafka.bootstrap.servers'] - bootstrapserver = config['config']['database.history.kafka.bootstrap.servers'].split(",") + bootstrapserver = config['config']['database.history.kafka.bootstrap.servers'].split( + ",") # check integrated test ( all in one docker) - if dockerbootstrapserver == 'kafka:9092' : - + if dockerbootstrapserver == 'kafka:9092': + print(bootstrapserver) kafkaadmin = KafkaAdminClient(bootstrap_servers=bootstrapserver)