DBZ-1812 Docs fixes; added Postgres

2020-06-16 08:58:24 +02:00 · 2020-06-16 08:58:24 +02:00 · 3fc4312ff5
commit 3fc4312ff5
parent 6b0651f2bc
8 changed files with 70 additions and 61 deletions
--- a/debezium-e2e-benchmark/Dockerfile
+++ b/debezium-e2e-benchmark/Dockerfile
@ -15,11 +15,11 @@ WORKDIR /home/tpc
 ADD py/ /home/tpc

 RUN mkdir /home/tpc/jdbcdriver
-run curl https://repo1.maven.org/maven2/com/ibm/db2/jcc/11.5.0.0/jcc-11.5.0.0.jar --output /home/tpc/jdbcdriver/jcc-11.5.0.0.jar
-run curl https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.19/mysql-connector-java-8.0.19.jar --output /home/tpc/jdbcdriver/mysql-connector-java-8.0.19.jar
-run curl https://repo1.maven.org/maven2/com/microsoft/sqlserver/mssql-jdbc/8.2.0.jre8/mssql-jdbc-8.2.0.jre8.jar --output /home/tpc/jdbcdriver/mssql-jdbc-8.2.0.jre8.jar
-run curl https://repo1.maven.org/maven2/postgresql/postgresql/9.1-901.jdbc4/postgresql-9.1-901.jdbc4.jar --output /home/tpc/jdbcdriver/postgresql-9.1-901.jdbc4.jar
-run curl https://repo1.maven.org/maven2/com/oracle/ojdbc/ojdbc10/19.3.0.0/ojdbc10-19.3.0.0.jar  --output /home/tpc/jdbcdriver/ojdbc10-19.3.0.0.jar
+run curl https://repo1.maven.org/maven2/com/ibm/db2/jcc/11.5.0.0/jcc-11.5.0.0.jar --output /home/tpc/jdbcdriver/jcc.jar
+run curl https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.19/mysql-connector-java-8.0.19.jar --output /home/tpc/jdbcdriver/mysql.jar
+run curl https://repo1.maven.org/maven2/com/microsoft/sqlserver/mssql-jdbc/8.2.0.jre8/mssql-jdbc-8.2.0.jre8.jar --output /home/tpc/jdbcdriver/mssql.jar
+run curl https://repo1.maven.org/maven2/postgresql/postgresql/9.1-901.jdbc4/postgresql-9.1-901.jdbc4.jar --output /home/tpc/jdbcdriver/postgresql.jar
+run curl https://repo1.maven.org/maven2/com/oracle/ojdbc/ojdbc10/19.3.0.0/ojdbc10-19.3.0.0.jar  --output /home/tpc/jdbcdriver/ojdbc10.jar


 CMD ["/usr/bin/tail","-f","/dev/null"]
--- a/debezium-e2e-benchmark/README.md
+++ b/debezium-e2e-benchmark/README.md
@ -5,7 +5,10 @@ One column is a timestamp stating when data is inserted into the table.
 The test compares this time with the time of the corresponding timestamp of the Kafka message in the topic. 
 The script creates in the `tpcdata` directory the resulting data in a CSV file and some diagrams in PNG format.

-<img src="./images/tpc_100000_1.png" width="20%"><img src="./images/tpc_100000_1-t.png" width="20%"><img src="./images/tpc_100000_1-t-d.png" width="20%"><img src="./images/tpc_100000_1-h.png" width="20%">
+![](images/tpc_100000_1.png)
+![](images/tpc_100000_1-t.png)
+![](images/tpc_100000_1-t-d.png)
+![](images/tpc_100000_1-h.png)

 All the SQL statements required to run the tests are specified in the [tpc-config.json](py/tpc-config.json) file. 
 The number of commits run and the commit interval of the data is controlled in this part:
@ -30,7 +33,7 @@ It need the driver information form "connector.class" in the register.json only
    "jdbc": {
        "db2": {
            "jdbcdriver": "com.ibm.db2.jcc.DB2Driver",
-            "jar" : "jcc-11.5.0.0.jar",
+            "jar" : "jcc.jar",
            ....

 An additional parameter is needed for a test run in a self-contained environment.
@ -56,7 +59,7 @@ If you have an existing up and running Debezium environment, you can do the benc
    - SQL create table for MySQL   
 ``` CREATE TABLE TPC.TEST ( USERNAME VARCHAR(32) NOT NULL, NAME VARCHAR(64), BLOOD_GROUP CHAR(3), RESIDENCE VARCHAR(200), COMPANY VARCHAR(128), ADDRESS VARCHAR(200), BIRTHDATE DATE, SEX CHAR(1), JOB VARCHAR(128), SSN CHAR(11), MAIL VARCHAR(128), ID INTEGER NOT NULL AUTO_INCREMENT, T0 TIMESTAMP  DEFAULT CURRENT_TIMESTAMP ) ``` 

- Whitelist the TPC.TEST table in your Denbezium connector config JSON
+- Whitelist the TPC.TEST table in your Debezium connector config JSON
 ```  "database.whitelist" : "TPC.TEST"   ```

 - Enable the table for CDC on the database
@ -66,16 +69,16 @@ If you have an existing up and running Debezium environment, you can do the benc
        - ``` VALUES ASNCDC.ASNCDCSERVICES('reinit','asncdc') ```
        - ``` VALUES ASNCDC.ASNCDCSERVICES('status','asncdc') ``` 
    - SQL for SQLServer    
-``` for detail see debezium-sqlserver-connector ```
+        - for details see [SQL Server Connector](https://debezium.io/documentation/reference/connectors/sqlserver.html)
    - SQL for MySQL   
-```  for detail see debezium-mysql-connector ```
+        - for details see [MySQL Connector](https://debezium.io/documentation/reference/connectors/mysql.html)

 - Login into the docker container
 ``` docker exec -it benchmark /bin/bash ```
- Copy the Debezium connector configuration JSON in the home directory as /home/tpc/register.json
+- Copy the Debezium connector configuration JSON in the home directory as `$HOME/register.json`
 - Go to the directory where the Python code is
-``` cd /home/tpc/py ```
- Edit the tpc-config.json to add the correct debezium.connect.server FQDN:port 
+``` cd $HOME/py ```
+- Edit the `tpc-config.json` to add the correct debezium.connect.server FQDN:port
 - Now run the tests 
 ``` python3 tpc-run-tes.py ```
 - Create plots
--- a/debezium-e2e-benchmark/docker-compose-postgresql-tpc.yaml
+++ b/debezium-e2e-benchmark/docker-compose-postgresql-tpc.yaml
@ -0,0 +1,48 @@
+version: '2'
+services:
+  zookeeper:
+    image: debezium/zookeeper:${DEBEZIUM_VERSION}
+    ports:
+     - 2181:2181
+     - 2888:2888
+     - 3888:3888
+  kafka:
+    image: debezium/kafka:${DEBEZIUM_VERSION}
+    ports:
+     - 9092:9092
+    links:
+     - zookeeper
+    environment:
+     - ZOOKEEPER_CONNECT=zookeeper:2181
+  postgres:
+    image: debezium/example-postgres:${DEBEZIUM_VERSION}
+    ports:
+     - 5432:5432
+    environment:
+     - POSTGRES_USER=postgres
+     - POSTGRES_PASSWORD=postgres
+  connect:
+    image: debezium/connect:${DEBEZIUM_VERSION}
+    ports:
+     - 8083:8083
+    links:
+     - kafka
+     - postgres
+    environment:
+     - BOOTSTRAP_SERVERS=kafka:9092
+     - GROUP_ID=1
+     - CONFIG_STORAGE_TOPIC=tpc_connect_configs
+     - OFFSET_STORAGE_TOPIC=tpc_connect_offsets
+     - STATUS_STORAGE_TOPIC=tpc_connect_statuses
+  tpc:
+    image: debezium-tpc
+    build:
+      context: .
+    privileged: True
+    links:
+      - kafka
+      - postgres
+      - connect
+    volumes: 
+      - ../../debezium-examples/tutorial/register-postgres.json:/home/tpc/register.json
+      - ${DEBEZIUM_TPC_VOLUME}:/home/tpc/tpcdata
--- a/debezium-e2e-benchmark/py/time1-tpcplot-detail.py
+++ b/debezium-e2e-benchmark/py/time1-tpcplot-detail.py
@ -34,8 +34,6 @@ with open(csvfile) as csvfile:

        
 xmin=0
-xmax=len(id)
-#xmin=19000    #0
 xmax=firstenrties    #max(id)
 ymin=0

@ -50,9 +48,7 @@ del kafka[xmax:len(id)]
 x = []
 y = []

-#for idx, e in enumerate(db):
 for i in range(xmin,xmax):
-    #x.append((kafka2[i]-db2[i]) / 1000)
    x.append((kafka[i]) / 1000)
    y.append(id[i])
 plt.scatter(x,y,s=0.01,c='lightblue')
@ -61,14 +57,10 @@ plt.scatter(x,y,s=0.01,c='lightblue')
 x = []
 y = []
 for i in range(xmin,xmax):
-    #x.append((kafka2[i]-db2[i]) / 1000)
    x.append((db[i]) / 1000)
    y.append(id[i])    

 axes = plt.gca()
-#axes.set_xlim([0,150000])
-#axes.set_ylim([0,100000])
-#axes.set_xlim([0,max(y)])
 axes.set_ylim([0,firstenrties])
 plt.xlabel('millisecond')
 plt.ylabel('entries ')
@ -80,5 +72,3 @@ plt.scatter(x,y,s=0.01,c='red')


 plt.savefig(Plotfilename)
-
-
--- a/debezium-e2e-benchmark/py/time1-tpcplot-full.py
+++ b/debezium-e2e-benchmark/py/time1-tpcplot-full.py
@ -33,8 +33,6 @@ with open(csvfile) as csvfile:
        
 xmin=0
 xmax=len(id)
-#xmin=19000    #0
-##xmax=9000    #max(id)
 ymin=0

 del db[0:xmin]
@ -48,9 +46,7 @@ del kafka[xmax:len(id)]
 x = []
 y = []

-#for idx, e in enumerate(db):
 for i in range(xmin,xmax):
-    #x.append((kafka2[i]-db2[i]) / 1000)
    x.append((kafka[i]) / 1000)
    y.append(id[i])
 plt.scatter(x,y,s=0.01,c='lightblue')
@ -59,7 +55,6 @@ plt.scatter(x,y,s=0.01,c='lightblue')
 x = []
 y = []
 for i in range(xmin,xmax):
-    #x.append((kafka2[i]-db2[i]) / 1000)
    x.append((db[i]) / 1000)
    y.append(id[i])    

@ -76,5 +71,3 @@ plt.scatter(x,y,s=0.01,c='red')


 plt.savefig(Plotfilename)
-
-
--- a/debezium-e2e-benchmark/py/tpc-config.json
+++ b/debezium-e2e-benchmark/py/tpc-config.json
@ -12,7 +12,7 @@
    "jdbc": {
        "db2": {
            "jdbcdriver": "com.ibm.db2.jcc.DB2Driver",
-            "jar": "jcc-11.5.0.0.jar",
+            "jar": "jcc.jar",
            "tpctable": "CREATE TABLE TPC.TEST ( USERNAME VARCHAR(32) NOT NULL, NAME VARCHAR(64), BLOOD_GROUP CHAR(3), RESIDENCE VARCHAR(200), COMPANY VARCHAR(128), ADDRESS VARCHAR(200), BIRTHDATE DATE, SEX CHAR(1), JOB VARCHAR(128), SSN CHAR(11), MAIL VARCHAR(128), ID INTEGER not null GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1), T0 TIMESTAMP NOT NULL     GENERATED BY DEFAULT     FOR EACH ROW ON UPDATE AS ROW CHANGE TIMESTAMP, PRIMARY KEY (ID) ) ORGANIZE BY ROW",
            "initsql": [
                "CALL ASNCDC.REMOVETABLE('TPC','TEST')",
@ -32,14 +32,14 @@
        },
        "mysql": {
            "jdbcdriver": "com.mysql.cj.jdbc.Driver",
-            "jar": "mysql-connector-java-8.0.19.jar",
+            "jar": "mysql.jar",
            "tpctable": "CREATE TABLE TPC.TEST ( USERNAME VARCHAR(32) NOT NULL, NAME VARCHAR(64), BLOOD_GROUP CHAR(3), RESIDENCE VARCHAR(200), COMPANY VARCHAR(128), ADDRESS VARCHAR(200), BIRTHDATE DATE, SEX CHAR(1), JOB VARCHAR(128), SSN CHAR(11), MAIL VARCHAR(128), ID INTEGER NOT NULL AUTO_INCREMENT, T0 TIMESTAMP  DEFAULT CURRENT_TIMESTAMP ) ",
            "initsql": [],
            "enablecdctablesql": []
        },
        "oracle": {
            "jdbcdriver": "com.oracle.ojdbc",
-            "jar": "ojdbc10-19.3.0.0.jar",
+            "jar": "ojdbc10.jar",
            "tpctable": "",
            "initsql": [],
            "enablecdctablesql": []
@ -53,7 +53,7 @@
        },
        "postgresql": {
            "jdbcdriver": "org.postgresql.Driver",
-            "jar": "postgresql-9.1-901.jdbc4.jar",
+            "jar": "postgresql.jar",
            "tpctable": "",
            "initsql": [],
            "enablecdctablesql": []
--- a/debezium-e2e-benchmark/py/tpcplot.py
+++ b/debezium-e2e-benchmark/py/tpcplot.py
@ -28,8 +28,6 @@ with open(csvfile) as csvfile:

 xmin=0
 xmax=len(id)
-#xmin=20000    #0
-#xmax=2100    #max(id)
 ymin=0


@ -37,7 +35,6 @@ ymin=0
 x = []
 y = []

-#for idx, e in enumerate(db):
 for i in range(xmin,xmax):
    y.append((kafka[i] - db[i]) / 1000)
    x.append(id[i])
@ -77,10 +74,8 @@ plt.xlabel('entires')
 plt.ylabel('delta ')
 plt.title('')
 plt.legend()
-#plt.yscale('log')
 axes = plt.gca()
 axes.set_xlim([xmin,xmax])
-#axes.set_ylim([ymin,ymax])
 axes.set_ylim([ymin,40])

 print('db2 in ')
@ -107,11 +102,8 @@ plt.xlabel('entires')
 plt.ylabel('delta ')
 plt.title('')
 plt.legend()
-#plt.yscale('log')
 axes = plt.gca()
 axes.set_xlim([xmin,xmax])
-#axes.set_ylim([ymin,ymax])
-#axes.set_ylim([ymin,10000])

 print('kafka in')
 print('min :',min(y))
@ -121,5 +113,3 @@ print('std :',std(y))


 plt.savefig(Plotfilename)
-
-
--- a/debezium-e2e-benchmark/register.json
+++ b/debezium-e2e-benchmark/register.json
@ -1,15 +0,0 @@
-{
-    "name": "inventory-connector",
-    "config": {
-        "connector.class" : "io.debezium.connector.db2.Db2Connector",
-        "tasks.max" : "1",
-        "database.server.name" : "db2server",
-        "database.hostname" : "db2server",
-        "database.port" : "50000",
-        "database.user" : "db2inst1",
-        "database.password" : "######",
-        "database.dbname" : "TESTDB",
-        "database.history.kafka.bootstrap.servers" : "kafka:9092",
-        "database.history.kafka.topic": "schema-changes.inventory"
-    }
-}