DBZ-2531 Implement connection retry for Oracle

This commit is contained in:
Chris Cranford 2020-09-17 15:10:17 -04:00 committed by Gunnar Morling
parent dda9ba96e3
commit aedb0b9a57
4 changed files with 121 additions and 105 deletions

View File

@ -75,7 +75,7 @@ public ChangeEventSourceCoordinator start(Configuration config) {
.loggingContextSupplier(() -> taskContext.configureLoggingContext(CONTEXT_NAME))
.build();
errorHandler = new ErrorHandler(OracleConnector.class, connectorConfig.getLogicalName(), queue);
errorHandler = new OracleErrorHandler(connectorConfig.getLogicalName(), queue);
final OracleEventMetadataProvider metadataProvider = new OracleEventMetadataProvider();

View File

@ -0,0 +1,42 @@
/*
* Copyright Debezium Authors.
*
* Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
*/
package io.debezium.connector.oracle;
import java.io.IOException;
import java.sql.SQLRecoverableException;
import io.debezium.connector.base.ChangeEventQueue;
import io.debezium.pipeline.ErrorHandler;
import oracle.net.ns.NetException;
/**
* Error handle for Oracle.
*
* @author Chris Cranford
*/
public class OracleErrorHandler extends ErrorHandler {
public OracleErrorHandler(String logicalName, ChangeEventQueue<?> queue) {
super(OracleConnector.class, logicalName, queue);
}
@Override
protected boolean isRetriable(Throwable throwable) {
if (throwable.getMessage() == null || throwable.getCause() == null) {
return false;
}
return throwable.getMessage().startsWith("ORA-03135") || // connection lost
throwable.getMessage().startsWith("ORA-12543") || // TNS:destination host unreachable
throwable.getMessage().startsWith("ORA-00604") || // error occurred at recursive SQL level 1
throwable.getMessage().startsWith("ORA-01089") || // Oracle immediate shutdown in progress
throwable.getCause() instanceof IOException ||
throwable instanceof SQLRecoverableException ||
throwable.getMessage().toUpperCase().startsWith("NO MORE DATA TO READ FROM SOCKET") ||
throwable.getCause().getCause() instanceof NetException;
}
}

View File

@ -20,13 +20,11 @@
import static io.debezium.connector.oracle.logminer.LogMinerHelper.setRedoLogFilesForMining;
import static io.debezium.connector.oracle.logminer.LogMinerHelper.startOnlineMining;
import java.io.IOException;
import java.math.BigDecimal;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.SQLRecoverableException;
import java.time.Duration;
import java.time.Instant;
import java.util.Map;
@ -50,8 +48,6 @@
import io.debezium.util.Clock;
import io.debezium.util.Metronome;
import oracle.net.ns.NetException;
/**
* A {@link StreamingChangeEventSource} based on Oracle's LogMiner utility.
* The event handler loop is executed in a separate executor.
@ -114,8 +110,6 @@ public void execute(ChangeEventSourceContext context) {
this.logMinerMetrics = new LogMinerMetrics(taskContext);
this.logMinerMetrics.register(LOGGER);
// The top outer loop gives the resiliency on the network disconnections. This is critical for cloud deployment.
while (context.isRunning()) {
try (Connection connection = jdbcConnection.connection(false);
PreparedStatement fetchFromMiningView = connection
.prepareStatement(SqlUtils.queryLogMinerContents(connectorConfig.getSchemaName(), jdbcConnection.username(), schema))) {
@ -130,7 +124,6 @@ public void execute(ChangeEventSourceContext context) {
}
// 1. Configure Log Miner to mine online redo logs
// todo: DBZ-137 this step can actually be executed outside the loop at start-up.
setNlsSessionParameters(jdbcConnection);
checkSupplementalLogging(jdbcConnection, connectorConfig.getPdbName());
@ -205,12 +198,8 @@ public void execute(ChangeEventSourceContext context) {
}
}
catch (Throwable e) {
if (connectionProblem(e)) {
logWarn(transactionalBufferMetrics, "Disconnection occurred. {} ", e.toString());
continue;
}
logError(transactionalBufferMetrics, "Mining session was stopped due to the {} ", e.toString());
throw new RuntimeException(e);
logError(transactionalBufferMetrics, "Mining session stopped due to the {} ", e.toString());
errorHandler.setProducerThrowable(e);
}
finally {
LOGGER.info("startScn={}, endScn={}, offsetContext.getScn()={}", startScn, endScn, offsetContext.getScn());
@ -218,7 +207,6 @@ public void execute(ChangeEventSourceContext context) {
LOGGER.info("Transactional buffer dump: {}", transactionalBuffer.toString());
LOGGER.info("LogMiner metrics dump: {}", logMinerMetrics.toString());
}
}
logMinerMetrics.unregister(LOGGER);
transactionalBufferMetrics.unregister(LOGGER);
@ -247,18 +235,4 @@ private void updateStartScn() {
public void commitOffset(Map<String, ?> offset) {
// nothing to do
}
private boolean connectionProblem(Throwable e) {
if (e.getMessage() == null || e.getCause() == null) {
return false;
}
return e.getMessage().startsWith("ORA-03135") || // connection lost contact
e.getMessage().startsWith("ORA-12543") || // TNS:destination host unreachable
e.getMessage().startsWith("ORA-00604") || // error occurred at recursive SQL level 1
e.getMessage().startsWith("ORA-01089") || // Oracle immediate shutdown in progress
e.getCause() instanceof IOException ||
e instanceof SQLRecoverableException ||
e.getMessage().toUpperCase().startsWith("NO MORE DATA TO READ FROM SOCKET") ||
e.getCause().getCause() instanceof NetException;
}
}

View File

@ -79,7 +79,7 @@ public void execute(ChangeEventSourceContext context) throws InterruptedExceptio
}
}
catch (Throwable e) {
throw new RuntimeException(e);
errorHandler.setProducerThrowable(e);
}
finally {
// 3. disconnect