DBZ-188 More efficient GTID source filters for MySQL Connector

Changed the GTID source filters in the MySQL connector to be far more efficient when the filters specify literal UUIDs rather than regex patterns. In these cases, the predicate just checks whether a supplied value is in a hash set, and no regular expression patterns are used.

The GTID source filters can still be a combination of UUID literals and regular expressions, and the predicate will use the best implementation for each. For example, if the filters include all UUID literals, then regular expressions will never be used.
This commit is contained in:
Randall Hauch 2017-02-10 11:34:24 -06:00
parent 8c60c29883
commit d2986710a5
4 changed files with 203 additions and 6 deletions

View File

@ -47,8 +47,8 @@ public MySqlTaskContext(Configuration config) {
// Set up the GTID filter ... // Set up the GTID filter ...
String gtidSetIncludes = config.getString(MySqlConnectorConfig.GTID_SOURCE_INCLUDES); String gtidSetIncludes = config.getString(MySqlConnectorConfig.GTID_SOURCE_INCLUDES);
String gtidSetExcludes = config.getString(MySqlConnectorConfig.GTID_SOURCE_EXCLUDES); String gtidSetExcludes = config.getString(MySqlConnectorConfig.GTID_SOURCE_EXCLUDES);
this.gtidSourceFilter = gtidSetIncludes != null ? Predicates.includes(gtidSetIncludes) this.gtidSourceFilter = gtidSetIncludes != null ? Predicates.includesUuids(gtidSetIncludes)
: (gtidSetExcludes != null ? Predicates.excludes(gtidSetExcludes) : null); : (gtidSetExcludes != null ? Predicates.excludesUuids(gtidSetExcludes) : null);
// Set up the MySQL schema ... // Set up the MySQL schema ...
this.dbSchema = new MySqlSchema(config, serverName(), this.gtidSourceFilter); this.dbSchema = new MySqlSchema(config, serverName(), this.gtidSourceFilter);

View File

@ -5,6 +5,11 @@
*/ */
package io.debezium.function; package io.debezium.function;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.function.Function; import java.util.function.Function;
import java.util.function.Predicate; import java.util.function.Predicate;
@ -15,11 +20,142 @@
/** /**
* Utilities for constructing various predicates. * Utilities for constructing various predicates.
*
* @author Randall Hauch * @author Randall Hauch
* *
*/ */
public class Predicates { public class Predicates {
/**
* Generate a predicate function that for any supplied UUID strings returns {@code true} if <i>any</i> of the comma-separated
* UUID literals or regular expressions matches the predicate parameter. This supplied strings can be a mixture
* of regular expressions and UUID literals, and the most efficient method will be used for each.
*
* @param uuidPatterns the comma-separated UUID literals or regular expression patterns; may not be null
* @return the predicate function that performs the matching
* @throws PatternSyntaxException if the string includes an invalid regular expression
*/
public static Predicate<String> includesUuids(String uuidPatterns) {
return includesLiteralsOrPatterns(uuidPatterns, Strings::isUuid, (s) -> s);
}
/**
* Generate a predicate function that for any supplied string returns {@code true} if <i>none</i> of the regular
* expressions or literals in the supplied comma-separated list matches the predicate parameter. This supplied strings can be
* a mixture of regular expressions and UUID literals, and the most efficient method will be used for each.
*
* @param uuidPatterns the comma-separated regular expression pattern (or literal) strings; may not be null
* @return the predicate function that performs the matching
* @throws PatternSyntaxException if the string includes an invalid regular expression
*/
public static Predicate<String> excludesUuids(String uuidPatterns) {
return includesUuids(uuidPatterns).negate();
}
/**
* Generate a predicate function that for any supplied string returns {@code true} if <i>any</i> of the regular expressions
* or literals in the supplied comma-separated list matches the predicate parameter. This supplied strings can be a mixture
* of regular expressions and literals, and the most efficient method will be used for each.
*
* @param literalsOrPatterns the comma-separated regular expression pattern (or literal) strings; may not be null
* @param isLiteral function that determines if a given pattern is a literal string; may not be null
* @param conversion the function that converts each predicate-supplied value to a string that can be matched against the
* regular expressions; may not be null
* @return the predicate function that performs the matching
* @throws PatternSyntaxException if the string includes an invalid regular expression
*/
public static <T> Predicate<T> includesLiteralsOrPatterns(String literalsOrPatterns, Predicate<String> isLiteral,
Function<T, String> conversion) {
// First create the predicates that handle either literals or patterns ...
Set<String> literals = new HashSet<>();
List<Pattern> patterns = new ArrayList<>();
for (String literalOrPattern : literalsOrPatterns.split(",")) {
if (isLiteral.test(literalOrPattern)) {
literals.add(literalOrPattern.toLowerCase());
} else {
patterns.add(Pattern.compile(literalOrPattern, Pattern.CASE_INSENSITIVE));
}
}
Predicate<T> patternsPredicate = includedInPatterns(patterns, conversion);
Predicate<T> literalsPredicate = includedInLiterals(literals, conversion);
// Now figure out which predicate(s) we need to use ...
if (patterns.isEmpty()) {
return literalsPredicate;
}
if (literals.isEmpty()) {
return patternsPredicate;
}
return literalsPredicate.or(patternsPredicate);
}
/**
* Generate a predicate function that for any supplied string returns {@code true} if <i>none</i> of the regular
* expressions or literals in the supplied comma-separated list matches the predicate parameter. This supplied strings can be
* a mixture of regular expressions and literals, and the most efficient method will be used for each.
*
* @param patterns the comma-separated regular expression pattern (or literal) strings; may not be null
* @param isLiteral function that determines if a given pattern is a literal string; may not be null
* @param conversion the function that converts each predicate-supplied value to a string that can be matched against the
* regular expressions; may not be null
* @return the predicate function that performs the matching
* @throws PatternSyntaxException if the string includes an invalid regular expression
*/
public static <T> Predicate<T> excludesLiteralsOrPatterns(String patterns, Predicate<String> isLiteral,
Function<T, String> conversion) {
return includesLiteralsOrPatterns(patterns, isLiteral, conversion).negate();
}
/**
* Generate a predicate function that for any supplied string returns {@code true} if <i>any</i> of the literals in
* the supplied comma-separated list case insensitively matches the predicate parameter.
*
* @param literals the comma-separated literal strings; may not be null
* @return the predicate function that performs the matching
*/
public static Predicate<String> includesLiterals(String literals) {
return includesLiterals(literals, (s) -> s);
}
/**
* Generate a predicate function that for any supplied string returns {@code true} if <i>none</i> of the literals in
* the supplied comma-separated list case insensitively matches the predicate parameter.
*
* @param literals the comma-separated literal strings; may not be null
* @return the predicate function that performs the matching
*/
public static Predicate<String> excludesLiterals(String literals) {
return includesLiterals(literals).negate();
}
/**
* Generate a predicate function that for any supplied string returns {@code true} if <i>any</i> of the literals in
* the supplied comma-separated list case insensitively matches the predicate parameter.
*
* @param literals the comma-separated literal strings; may not be null
* @param conversion the function that converts each predicate-supplied value to a string that can be matched against the
* regular expressions; may not be null
* @return the predicate function that performs the matching
*/
public static <T> Predicate<T> includesLiterals(String literals, Function<T, String> conversion) {
String[] literalValues = literals.toLowerCase().split(",");
Set<String> literalSet = new HashSet<>(Arrays.asList(literalValues));
return includedInLiterals(literalSet, conversion);
}
/**
* Generate a predicate function that for any supplied string returns {@code true} if <i>none</i> of the literals in
* the supplied comma-separated list case insensitively matches the predicate parameter.
*
* @param literals the comma-separated literal strings; may not be null
* @param conversion the function that converts each predicate-supplied value to a string that can be matched against the
* regular expressions; may not be null
* @return the predicate function that performs the matching
*/
public static <T> Predicate<T> excludesLiterals(String literals, Function<T, String> conversion) {
return includesLiterals(literals, conversion).negate();
}
/** /**
* Generate a predicate function that for any supplied string returns {@code true} if <i>any</i> of the regular expressions in * Generate a predicate function that for any supplied string returns {@code true} if <i>any</i> of the regular expressions in
* the supplied comma-separated list matches the predicate parameter. * the supplied comma-separated list matches the predicate parameter.
@ -55,18 +191,29 @@ public static Predicate<String> excludes(String regexPatterns) {
* @throws PatternSyntaxException if the string includes an invalid regular expression * @throws PatternSyntaxException if the string includes an invalid regular expression
*/ */
public static <T> Predicate<T> includes(String regexPatterns, Function<T, String> conversion) { public static <T> Predicate<T> includes(String regexPatterns, Function<T, String> conversion) {
Set<Pattern> patterns = Strings.listOfRegex(regexPatterns,Pattern.CASE_INSENSITIVE); Set<Pattern> patterns = Strings.listOfRegex(regexPatterns, Pattern.CASE_INSENSITIVE);
return includedInPatterns(patterns, conversion);
}
protected static <T> Predicate<T> includedInPatterns(Collection<Pattern> patterns, Function<T, String> conversion) {
return (t) -> { return (t) -> {
String str = conversion.apply(t); String str = conversion.apply(t);
if ( str != null ) { if (str != null) {
for ( Pattern p : patterns ) { for (Pattern p : patterns) {
if ( p.matcher(str).matches()) return true; if (p.matcher(str).matches()) return true;
} }
} }
return false; return false;
}; };
} }
protected static <T> Predicate<T> includedInLiterals(Collection<String> literals, Function<T, String> conversion) {
return (s) -> {
String str = conversion.apply(s).toLowerCase();
return literals.contains(str);
};
}
/** /**
* Generate a predicate function that for any supplied parameter returns {@code true} if <i>none</i> of the regular * Generate a predicate function that for any supplied parameter returns {@code true} if <i>none</i> of the regular
* expressions in the supplied comma-separated list matches the predicate parameter. * expressions in the supplied comma-separated list matches the predicate parameter.

View File

@ -19,6 +19,7 @@
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import java.util.UUID;
import java.util.function.Function; import java.util.function.Function;
import java.util.function.Supplier; import java.util.function.Supplier;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -782,6 +783,21 @@ private static List<String> split(String str,
return l; return l;
} }
/**
* Determine if the supplied string is a valid {@link UUID}.
* @param str the string to evaluate
* @return {@code true} if the string is a valid representation of a UUID, or {@code false} otherwise
*/
public static boolean isUuid(String str) {
if (str == null) return false;
try {
UUID.fromString(str);
return true;
} catch (IllegalArgumentException e) {
return false;
}
}
private Strings() { private Strings() {
} }
} }

View File

@ -5,6 +5,7 @@
*/ */
package io.debezium.function; package io.debezium.function;
import java.util.UUID;
import java.util.function.Predicate; import java.util.function.Predicate;
import org.junit.Test; import org.junit.Test;
@ -55,4 +56,37 @@ public void shouldMatchCommaSeparatedLiteralExcludes() {
assertThat(p.test(-1)).isTrue(); assertThat(p.test(-1)).isTrue();
} }
@Test
public void shouldMatchCommaSeparatedUuidLiterals() {
String uuid1 = UUID.randomUUID().toString();
String uuid2 = UUID.randomUUID().toString();
String uuid3 = UUID.randomUUID().toString();
String uuid4 = UUID.randomUUID().toString();
String uuid4Prefix = uuid4.substring(0,10) + ".*";
Predicate<String> p = Predicates.includesUuids(uuid1 + "," + uuid2);
assertThat(p.test(uuid1)).isTrue();
assertThat(p.test(uuid2)).isTrue();
assertThat(p.test(uuid3)).isFalse();
assertThat(p.test(uuid4)).isFalse();
p = Predicates.excludesUuids(uuid1 + "," + uuid2);
assertThat(p.test(uuid1)).isFalse();
assertThat(p.test(uuid2)).isFalse();
assertThat(p.test(uuid3)).isTrue();
assertThat(p.test(uuid4)).isTrue();
p = Predicates.includesUuids(uuid1 + "," + uuid2 + "," + uuid4Prefix);
assertThat(p.test(uuid1)).isTrue();
assertThat(p.test(uuid2)).isTrue();
assertThat(p.test(uuid3)).isFalse();
assertThat(p.test(uuid4)).isTrue();
p = Predicates.excludesUuids(uuid1 + "," + uuid2 + "," + uuid4Prefix);
assertThat(p.test(uuid1)).isFalse();
assertThat(p.test(uuid2)).isFalse();
assertThat(p.test(uuid3)).isTrue();
assertThat(p.test(uuid4)).isFalse();
}
} }