DBZ-6201: Add new HeaderToValue SMT to copy headers to value
This commit is contained in:
parent
4e544debdc
commit
c45bf39eb0
@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Copyright Debezium Authors.
|
||||
*
|
||||
* Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
|
||||
*/
|
||||
package io.debezium.transforms;
|
||||
|
||||
import io.debezium.config.Configuration;
|
||||
import io.debezium.config.Field;
|
||||
import io.debezium.util.BoundedConcurrentHashMap;
|
||||
import org.apache.kafka.common.config.ConfigDef;
|
||||
import org.apache.kafka.common.config.ConfigException;
|
||||
import org.apache.kafka.connect.connector.ConnectRecord;
|
||||
import org.apache.kafka.connect.data.Schema;
|
||||
import org.apache.kafka.connect.data.SchemaBuilder;
|
||||
import org.apache.kafka.connect.data.Struct;
|
||||
import org.apache.kafka.connect.header.Header;
|
||||
import org.apache.kafka.connect.transforms.Transformation;
|
||||
import org.apache.kafka.connect.transforms.util.SchemaUtil;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import static java.lang.String.format;
|
||||
import static org.apache.kafka.connect.transforms.util.Requirements.requireStruct;
|
||||
|
||||
public class HeaderToValue<R extends ConnectRecord<R>> implements Transformation<R> {
|
||||
|
||||
public static final String FIELDS_CONF = "fields";
|
||||
public static final String HEADERS_CONF = "headers";
|
||||
public static final String OPERATION_CONF = "operation";
|
||||
private static final String MOVE_OPERATION = "move";
|
||||
private static final String COPY_OPERATION = "copy";
|
||||
private static final int SCHEMA_CACHE_SIZE = 64;
|
||||
|
||||
enum Operation {
|
||||
MOVE(MOVE_OPERATION),
|
||||
COPY(COPY_OPERATION);
|
||||
|
||||
private final String name;
|
||||
|
||||
Operation(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
static Operation fromName(String name) {
|
||||
switch (name) {
|
||||
case MOVE_OPERATION:
|
||||
return MOVE;
|
||||
case COPY_OPERATION:
|
||||
return COPY;
|
||||
default:
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
||||
public static final Field HEADERS_FIELD = Field.create(HEADERS_CONF)
|
||||
.withDisplayName("Header names list")
|
||||
.withType(ConfigDef.Type.LIST)
|
||||
.withImportance(ConfigDef.Importance.HIGH)
|
||||
.withDescription("Header names in the record whose values are to be copied or moved to value.")
|
||||
.required();
|
||||
|
||||
public static final Field FIELDS_FIELD = Field.create(FIELDS_CONF)
|
||||
.withDisplayName("Field names list")
|
||||
.withType(ConfigDef.Type.LIST)
|
||||
.withImportance(ConfigDef.Importance.HIGH)
|
||||
.withDescription("Field names, in the same order as the header names listed in the headers configuration property. Supports Struct nesting using dot notation.")
|
||||
.required();
|
||||
|
||||
public static final Field OPERATION_FIELD = Field.create(OPERATION_CONF)
|
||||
.withDisplayName("Operation: mover or copy")
|
||||
.withType(ConfigDef.Type.STRING)
|
||||
.withEnum(Operation.class)
|
||||
.withImportance(ConfigDef.Importance.HIGH)
|
||||
.withDescription("Either <code>move</code> if the fields are to be moved to the value (removed from the headers), " +
|
||||
"or <code>copy</code> if the fields are to be copied to the value (retained in the headers).")
|
||||
.required();
|
||||
|
||||
private List<String> fields;
|
||||
|
||||
private List<String> headers;
|
||||
|
||||
private Operation operation;
|
||||
|
||||
private SmtManager<R> smtManager;
|
||||
|
||||
private BoundedConcurrentHashMap<Schema, Schema> schemaUpdateCache = new BoundedConcurrentHashMap<>(SCHEMA_CACHE_SIZE);
|
||||
@Override
|
||||
public ConfigDef config() {
|
||||
|
||||
final ConfigDef config = new ConfigDef();
|
||||
Field.group(config, null, HEADERS_FIELD, FIELDS_FIELD, OPERATION_FIELD);
|
||||
return config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(Map<String, ?> props) {
|
||||
|
||||
final Configuration config = Configuration.from(props);
|
||||
smtManager = new SmtManager<>(config);
|
||||
smtManager.validate(config, Field.setOf(FIELDS_FIELD, HEADERS_FIELD, OPERATION_FIELD));
|
||||
|
||||
fields = config.getList(FIELDS_FIELD);
|
||||
headers = config.getList(HEADERS_FIELD);
|
||||
if (headers.size() != fields.size()) {
|
||||
throw new ConfigException(format("'%s' config must have the same number of elements as '%s' config.",
|
||||
FIELDS_FIELD, HEADERS_FIELD));
|
||||
}
|
||||
operation = Operation.fromName(config.getString(OPERATION_FIELD));
|
||||
}
|
||||
|
||||
@Override
|
||||
public R apply(R record) {
|
||||
|
||||
final Struct value = requireStruct(record.value(), "");
|
||||
|
||||
Map<String, Header> headerToProcess = StreamSupport.stream(record.headers().spliterator(), false)
|
||||
.filter(header -> headers.contains(header.key()))
|
||||
.collect(Collectors.toMap(Header::key, Function.identity()));
|
||||
|
||||
Schema updatedSchema = schemaUpdateCache.computeIfAbsent(value.schema(),
|
||||
s -> makeNewSchema(value.schema(), headerToProcess));
|
||||
|
||||
// Update the value with the new fields
|
||||
Struct updatedValue = new Struct(updatedSchema);
|
||||
for (org.apache.kafka.connect.data.Field field : value.schema().fields()) {
|
||||
if (value.get(field) != null) {
|
||||
updatedValue.put(field.name(), value.get(field));
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
|
||||
Header currentHeader = headerToProcess.get(headers.get(i));
|
||||
|
||||
updatedValue.put(fields.get(i), currentHeader.value());
|
||||
}
|
||||
|
||||
|
||||
return record.newRecord(record.topic(), record.kafkaPartition(),
|
||||
record.keySchema(),
|
||||
record.key(),
|
||||
updatedSchema,
|
||||
updatedValue,
|
||||
record.timestamp(),
|
||||
record.headers());
|
||||
}
|
||||
|
||||
private Schema makeNewSchema(Schema oldSchema, Map<String, Header> headerToProcess) {
|
||||
|
||||
// Get fields from original schema
|
||||
SchemaBuilder newSchemabuilder = SchemaUtil.copySchemaBasics(oldSchema, SchemaBuilder.struct());
|
||||
for (org.apache.kafka.connect.data.Field field : oldSchema.fields()) {
|
||||
newSchemabuilder.field(field.name(), field.schema());
|
||||
}
|
||||
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
|
||||
Header currentHeader = headerToProcess.get(headers.get(i));
|
||||
String destinationFieldName = fields.get(i);
|
||||
newSchemabuilder = newSchemabuilder.field(destinationFieldName, currentHeader.schema());
|
||||
}
|
||||
|
||||
return newSchemabuilder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright Debezium Authors.
|
||||
*
|
||||
* Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
|
||||
*/
|
||||
|
||||
package io.debezium.transforms;
|
||||
|
||||
import io.debezium.data.Envelope;
|
||||
import org.apache.kafka.common.config.ConfigException;
|
||||
import org.apache.kafka.connect.data.Schema;
|
||||
import org.apache.kafka.connect.data.SchemaBuilder;
|
||||
import org.apache.kafka.connect.data.Struct;
|
||||
import org.apache.kafka.connect.source.SourceRecord;
|
||||
import org.apache.kafka.connect.transforms.util.Requirements;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
public class HeaderToValueTest {
|
||||
|
||||
public static final Schema VALUE_SCHEMA = SchemaBuilder.struct()
|
||||
.name("mysql-server-1.inventory.products.Value")
|
||||
.field("id", Schema.INT64_SCHEMA)
|
||||
.field("price", Schema.FLOAT32_SCHEMA)
|
||||
.field("product", Schema.STRING_SCHEMA)
|
||||
.build();
|
||||
private final HeaderToValue<SourceRecord> headerToValue = new HeaderToValue<>();
|
||||
|
||||
@Test
|
||||
public void whenOperationIsNotMoveOrCopyAConfigExceptionIsThrew() {
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
headerToValue.configure(Map.of(
|
||||
"headers", "h1",
|
||||
"fields", "f1",
|
||||
"operation", "invalidOp")))
|
||||
.isInstanceOf(ConfigException.class)
|
||||
.hasMessageContaining(
|
||||
"Invalid value invalidOp for configuration operation: The 'operation' value is invalid: Value must be one of move, copy");;
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenNoFieldsDeclaredAConfigExceptionIsThrew() {
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
headerToValue.configure(Map.of(
|
||||
"headers", "h1",
|
||||
"operation", "copy")))
|
||||
.isInstanceOf(ConfigException.class)
|
||||
.hasMessageContaining(
|
||||
"Invalid value null for configuration fields: The 'fields' value is invalid: A value is required");;
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenNoHeadersDeclaredAConfigExceptionIsThrew() {
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
headerToValue.configure(Map.of(
|
||||
"fields", "f1",
|
||||
"operation", "copy")))
|
||||
.isInstanceOf(ConfigException.class)
|
||||
.hasMessageContaining(
|
||||
"Invalid value null for configuration headers: The 'headers' value is invalid: A value is required");;
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenHeadersAndFieldsHaveDifferentSizeAConfigExceptionIsThrew() {
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
headerToValue.configure(Map.of(
|
||||
"headers", "h1,h2",
|
||||
"fields", "f1",
|
||||
"operation", "copy")))
|
||||
.isInstanceOf(ConfigException.class)
|
||||
.hasMessageContaining(
|
||||
"'fields' config must have the same number of elements as 'headers' config.");;
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenARecordThatContainsADefinedHeaderItWillBeCopiedInTheDefinedField() {
|
||||
|
||||
headerToValue.configure(Map.of(
|
||||
"headers", "h1,h2",
|
||||
"fields", "f1, f2",
|
||||
"operation", "copy"));
|
||||
|
||||
Struct row = new Struct(VALUE_SCHEMA)
|
||||
.put("id", 101L)
|
||||
.put("price", 20.0F)
|
||||
.put("product", "a product");
|
||||
|
||||
Envelope createEnvelope = Envelope.defineSchema()
|
||||
.withName("mysql-server-1.inventory.product.Envelope")
|
||||
.withRecord(VALUE_SCHEMA)
|
||||
.withSource(Schema.STRING_SCHEMA)
|
||||
.build();
|
||||
|
||||
Struct payload = createEnvelope.create(row, null , Instant.now());
|
||||
SourceRecord sourceRecord = new SourceRecord(new HashMap<>(), new HashMap<>(), "topic", createEnvelope.schema(), payload);
|
||||
sourceRecord.headers().add("h1", "this is a value from h1 header", Schema.STRING_SCHEMA);
|
||||
sourceRecord.headers().add("h2", "this is a value from h2 header", Schema.STRING_SCHEMA);
|
||||
|
||||
SourceRecord transformedRecord = headerToValue.apply(sourceRecord);
|
||||
|
||||
Struct payloadStruct = Requirements.requireStruct(transformedRecord.value(), "");
|
||||
assertThat(payloadStruct.get("f1")).isEqualTo("this is a value from h1 header");
|
||||
assertThat(payloadStruct.get("f2")).isEqualTo("this is a value from h2 header");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenARecordThatContainsADefinedStructHeaderItWillBeCopiedInTheDefinedField() {
|
||||
|
||||
Schema headerSchema = SchemaBuilder.array(SchemaBuilder.OPTIONAL_STRING_SCHEMA).optional().name("h1").build();
|
||||
headerToValue.configure(Map.of(
|
||||
"headers", "h1",
|
||||
"fields", "f1",
|
||||
"operation", "copy"));
|
||||
|
||||
Struct row = new Struct(VALUE_SCHEMA)
|
||||
.put("id", 101L)
|
||||
.put("price", 20.0F)
|
||||
.put("product", "a product");
|
||||
|
||||
Envelope createEnvelope = Envelope.defineSchema()
|
||||
.withName("mysql-server-1.inventory.product.Envelope")
|
||||
.withRecord(VALUE_SCHEMA)
|
||||
.withSource(Schema.STRING_SCHEMA)
|
||||
.build();
|
||||
Struct payload = createEnvelope.create(row, null , Instant.now());
|
||||
SourceRecord sourceRecord = new SourceRecord(new HashMap<>(), new HashMap<>(), "topic", createEnvelope.schema(), payload);
|
||||
sourceRecord.headers().add("h1", List.of("v1", "v2"), headerSchema);
|
||||
|
||||
SourceRecord transformedRecord = headerToValue.apply(sourceRecord);
|
||||
|
||||
Struct payloadStruct = Requirements.requireStruct(transformedRecord.value(), "");
|
||||
assertThat(payloadStruct.getArray("f1")).contains("v1", "v2");
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user