DBZ-4033 A new hash version has been added so that it can be used to have the same hash value in different places;

In V2, the byte array only has the actual value, unlike the serialization based V1 implementation which exposed Java's internal serialization format.
This commit is contained in:
unalsurmeli 2021-09-23 15:57:27 +03:00 committed by Gunnar Morling
parent 5c8699ae76
commit 9a13d80074
6 changed files with 96 additions and 15 deletions

View File

@ -340,3 +340,4 @@ Zoran Regvart
志飞 张
李宗文
민규 김
Ünal Sürmeli

View File

@ -67,6 +67,14 @@ public static ColumnMappers create(RelationalDatabaseConnectorConfig connectorCo
}
});
config.forEachMatchingFieldNameWithString("column\\.mask\\.hash.v2.\\." + hashAlgorithmAndSaltExtractPattern.pattern(),
(fullyQualifiedColumnNames, hashAlgorithmAndSalt) -> {
Matcher matcher = hashAlgorithmAndSaltExtractPattern.matcher(hashAlgorithmAndSalt);
if (matcher.matches()) {
builder.maskStringsByHashingV2(fullyQualifiedColumnNames, matcher.group("hashAlgorithm"), matcher.group("salt"));
}
});
return builder.build();
}
@ -208,7 +216,11 @@ public Builder maskStrings(String fullyQualifiedColumnNames, String maskValue) {
}
public Builder maskStringsByHashing(String fullyQualifiedColumnNames, String hashAlgorithm, String salt) {
return map(fullyQualifiedColumnNames, new MaskStrings(salt.getBytes(), hashAlgorithm));
return map(fullyQualifiedColumnNames, new MaskStrings(salt.getBytes(), hashAlgorithm, MaskStrings.HashingByteArrayStrategy.V1));
}
public Builder maskStringsByHashingV2(String fullyQualifiedColumnNames, String hashAlgorithm, String salt) {
return map(fullyQualifiedColumnNames, new MaskStrings(salt.getBytes(), hashAlgorithm, MaskStrings.HashingByteArrayStrategy.V2));
}
public Builder propagateSourceTypeToSchemaParameter(String fullyQualifiedColumnNames, String value) {

View File

@ -55,11 +55,11 @@ public MaskStrings(String maskValue) {
* must be on of Java Cryptography Architecture Standard Algorithm {@link MessageDigest}.
* @throws IllegalArgumentException if the {@param salt} or {@param hashAlgorithm} are null
*/
public MaskStrings(byte[] salt, String hashAlgorithm) {
public MaskStrings(byte[] salt, String hashAlgorithm, HashingByteArrayStrategy hashingByteArrayStrategy) {
Objects.requireNonNull(salt);
Objects.requireNonNull(hashAlgorithm);
this.converterFromColumn = column -> {
final HashValueConverter hashValueConverter = new HashValueConverter(salt, hashAlgorithm);
final HashValueConverter hashValueConverter = new HashValueConverter(salt, hashAlgorithm, hashingByteArrayStrategy);
if (column.length() > 0) {
return hashValueConverter.and(new TruncateStrings.TruncatingValueConverter(column.length()));
}
@ -113,9 +113,11 @@ protected static final class HashValueConverter implements ValueConverter {
private static final Logger LOGGER = LoggerFactory.getLogger(HashValueConverter.class);
private final byte[] salt;
private final MessageDigest hashAlgorithm;
private final HashingByteArrayStrategy hashingByteArrayStrategy;
public HashValueConverter(byte[] salt, String hashAlgorithm) {
public HashValueConverter(byte[] salt, String hashAlgorithm, HashingByteArrayStrategy hashingByteArrayStrategy) {
this.salt = salt;
this.hashingByteArrayStrategy = hashingByteArrayStrategy;
try {
this.hashAlgorithm = MessageDigest.getInstance(hashAlgorithm);
}
@ -142,12 +144,8 @@ public Object convert(Object value) {
private String toHash(Serializable value) throws IOException {
hashAlgorithm.reset();
hashAlgorithm.update(salt);
try (ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutput out = new ObjectOutputStream(bos)) {
out.writeObject(value);
return convertToHexadecimalFormat(hashAlgorithm.digest(bos.toByteArray()));
}
byte[] valueToByteArray = hashingByteArrayStrategy.toByteArray(value);
return convertToHexadecimalFormat(hashAlgorithm.digest(valueToByteArray));
}
private String convertToHexadecimalFormat(byte[] bytes) {
@ -158,4 +156,29 @@ private String convertToHexadecimalFormat(byte[] bytes) {
return hashString.toString();
}
}
}
/**
* V1 default and previous version. Because ObjectOutputStream is used, some characters are added before the actual value.
* V2 should be used to fidelity for the value being hashed the same way in different places. The byte array also has only the actual value.
*
*/
public enum HashingByteArrayStrategy {
V1 {
@Override
byte[] toByteArray(Serializable value) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutput out = new ObjectOutputStream(bos);
out.writeObject(value);
return bos.toByteArray();
}
},
V2 {
@Override
byte[] toByteArray(Serializable value) {
return value.toString().getBytes();
}
};
abstract byte[] toByteArray(Serializable value) throws IOException;
}
}

View File

@ -16,7 +16,6 @@
/**
* @author Randall Hauch
*
*/
public class MaskStringsTest {
@ -39,11 +38,33 @@ public void shouldMaskStringsWithAsterisks() {
@Test
public void shouldTransformSameInputsToSameResultsForCharsetType() {
converter = new MaskStrings("salt".getBytes(), "SHA-256").create(column);
converter = new MaskStrings("salt".getBytes(), "SHA-256", MaskStrings.HashingByteArrayStrategy.V1).create(column);
assertThat(converter.convert("hello")).isEqualTo("af5843a0f0e728ab0332c8888b6e1190bfb79e584f0d40538de8f10df6ef29c6");
assertThat(converter.convert("hello")).isEqualTo("af5843a0f0e728ab0332c8888b6e1190bfb79e584f0d40538de8f10df6ef29c6");
assertThat(converter.convert("world")).isEqualTo("4588e1f2dcdc7fefc1515d3acd5acb9033478eace68286f383c337b9ff4464a3");
assertThat(converter.convert("world")).isEqualTo("4588e1f2dcdc7fefc1515d3acd5acb9033478eace68286f383c337b9ff4464a3");
}
}
@Test
public void shouldTransformSameInputsToSameResultsForCharsetTypeWithMD5() {
converter = new MaskStrings("salt".getBytes(), "MD5", MaskStrings.HashingByteArrayStrategy.V1).create(column);
assertThat(converter.convert("hello")).isEqualTo("46f48a7a5ea32efba87cbff49d46d35a");
assertThat(converter.convert("world")).isEqualTo("030f632140d9c9591b0d8c21098024b8");
}
@Test
public void shouldTransformSameInputsToSameResultsForCharsetTypeWithV2() {
converter = new MaskStrings("salt".getBytes(), "SHA-256", MaskStrings.HashingByteArrayStrategy.V2).create(column);
assertThat(converter.convert("hello")).isEqualTo("cd31b3b98ece60cb739c0bf770b2de892ae0ad133f645513c3d83f08757a843a");
assertThat(converter.convert("hello")).isEqualTo("cd31b3b98ece60cb739c0bf770b2de892ae0ad133f645513c3d83f08757a843a");
assertThat(converter.convert("world")).isEqualTo("e84ac3142870113ddc6710c06f76421befc8e8ca6de64e98d2993ed8d41f4085");
assertThat(converter.convert("world")).isEqualTo("e84ac3142870113ddc6710c06f76421befc8e8ca6de64e98d2993ed8d41f4085");
}
@Test
public void shouldTransformSameInputsToSameResultsForCharsetTypeWithMD5AndV2() {
converter = new MaskStrings("salt".getBytes(), "MD5", MaskStrings.HashingByteArrayStrategy.V2).create(column);
assertThat(converter.convert("hello")).isEqualTo("06decc8b095724f80103712c235586be");
assertThat(converter.convert("world")).isEqualTo("172c8e95398cc72ab5358ead6981e7e5");
}
}

View File

@ -2675,6 +2675,29 @@ The connector configuration can include multiple properties that specify differe
+
Depending on the _hashAlgorithm_ used, the _salt_ selected, and the actual data set, the resulting data set might not be completely masked.
|[[postgresql-property-column-mask-hash-v2]]<<postgresql-property-column-mask-hash-v2, `column.mask.hash.v2._hashAlgorithm_.with.salt._salt_`>>
|_n/a_
|An optional, comma-separated list of regular expressions that match the fully-qualified names of character-based columns.
Fully-qualified names for columns are of the form _<schemaName>_._<tableName>_._<columnName>_.
In the resulting change event record, the values for the specified columns are replaced with pseudonyms. +
A pseudonym consists of the hashed value that results from applying the specified _hashAlgorithm_ and _salt_.
Based on the hash function that is used, referential integrity is maintained, while column values are replaced with pseudonyms.
Supported hash functions are described in the {link-java7-standard-names}[MessageDigest section] of the Java Cryptography Architecture Standard Algorithm Name Documentation. +
+
In the following example, `CzQMA0cB5K` is a randomly selected salt. +
----
column.mask.hash.v2.SHA-256.with.salt.CzQMA0cB5K = inventory.orders.customerName, inventory.shipment.customerName
----
If necessary, the pseudonym is automatically shortened to the length of the column.
The connector configuration can include multiple properties that specify different hash algorithms and salts. +
+
Depending on the _hashAlgorithm_ used, the _salt_ selected, and the actual data set, the resulting data set might not be completely masked.
* Hashing Strategy Version V2: V2 should be used to fidelity for the value being hashed the same way in different places.
|[[postgresql-property-column-propagate-source-type]]<<postgresql-property-column-propagate-source-type, `+column.propagate.source.type+`>>
|_n/a_
|An optional, comma-separated list of regular expressions that match the fully-qualified names of columns. Fully-qualified names for columns are of the form _databaseName_._tableName_._columnName_, or _databaseName_._schemaName_._tableName_._columnName_. +

View File

@ -93,4 +93,5 @@ pkgonan,민규 김
jiabao.sun,Jiabao Sun
indraraj,Indra Shukla
judahrand,Judah Rand
zxpzlp,Xiaopu Zhu
zxpzlp,Xiaopu Zhu
unalsurmeli,Ünal Sürmeli