DBZ-4033 A new hash version has been added so that it can be used to have the same hash value in different places;
In V2, the byte array only has the actual value, unlike the serialization based V1 implementation which exposed Java's internal serialization format.
This commit is contained in:
parent
5c8699ae76
commit
9a13d80074
@ -340,3 +340,4 @@ Zoran Regvart
|
||||
志飞 张
|
||||
李宗文
|
||||
민규 김
|
||||
Ünal Sürmeli
|
@ -67,6 +67,14 @@ public static ColumnMappers create(RelationalDatabaseConnectorConfig connectorCo
|
||||
}
|
||||
});
|
||||
|
||||
config.forEachMatchingFieldNameWithString("column\\.mask\\.hash.v2.\\." + hashAlgorithmAndSaltExtractPattern.pattern(),
|
||||
(fullyQualifiedColumnNames, hashAlgorithmAndSalt) -> {
|
||||
Matcher matcher = hashAlgorithmAndSaltExtractPattern.matcher(hashAlgorithmAndSalt);
|
||||
if (matcher.matches()) {
|
||||
builder.maskStringsByHashingV2(fullyQualifiedColumnNames, matcher.group("hashAlgorithm"), matcher.group("salt"));
|
||||
}
|
||||
});
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@ -208,7 +216,11 @@ public Builder maskStrings(String fullyQualifiedColumnNames, String maskValue) {
|
||||
}
|
||||
|
||||
public Builder maskStringsByHashing(String fullyQualifiedColumnNames, String hashAlgorithm, String salt) {
|
||||
return map(fullyQualifiedColumnNames, new MaskStrings(salt.getBytes(), hashAlgorithm));
|
||||
return map(fullyQualifiedColumnNames, new MaskStrings(salt.getBytes(), hashAlgorithm, MaskStrings.HashingByteArrayStrategy.V1));
|
||||
}
|
||||
|
||||
public Builder maskStringsByHashingV2(String fullyQualifiedColumnNames, String hashAlgorithm, String salt) {
|
||||
return map(fullyQualifiedColumnNames, new MaskStrings(salt.getBytes(), hashAlgorithm, MaskStrings.HashingByteArrayStrategy.V2));
|
||||
}
|
||||
|
||||
public Builder propagateSourceTypeToSchemaParameter(String fullyQualifiedColumnNames, String value) {
|
||||
|
@ -55,11 +55,11 @@ public MaskStrings(String maskValue) {
|
||||
* must be on of Java Cryptography Architecture Standard Algorithm {@link MessageDigest}.
|
||||
* @throws IllegalArgumentException if the {@param salt} or {@param hashAlgorithm} are null
|
||||
*/
|
||||
public MaskStrings(byte[] salt, String hashAlgorithm) {
|
||||
public MaskStrings(byte[] salt, String hashAlgorithm, HashingByteArrayStrategy hashingByteArrayStrategy) {
|
||||
Objects.requireNonNull(salt);
|
||||
Objects.requireNonNull(hashAlgorithm);
|
||||
this.converterFromColumn = column -> {
|
||||
final HashValueConverter hashValueConverter = new HashValueConverter(salt, hashAlgorithm);
|
||||
final HashValueConverter hashValueConverter = new HashValueConverter(salt, hashAlgorithm, hashingByteArrayStrategy);
|
||||
if (column.length() > 0) {
|
||||
return hashValueConverter.and(new TruncateStrings.TruncatingValueConverter(column.length()));
|
||||
}
|
||||
@ -113,9 +113,11 @@ protected static final class HashValueConverter implements ValueConverter {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(HashValueConverter.class);
|
||||
private final byte[] salt;
|
||||
private final MessageDigest hashAlgorithm;
|
||||
private final HashingByteArrayStrategy hashingByteArrayStrategy;
|
||||
|
||||
public HashValueConverter(byte[] salt, String hashAlgorithm) {
|
||||
public HashValueConverter(byte[] salt, String hashAlgorithm, HashingByteArrayStrategy hashingByteArrayStrategy) {
|
||||
this.salt = salt;
|
||||
this.hashingByteArrayStrategy = hashingByteArrayStrategy;
|
||||
try {
|
||||
this.hashAlgorithm = MessageDigest.getInstance(hashAlgorithm);
|
||||
}
|
||||
@ -142,12 +144,8 @@ public Object convert(Object value) {
|
||||
private String toHash(Serializable value) throws IOException {
|
||||
hashAlgorithm.reset();
|
||||
hashAlgorithm.update(salt);
|
||||
|
||||
try (ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
ObjectOutput out = new ObjectOutputStream(bos)) {
|
||||
out.writeObject(value);
|
||||
return convertToHexadecimalFormat(hashAlgorithm.digest(bos.toByteArray()));
|
||||
}
|
||||
byte[] valueToByteArray = hashingByteArrayStrategy.toByteArray(value);
|
||||
return convertToHexadecimalFormat(hashAlgorithm.digest(valueToByteArray));
|
||||
}
|
||||
|
||||
private String convertToHexadecimalFormat(byte[] bytes) {
|
||||
@ -158,4 +156,29 @@ private String convertToHexadecimalFormat(byte[] bytes) {
|
||||
return hashString.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* V1 default and previous version. Because ObjectOutputStream is used, some characters are added before the actual value.
|
||||
* V2 should be used to fidelity for the value being hashed the same way in different places. The byte array also has only the actual value.
|
||||
*
|
||||
*/
|
||||
public enum HashingByteArrayStrategy {
|
||||
V1 {
|
||||
@Override
|
||||
byte[] toByteArray(Serializable value) throws IOException {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
ObjectOutput out = new ObjectOutputStream(bos);
|
||||
out.writeObject(value);
|
||||
return bos.toByteArray();
|
||||
}
|
||||
},
|
||||
V2 {
|
||||
@Override
|
||||
byte[] toByteArray(Serializable value) {
|
||||
return value.toString().getBytes();
|
||||
}
|
||||
};
|
||||
|
||||
abstract byte[] toByteArray(Serializable value) throws IOException;
|
||||
}
|
||||
}
|
@ -16,7 +16,6 @@
|
||||
|
||||
/**
|
||||
* @author Randall Hauch
|
||||
*
|
||||
*/
|
||||
public class MaskStringsTest {
|
||||
|
||||
@ -39,11 +38,33 @@ public void shouldMaskStringsWithAsterisks() {
|
||||
|
||||
@Test
|
||||
public void shouldTransformSameInputsToSameResultsForCharsetType() {
|
||||
converter = new MaskStrings("salt".getBytes(), "SHA-256").create(column);
|
||||
converter = new MaskStrings("salt".getBytes(), "SHA-256", MaskStrings.HashingByteArrayStrategy.V1).create(column);
|
||||
assertThat(converter.convert("hello")).isEqualTo("af5843a0f0e728ab0332c8888b6e1190bfb79e584f0d40538de8f10df6ef29c6");
|
||||
assertThat(converter.convert("hello")).isEqualTo("af5843a0f0e728ab0332c8888b6e1190bfb79e584f0d40538de8f10df6ef29c6");
|
||||
assertThat(converter.convert("world")).isEqualTo("4588e1f2dcdc7fefc1515d3acd5acb9033478eace68286f383c337b9ff4464a3");
|
||||
assertThat(converter.convert("world")).isEqualTo("4588e1f2dcdc7fefc1515d3acd5acb9033478eace68286f383c337b9ff4464a3");
|
||||
}
|
||||
|
||||
}
|
||||
@Test
|
||||
public void shouldTransformSameInputsToSameResultsForCharsetTypeWithMD5() {
|
||||
converter = new MaskStrings("salt".getBytes(), "MD5", MaskStrings.HashingByteArrayStrategy.V1).create(column);
|
||||
assertThat(converter.convert("hello")).isEqualTo("46f48a7a5ea32efba87cbff49d46d35a");
|
||||
assertThat(converter.convert("world")).isEqualTo("030f632140d9c9591b0d8c21098024b8");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldTransformSameInputsToSameResultsForCharsetTypeWithV2() {
|
||||
converter = new MaskStrings("salt".getBytes(), "SHA-256", MaskStrings.HashingByteArrayStrategy.V2).create(column);
|
||||
assertThat(converter.convert("hello")).isEqualTo("cd31b3b98ece60cb739c0bf770b2de892ae0ad133f645513c3d83f08757a843a");
|
||||
assertThat(converter.convert("hello")).isEqualTo("cd31b3b98ece60cb739c0bf770b2de892ae0ad133f645513c3d83f08757a843a");
|
||||
assertThat(converter.convert("world")).isEqualTo("e84ac3142870113ddc6710c06f76421befc8e8ca6de64e98d2993ed8d41f4085");
|
||||
assertThat(converter.convert("world")).isEqualTo("e84ac3142870113ddc6710c06f76421befc8e8ca6de64e98d2993ed8d41f4085");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldTransformSameInputsToSameResultsForCharsetTypeWithMD5AndV2() {
|
||||
converter = new MaskStrings("salt".getBytes(), "MD5", MaskStrings.HashingByteArrayStrategy.V2).create(column);
|
||||
assertThat(converter.convert("hello")).isEqualTo("06decc8b095724f80103712c235586be");
|
||||
assertThat(converter.convert("world")).isEqualTo("172c8e95398cc72ab5358ead6981e7e5");
|
||||
}
|
||||
}
|
@ -2675,6 +2675,29 @@ The connector configuration can include multiple properties that specify differe
|
||||
+
|
||||
Depending on the _hashAlgorithm_ used, the _salt_ selected, and the actual data set, the resulting data set might not be completely masked.
|
||||
|
||||
|[[postgresql-property-column-mask-hash-v2]]<<postgresql-property-column-mask-hash-v2, `column.mask.hash.v2._hashAlgorithm_.with.salt._salt_`>>
|
||||
|_n/a_
|
||||
|An optional, comma-separated list of regular expressions that match the fully-qualified names of character-based columns.
|
||||
Fully-qualified names for columns are of the form _<schemaName>_._<tableName>_._<columnName>_.
|
||||
In the resulting change event record, the values for the specified columns are replaced with pseudonyms. +
|
||||
|
||||
A pseudonym consists of the hashed value that results from applying the specified _hashAlgorithm_ and _salt_.
|
||||
Based on the hash function that is used, referential integrity is maintained, while column values are replaced with pseudonyms.
|
||||
Supported hash functions are described in the {link-java7-standard-names}[MessageDigest section] of the Java Cryptography Architecture Standard Algorithm Name Documentation. +
|
||||
+
|
||||
In the following example, `CzQMA0cB5K` is a randomly selected salt. +
|
||||
|
||||
----
|
||||
column.mask.hash.v2.SHA-256.with.salt.CzQMA0cB5K = inventory.orders.customerName, inventory.shipment.customerName
|
||||
----
|
||||
|
||||
If necessary, the pseudonym is automatically shortened to the length of the column.
|
||||
The connector configuration can include multiple properties that specify different hash algorithms and salts. +
|
||||
+
|
||||
Depending on the _hashAlgorithm_ used, the _salt_ selected, and the actual data set, the resulting data set might not be completely masked.
|
||||
|
||||
* Hashing Strategy Version V2: V2 should be used to fidelity for the value being hashed the same way in different places.
|
||||
|
||||
|[[postgresql-property-column-propagate-source-type]]<<postgresql-property-column-propagate-source-type, `+column.propagate.source.type+`>>
|
||||
|_n/a_
|
||||
|An optional, comma-separated list of regular expressions that match the fully-qualified names of columns. Fully-qualified names for columns are of the form _databaseName_._tableName_._columnName_, or _databaseName_._schemaName_._tableName_._columnName_. +
|
||||
|
@ -93,4 +93,5 @@ pkgonan,민규 김
|
||||
jiabao.sun,Jiabao Sun
|
||||
indraraj,Indra Shukla
|
||||
judahrand,Judah Rand
|
||||
zxpzlp,Xiaopu Zhu
|
||||
zxpzlp,Xiaopu Zhu
|
||||
unalsurmeli,Ünal Sürmeli
|
Loading…
Reference in New Issue
Block a user