Fixed a thinko in pseudonymizr.pl that disabled reuse of a registry file.

This commit is contained in:
Florian "flowdy" Heß 2016-09-03 15:25:39 +02:00
parent dc4a890a00
commit 15f9ed2a98
1 changed files with 17 additions and 12 deletions

View File

@ -22,11 +22,16 @@ of course.
This script pseudonymizes lines from standard input and writes them to This script pseudonymizes lines from standard input and writes them to
standard output. All strings that need to be replaced must be wrapped into standard output. All strings that need to be replaced must be wrapped into
a special marker, namely X{...}, where X can be any upper- or lowercase letter of the alphabet, denoting a certain category of information, e.g. "M" for a special marker, namely X{...}, where X can be any upper- or lowercase
member names. letter of the alphabet, denoting a certain category of information, e.g.
"M" for member names.
Caution with natural member names! Slightly differing content of M{...} clauses lead to completely different pseudonyms. In order not to render the tests Caution with natural member names! Slightly differing content of M{...}
irreversible and in disaccord with actual states, because the association of members and accounts is wrong, you should prefer using it for the unique and standard member ID, say the number in the member table. Where proper names in financial transfer information, use a different letter. clauses lead to completely different pseudonyms. In order not to render
the tests irreversible and in disaccord with actual states, because the
association of members and accounts is wrong, you should prefer using it
for the unique and standard member ID, say the number in the member table.
Where proper names in financial transfer information, use a different letter.
=head1 COMMAND =head1 COMMAND
@ -61,13 +66,13 @@ if ( $registry_fh ) {
my ($orig, $random); my ($orig, $random);
my ($h1, $h2, $assign) = $reverse_mode my ($h1, $h2, $assign) = $reverse_mode
? (\%subst => \%known, sub { ($random, $orig) = @_ }) ? (\%subst => \%known)
: (\%known => \%subst, sub { ($orig, $random) = @_ }) : (\%known => \%subst)
; ;
while ( $_ = <$registry_fh> ) { while ( $_ = <$registry_fh> ) {
chomp; chomp;
$assign->( split /\t/ ); my ($random, $orig) = split /\t/;
$h1->{ $random } = $orig; $h1->{ $random } = $orig;
} }
%$h2 = reverse %$h1; %$h2 = reverse %$h1;
@ -95,13 +100,13 @@ unless ( $reverse_mode ) {
sub pseudonymize { my $orig = shift; $subst{ $orig } //= do {{ sub pseudonymize { my $orig = shift; $subst{ $orig } //= do {{
# Zufallsstring der Länge $LENGTH erzeugen # Make random string with $LENGTH characters from @CHARS
my $random = join q{}, map { $CHARS[ rand 62 ] } 1 .. $LENGTH; my $random = join q{}, map { $CHARS[ rand @CHARS ] } 1 .. $LENGTH;
# If known, try anew (p = 1 : 62 ^ $LENGTH, i.e. p > 0) # If known, try anew (p = 1 : @CHARS ^ $LENGTH, i.e. p > 0)
$_ = $_ ? redo : $orig for $known{ $random }; $_ = defined($_) ? redo : $orig for $known{ $random };
# in $subst speichern # return to %subst cache
$random; $random;
}}; } }}; }