1123 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			1123 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/perl -w
 | 
						|
my $Id = '';
 | 
						|
#
 | 
						|
# findimagedupes - Finds visually similar or duplicate images
 | 
						|
#
 | 
						|
# Copyright © 2006-2022 by Jonathan H N Chin <code@jhnc.org>.
 | 
						|
#
 | 
						|
# This program is free software; you may redistribute it and/or modify
 | 
						|
# it under the terms of the GNU General Public License as published by
 | 
						|
# the Free Software Foundation; either version 3 of the License, or
 | 
						|
# (at your option) any later version.
 | 
						|
#
 | 
						|
# This program is distributed in the hope that it will be useful,
 | 
						|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
# GNU General Public License for more details.
 | 
						|
#
 | 
						|
# You should have received a copy of the GNU General Public License
 | 
						|
# along with this program; if not, write to the Free Software
 | 
						|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
#
 | 
						|
 | 
						|
use strict;
 | 
						|
 | 
						|
require 5.006_001;
 | 
						|
 | 
						|
use Cwd qw(realpath);
 | 
						|
use DB_File;
 | 
						|
use Digest::MD5 qw(md5_hex);
 | 
						|
use Getopt::Long qw(:config no_ignore_case require_order);
 | 
						|
use File::MimeInfo::Magic;
 | 
						|
use File::Temp qw(tempdir tempfile);
 | 
						|
use Graphics::Magick;
 | 
						|
use MIME::Base64;
 | 
						|
use Pod::Usage;
 | 
						|
 | 
						|
use Inline
 | 
						|
	C => 'DATA',
 | 
						|
	NAME => 'findimagedupes',
 | 
						|
	DIRECTORY => '/tmp';
 | 
						|
 | 
						|
# ----------------------------------------------------------------------
 | 
						|
#
 | 
						|
# option parsing
 | 
						|
#
 | 
						|
 | 
						|
use vars qw(
 | 
						|
	$null
 | 
						|
	$add
 | 
						|
	$collection
 | 
						|
	@debug %debug
 | 
						|
	@fpdb
 | 
						|
	$merge
 | 
						|
	$mergeFile
 | 
						|
	$nocompare
 | 
						|
	$program
 | 
						|
	$quiet
 | 
						|
	$rescan
 | 
						|
	$recurse
 | 
						|
	$script
 | 
						|
	@scriptCode
 | 
						|
	$scriptFile
 | 
						|
	$threshold
 | 
						|
	@verbosity
 | 
						|
	$prune
 | 
						|
);
 | 
						|
 | 
						|
$add = 0;
 | 
						|
$quiet = 0;
 | 
						|
$threshold = '90%';
 | 
						|
 | 
						|
my %opt;
 | 
						|
GetOptions(
 | 
						|
	'0|null'                 => \$null,
 | 
						|
	'a|add'                  => \$add,
 | 
						|
	'c|collection=s'         => sub { ($collection = $_[1]) =~ s/([.]gqv)?$/.gqv/ },
 | 
						|
	'd|debug=s'              => \@debug,
 | 
						|
	'f|fingerprints|fp|db=s' => \@fpdb,
 | 
						|
	'h|?|help'               => sub { pod2usage(-verbose => 1) },
 | 
						|
	'man'                    => sub { pod2usage(-verbose => 2) },
 | 
						|
	'M|merge=s'              => sub { $merge = 1; $mergeFile = $_[1]; },
 | 
						|
	'n|no-compare'           => \$nocompare,
 | 
						|
	'P|prune'                => \$prune,
 | 
						|
	'p|program=s'            => \$program,
 | 
						|
	'q|quiet+'               => \$quiet,
 | 
						|
	'R|recurse'              => \$recurse,
 | 
						|
	'r|rescan'               => \$rescan,
 | 
						|
	's|script=s'             => \$script,
 | 
						|
	't|threshold=s'          => \$threshold,
 | 
						|
	'v|verbosity=s'          => \@verbosity,
 | 
						|
	'version'                => sub { print "findimagedupes $Id\n"; exit(0); },
 | 
						|
 | 
						|
	'i|include=s'            => \@scriptCode,
 | 
						|
	'I|include-file=s'       => \$scriptFile,
 | 
						|
) or pod2usage(-verbose => 0);
 | 
						|
 | 
						|
# ----------------------------------------------------------------------
 | 
						|
 | 
						|
my @errors = ();
 | 
						|
my @warnings = ();
 | 
						|
 | 
						|
sub exitvalue {
 | 
						|
	return(2) if @errors;
 | 
						|
	return(1) if @warnings;
 | 
						|
	return(0);
 | 
						|
}
 | 
						|
 | 
						|
sub mkerr { push @errors, join("", @_); }
 | 
						|
sub mkwarn { push @warnings, join("", @_); }
 | 
						|
 | 
						|
sub nqprint { print(@_) unless $quiet; }
 | 
						|
sub nqwarn { warn("Warning: ", @_) unless $quiet; }
 | 
						|
sub nqdie { nqwarn(@_); die; }
 | 
						|
sub nqdie2 { warn("Error: ", @_) if $quiet<2; die; }
 | 
						|
sub nqexit { warn("Error: ", @_) if $quiet<2; exit(3); }
 | 
						|
 | 
						|
my $inFP = 0;
 | 
						|
$SIG{SEGV} = sub { die $inFP ? "caught segfault" : ()};
 | 
						|
 | 
						|
sub printScriptFile {
 | 
						|
	print("# BEGIN USER FILE INCLUDE\n");
 | 
						|
	open(USERSCRIPT, "< $scriptFile") or die("$!\n");
 | 
						|
	while (<USERSCRIPT>) {
 | 
						|
		chomp;
 | 
						|
		print "$_\n";
 | 
						|
	}
 | 
						|
	close(USERSCRIPT);
 | 
						|
	print("# END USER FILE INCLUDE\n\n");
 | 
						|
}
 | 
						|
 | 
						|
# ----------------------------------------------------------------------
 | 
						|
#
 | 
						|
# setup
 | 
						|
#
 | 
						|
 | 
						|
my ($verb_fp, $verb_md5);
 | 
						|
 | 
						|
my $read_input = grep(/^-$/, @ARGV);
 | 
						|
 | 
						|
# XXX: can we tie these to save memory without breaking hv_iterinit() ?
 | 
						|
my (%fpcache, %filelist);
 | 
						|
 | 
						|
my $image = Graphics::Magick->new;
 | 
						|
 | 
						|
for (@debug) { $debug{$_} = 1 }
 | 
						|
 | 
						|
for (@scriptCode) { chomp; $_ .= "\n" }
 | 
						|
 | 
						|
if (@scriptCode) {
 | 
						|
	unshift(@scriptCode, "# BEGIN USER CODE INCLUDE\n" );
 | 
						|
	push(@scriptCode, "# END USER CODE INCLUDE\n\n" );
 | 
						|
}
 | 
						|
 | 
						|
my $scriptHeader = <<'EOD';
 | 
						|
#!/bin/sh
 | 
						|
 | 
						|
VIEW(){
 | 
						|
	echo "$@"
 | 
						|
}
 | 
						|
END(){
 | 
						|
	:;
 | 
						|
}
 | 
						|
 | 
						|
EOD
 | 
						|
 | 
						|
my $collectionHeader = <<'EOD';
 | 
						|
#GQview collection
 | 
						|
#created with findimagedupes
 | 
						|
# sort: manual
 | 
						|
EOD
 | 
						|
 | 
						|
my $collectionFooter = <<'EOD';
 | 
						|
#end
 | 
						|
EOD
 | 
						|
 | 
						|
# ----------------------------------------------------------------------
 | 
						|
#
 | 
						|
# sanity checks
 | 
						|
#
 | 
						|
 | 
						|
# +----------+
 | 
						|
# | warnings |
 | 
						|
# +----------+
 | 
						|
 | 
						|
if ($read_input>1) {
 | 
						|
	mkwarn("extra occurrences of \"-\" will be ignored");
 | 
						|
}
 | 
						|
 | 
						|
if ($null and !$read_input) {
 | 
						|
	mkwarn("--null has no effect in this context");
 | 
						|
}
 | 
						|
 | 
						|
if ($prune and !@fpdb) {
 | 
						|
	mkwarn("--prune has no effect in this context");
 | 
						|
}
 | 
						|
 | 
						|
if ($nocompare) {
 | 
						|
	mkwarn("--program ignored because --no-compare given") if $program;
 | 
						|
	mkwarn("--script ignored because --no-compare given") if $script;
 | 
						|
}
 | 
						|
 | 
						|
if ($merge and $mergeFile eq '/dev/null') {
 | 
						|
	$mergeFile = undef; # use DB_File memory database
 | 
						|
	mkwarn("merge database is temporary");
 | 
						|
}
 | 
						|
 | 
						|
if ($prune and $merge and !defined($mergeFile)) {
 | 
						|
	mkwarn("--prune is pointless when --merge database is temporary");
 | 
						|
}
 | 
						|
 | 
						|
if (@warnings and !$quiet) {
 | 
						|
	warn( join("\n", map {"Warning: $_"} @warnings), "\n" );
 | 
						|
}
 | 
						|
 | 
						|
# +--------+
 | 
						|
# | errors |
 | 
						|
# +--------+
 | 
						|
 | 
						|
if ($collection and -e($collection)) {
 | 
						|
	mkerr("Output file for --collection exists: $collection");
 | 
						|
}
 | 
						|
 | 
						|
for (@fpdb) {
 | 
						|
	mkerr("File for --fingerprints does not exist: $_")
 | 
						|
		unless (-f($_) or (@fpdb==1 and !$merge));
 | 
						|
}
 | 
						|
 | 
						|
if (@fpdb>1 and !$merge) {
 | 
						|
	mkerr("Require --merge if using multiple fingerprint databases");
 | 
						|
}
 | 
						|
 | 
						|
if ($merge and defined($mergeFile) and -e($mergeFile)) {
 | 
						|
	mkerr("Output file for --merge exists: $mergeFile");
 | 
						|
}
 | 
						|
 | 
						|
if ($program) {
 | 
						|
	if (! -e($program)) {
 | 
						|
		mkerr("File for --program does not exist: $program");
 | 
						|
	}
 | 
						|
	elsif (! -x($program)) {
 | 
						|
		mkerr("File for --program not executable: $program");
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
if ($script and -e($script)) {
 | 
						|
	mkerr("Output file for --script exists: $script");
 | 
						|
}
 | 
						|
 | 
						|
if ($scriptFile) {
 | 
						|
	if (! -f($scriptFile)) {
 | 
						|
		mkerr("File for --include-file does not exist: $scriptFile");
 | 
						|
	}
 | 
						|
	elsif (! -r($scriptFile)) {
 | 
						|
		mkerr("File for --include-file is not readable: $scriptFile");
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
if (my ($thres_val, $thres_unit) = $threshold =~ m/^\s*(\d+(?:[.]\d+)?)\s*([%b]?)\s*$/) {
 | 
						|
	if ($thres_unit eq '' or $thres_unit eq '%') {
 | 
						|
		# percentage to bits
 | 
						|
		$threshold = int(2.56 * (100 - $thres_val));
 | 
						|
	}
 | 
						|
	elsif ($thres_unit =~ m/^b/i) {
 | 
						|
		# already in bits
 | 
						|
		$threshold = int($thres_val);
 | 
						|
	}
 | 
						|
}
 | 
						|
else {
 | 
						|
	# error
 | 
						|
	$threshold = -1;
 | 
						|
}
 | 
						|
if ($threshold>256 or $threshold<0) {
 | 
						|
	mkerr("--threshold takes values between 0.0% .. 100.0% or 0b .. 256b");
 | 
						|
}
 | 
						|
 | 
						|
for (split(",", join(",", @verbosity))) {
 | 
						|
	/^(fingerprint|fp)$/ && do { $verb_fp = 1;  next; };
 | 
						|
	/^md5$/              && do { $verb_md5 = 1; next; };
 | 
						|
	mkerr("unknown option to --verbosity: $_");
 | 
						|
}
 | 
						|
 | 
						|
if (@errors) {
 | 
						|
	exit(exitvalue()) unless $quiet<2;
 | 
						|
	pod2usage(
 | 
						|
		-verbose => 0,
 | 
						|
		-exitval => exitvalue(),
 | 
						|
		-msg => join("\n", map {"Error: $_"} @errors),
 | 
						|
	);
 | 
						|
}
 | 
						|
 | 
						|
# +-------------------------------------------------+
 | 
						|
# | last chance to abort without altering any files |
 | 
						|
# +-------------------------------------------------+
 | 
						|
 | 
						|
unless (@ARGV>0 or @fpdb or $merge) {
 | 
						|
	exit(exitvalue()) unless $quiet<2;
 | 
						|
	warn("Nothing to do!\n") unless @warnings or $quiet;
 | 
						|
	pod2usage(
 | 
						|
		-verbose => 0,
 | 
						|
		-exitval => exitvalue(),
 | 
						|
	);
 | 
						|
}
 | 
						|
 | 
						|
# ----------------------------------------------------------------------
 | 
						|
#
 | 
						|
# load fingerprint cache
 | 
						|
#
 | 
						|
 | 
						|
my @regen = ();
 | 
						|
 | 
						|
for my $db (@fpdb) {
 | 
						|
	my %data;
 | 
						|
	tie(%data, 'DB_File', $db) or nqexit("tie($db): $!\n");
 | 
						|
	while (my ($file, $fp) = each %data) {
 | 
						|
		next if ($prune && !-f($file));
 | 
						|
 | 
						|
		if (exists $fpcache{$file}) {
 | 
						|
			if ($fpcache{$file} ne $fp) {
 | 
						|
				# fingerprint mismatch, force regeneration
 | 
						|
				push @regen, $file;
 | 
						|
				delete $fpcache{$file};
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else {
 | 
						|
			$fpcache{$file} = $fp;
 | 
						|
		}
 | 
						|
	}
 | 
						|
	untie(%data);
 | 
						|
}
 | 
						|
 | 
						|
# ----------------------------------------------------------------------
 | 
						|
#
 | 
						|
# build file list
 | 
						|
#
 | 
						|
 | 
						|
my %mergelist;
 | 
						|
my $rw = 0;
 | 
						|
 | 
						|
if ($merge) {
 | 
						|
	tie(%mergelist, 'DB_File', $mergeFile) or nqexit("tie($merge): $!\n");
 | 
						|
	%mergelist = %fpcache;
 | 
						|
	$rw = 1;
 | 
						|
}
 | 
						|
elsif (@fpdb==1) {
 | 
						|
	tie(%mergelist, 'DB_File', $fpdb[0]) or nqexit("tie($fpdb[0]): $!\n");
 | 
						|
	%mergelist = %fpcache if $prune;
 | 
						|
	$rw = 1;
 | 
						|
}
 | 
						|
 | 
						|
$| = 1;
 | 
						|
$/ = "\0" if $null;
 | 
						|
 | 
						|
for (@ARGV ? @ARGV : @regen) {
 | 
						|
	classify($_);
 | 
						|
}
 | 
						|
 | 
						|
untie(%mergelist);
 | 
						|
 | 
						|
finddupes() unless $nocompare;
 | 
						|
 | 
						|
exit 0;
 | 
						|
 | 
						|
# ----------------------------------------------------------------------
 | 
						|
 | 
						|
sub process_file {
 | 
						|
	my ($path) = @_;
 | 
						|
	my $file = realpath($path); # normalize to absolute canonical path
 | 
						|
	if (!$file) {
 | 
						|
		nqwarn("skipping bogus file: $path\n");
 | 
						|
	}
 | 
						|
	else {
 | 
						|
		my $fp;
 | 
						|
		if ($rescan or !exists $fpcache{$file}) {
 | 
						|
			$fp = fingerprint($file);
 | 
						|
		}
 | 
						|
		elsif ($add and exists $fpcache{$file}) {
 | 
						|
			$fp = $fpcache{$file};
 | 
						|
		}
 | 
						|
		if ($fp) {
 | 
						|
			$filelist{$file} = $fp;
 | 
						|
			delete $fpcache{$file};
 | 
						|
			$mergelist{$file} = $filelist{$file} if $rw;
 | 
						|
		}
 | 
						|
 | 
						|
		if ($verb_fp) {
 | 
						|
			my $fp = ( $filelist{$file} || $fpcache{$file} );
 | 
						|
			if ($fp) {
 | 
						|
				print(encode_base64($fp, ""), "  $file\n");
 | 
						|
			}
 | 
						|
			else {
 | 
						|
				nqwarn("can't get fingerprint: $file\n");
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if ($verb_md5) {
 | 
						|
			open(FILE, $file) or nqdie2("open($file): $!\n");
 | 
						|
			binmode(FILE);
 | 
						|
			my $digest = Digest::MD5->new->addfile(*FILE)->hexdigest;
 | 
						|
			if ($digest) {
 | 
						|
				print("$digest  $file\n");
 | 
						|
			}
 | 
						|
			else {
 | 
						|
				nqwarn("can't get md5sum: $file\n");
 | 
						|
			}
 | 
						|
			close(FILE);
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
# XXX: This function is complicated by two things:
 | 
						|
#	- Historically we didn't recurse and it would be nice to
 | 
						|
#		retain nonrecursion as an option.
 | 
						|
#	- We need to process "." and ".." when they are given
 | 
						|
#		explicitly on the command line.
 | 
						|
#	Perhaps we could cleanup the code by rewriting using
 | 
						|
#		something like the "-" code?
 | 
						|
sub classify {
 | 
						|
	my ($file) = @_;
 | 
						|
 | 
						|
	if ($file eq "-") {
 | 
						|
		if ($read_input) {
 | 
						|
			$read_input = 0;
 | 
						|
			while (<STDIN>) {
 | 
						|
				chomp;
 | 
						|
				classify($_);
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else {
 | 
						|
			# silently ignore any extra occurrences of "-"
 | 
						|
			# (we already reported them at startup)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	elsif (-d($file) and !-l($file)) {
 | 
						|
		# don't follow directory symlinks, to prevent looping
 | 
						|
		if (opendir(DIR, $file)) {
 | 
						|
			for (readdir(DIR)) {
 | 
						|
				my $path = "$file/$_";
 | 
						|
				if (-d($path) and !-l($path)) {
 | 
						|
					next if m/^\.\.?$/;	# skip . and ..
 | 
						|
					classify($path) if $recurse;
 | 
						|
				}
 | 
						|
				elsif (-f($path)) {
 | 
						|
					process_file($path);
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else {
 | 
						|
			nqwarn("can't process directory $file: $@\n");
 | 
						|
		}
 | 
						|
	}
 | 
						|
	elsif (-f($file)) {
 | 
						|
		# symlinks are okay for normal files
 | 
						|
		process_file($file);
 | 
						|
	}
 | 
						|
	else {
 | 
						|
		# skip anything else (devices, etc)
 | 
						|
		nqwarn("skipping file: $file\n");
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
# ----------------------------------------------------------------------
 | 
						|
 | 
						|
sub try {
 | 
						|
	my ($err) = @_;
 | 
						|
	if ($err and $err !~ /Warning (315|330):/) {
 | 
						|
		die("imagemagick problem: $err\n");
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
sub fingerprint {
 | 
						|
	my ($file) = @_;
 | 
						|
	my $blob;
 | 
						|
 | 
						|
	# imagemagick doesn't always catch output from the programs
 | 
						|
	# it spawns, so we have to clean up for it...
 | 
						|
	open(SAVED_OUT, ">&", \*STDOUT) or nqdie2("open(/dev/null): $!");
 | 
						|
	open(SAVED_ERR, ">&", \*STDERR) or nqdie2("open(/dev/null): $!");
 | 
						|
	open(STDOUT, ">/dev/null");
 | 
						|
	open(STDERR, ">/dev/null");
 | 
						|
 | 
						|
	$inFP = 1;
 | 
						|
	my $result = eval {
 | 
						|
		if ((mimetype($file)||'') =~ /^(audio|video)/) {
 | 
						|
			die("not fingerprinting A/V file: $file\n");
 | 
						|
		}
 | 
						|
 | 
						|
		if (!$image->Ping($file)) {
 | 
						|
			die("not fingerprinting unknown-type file: $file\n");
 | 
						|
		}
 | 
						|
 | 
						|
		try $image->Read($file);
 | 
						|
 | 
						|
		if ($#$image<0) {
 | 
						|
			die("fingerprint: not enough image data for $file");
 | 
						|
		}
 | 
						|
		else {
 | 
						|
			$#$image = 0;
 | 
						|
		}
 | 
						|
		try $image->Sample("160x160!");
 | 
						|
		try $image->Modulate(saturation=>-100);
 | 
						|
		try $image->Blur(radius=>3,sigma=>99);
 | 
						|
		try $image->Normalize();
 | 
						|
		try $image->Equalize();
 | 
						|
		try $image->Sample("16x16");
 | 
						|
		try $image->Threshold();
 | 
						|
		try $image->Set(magick=>'mono');
 | 
						|
 | 
						|
		($blob) = $image->ImageToBlob();
 | 
						|
		if (!defined($blob)) {
 | 
						|
			die("This can't happen! undefined blob for: $file\n");
 | 
						|
		}
 | 
						|
	};
 | 
						|
 | 
						|
	$inFP = 0;
 | 
						|
	@$image = ();
 | 
						|
 | 
						|
	open(STDOUT, ">&", \*SAVED_OUT) or nqdie2("open(/dev/null): $!");
 | 
						|
	open(STDERR, ">&", \*SAVED_ERR) or nqdie2("open(/dev/null): $!");
 | 
						|
	close(SAVED_OUT);
 | 
						|
	close(SAVED_ERR);
 | 
						|
 | 
						|
	if (defined $result) {
 | 
						|
		return $blob;
 | 
						|
	}
 | 
						|
	else {
 | 
						|
		nqwarn($@);
 | 
						|
		return undef;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
sub finddupes {
 | 
						|
	my @matches = diffbits(\%fpcache, \%filelist, $threshold, $add);
 | 
						|
 | 
						|
	my (%set, %ptr, %val);
 | 
						|
 | 
						|
	while (@matches) {
 | 
						|
		my $a = shift(@matches);
 | 
						|
		my $b = shift(@matches);
 | 
						|
		my $c = shift(@matches);
 | 
						|
		$set{$a} = 1;
 | 
						|
		$set{$b} = 1;
 | 
						|
 | 
						|
		# cf. debian bug #87013
 | 
						|
 | 
						|
		if (!defined($ptr{$a}) and !defined($ptr{$b})) {
 | 
						|
			$ptr{$a} = $a;
 | 
						|
			push @{$val{$a}}, $a, $b;
 | 
						|
			$ptr{$b} = $a;
 | 
						|
			$#{$val{$b}} = 0;
 | 
						|
		}
 | 
						|
		elsif (defined($ptr{$a}) and !defined($ptr{$b})) {
 | 
						|
			push @{$val{$ptr{$a}}}, $b;
 | 
						|
			$ptr{$b} = $ptr{$a};
 | 
						|
			$#{$val{$b}} = 0;
 | 
						|
		}
 | 
						|
		elsif (!defined($ptr{$a}) and defined($ptr{$b})) {
 | 
						|
			push @{$val{$ptr{$b}}}, $a;
 | 
						|
			$ptr{$a} = $ptr{$b};
 | 
						|
			$#{$val{$a}} = 0;
 | 
						|
		}
 | 
						|
		elsif ($ptr{$a} ne $ptr{$b}) {
 | 
						|
			my $valptrb = $val{$ptr{$b}};
 | 
						|
			push @{$val{$ptr{$a}}}, @{$valptrb};
 | 
						|
			for my $bkey (@{$valptrb}) {
 | 
						|
				$ptr{$bkey} = $ptr{$a};
 | 
						|
			}
 | 
						|
			$#$valptrb = 0;
 | 
						|
			# else $val{$a} is $val{$b} already
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	my $cnt = 0;
 | 
						|
	for my $k (keys %filelist, keys %fpcache) {
 | 
						|
		$set{$cnt} = $k if defined $set{$cnt};
 | 
						|
		$cnt++;
 | 
						|
	}
 | 
						|
 | 
						|
	# FIXME: What is the proper format of collection files?
 | 
						|
	#	It seems to be poorly defined, and gthumb parses them
 | 
						|
	#	differently from gqview itself. In particular, gthumb
 | 
						|
	#	seems to misparse comment lines, and the sort pragma
 | 
						|
	#	it defines seems to be fragile wrt whitespace.
 | 
						|
	if ($collection) {
 | 
						|
		open(COLLECTION, "> $collection") or nqdie2("open(> $collection): $!\n");
 | 
						|
		select(COLLECTION);
 | 
						|
 | 
						|
		print $collectionHeader;
 | 
						|
		for my $k (keys %ptr) {
 | 
						|
			next unless $ptr{$k} eq $k;
 | 
						|
			for ( @{$val{$ptr{$k}}} ) {
 | 
						|
				my $name = $set{$_};
 | 
						|
 | 
						|
				if ( $name =~ /[\n"]/s ) {
 | 
						|
					nqwarn("excluded from $collection: $name\n");
 | 
						|
				}
 | 
						|
				else {
 | 
						|
					print qq{"$name"\n};
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
		print $collectionFooter;
 | 
						|
		close(COLLECTION);
 | 
						|
	}
 | 
						|
 | 
						|
	if ($script) {
 | 
						|
		open(COMMANDS, "> $script") or nqdie2("open(> $script): $!\n");
 | 
						|
	}
 | 
						|
	else {
 | 
						|
		open(COMMANDS, "| /bin/sh") or nqdie2("open(| /bin/sh): $!\n");
 | 
						|
	}
 | 
						|
	select(COMMANDS);
 | 
						|
 | 
						|
	if ($script or $program or !$collection) {
 | 
						|
		$program = 'VIEW' unless $program;
 | 
						|
		print $scriptHeader;
 | 
						|
		printScriptFile if $scriptFile;
 | 
						|
		print @scriptCode if @scriptCode;
 | 
						|
		for my $k (keys %ptr) {
 | 
						|
			next unless $ptr{$k} eq $k;
 | 
						|
			print join(" \\\n\t",
 | 
						|
				$program,
 | 
						|
				( map { quotemeta $set{$_} } @{$val{$ptr{$k}}} ),
 | 
						|
				";\n"
 | 
						|
			);
 | 
						|
		}
 | 
						|
		print "\nEND;\n\n";
 | 
						|
	}
 | 
						|
	close(COMMANDS);
 | 
						|
}
 | 
						|
 | 
						|
# ======================================================================
 | 
						|
 | 
						|
__DATA__
 | 
						|
 | 
						|
=encoding UTF-8
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
findimagedupes - Finds visually similar or duplicate images
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
findimagedupes [option ...] [--] [ - | [file ...] ]
 | 
						|
 | 
						|
   Options:
 | 
						|
      -f, --fingerprints=FILE    -c, --collection=FILE
 | 
						|
      -M, --merge=FILE           -p, --program=PROGRAM
 | 
						|
      -P, --prune                -s, --script=FILE
 | 
						|
      -a, --add                  -i, --include=TEXT
 | 
						|
      -r, --rescan               -I, --include-file=FILE
 | 
						|
      -n, --no-compare
 | 
						|
                                 -q, --quiet
 | 
						|
      -t, --threshold=AMOUNT     -v, --verbosity=LIST
 | 
						|
 | 
						|
      -0, --null                 -h, --help
 | 
						|
      -R, --recurse                  --man
 | 
						|
                                     --version
 | 
						|
 | 
						|
With no options, compares the specified files and does not use nor
 | 
						|
update any fingerprint database.
 | 
						|
 | 
						|
Directories of images may be specified instead of individual files;
 | 
						|
Sub-directories of these are not searched unless --recurse is used.
 | 
						|
 | 
						|
=head1 INSTALLATION
 | 
						|
 | 
						|
If you use linux, your distribution may include a prepackaged version.
 | 
						|
For example, Debian and Ubuntu do.
 | 
						|
 | 
						|
Otherwise, at a minimum you'll need Perl with the modules listed at the
 | 
						|
top of the findimagedupes script. Also the GraphicksMagick package.
 | 
						|
 | 
						|
You may need to change Inline's C<DIRECTORY> to point somewhere else.
 | 
						|
Read the Inline module documentation for details.
 | 
						|
 | 
						|
=head1 OPTIONS
 | 
						|
 | 
						|
=over 8
 | 
						|
 | 
						|
=item B<-0>, B<--null>
 | 
						|
 | 
						|
If a file C<-> is given, a list of files is read from stdin.
 | 
						|
 | 
						|
Without B<-0>, the list is specified one file per line, such as
 | 
						|
produced by find(1) with its C<-print> option.
 | 
						|
 | 
						|
With B<-0>, the list is expected to be null-delimited, such as
 | 
						|
produced by find(1) with its C<-print0> option.
 | 
						|
 | 
						|
=item B<-a>, B<--add>
 | 
						|
 | 
						|
Only look for duplicates of files specified on the commandline.
 | 
						|
 | 
						|
Matches are also sought in any fingerprint databases specified.
 | 
						|
 | 
						|
=item B<-c>, B<--collection>=I<FILE>
 | 
						|
 | 
						|
Create GQView collection I<FILE>.gqv of duplicates.
 | 
						|
 | 
						|
The program attempts to produce well-formed collections.
 | 
						|
In particular, it will print a warning and exclude any file
 | 
						|
whose name contains newline or doublequote. (In this situation,
 | 
						|
gqview(1) seems to create a .gqv collection file that it
 | 
						|
silently fails to read back in properly.)
 | 
						|
 | 
						|
=item B<-d>, B<--debug>=I<OPTS>
 | 
						|
 | 
						|
Enable debugging output. Options I<OPTS> are subject to change.
 | 
						|
See the program source for details.
 | 
						|
 | 
						|
=item B<-f>, B<--fingerprints>=I<FILE>
 | 
						|
 | 
						|
Use I<FILE> as fingerprint database.
 | 
						|
 | 
						|
May be abbreviated as B<--fp> or B<--db>.
 | 
						|
 | 
						|
This option may be given multiple times when B<--merge> is used.
 | 
						|
(Note: I<FILE> could contain commas, so multiple databases may
 | 
						|
not be specified as a single comma-delimited list.)
 | 
						|
 | 
						|
=item B<-h>, B<--help>
 | 
						|
 | 
						|
Print usage and option sections of this manual, then exit.
 | 
						|
 | 
						|
=item B<-i>, B<--include>=I<TEXT>
 | 
						|
 | 
						|
I<TEXT> is Bourne-shell code to customise B<--script>.
 | 
						|
 | 
						|
It is executed after any code included using B<--include-file>.
 | 
						|
 | 
						|
May be given multiple times. Code will be concatenated.
 | 
						|
 | 
						|
=item B<-I>, B<--include-file>=I<FILE>
 | 
						|
 | 
						|
I<FILE> is a file containing Bourne-shell code to customise 
 | 
						|
B<--script>. 
 | 
						|
 | 
						|
It is executed before any code included using B<--include>.
 | 
						|
 | 
						|
=item B<--man>
 | 
						|
 | 
						|
Display the full documentation, using default pager, then exit.
 | 
						|
 | 
						|
=item B<-M>, B<--merge>=I<FILE>
 | 
						|
 | 
						|
Takes any databases specified with B<--fingerprints>
 | 
						|
and merges them into a new database called I<FILE>.
 | 
						|
Conflicting fingerprints for an image will cause one of two actions to occur:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item 1.
 | 
						|
 | 
						|
If the image does not exist, then the entry is elided.
 | 
						|
 | 
						|
=item 2.
 | 
						|
 | 
						|
If the image does exist, then the old information is ignored
 | 
						|
and a new fingerprint is generated from scratch.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
By default, image existence is not checked unless there is a conflict.
 | 
						|
To force removal of defunct data, use B<--prune> as well.
 | 
						|
 | 
						|
A list of image files is not required if this option is used.
 | 
						|
However, if a list is provided, fingerprint data for the files
 | 
						|
will be copied or (re)generated as appropriate.
 | 
						|
 | 
						|
When B<--merge> is used, the original fingerprint databases are not modified,
 | 
						|
even if B<--prune> is used.
 | 
						|
 | 
						|
If multiple fingerprint databases are to be used but the merge output is
 | 
						|
not required, specify: B<--merge>=I</dev/null>
 | 
						|
 | 
						|
See also: B<--rescan>
 | 
						|
 | 
						|
=item B<-n>, B<--no-compare>
 | 
						|
 | 
						|
Don't look for duplicates.
 | 
						|
 | 
						|
=item B<-p>, B<--program>=I<PROGRAM>
 | 
						|
 | 
						|
Launch I<PROGRAM> (in foreground) to view each set of dupes.
 | 
						|
 | 
						|
I<PROGRAM> must be the full path to an existing executable file.
 | 
						|
For more flexibility, see the B<--include> and B<--include-file>
 | 
						|
options.
 | 
						|
 | 
						|
See also: B<--script>
 | 
						|
 | 
						|
=item B<-P>, B<--prune>
 | 
						|
 | 
						|
Remove fingerprint data for images that do not exist any more.
 | 
						|
Has no effect unless B<--fingerprints> or B<--merge> is also used.
 | 
						|
 | 
						|
Databases specified by B<--fingerprints> are only modified if
 | 
						|
B<--merge> is not used.
 | 
						|
 | 
						|
=item B<-q>, B<--quiet>
 | 
						|
 | 
						|
This option may be given multiple times.
 | 
						|
 | 
						|
Usually, progress, warning and error messages are printed on stderr.
 | 
						|
If this option is given, warnings are not displayed.
 | 
						|
If it is given twice or more, errors are not displayed either.
 | 
						|
 | 
						|
Information requested with B<--verbosity> is still displayed.
 | 
						|
 | 
						|
=item B<-R>, B<--recurse>
 | 
						|
 | 
						|
Use B<--recurse> to search recursively for images inside
 | 
						|
subdirectories. For historical reasons, the default is to not do so.
 | 
						|
To avoid looping, symbolic links to directories are never followed.
 | 
						|
 | 
						|
=item B<-r>, B<--rescan>
 | 
						|
 | 
						|
(Re)generate all fingerprints, not just any that are unknown.
 | 
						|
 | 
						|
If used with B<--add>, only the fingerprints of files specified
 | 
						|
on the commandline are (re)generated.
 | 
						|
 | 
						|
Implies B<--prune>.
 | 
						|
 | 
						|
=item B<-s>, B<--script>=I<FILE>
 | 
						|
 | 
						|
When used with B<--program>, I<PROGRAM> is not launched immediately.
 | 
						|
Instead sh(1)-style commands are saved to I<FILE>.
 | 
						|
This script may be edited (if desired) and then executed manually.
 | 
						|
 | 
						|
When used without B<--program>, two skeletal shell functions
 | 
						|
are generated: C<VIEW> simply echo(1)s its arguments; 
 | 
						|
the empty function C<END> runs after files-processing is finished.
 | 
						|
 | 
						|
To display to terminal (or feed into a pipe), use C<-> as I<FILE>.
 | 
						|
 | 
						|
If B<--script> is not given, the script is still created in memory and
 | 
						|
is executed immediately. So, with the default VIEW and END functions,
 | 
						|
lines containing sets of duplicates are displayed. See: B<EXAMPLES>
 | 
						|
 | 
						|
See also: B<--include>, B<--include-file>
 | 
						|
 | 
						|
=item B<-t>, B<--threshold>=I<AMOUNT>
 | 
						|
 | 
						|
Use I<AMOUNT> as threshold of similarity.
 | 
						|
Append C<%> to give a percentage or C<b> for bits.
 | 
						|
For backwards compatibility, a number with no unit is treated as
 | 
						|
a percentage. Percentage is the minimum required for a match;
 | 
						|
bits is the maximum that may differ: bits=floor(2.56(100-percent))
 | 
						|
 | 
						|
A fractional part may be given but it is only accurate to 100/256
 | 
						|
(0.390625) for percentage and it is meaningless for C<bits>.
 | 
						|
Default is C<90%> (C<25b>) if not specified.
 | 
						|
 | 
						|
=item B<-v>, B<--verbosity>=I<LIST>
 | 
						|
 | 
						|
Enable display of informational messages to stdout,
 | 
						|
where I<LIST> is a comma-delimited list of:
 | 
						|
 | 
						|
=over 8
 | 
						|
 | 
						|
=item B<md5>
 | 
						|
 | 
						|
Display the checksum for each file, as per md5sum(1).
 | 
						|
 | 
						|
=item B<fingerprint> | B<fp>
 | 
						|
 | 
						|
Display the base64-encoded fingerprint of each file.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
Alternatively, B<--verbosity> may be given multiple times, and accumulates.
 | 
						|
Note that this may not be sensible. For example, to be useful,
 | 
						|
B<md5> output probably should not be merged with B<fingerprint> data.
 | 
						|
 | 
						|
=item B<--version>
 | 
						|
 | 
						|
Display the program version, then exit.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
B<findimagedupes> compares a list of files for visual similarity.
 | 
						|
 | 
						|
=over 1
 | 
						|
 | 
						|
=item To calculate an image fingerprint:
 | 
						|
 | 
						|
 1) Read image.
 | 
						|
 2) Resample to 160x160 to standardize size.
 | 
						|
 3) Grayscale by reducing saturation.
 | 
						|
 4) Blur a lot to get rid of noise.
 | 
						|
 5) Normalize to spread out intensity as much as possible.
 | 
						|
 6) Equalize to make image as contrasty as possible.
 | 
						|
 7) Resample again down to 16x16.
 | 
						|
 8) Reduce to 1bpp.
 | 
						|
 9) The fingerprint is this raw image data.
 | 
						|
 | 
						|
=item To compare two images for similarity:
 | 
						|
 | 
						|
 1) Take fingerprint pairs and xor them.
 | 
						|
 2) Compute the percentage of 1 bits in the result.
 | 
						|
 3) If percentage exceeds threshold, declare files to be similar.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
 | 
						|
=head1 RETURN VALUE
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item B<0>
 | 
						|
 | 
						|
Success.
 | 
						|
 | 
						|
=item B<1>
 | 
						|
 | 
						|
Usage information was requested (B<--help> or B<--man>), or there
 | 
						|
were warnings.
 | 
						|
 | 
						|
=item B<2>
 | 
						|
 | 
						|
Invalid options or arguments were provided.
 | 
						|
 | 
						|
=item B<3>
 | 
						|
 | 
						|
Runtime error.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
Any other return values indicate an internal error of some sort.
 | 
						|
 | 
						|
=head1 DIAGNOSTICS
 | 
						|
 | 
						|
To be written.
 | 
						|
 | 
						|
=head1 EXAMPLES
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item C<<<< findimagedupes -R -- . >>>>
 | 
						|
 | 
						|
Look for and compare images in all subdirectories of the current directory.
 | 
						|
 | 
						|
=item C<<<< find . -type f -print0 | findimagedupes -0 -- - >>>>
 | 
						|
 | 
						|
Same as above.
 | 
						|
 | 
						|
=item C<<<< findimagedupes -i 'echo "# sort: manual"' -i 'VIEW(){ for f in "$@"; do echo \"file://$f\"; done; }' -- *.jpg > dupes.gqv >>>>
 | 
						|
 | 
						|
Use script hooks to produce collection-style output
 | 
						|
suitable for use with gthumb(1).
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 FILES
 | 
						|
 | 
						|
To be written.
 | 
						|
 | 
						|
=head1 BUGS
 | 
						|
 | 
						|
There is a memory leak somewhere.
 | 
						|
 | 
						|
Killing the program may corrupt the fingerprint database(s).
 | 
						|
 | 
						|
The program does not lock the fingerprint database although concurrent
 | 
						|
write access to it is unsafe.
 | 
						|
 | 
						|
GraphicsMagick does not expose its auto-orient functionality to Perl.
 | 
						|
 | 
						|
Changing version of GraphicsMagick invalidates fingerprint databases.
 | 
						|
 | 
						|
 | 
						|
=head1 NOTES
 | 
						|
 | 
						|
Directory recursion is deliberately not implemented:
 | 
						|
Composing a file-list and using it with C<-> is a more flexible approach.
 | 
						|
 | 
						|
Repetitions are culled before comparisons take place, so a commandline
 | 
						|
like C<findimagedupes a.jpg a.jpg> will not produce a match.
 | 
						|
 | 
						|
The program needs a lot of memory. Probably not an issue, unless your
 | 
						|
machine has less than 128MB of free RAM and you try to compare more than
 | 
						|
a hundred-thousand files at once (and the program will run quite slowly
 | 
						|
with that many files anyway---about eight hours initially to generate
 | 
						|
fingerprints and another ten minutes to do the actual comparing).
 | 
						|
 | 
						|
Fingerprinting images is a bottleneck but unfortunately the program was
 | 
						|
not written with parallel processing in mind. For a workaround, see:
 | 
						|
https://github.com/jhnc/findimagedupes/issues/9
 | 
						|
 | 
						|
=head1 SEE ALSO
 | 
						|
 | 
						|
find(1), md5sum(1)
 | 
						|
 | 
						|
B<gqview> - GTK based multiformat image viewer
 | 
						|
 | 
						|
B<gthumb> - an image viewer and browser for GNOME
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
Jonathan H N Chin <code@jhnc.org>
 | 
						|
 | 
						|
=head1 COPYRIGHT AND LICENSE
 | 
						|
 | 
						|
 Copyright © 2006-2022 by Jonathan H N Chin <code@jhnc.org>.
 | 
						|
 | 
						|
 This program is free software; you may redistribute it and/or modify
 | 
						|
 it under the terms of the GNU General Public License as published by
 | 
						|
 the Free Software Foundation; either version 3 of the License, or
 | 
						|
 (at your option) any later version.
 | 
						|
 | 
						|
 This program is distributed in the hope that it will be useful,
 | 
						|
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
 GNU General Public License for more details.
 | 
						|
 | 
						|
 You should have received a copy of the GNU General Public License
 | 
						|
 along with this program; if not, write to the Free Software
 | 
						|
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
 | 
						|
=head1 HISTORY
 | 
						|
 | 
						|
This code has been written from scratch. However it owes its existence
 | 
						|
to B<findimagedupes> by Rob Kudla and uses the same duplicate-detection
 | 
						|
algorithm.
 | 
						|
 | 
						|
=cut
 | 
						|
 | 
						|
__C__
 | 
						|
 | 
						|
/* efficient bit-comparison */
 | 
						|
 | 
						|
#include <stdint.h>
 | 
						|
#include <string.h>
 | 
						|
 | 
						|
#define LOOKUP_SIZE 65536
 | 
						|
#define FP_CHUNKS 16
 | 
						|
 | 
						|
typedef uint16_t FP[FP_CHUNKS];
 | 
						|
 | 
						|
void diffbits (SV* oldfiles, SV* newfiles, unsigned int threshold, unsigned limit) {
 | 
						|
	FP *the_data, *a, *b;
 | 
						|
	unsigned int lookup[LOOKUP_SIZE];
 | 
						|
	unsigned int i, j, k, m, bits, old, new;
 | 
						|
	HV *oldhash;
 | 
						|
	HE *oldhash_entry;
 | 
						|
	HV *newhash;
 | 
						|
	HE *newhash_entry;
 | 
						|
	unsigned int numkeys = 0;
 | 
						|
	SV *sv_val;
 | 
						|
	Inline_Stack_Vars;
 | 
						|
 | 
						|
	if ((threshold<0) || (threshold>256)) {
 | 
						|
		croak("ridiculous threshold specified");
 | 
						|
	}
 | 
						|
 | 
						|
	/* pack fingerprints into C array */
 | 
						|
	/* partly lifted from Inline::C-Cookbook */
 | 
						|
 | 
						|
	if (! SvROK(newfiles)) {
 | 
						|
		croak("newfiles is not a reference");
 | 
						|
	}
 | 
						|
	newhash = (HV *)SvRV(newfiles);
 | 
						|
	new = hv_iterinit(newhash);
 | 
						|
 | 
						|
	if (! SvROK(oldfiles)) {
 | 
						|
		croak("oldfiles is not a reference");
 | 
						|
	}
 | 
						|
	oldhash = (HV *)SvRV(oldfiles);
 | 
						|
	old = hv_iterinit(oldhash);
 | 
						|
 | 
						|
	numkeys = new+old;
 | 
						|
	if (numkeys<2) {
 | 
						|
		/* minor optimization: return without doing anything */
 | 
						|
		/* malloc(0) could be bad... */
 | 
						|
		Inline_Stack_Void;
 | 
						|
	}
 | 
						|
	the_data = (FP *)malloc(numkeys*sizeof(FP));
 | 
						|
	if (!the_data) {
 | 
						|
		croak("malloc failed");
 | 
						|
	}
 | 
						|
 | 
						|
	for (i = 0; i<new; i++) {
 | 
						|
		newhash_entry = hv_iternext(newhash);
 | 
						|
		sv_val = hv_iterval(newhash, newhash_entry);
 | 
						|
		memcpy(the_data+i, SvPV(sv_val, PL_na), sizeof(FP));
 | 
						|
	}
 | 
						|
	for (i = new; i<numkeys; i++) {
 | 
						|
		oldhash_entry = hv_iternext(oldhash);
 | 
						|
		sv_val = hv_iterval(oldhash, oldhash_entry);
 | 
						|
		memcpy(the_data+i, SvPV(sv_val, PL_na), sizeof(FP));
 | 
						|
	}
 | 
						|
 | 
						|
	/* initialise lookup table */
 | 
						|
	/* cf. https://graphics.stanford.edu/~seander/bithacks.html */
 | 
						|
	for (i=0; i<LOOKUP_SIZE; i++) {
 | 
						|
		lookup[i] = lookup[i/2] + (i&1);
 | 
						|
	}
 | 
						|
 | 
						|
	/* look for matches */
 | 
						|
	Inline_Stack_Reset;
 | 
						|
	for (a=the_data, i=0, m=(limit>0 ? new : numkeys-1); i<m; a++, i++) {
 | 
						|
		for (b=a+1, j=i+1; j<numkeys; b++, j++) {
 | 
						|
			for (bits=0, k=0; k<FP_CHUNKS; k++) {
 | 
						|
				bits += lookup[(*a)[k]^(*b)[k]];
 | 
						|
				if (bits > threshold) goto abortmatch;
 | 
						|
			}
 | 
						|
			/* if (bits <= threshold) */ {
 | 
						|
				Inline_Stack_Push(sv_2mortal(newSViv(i)));
 | 
						|
				Inline_Stack_Push(sv_2mortal(newSViv(j)));
 | 
						|
				Inline_Stack_Push(sv_2mortal(newSViv(bits)));
 | 
						|
			}
 | 
						|
abortmatch:;
 | 
						|
		}
 | 
						|
	}
 | 
						|
	Inline_Stack_Done;
 | 
						|
 | 
						|
	/* clean up */
 | 
						|
	free(the_data);
 | 
						|
}
 | 
						|
 |