#!/usr/bin/perl -w
# Example 8-1: superstrings.pl
# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)
die "Usage: $0 <word doc>\n" unless @ARGV == 1;
my %hash = ();
foreach my $encoding ('s', 'b', 'B', 'l', 'L') {
my $text = `strings -td -e$encoding $ARGV[0]`;
foreach my $line (split /\n/, $text) {
if($line =~ /^\s*(\d+)\s+(.*)$/) {
$hash{$1} = $2;
}
}
}
foreach my $offset (sort { $a <=> $b } keys %hash) {
printf "%s\n", $hash{$offset};
}
|
#!/usr/bin/perl -w
# Example 8-2: compare_text.pl
# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)
die "Usage: $0 <file1> <file2>\n" unless @ARGV == 2;
my $minscore = 5;
my @words0 = ();
my @words1 = ();
loadWords($ARGV[0], \@words0);
loadWords($ARGV[1], \@words1);
my %segment = ();
my $score = 0;
my $maxscore = 0;
my $maxi0 = 0;
my $maxi1 = 0;
for(my $i0 = 0; $i0 < @words0; $i0++) {
my $word0 = $words0[$i0];
for(my $i1 = 0; $i1 < @words1; $i1++) {
if(lc $words1[$i1] eq lc $word0) {
($maxscore, $maxi0, $maxi1) =
matchDiagonal(\@words0, \@words1, $i0, $i1);
if(exists $segment{$maxi0}{$maxi1}) {
if($maxscore > $segment{$maxi0}{$maxi1}){
$segment{$maxi0}{$maxi1} = $maxscore;
}
} else {
$segment{$maxi0}{$maxi1} = $maxscore;
}
}
}
}
foreach my $maxi0 (sort keys %segment) {
foreach my $maxi1(sort keys %{$segment{$maxi0}}) {
$maxscore = $segment{$maxi0}{$maxi1};
if($maxscore >= $minscore) {
printf "%s\n\n",
traceBack(\@words0, \@words1, $maxi0, $maxi1, $maxscore);
}
}
}
sub matchDiagonal {
# Extend an initial word match along both word arrays
my ($words0, $words1, $i0, $i1) = @_;
my $maxscore = 0;
my $maxi0 = $i0;
my $maxi1 = $i1;
my $score = 0;
my $j1 = $i1;
for(my $j0 = $i0; $j0 < @$words0; $j0++) {
if(lc $words0->[$j0] eq lc $words1->[$j1]) {
$score++;
if($score > $maxscore) {
$maxscore = $score;
$maxi0 = $j0;
$maxi1 = $j1;
}
} else {
$score--;
}
if($score < 0) {
$score = 0;
last;
}
$j1++;
last if($j1 >= @$words1);
}
($maxscore, $maxi0, $maxi1);
}
sub traceBack {
# Trace back from the maximum score to reconstruct the matching string
my ($words0, $words1, $maxi0, $maxi1, $score) = @_;
my @array0 = ();
my @array1 = ();
my $i1 = $maxi1;
for(my $i0 = $maxi0; $i0 >= 0; $i0--) {
push @array0, $words0[$i0];
push @array1, $words1[$i1];
if(lc $words0[$i0] eq lc $words1[$i1]) {
$score--;
}
last if($score == 0);
$i1--;
last if($i1 < 0);
}
my @array = ();
for(my $i=0; $i<@array0; $i++) {
if(lc $array0[$i] eq lc $array1[$i]) {
push @array, $array0[$i];
} else {
push @array, sprintf "((%s/%s))", $array0[$i], $array1[$i];
}
}
join ' ', reverse @array;
}
sub loadWords {
# Read in the text word by word - skip short words
my ($filename, $words) = @_;
my $minsize = 4;
open INPUT, "< $filename" or die "Unable to open file: $filename\n";
while(<INPUT>) {
$_ =~ s/[^a-zA-Z0-9]+/ /g;
$_ =~ s/^\s+//;
foreach my $word (split /\s+/, $_) {
if(length $word >= $minsize) {
push @$words, $word;
}
}
}
close INPUT;
}
|