Code examples from 'Internet Forensics'
Chapter 8 (File Contents)
Example 8-1: superstrings.pl |
#!/usr/bin/perl -w # Example 8-1: superstrings.pl # Excerpted from 'Internet Forensics' by Robert Jones # Published 2005 by O'Reilly Media (ISBN 0-596-10006-X) die "Usage: $0 <word doc>\n" unless @ARGV == 1; my %hash = (); foreach my $encoding ('s', 'b', 'B', 'l', 'L') { my $text = `strings -td -e$encoding $ARGV[0]`; foreach my $line (split /\n/, $text) { if($line =~ /^\s*(\d+)\s+(.*)$/) { $hash{$1} = $2; } } } foreach my $offset (sort { $a <=> $b } keys %hash) { printf "%s\n", $hash{$offset}; } |
Example 8-2: compare_text.pl |
#!/usr/bin/perl -w # Example 8-2: compare_text.pl # Excerpted from 'Internet Forensics' by Robert Jones # Published 2005 by O'Reilly Media (ISBN 0-596-10006-X) die "Usage: $0 <file1> <file2>\n" unless @ARGV == 2; my $minscore = 5; my @words0 = (); my @words1 = (); loadWords($ARGV[0], \@words0); loadWords($ARGV[1], \@words1); my %segment = (); my $score = 0; my $maxscore = 0; my $maxi0 = 0; my $maxi1 = 0; for(my $i0 = 0; $i0 < @words0; $i0++) { my $word0 = $words0[$i0]; for(my $i1 = 0; $i1 < @words1; $i1++) { if(lc $words1[$i1] eq lc $word0) { ($maxscore, $maxi0, $maxi1) = matchDiagonal(\@words0, \@words1, $i0, $i1); if(exists $segment{$maxi0}{$maxi1}) { if($maxscore > $segment{$maxi0}{$maxi1}){ $segment{$maxi0}{$maxi1} = $maxscore; } } else { $segment{$maxi0}{$maxi1} = $maxscore; } } } } foreach my $maxi0 (sort keys %segment) { foreach my $maxi1(sort keys %{$segment{$maxi0}}) { $maxscore = $segment{$maxi0}{$maxi1}; if($maxscore >= $minscore) { printf "%s\n\n", traceBack(\@words0, \@words1, $maxi0, $maxi1, $maxscore); } } } sub matchDiagonal { # Extend an initial word match along both word arrays my ($words0, $words1, $i0, $i1) = @_; my $maxscore = 0; my $maxi0 = $i0; my $maxi1 = $i1; my $score = 0; my $j1 = $i1; for(my $j0 = $i0; $j0 < @$words0; $j0++) { if(lc $words0->[$j0] eq lc $words1->[$j1]) { $score++; if($score > $maxscore) { $maxscore = $score; $maxi0 = $j0; $maxi1 = $j1; } } else { $score--; } if($score < 0) { $score = 0; last; } $j1++; last if($j1 >= @$words1); } ($maxscore, $maxi0, $maxi1); } sub traceBack { # Trace back from the maximum score to reconstruct the matching string my ($words0, $words1, $maxi0, $maxi1, $score) = @_; my @array0 = (); my @array1 = (); my $i1 = $maxi1; for(my $i0 = $maxi0; $i0 >= 0; $i0--) { push @array0, $words0[$i0]; push @array1, $words1[$i1]; if(lc $words0[$i0] eq lc $words1[$i1]) { $score--; } last if($score == 0); $i1--; last if($i1 < 0); } my @array = (); for(my $i=0; $i<@array0; $i++) { if(lc $array0[$i] eq lc $array1[$i]) { push @array, $array0[$i]; } else { push @array, sprintf "((%s/%s))", $array0[$i], $array1[$i]; } } join ' ', reverse @array; } sub loadWords { # Read in the text word by word - skip short words my ($filename, $words) = @_; my $minsize = 4; open INPUT, "< $filename" or die "Unable to open file: $filename\n"; while(<INPUT>) { $_ =~ s/[^a-zA-Z0-9]+/ /g; $_ =~ s/^\s+//; foreach my $word (split /\s+/, $_) { if(length $word >= $minsize) { push @$words, $word; } } } close INPUT; } |