Example 5-1: readable_html.pl
|
#!/usr/bin/perl -w
# Example 5-1: readable_html.pl
# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)
die "Usage: $0 <html file>\n" unless @ARGV < 2;
$ARGV[0] = '-' if @ARGV == 0;
open INPUT, "< $ARGV[0]" or
die "$0: Unable to open html file $ARGV[0]\n";
while(<INPUT>) {
s/(\<\/.*?\>)/$1\n/g;
print $_;
}
close INPUT;
|
Example 5-2: extract_links.pl
|
#!/usr/bin/perl -w
# Example 5-2: extract_links.pl
# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)
use HTML::LinkExtor;
use LWP::Simple;
die "Usage: $0 <url>\n" unless @ARGV == 1;
my $doc = get($ARGV[0]) or die "$0: Unable to get url: $ARGV[0]\n";
my $parser = HTML::LinkExtor->new(undef, $ARGV[0]);
$parser->parse($doc)->eof;
my %hash = ();
foreach my $linkarray ($parser->links) {
$hash{$$linkarray[2]} = $$linkarray[0];
}
foreach my $key (sort { $hash{$a} cmp $hash{$b} or $a cmp $b }
keys %hash) {
printf qq[%-6s %s\n], $hash{$key}, $key;
}
|
Example 5-3: extract_form_elements.pl
|
#!/usr/bin/perl -w
# Example 5-3: extract_form_elements.pl
# Excerpted from 'Internet Forensics' by Robert Jones
# Published 2005 by O'Reilly Media (ISBN 0-596-10006-X)
use HTML::TokeParser;
die "Usage: $0 <html file>\n" unless @ARGV == 1;
my $p = HTML::TokeParser->new($ARGV[0]) || die "Can't open: $!";
while(my $token = $p->get_token) {
if($token->[0] eq 'S') {
if($token->[1] eq 'form' or
$token->[1] eq 'button' or
$token->[1] eq 'input' or
$token->[1] eq 'select' or
$token->[1] eq 'option' or
$token->[1] eq 'textarea') {
print $token->[4] . "\n";
}
} elsif($token->[0] eq 'E') {
if($token->[1] eq 'form') {
print $token->[2] . "\n\n";
}
}
}
|