Show html_purify.pl syntax highlighted
#!/usr/bin/perl -w
#
use strict;
use HTML::Parser;
our %skip = (
head => 2,
"link" => 2,
meta => 2,
title => 2,
body => 2,
object => 1,
iframe => 1,
embed => 1,
frame => 2,
html => 2,
script => 2,
);
our $imageskip = 1;
our $skipping = 0;
our %attrskip = (
target => 1,
);
sub start
{
my $tag = lc(shift);
my $attr = shift;
if($skip{$tag}) {
$skipping = 1 if($skip{$tag} == 2);
return;
}
print "<$tag ";
for my $a (sort keys %$attr) {
next if $a =~ m/^on/i;
next if $attrskip{lc($a)};
if($imageskip && lc($tag) eq 'img' && lc($a) eq 'src') {
print qq(src="broken.gif" );
} else {
print qq($a="$attr->{$a}" );
}
}
print ">";
}
sub end
{
my $tag = lc(shift);
if($skip{$tag}) {
$skipping = 0 if($skip{$tag} == 2);
return;
}
print "</$tag>";
}
sub text
{
print shift if !$skipping;
}
my $p = new HTML::Parser(api_version => 3,
start_h => [ \&start, "tagname, attr" ],
end_h => [ \&end, "tagname" ],
text_h => [ \&text, "text" ]
);
$p->parse_file("test.html");
See more files for this project here