Code Search for Developers
 
 
  

html_purify.pl from AlphaMail at Krugle


Show html_purify.pl syntax highlighted

#!/usr/bin/perl -w
#
use strict;
use HTML::Parser;

our %skip = (
   head => 2,
   "link" => 2,
   meta => 2,
   title => 2,
   body => 2,
   object => 1,
   iframe => 1,
   embed => 1,
   frame => 2,
   html => 2,
   script => 2,
   );

our $imageskip = 1;
our $skipping = 0;

our %attrskip = (
      target => 1,
   );

sub start
{
   my $tag = lc(shift);
   my $attr = shift;
   
   if($skip{$tag}) {
      $skipping = 1 if($skip{$tag} == 2);
      return;
   }

   print "<$tag ";
   for my $a (sort keys %$attr) {
      next if $a =~ m/^on/i;
      next if $attrskip{lc($a)};
      if($imageskip && lc($tag) eq 'img' && lc($a) eq 'src') {
         print qq(src="broken.gif" );
      } else {
         print qq($a="$attr->{$a}" );
      }
   }
   print ">";
}

sub end
{
   my $tag = lc(shift);
   
   if($skip{$tag}) {
      $skipping = 0 if($skip{$tag} == 2);
      return;
   }

   print "</$tag>";
}

sub text
{
   print shift if !$skipping;
}

my $p = new HTML::Parser(api_version => 3,
   start_h => [ \&start, "tagname, attr" ],
   end_h => [ \&end, "tagname" ],
   text_h => [ \&text, "text" ]
   );

$p->parse_file("test.html");




See more files for this project here

AlphaMail

AlphaMail is an accelerated web mail interface with a C++ middleware layer that is more effective than an IMAP proxy which is a highly scalable (10k+ users). The interface includes modern features, Section 508 compliance, and universal browser support.

Project homepage: http://sourceforge.net/projects/alphamail
Programming language(s): C++,Java,JavaScript,Perl
License: other

  html_purify.pl
  imap.pl
  imap_cache_test.pl
  ispell.pl
  middleware_test.pl
  mime.pl
  subexp.pl
  test.html