Code Search for Developers
 
 
  

parser.cc from AlphaMail at Krugle


Show parser.cc syntax highlighted

#include <boost/bind.hpp>
#include <boost/spirit.hpp>
#include <boost/spirit/dynamic.hpp>
#include <boost/spirit/phoenix.hpp>

using namespace boost::spirit;

int main()
{
chset<> CTL("\x00-\x1F\x7F"), specials("()<>[]:;@\\,.\""),
                       NO_WS_CTL("\x01-\x08\x0b\x0c\x0e-\x1F\x7F"),
                       WSP(" \t"),
                       text("\x00-\x09\x0b\x0c\x0e-\x7f"),
                       ctext("\x00-\x08\x0b\x21-\x27\x2a-\x5b\x5d-\x7e"),
                       atext("0-9a-zA-Z!#$%&'*+-/=?^_`{|}~"),
                       qtext("\x01-\x08\x0b\x0c\x0e-\x1F\x7F\x21\x23-\x5b\x5d-\x7e"),
                       dtext("\x01-\x08\x0b\x0c\x0e-\x1F\x7F\x21-\x5b\x5e-\x7e"),
                       ;

rule<> quoted_pair, ccontent, comment, CRLF, CFWS, FWS,
   atom, dot_atom_text, dot_atom, qcontent, quoted_string, word,
   phrase, local_part, dcontent, domain_literal, domain, display_name,
   addr_spec, angle_addr, name_addr, mailbox, obs_phrase, obs_angle_addr,
   obs_route, obs_domain_list, mailbox_list, obs_mbox_list;

   quoted_pair    = ch_p('\\') >> text
      ;
   CRLF           = ch_p('\r') >> ch_p('\n');

   ccontent       = ctext | quoted_pair | comment;

   comment        = ch_p('(') >> *(!FWS >> ccontent) >> !FWS >> ch_p(')');

   FWS            = !(*WSP >> CRLF) >> +WSP;

   CFWS           = *(!FWS >> comment) >> ((!FWS >> comment) | FWS);

   atom           = !CFWS >> +atext >> !CFWS;

   dot_atom_text  = +atext >> *(ch_p('.') >> +atext);

   dot_atom       = !CFWS >> +dot_atom_text >> !CFWS;

   qcontent       = qtext | quoted_pair;

   quoted_string  = !CFWS >> ch_p('"') >> *(!FWS >> qcontent) >> 
      !FWS >> ch_p('"') >> !CFWS;

   word           = atom | quoted_string;

   phrase         = +word | obs_phrase;

   local_part     = dot_atom|quoted_string;
   dcontent       = dtext|quoted_pair;
   domain_literal = !CFWS >> ch_p('[') >> *(!FWS >> dcontent) >> 
      !FWS >> ch_p(']') >> !CFWS;

   domain         = dot_atom|domain_literal;

   display_name   = phrase;

   addr_spec      = local_part >> ch_p('@') >> domain;

   angle_addr     = !CFWS >> ch_p('<') >> addr_spec >> 
      ch_p('>') >> !CFWS | obs_angle_addr;

   name_addr      = !display_name >> angle_addr;

   mailbox        = name_addr | addr_spec;

   mailbox_list   = (mailbox >> *(ch_p(',') >> mailbox)) | obs_mbox_list;

   obs_mbox_list  = +(!mailbox >> !CFWS >> ch_p(',') >> !CFWS) >> !mailbox;

   obs_angle_addr = !CFWS >> ch_p('<') >> !obs_route >> addr_spec >> ch_p('>')
      >> !CFWS;

   obs_phrase     = word >> *(word | ch_p('.') | CFWS);

   obs_route       = !CFWS >> obs_domain_list >> ch_p(':') >> !CFWS;

   obs_domain_list = ch_p('@') >> domain >> *(*(CFWS | ch_p(',')) >> !CFWS >> ch_p('@') >> domain);
}

/*
sub parse {
    return @{$PARSE_CACHE{$_[1]}} if exists $PARSE_CACHE{$_[1]};
    my ($class, $line) = @_;
    my (@mailboxes) = ($line =~ /$mailbox/go);
    my @addrs;
    foreach (@mailboxes) {
      my $original = $_;

      my @comments = /($comment)/go;
      s/$comment//go if @comments;

      my ($user, $host, $com);
      ($user, $host) = ($1, $2) if s/<($local_part)\@($domain)>//o;
      if (! defined($user) || ! defined($host)) {
          s/($local_part)\@($domain)//o;
          ($user, $host) = ($1, $2);
      }

      my ($phrase)       = /($display_name)/o;

      for ( $phrase, $host, $user, @comments ) {
        next unless defined $_;
        s/^\s+//;
        s/\s+$//;
      }

      my $new_comment = join ' ', @comments;
      push @addrs, $class->new($phrase, "$user\@$host", $new_comment, $original);
    }
    $PARSE_CACHE{$line} = [@addrs];
    @addrs;
}

=pod

=item new

  my $address = Email::Address->new(undef, 'casey@local');
  my $address = Email::Address->new('Casey West', 'casey@local');
  my $address = Email::Address->new(undef, 'casey@local', '(Casey)');

Constructs and returns a new C<Email::Address> object. Takes four
positional arguments: phrase, email, and comment, and original string.

The original string should only really be set using C<parse>.

=cut

sub _PHRASE   () { 0 }
sub _ADDRESS  () { 1 }
sub _COMMENT  () { 2 }
sub _ORIGINAL () { 3 }
sub new { bless [@_[1..4]], $_[0] }

=pod

=item purge_cache

  Email::Address->purge_cache;

One way this module stays fast is with internal caches. Caches live
in memory and there is the remote possibility that you will have a
memory problem. In the off chance that you think you're one of those
people, this class method will empty those caches.

I've loaded over 12000 objects and not encountered a memory problem.

=cut

sub purge_cache {
    %NAME_CACHE   = ();
    %FORMAT_CACHE = ();
    %PARSE_CACHE  = ();
}

=pod

=back

=head2 Instance Methods

=over 4

=item phrase

  my $phrase = $address->phrase;
  $address->phrase( "Me oh my" );

Accessor and mutator for the phrase portion of an address.

=item address

  my $addr = $address->address;
  $addr->address( "me@PROTECTED.com" );

Accessor and mutator for the address portion of an address.

=item comment

  my $comment = $address->comment;
  $address->comment( "(Work address)" );

Accessor and mutator for the comment portion of an address.

=item original

  my $orig = $address->original;

Accessor for the original address found when parsing, or passed
to C<new>.

=item host

  my $host = $address->host;

Accessor for the host portion of an address's address.

=item user

  my $user = $address->user;

Accessor for the user portion of an address's address.

=cut

sub phrase   { $_[1] ? $_[0]->[_PHRASE]   = $_[1] : $_[0]->[_PHRASE]   }
sub address  { $_[1] ? $_[0]->[_ADDRESS]  = $_[1] : $_[0]->[_ADDRESS]  }
sub comment  { $_[1] ? $_[0]->[_COMMENT]  = $_[1] : $_[0]->[_COMMENT]  }
sub original { $_[1] ? $_[0]->[_ORIGINAL] = $_[1] : $_[0]->[_ORIGINAL] }
sub host     { ($_[0]->[_ADDRESS] =~ /\@($domain)/o)[0]                }
sub user     { ($_[0]->[_ADDRESS] =~ /($local_part)\@/o)[0]            }


=pod

=item format

  my $printable = $address->format;

Returns a properly formatted RFC 2822 address representing the
object.

=cut

sub format {
    local $^W = 0;
    return $FORMAT_CACHE{"@{$_[0]}"} if exists $FORMAT_CACHE{"@{$_[0]}"};
    my ($self) = @_;
    my $format = sprintf '%s <%s> %s',
                 $self->[_PHRASE], $self->[_ADDRESS], $self->[_COMMENT];
    $format =~ s/^\s+//;
    $format =~ s/\s+$//;
    $FORMAT_CACHE{"@{$_[0]}"} = $format;
}

=pod

=item name

  my $name = $address->name;

This method tries very hard to determine the name belonging to the address.
First the C<phrase> is checked. If that doesn't work out the C<comment>
is looked into. If that still doesn't work out, the C<user> portion of
the C<address> is returned.

This method does B<not> try to massage any name it identifies and instead
leaves that up to someone else. Who is it to decide if someone wants their
name capitalized, or if they're Irish?

=cut

sub name {
    local $^W = 0;
    return $NAME_CACHE{"@{$_[0]}"} if exists $NAME_CACHE{"@{$_[0]}"};
    my ($self) = @_;
    my $name = '';
    if ( $name = $self->[_PHRASE] ) {
        $name =~ s/^"//;
        $name =~ s/"$//;
        $name =~ s/($quoted_pair)/substr $1, -1/goe;
    } elsif ( $name = $self->[_COMMENT] ) {
        $name =~ s/^\(//;
        $name =~ s/\)$//;
        $name =~ s/($quoted_pair)/substr $1, -1/goe;
        $name =~ s/$comment/ /go;
    } else {
        ($name) = $self->[_ADDRESS] =~ /($local_part)\@/o;
    }
    $NAME_CACHE{"@{$_[0]}"} = $name;
}

=pod

=back

=head2 Overloaded Operators

=over 4

=item stringify

  print "I have your email address, $address.";

Objects stringify to C<format> by default. It's possible that you don't
like that idea. Okay, then, you can change it by modifying
C<$Email:Address::STRINGIFY>. Please consider modifying this package
variable using C<local>. You might step on someone else's toes if you
don't.

  {
    local $Email::Address::STRINGIFY = 'address';
    print "I have your address, $address.";
    #   geeknest.com
  }
  print "I have your address, $address.";
  #   "Casey West" <casey@geeknest.com>

=cut

sub as_string { no strict 'refs'; goto &{$STRINGIFY} };
use overload '""' => \&as_string;

=pod

=back

=cut

1;

__END__

=head2 Did I Mention Fast?

On my 877Mhz 12" Apple Powerbook I can run the distributed benchmarks and
get results like this.

  $ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 5 
                 s/iter  Mail::Address Email::Address
  Mail::Address    1.59             --           -31%
  Email::Address   1.10            45%             --
  $ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 25
                 s/iter  Mail::Address Email::Address
  Mail::Address    1.58             --           -60%
  Email::Address  0.630           151%             --
  $ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 50
                 s/iter  Mail::Address Email::Address
  Mail::Address    1.58             --           -65%
  Email::Address  0.558           182%             --

=head1 SEE ALSO

L<Email::Simple>, L<perl>.

=head1 AUTHOR

Casey West, <F<casey@geeknest.com>>.

=head1 COPYRIGHT

  Copyright (c) 2004 Casey West.  All rights reserved.
  This module is free software; you can redistribute it and/or modify it
  under the same terms as Perl itself.

=cut
*/




See more files for this project here

AlphaMail

AlphaMail is an accelerated web mail interface with a C++ middleware layer that is more effective than an IMAP proxy which is a highly scalable (10k+ users). The interface includes modern features, Section 508 compliance, and universal browser support.

Project homepage: http://sourceforge.net/projects/alphamail
Programming language(s): C++,Java,JavaScript,Perl
License: other

  parser.cc