Show parser.cc syntax highlighted
#include <boost/bind.hpp>
#include <boost/spirit.hpp>
#include <boost/spirit/dynamic.hpp>
#include <boost/spirit/phoenix.hpp>
using namespace boost::spirit;
int main()
{
chset<> CTL("\x00-\x1F\x7F"), specials("()<>[]:;@\\,.\""),
NO_WS_CTL("\x01-\x08\x0b\x0c\x0e-\x1F\x7F"),
WSP(" \t"),
text("\x00-\x09\x0b\x0c\x0e-\x7f"),
ctext("\x00-\x08\x0b\x21-\x27\x2a-\x5b\x5d-\x7e"),
atext("0-9a-zA-Z!#$%&'*+-/=?^_`{|}~"),
qtext("\x01-\x08\x0b\x0c\x0e-\x1F\x7F\x21\x23-\x5b\x5d-\x7e"),
dtext("\x01-\x08\x0b\x0c\x0e-\x1F\x7F\x21-\x5b\x5e-\x7e"),
;
rule<> quoted_pair, ccontent, comment, CRLF, CFWS, FWS,
atom, dot_atom_text, dot_atom, qcontent, quoted_string, word,
phrase, local_part, dcontent, domain_literal, domain, display_name,
addr_spec, angle_addr, name_addr, mailbox, obs_phrase, obs_angle_addr,
obs_route, obs_domain_list, mailbox_list, obs_mbox_list;
quoted_pair = ch_p('\\') >> text
;
CRLF = ch_p('\r') >> ch_p('\n');
ccontent = ctext | quoted_pair | comment;
comment = ch_p('(') >> *(!FWS >> ccontent) >> !FWS >> ch_p(')');
FWS = !(*WSP >> CRLF) >> +WSP;
CFWS = *(!FWS >> comment) >> ((!FWS >> comment) | FWS);
atom = !CFWS >> +atext >> !CFWS;
dot_atom_text = +atext >> *(ch_p('.') >> +atext);
dot_atom = !CFWS >> +dot_atom_text >> !CFWS;
qcontent = qtext | quoted_pair;
quoted_string = !CFWS >> ch_p('"') >> *(!FWS >> qcontent) >>
!FWS >> ch_p('"') >> !CFWS;
word = atom | quoted_string;
phrase = +word | obs_phrase;
local_part = dot_atom|quoted_string;
dcontent = dtext|quoted_pair;
domain_literal = !CFWS >> ch_p('[') >> *(!FWS >> dcontent) >>
!FWS >> ch_p(']') >> !CFWS;
domain = dot_atom|domain_literal;
display_name = phrase;
addr_spec = local_part >> ch_p('@') >> domain;
angle_addr = !CFWS >> ch_p('<') >> addr_spec >>
ch_p('>') >> !CFWS | obs_angle_addr;
name_addr = !display_name >> angle_addr;
mailbox = name_addr | addr_spec;
mailbox_list = (mailbox >> *(ch_p(',') >> mailbox)) | obs_mbox_list;
obs_mbox_list = +(!mailbox >> !CFWS >> ch_p(',') >> !CFWS) >> !mailbox;
obs_angle_addr = !CFWS >> ch_p('<') >> !obs_route >> addr_spec >> ch_p('>')
>> !CFWS;
obs_phrase = word >> *(word | ch_p('.') | CFWS);
obs_route = !CFWS >> obs_domain_list >> ch_p(':') >> !CFWS;
obs_domain_list = ch_p('@') >> domain >> *(*(CFWS | ch_p(',')) >> !CFWS >> ch_p('@') >> domain);
}
/*
sub parse {
return @{$PARSE_CACHE{$_[1]}} if exists $PARSE_CACHE{$_[1]};
my ($class, $line) = @_;
my (@mailboxes) = ($line =~ /$mailbox/go);
my @addrs;
foreach (@mailboxes) {
my $original = $_;
my @comments = /($comment)/go;
s/$comment//go if @comments;
my ($user, $host, $com);
($user, $host) = ($1, $2) if s/<($local_part)\@($domain)>//o;
if (! defined($user) || ! defined($host)) {
s/($local_part)\@($domain)//o;
($user, $host) = ($1, $2);
}
my ($phrase) = /($display_name)/o;
for ( $phrase, $host, $user, @comments ) {
next unless defined $_;
s/^\s+//;
s/\s+$//;
}
my $new_comment = join ' ', @comments;
push @addrs, $class->new($phrase, "$user\@$host", $new_comment, $original);
}
$PARSE_CACHE{$line} = [@addrs];
@addrs;
}
=pod
=item new
my $address = Email::Address->new(undef, 'casey@local');
my $address = Email::Address->new('Casey West', 'casey@local');
my $address = Email::Address->new(undef, 'casey@local', '(Casey)');
Constructs and returns a new C<Email::Address> object. Takes four
positional arguments: phrase, email, and comment, and original string.
The original string should only really be set using C<parse>.
=cut
sub _PHRASE () { 0 }
sub _ADDRESS () { 1 }
sub _COMMENT () { 2 }
sub _ORIGINAL () { 3 }
sub new { bless [@_[1..4]], $_[0] }
=pod
=item purge_cache
Email::Address->purge_cache;
One way this module stays fast is with internal caches. Caches live
in memory and there is the remote possibility that you will have a
memory problem. In the off chance that you think you're one of those
people, this class method will empty those caches.
I've loaded over 12000 objects and not encountered a memory problem.
=cut
sub purge_cache {
%NAME_CACHE = ();
%FORMAT_CACHE = ();
%PARSE_CACHE = ();
}
=pod
=back
=head2 Instance Methods
=over 4
=item phrase
my $phrase = $address->phrase;
$address->phrase( "Me oh my" );
Accessor and mutator for the phrase portion of an address.
=item address
my $addr = $address->address;
$addr->address( "me@PROTECTED.com" );
Accessor and mutator for the address portion of an address.
=item comment
my $comment = $address->comment;
$address->comment( "(Work address)" );
Accessor and mutator for the comment portion of an address.
=item original
my $orig = $address->original;
Accessor for the original address found when parsing, or passed
to C<new>.
=item host
my $host = $address->host;
Accessor for the host portion of an address's address.
=item user
my $user = $address->user;
Accessor for the user portion of an address's address.
=cut
sub phrase { $_[1] ? $_[0]->[_PHRASE] = $_[1] : $_[0]->[_PHRASE] }
sub address { $_[1] ? $_[0]->[_ADDRESS] = $_[1] : $_[0]->[_ADDRESS] }
sub comment { $_[1] ? $_[0]->[_COMMENT] = $_[1] : $_[0]->[_COMMENT] }
sub original { $_[1] ? $_[0]->[_ORIGINAL] = $_[1] : $_[0]->[_ORIGINAL] }
sub host { ($_[0]->[_ADDRESS] =~ /\@($domain)/o)[0] }
sub user { ($_[0]->[_ADDRESS] =~ /($local_part)\@/o)[0] }
=pod
=item format
my $printable = $address->format;
Returns a properly formatted RFC 2822 address representing the
object.
=cut
sub format {
local $^W = 0;
return $FORMAT_CACHE{"@{$_[0]}"} if exists $FORMAT_CACHE{"@{$_[0]}"};
my ($self) = @_;
my $format = sprintf '%s <%s> %s',
$self->[_PHRASE], $self->[_ADDRESS], $self->[_COMMENT];
$format =~ s/^\s+//;
$format =~ s/\s+$//;
$FORMAT_CACHE{"@{$_[0]}"} = $format;
}
=pod
=item name
my $name = $address->name;
This method tries very hard to determine the name belonging to the address.
First the C<phrase> is checked. If that doesn't work out the C<comment>
is looked into. If that still doesn't work out, the C<user> portion of
the C<address> is returned.
This method does B<not> try to massage any name it identifies and instead
leaves that up to someone else. Who is it to decide if someone wants their
name capitalized, or if they're Irish?
=cut
sub name {
local $^W = 0;
return $NAME_CACHE{"@{$_[0]}"} if exists $NAME_CACHE{"@{$_[0]}"};
my ($self) = @_;
my $name = '';
if ( $name = $self->[_PHRASE] ) {
$name =~ s/^"//;
$name =~ s/"$//;
$name =~ s/($quoted_pair)/substr $1, -1/goe;
} elsif ( $name = $self->[_COMMENT] ) {
$name =~ s/^\(//;
$name =~ s/\)$//;
$name =~ s/($quoted_pair)/substr $1, -1/goe;
$name =~ s/$comment/ /go;
} else {
($name) = $self->[_ADDRESS] =~ /($local_part)\@/o;
}
$NAME_CACHE{"@{$_[0]}"} = $name;
}
=pod
=back
=head2 Overloaded Operators
=over 4
=item stringify
print "I have your email address, $address.";
Objects stringify to C<format> by default. It's possible that you don't
like that idea. Okay, then, you can change it by modifying
C<$Email:Address::STRINGIFY>. Please consider modifying this package
variable using C<local>. You might step on someone else's toes if you
don't.
{
local $Email::Address::STRINGIFY = 'address';
print "I have your address, $address.";
# geeknest.com
}
print "I have your address, $address.";
# "Casey West" <casey@geeknest.com>
=cut
sub as_string { no strict 'refs'; goto &{$STRINGIFY} };
use overload '""' => \&as_string;
=pod
=back
=cut
1;
__END__
=head2 Did I Mention Fast?
On my 877Mhz 12" Apple Powerbook I can run the distributed benchmarks and
get results like this.
$ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 5
s/iter Mail::Address Email::Address
Mail::Address 1.59 -- -31%
Email::Address 1.10 45% --
$ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 25
s/iter Mail::Address Email::Address
Mail::Address 1.58 -- -60%
Email::Address 0.630 151% --
$ perl -Ilib bench/ea-vs-ma.pl bench/corpus.txt 50
s/iter Mail::Address Email::Address
Mail::Address 1.58 -- -65%
Email::Address 0.558 182% --
=head1 SEE ALSO
L<Email::Simple>, L<perl>.
=head1 AUTHOR
Casey West, <F<casey@geeknest.com>>.
=head1 COPYRIGHT
Copyright (c) 2004 Casey West. All rights reserved.
This module is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
*/
See more files for this project here