]*)>/ ) {
$type = 'RDF';
}
return $type;
}
sub __parse_content {
my ($self, $contref, %opts) = @_;
my $feed_type = $self->__feed_type($contref);
# warn "FEED Type = [$feed_type]\n";
return undef unless $feed_type;
$$contref =~ s/>\s+>(.*?)<\/items([^>]*?)>//sgi;
$$contref =~ s/<\/?br(.*?)>/\n/sgi;
$$contref =~ s/<\/?nobr(.*?)>//sgi;
#$$contref =~ s//\n\n/sgi;
#$$contref =~ s/
/\n\n/sgi;
$$contref =~ s/<\/?strong\s(.*?)>//sgi;
if ( $opts{preserve_tags} ) {
$$contref =~ s/<(\/?[sib])>/\[$1\]/sgi;
$$contref =~ s/<(\/?strong)>/\[$1\]/sgi;
$$contref =~ s/<(\/?em)>/\[$1\]/sgi;
} else {
$$contref =~ s/<(\/?[sib])>//sgi;
$$contref =~ s/<(\/?strong)>//sgi;
$$contref =~ s/<(\/?em)>//sgi;
}
#$$contref =~ s/<\/p>//sgi;
#$$contref =~ s/<\/p\s(.*?)>//sgi;
my @items;
if ( $feed_type eq 'RSS' ) {
while ( $$contref =~ /- (.*?)<\/item([^>]*?)>/sgi ) {
my $item_params = $1;
my $item_body = $2;
# warn "BODY: [$item_body]\n\n";
my $params = $self->__parse_params ($item_params);
my $item = $self->__parse_item_RSS ($item_body) || {};
if ( ref $params eq 'HASH' ) {
foreach my $key ( %$params ) {
if ( exists $item->{$key} && ref $item->{$key} eq 'ARRAY' ) {
push @{ $item->{$key} }, $params->{$key};
} elsif ( exists $item->{$key} ) {
my @arr = ( $item->{$key}, $params->{$key} );
$item->{$key} = \@arr;
} else {
$item->{$key} = $params->{$key};
}
}
}
if ( ref $item eq 'HASH' && scalar keys %$item ) {
if ( exists $item->{'feedburner:origlink'} ) {
$item->{link} = ref $item->{'feedburner:origlink'} eq 'ARRAY' ? $item->{'feedburner:origlink'}->[0] : $item->{'feedburner:origlink'};
} elsif ( !exists $item->{link} ) {
foreach my $key ( qw( guid ) ) {
if ( exists $item->{$key} ) {
$item->{link} = $item->{$key};
last;
}
}
}
push @items, $item;
}
# warn Dumper($item);
}
}
if ( $feed_type eq 'RDF' ) {
while ( $$contref =~ /
- (.*?)<\/item([^>]*?)>/sgi ) {
my $item_params = $1;
my $item_body = $2;
# warn "BODY: [$item_body]\n\n";
my $params = $self->__parse_params ($item_params);
my $item = $self->__parse_item_RSS ($item_body) || {};
if ( ref $params eq 'HASH' ) {
foreach my $key ( %$params ) {
if ( exists $item->{$key} && ref $item->{$key} eq 'ARRAY' ) {
push @{ $item->{$key} }, $params->{$key};
} elsif ( exists $item->{$key} ) {
my @arr = ( $item->{$key}, $params->{$key} );
$item->{$key} = \@arr;
} else {
$item->{$key} = $params->{$key};
}
}
}
# warn Dumper($item);
if ( ref $item eq 'HASH' && scalar keys %$item ) {
if ( !exists $item->{pubdate} ) {
foreach my $key ( 'prism:publicationdate', 'dc:date' ) {
if ( exists $item->{$key} ) {
$item->{pubdate} = $item->{$key};
last;
}
}
}
push @items, $item;
}
}
}
if ( $feed_type eq 'ATOM' ) {
while ( $$contref =~ /(.*?)<\/entry([^>]*?)>/sgi ) {
my $item_params = $1;
my $item_body = $2;
my $item = $self->__parse_item_ATOM ($item_body) || {};
# warn Dumper($item);
if ( ref $item eq 'HASH' && scalar keys %$item ) {
if ( !exists $item->{pubdate} ) {
foreach my $key ( 'published', 'updated' ) {
if ( exists $item->{$key} ) {
$item->{pubdate} = $item->{$key};
last;
}
}
}
push @items, $item;
}
}
}
return ( scalar @items ? \@items : undef );
}
sub __parse_params {
my ($self, $params) = @_;
return undef unless $params;
my %params;
while ( $params =~ /([\w\:]+)(\s*?)=(\s*?)["'](.*?)["']/sgi ) {
my $name = $1;
my $value = $4;
if ( $name && $value ) {
$params{$name} = $value;
}
}
return ( scalar(keys %params) ? \%params : undef );
}
sub __parse_item_RSS {
my ($self, $item_body, $debug) = @_;
return undef unless $item_body;
my %item;
# my $embedded = $self->__item_cut_rss_embedded(\$item_body);
# if ( ref $embedded ) {
# %item = %$embedded;
# }
# my $content = $self->__item_cut_rss_description(\$item_body);
# $item{description} = $content if $content;
# my $one_string_elements = $self->__item_cut_single_elements (\$item_body);
# if ( ref $one_string_elements eq 'ARRAY' && @$one_string_elements ) {
# foreach my $elem ( @$one_string_elements ) {
# my ($elem_name) = keys %$elem if ref $elem eq 'HASH';
# if ( exists $item{$elem_name} && ref $item{$elem_name} eq 'ARRAY' ) {
# push @{ $item{$elem_name} }, $elem->{$elem_name};
# } elsif ( exists $item{$elem_name} ) {
# $item{$elem_name} = [$item{$elem_name}, $elem->{$elem_name}];
# } else {
# $item{$elem_name} = $elem->{$elem_name};
# }
# }
# }
my $parsed = $self->__make_tree (\$item_body, $debug);
# warn Dumper($parsed);
if ( ref $parsed && exists $parsed->{1} && exists $parsed->{1}{children} && ref $parsed->{1}{children} eq 'ARRAY' ) {
foreach my $tag ( @{ $parsed->{1}{children} } ) {
if ( ref $tag->{children} eq 'ARRAY' && scalar @{ $tag->{children} } ) {
my %params;
foreach my $it ( @{ $tag->{children} } ) {
next unless $it->{text};
if ( exists $params{$it->{type}} && ref $params{$it->{type}} eq 'ARRAY' ) {
push @{ $params{$it->{type}} }, $it->{text};
} elsif ( exists $params{$it->{type}} ) {
my @arr = ( $params{$it->{type}}, $it->{text} );
$params{$it->{type}} = \@arr;
} else {
$params{$it->{type}} = $it->{text};
}
}
if ( exists $item{$tag->{type}} && ref $item{$tag->{type}} eq 'ARRAY' ) {
push @{ $item{$tag->{type}} }, \%params;
} elsif ( exists $item{$tag->{type}} ) {
my @arr = ( $item{$tag->{type}}, \%params );
$item{$tag->{type}} = \@arr;
} else {
$item{$tag->{type}} = \%params;
}
} else {
my $body = $tag->{text} || $tag->{params};
if ( exists $item{$tag->{type}} && ref $item{$tag->{type}} eq 'ARRAY' ) {
push @{ $item{$tag->{type}} }, $body;
} elsif ( exists $item{$tag->{type}} ) {
my @arr = ( $item{$tag->{type}}, $body );
$item{$tag->{type}} = \@arr;
} else {
$item{$tag->{type}} = $body;
}
}
}
}
# warn Dumper(\%item);
return \%item;
}
sub __parse_item_ATOM {
my ($self, $item_body, $debug) = @_;
return undef unless $item_body;
my %item;
my $embedded = $self->__item_cut_rss_embedded(\$item_body);
if ( ref $embedded ) {
%item = %$embedded;
}
if ( exists $item{summary} ) {
$item{description} = delete $item{summary};
} else {
my $summary = $self->__item_cut_atom_summary(\$item_body);
$item{description} = $summary if $summary;
}
my $content = $self->__item_cut_atom_content(\$item_body);
if ( $content && $item{description} ) {
$item{fulltext} = $content;
} elsif ( $content ) {
$item{description} = $content;
}
my $one_string_elements = $self->__item_cut_single_elements (\$item_body);
# warn Dumper ($one_string_elements);
if ( ref $one_string_elements eq 'ARRAY' && @$one_string_elements ) {
foreach my $elem ( @$one_string_elements ) {
my ($elem_name) = keys %$elem if ref $elem eq 'HASH';
if ( exists $item{$elem_name} && ref $item{$elem_name} eq 'ARRAY' ) {
push @{$item{$elem_name}}, $elem->{$elem_name};
} elsif ( exists $item{$elem_name} ) {
my @arr = ($item{$elem_name}, $elem->{$elem_name});
$item{$elem_name} = \@arr;
} else {
$item{$elem_name} = $elem->{$elem_name};
}
if ( exists $elem->{$elem_name}{type} && $elem->{$elem_name}{type} =~ /^image/ ) {
my $enclosure = { url => $elem->{$elem_name}{href} || $elem->{$elem_name}{url}, type => $elem->{$elem_name}{type} };
if ( exists $item{enclosure} && ref $item{enclosure} eq 'ARRAY' ) {
push @{ $item{enclosure} }, $enclosure;
} elsif ( exists $item{enclosure} ) {
my @arr = ($item{enclosure}, $enclosure);
$item{enclosure} = \@arr;
} else {
$item{enclosure} = $enclosure;
}
}
if ( exists $elem->{$elem_name}{type} && $elem->{$elem_name}{type} =~ /^video/ ) {
my $enclosure = { url => $elem->{$elem_name}{href} || $elem->{$elem_name}{url}, type => $elem->{$elem_name}{type} };
if ( exists $item{enclosure} && ref $item{enclosure} eq 'ARRAY' ) {
push @{ $item{enclosure} }, $enclosure;
} elsif ( exists $item{enclosure} ) {
my @arr = ($item{enclosure}, $enclosure);
$item{enclosure} = \@arr;
} else {
$item{enclosure} = $enclosure;
}
}
}
}
my $parsed = $self->__make_tree (\$item_body, $debug);
# warn Dumper($parsed);
if ( ref $parsed && exists $parsed->{1} && exists $parsed->{1}{children} && ref $parsed->{1}{children} eq 'ARRAY' ) {
foreach my $tag ( @{ $parsed->{1}{children} } ) {
if ( ref $tag->{children} eq 'ARRAY' && scalar @{ $tag->{children} } ) {
my %params;
foreach my $it ( @{ $tag->{children} } ) {
next unless $it->{text};
if ( exists $params{$it->{type}} && ref $params{$it->{type}} eq 'ARRAY' ) {
push @{ $params{$it->{type}} }, $it->{text};
} elsif ( exists $params{$it->{type}} ) {
my @arr = ( $params{$it->{type}}, $it->{text} );
$params{$it->{type}} = \@arr;
} else {
$params{$it->{type}} = $it->{text};
}
}
if ( exists $tag->{params} && ref $tag->{params} eq 'HASH' ) {
while ( my ($param, $value) = each %{ $tag->{params} } ) {
if ( exists $params{$param} && ref $params{$param} eq 'ARRAY' ) {
push @{ $params{$param} }, $value;
} elsif ( exists $params{$param} ) {
my @arr = ( $params{$param}, $value );
$params{$param} = \@arr;
} else {
$params{$param} = $value;
}
}
}
if ( exists $item{$tag->{type}} && ref $item{$tag->{type}} eq 'ARRAY' ) {
push @{ $item{$tag->{type}} }, \%params;
} elsif ( exists $item{$tag->{type}} ) {
my @arr = ( $item{$tag->{type}}, \%params );
$item{$tag->{type}} = \@arr;
} else {
$item{$tag->{type}} = \%params;
}
} else {
my $body = $tag->{text} || $tag->{params};
if ( exists $item{$tag->{type}} && ref $item{$tag->{type}} eq 'ARRAY' ) {
push @{ $item{$tag->{type}} }, $body;
} elsif ( exists $item{$tag->{type}} ) {
my @arr = ( $item{$tag->{type}}, $body );
$item{$tag->{type}} = \@arr;
} else {
$item{$tag->{type}} = $body;
}
}
}
my $pubDate = exists $item{issued} ? $item{issued} : exists $item{modified} ? $item{modified} : undef;
$item{pubdate} = $pubDate if $pubDate;
}
# warn Dumper(\%item);
return \%item;
}
sub __make_tree {
my ($self, $content, $debug) = @_;
my @elems = split (//,$$content);
# warn "CONTENT: [$$content]\n\n";
my $id = 1;
my $level = 0;
my @stack;
my %tree = (
root => {
id => $id++,
text => '',
type => 'root',
children=> [],
parent => undef,
level => $level,
},
);
my %elem_hash = ( 1 => $tree{root} );
my $current = $tree{root};
while ( @elems ) {
if ( $elems[0] eq '<' && $elems[1] =~ /[\!a-zA-Z]/ ) {
my $tag = $self->__try_tag (\@elems);
if ( ref $tag && $tag->{type} eq 'text' ) {
$current->{text} .= $tag->{content};
splice @elems, 0, $tag->{count};
# warn "Tag: [".$current->{type}."]\n Text added:[".$tag->{content}."]\n";
} elsif ( ref $tag && exists $tag->{closed} && $tag->{closed} ) {
$tag->{id} = $id++;
$tag->{parent} = $current;
$tag->{level} = $level+1;
$elem_hash{$tag->{id}} = $tag;
push @{$current->{children}}, $tag;
splice @elems, 0, $tag->{count};
# warn "Tag: [".$current->{type}."]\n Text added:[".$tag->{content}."]\n";
} elsif ( ref $tag ) {
$tag->{id} = $id++;
$tag->{children} = [];
$tag->{parent} = $current;
$tag->{level} = ++$level;
$elem_hash{$tag->{id}} = $tag;
push @{$current->{children}}, $tag;
push @stack, $current;
$current = $tag;
splice @elems, 0, $tag->{count};
# warn "Tag: [".$current->{type}."]\n Text added:[".$tag->{content}."]\n";
} else {
# warn "!!!! Error: RSS analyse. Job on item broken... !!!!\n" if $debug;
return undef;
}
} elsif ( $elems[0] eq '<' && $elems[1] =~ /\// ) {
my $tag = $self->__try_end (\@elems);
if ( ref $tag && $tag->{type} eq 'text' ) {
$current->{text} .= $tag->{content};
$current->{count} += $tag->{count};
splice @elems, 0, $tag->{count};
} elsif ( ref $tag ) {
if ( $current->{type} ne $tag->{type} ) {
# warn "!!!!Wrong tag type for closing. It's [$tag->{type}]. It must be [$current->{type}]!!!!\n" if $debug;
return undef;
} else {
$current = pop @stack;
$level = $current->{level};
# warn "Text place: [".substr($current->{text}, 0, 20)."]\n" if exists $current->{text};
# warn "Close type: /$tag->{type}. Level: $level. Stack depth: ".scalar(@stack)."\n";
}
splice @elems, 0, $tag->{count};
} else {
# warn "!!!! Error: HTML analyse. Job broken... !!!!\n" if $debug;
return undef;
}
} else {
$current->{text} .= shift @elems;
$current->{count}++;
}
}
return (\%elem_hash);
}
sub __try_tag {
my ($self, $content) = @_;
my $i = 1;
my %tag;
my $tag = $content->[0];
if ( $content->[$i] eq '!' ) {
# warn "What? Think it's CDATA\n";
my $try_cdata = join '', @$content[1..8];
if ( $try_cdata eq '![CDATA[' ) {
$tag = '';
$i = 9;
while ( !($content->[$i-1] eq '>' && $content->[$i-2] eq ']' && $content->[$i-3] eq ']') && $i < scalar @$content ) {
$tag .= $content->[$i];
$i++;
}
chop $tag; chop $tag; chop $tag;
}
# warn "CDATA Found: [$tag]";
return {
type => 'text',
content => $tag,
count => $i,
};
}
while ( $content->[$i] ne '<' && $content->[$i] ne '>' && $i < scalar @$content ) {
$tag .= $content->[$i];
$i++;
}
if ( $content->[$i] eq '<' || $i >= scalar @$content ) {
return {
type => 'text',
content => $tag,
count => $i,
};
} else {
if ( $tag =~ /^<([\w:-]+)\s*(.*)/si ) {
my $elem_name = $1;
my $elem_body = $2;
unless ( $self->__is_valid_tag ($elem_name) ) {
return {
type => 'text',
content => $tag,
count => $i,
};
} else {
my $params = $self->__parse_params ($elem_body) if $elem_body;
if ( $content->[$i] eq '>' && $content->[$i-1] eq '/' ) {
$tag{closed} = 1;
}
$tag{type} = lc($elem_name);
$tag{count} = $i+1;
$tag{params} = $params if ref $params;
return \%tag;
}
} else {
return {
type => 'text',
content => $tag,
count => $i,
};
}
}
}
sub __try_end {
my ($self, $content) = @_;
my $i = 2;
my %tag;
my $tag = $content->[0].$content->[1];
while ( $content->[$i] ne '<' && $content->[$i] ne '>' && $i < scalar @$content ) {
$tag .= $content->[$i];
$i++;
}
if ( $content->[$i] eq '<' || $i >= scalar @$content ) {
return {
type => 'text',
content => $tag,
count => $i,
};
} else {
if ( $tag =~ /^<\/([\w:-]+)/i ) {
my $elem_name = $1;
unless ( $self->__is_valid_tag ($elem_name) ) {
return {
type => 'text',
content => $tag,
count => $i,
};
} else {
$tag{type} = lc($elem_name);
$tag{count} = $i+1;
return \%tag;
}
} else {
return {
type => 'text',
content => $tag,
count => $i,
};
}
}
}
sub __is_valid_tag {
my ($self, $tag) = @_;
foreach my $invtag ( @INVALID_TAGS ) {
return 0 if lc($invtag) eq lc($tag);
}
return 1;
}
sub __item_cut_atom_content {
my ($self, $item_body) = @_;
my %elem;
if ( $$item_body =~ /]*?)>(.*?)<\/content([^>]*)>/si ) {
my $content_params = $1;
my $content_body = $2;
my $params = $self->__parse_params ($content_params) if $content_params;
$$item_body =~ s/]*?)>(.*?)<\/content([^>]*)>//si;
return $content_body;
}
}
sub __item_cut_atom_summary {
my ($self, $item_body) = @_;
my %elem;
if ( $$item_body =~ /]*)>(.*?)<\/summary([^>]*)>/si ) {
my $content_params = $1;
my $content_body = $2;
my $params = $self->__parse_params ($content_params) if $content_params;
$$item_body =~ s/]*)>(.*?)<\/summary([^>]*)>//si;
return $content_body;
}
}
sub __item_cut_rss_description {
my ($self, $item_body) = @_;
my %elem;
if ( $$item_body =~ /]*?)>(.*?)<\/description([^>]*)>/si ) {
my $content_params = $1;
my $content_body = $2;
my $params = $self->__parse_params ($content_params) if $content_params;
$$item_body =~ s/]*?)>(.*?)<\/description([^>]*)>//si;
return $content_body;
}
}
sub __item_cut_rss_embedded {
my ($self, $item_body) = @_;
my %elem;
while ( $$item_body =~ /<([^>]*?)>\s*\s*<\/([^>]*)>/sgi ) {
my $tag = $3;
my $content_body = $2;
my $content_params = $1;
if ( $content_params =~ /([\w:-]+)\s+(.*)/ ) {
$tag = 1;
$content_params = $2;
}
my $params = $self->__parse_params ($content_params) if $content_params;
$elem{$tag} = $content_body;
$$item_body =~ s/<$tag([^>]*?)>(.*?)<\/$tag([^>]*)>//si;
}
return scalar keys %elem ? \%elem : undef;
}
sub __item_cut_single_elements {
my ($self, $item_body) = @_;
my @elems;
while ( $$item_body =~ /<([\w\:\-]+)\s*([^>]*?)\/>/sgi ) {
my $elem_name = $1;
my $elem_body = $2;
my $params = $self->__parse_params ($elem_body) if $elem_body;
if ( $elem_name && ref $params ) {
push @elems, { $elem_name => $params }
}
}
$$item_body =~ s/<(\w+)\s*([^>]*?)\/>//sgi;
return ( @elems ? \@elems : undef );
}
sub __field_prepare {
my ($self, $text) = @_;
return unless $text;
for ( $text ) {
s/^[\n\r\ \t]+//;
s/[\n\r\ \t]+$//;
s/\[(\/?strong)\]/<$1>/sgi;
s/\[(\/?em)\]/<$1>/sgi;
s/\[(\/?[sib])\]/<$1>/sgi;
}
$self->__cdata (\$text);
$self->__extchar (\$text);
# $text = HTML::Entities::decode_entities($text);
# Remove linebreaks inside incorrectly breaked paragraphs
if (length($text) > 100) {
my $pcount = 0;
while ($text =~ /
(.+?)(?=<\/?p>|$)/sgi) {
my $p = $1;
if (length $p > 50) {
my ($dcount, $ndcount) = (0,0);
# Count sentences normally ended vs breaked
$dcount++ while $p =~ /(\.|\?|\!)['"]?\s*[\r\n]+/g;
$ndcount++ while $p =~ /([^\.\?\!\s])\s*[\r\n]+/g;
# Found broken paragraph
last if $ndcount > $dcount and ++$pcount > 1;
}
}
if ($pcount > 0) {
$text =~ s/[\n\r]+/ /sg;
}
}
$text =~ s/
]*>/\n/sgi;
$text =~ s/
/\n\n/sgi;
$text =~ s/<\/p\s*>//sgi;
# $text = Contenido::Parser::Util::strip_html($text);
# $text = Contenido::Parser::Util::text_cleanup($text);
return $text;
}
sub __extchar {
my ($self, $textref) = @_;
for ( $$textref ) {
s/&/\&/sg;
s/\&/\&/sgi;
s/\&/\&/sgi;
s/\<//sgi;
s/\"/"/sgi;
s/\«/«/sg;
s/\»/»/sg;
s/\£/£/sg;
s/\/–/sg;
s/\/—/sg;
s/\/"/sg;
s/\/"/sg;
s/\/"/sg;
s/\´/'/sg;
s/\
/\.\.\./sg;
s/\
/\n/sg;
s/\"/"/sg;
}
# $$textref =~ s/(\d+);/{''.__normalise($1).';'}/eg;
# $$textref =~ s/«/«/sgi;
# $$textref =~ s/»/»/sgi;
# $$textref =~ s/©/©/sgi;
# $$textref =~ s/–/–/sgi;
# $$textref =~ s/—/—/sgi;
# $$textref =~ s/°/º/sgi;
# $$textref =~ s/ /\x20/sgi;
}
sub __normalise {
my $chr = shift;
return sprintf("%04d",$chr);
}
sub __cdata {
my ($self, $textref) = @_;
if ( $$textref =~ /^<\!\[CDATA\[/ ) {
$$textref =~ s/<\!\[CDATA\[//sgi;
$$textref =~ s/\]\]>//sgi;
}
}
sub __parse_rools {
my ($self, $rools) = @_;
return unless $rools;
$rools =~ s/\r//sgi;
my @rools = split /\n/, $rools;
return unless @rools;
my @parsed;
foreach my $rool ( @rools ) {
my %pr;
next if $rool =~ /^#/;
$rool =~ s/[\x20\t]+$//;
$rool =~ s/^[\x20\t]+//;
if ( $rool =~ /^([\w']+)\s+(.*)$/ || $rool =~ /^(\w+)(.*)$/ ) {
$pr{action} = lc($1);
my $params = $2;
if ( $pr{action} eq 'use' && $params =~ /^(current)\s+(date)$/ ) {
$pr{action} = 'rewrite';
$pr{target} = 'pubdate';
$pr{command} = 'set';
$pr{condition} = 'CURRENT_DATETIME';
push @parsed, \%pr;
} elsif ( $params =~ /^(\w+)\s+(.*)$/ ) {
$pr{target} = lc($1);
$params = $2;
if ( $params =~ /^([\w=]+)\s+(.*)$/ ) {
$pr{command} = lc($1);
$params = $2;
if ( $pr{action} eq 'filter' && $pr{command} eq 'cut' && $params =~ /^(\w+)\s+(.*)$/ ) {
$pr{subcommand} = lc($1); $params = $2;
next unless $pr{subcommand} =~ /^(untill|till|from|off|regex)$/;
$params =~ s|([*+?/\\\|])|\\$1|sg unless $pr{subcommand} eq 'regex';
$pr{condition} = $params;
} elsif ( $pr{action} eq 'filter' && $pr{command} eq 'regex' && substr($params,0,1) eq substr($params,-1,1) && substr($params,0,1) =~ /([\/\#\|])/ ) {
my $delim = $1;
$params = substr($params,1,length($params)-2);
my @params = split(//,$params);
my ($from, $to) = ('','');
my $prev = '';
while ( @params ) {
my $ch = shift @params;
if ( $ch eq $delim && $prev ne '\\' ) {
last;
} else {
$prev = $ch;
$from .= $ch;
}
}
$to = join ('', @params);
$pr{condition} = { from => $from, to => $to };
} elsif ( ($pr{action} eq 'ignore' || $pr{action} eq 'only') && $pr{command} =~ /^(regex|=|contain|contains)$/ ) {
$params =~ s|([*+?/\\\|])|\\$1|sg unless $pr{subcommand} eq 'regex';
$pr{condition} = $params;
} else {
next;
}
push @parsed, \%pr;
}
}
}
}
return ( scalar @parsed ? \@parsed : undef );
}
sub __parse_related {
my ($self, $related) = @_;
return unless ref $related eq 'HASH';
return unless exists $related->{url} && $related->{url} =~ /^http:\/\//i;
return unless exists $related->{rel} && $related->{rel} =~ /(news|discussion|teaser)/;
my $result = { url => $related->{url}, rel => $related->{rel} };
$result->{type} = $related->{type} if exists $related->{type};
$result->{title} = $self->__field_prepare($related->{title}) if exists $related->{title} && $related->{title};
$result->{author} = $self->__field_prepare($related->{author}) if exists $related->{author} && $related->{author};
$result->{description} = $self->__field_prepare($related->{description}) if exists $related->{description} && $related->{description};
if ( exists $related->{pubdate} && $related->{pubdate} ) {
my $pubdate = Class::Date::localdate(Date::Parse::str2time($related->{pubdate}));
$result->{pubdate} = $pubdate if $pubdate;
}
if ( $related->{rel} =~ /(news|teaser)/ ) {
return undef unless $result->{title} && $result->{pubdate};
} else {
$result->{title} ||= 'Обсудить';
}
if ( exists $related->{image} && $related->{image} ) {
if ( ref $related->{image} eq 'HASH' && (exists $related->{image}{url} || exists $related->{image}{href}) ) {
my $img = rchannel::Image->new( { src => ($related->{image}{url} || $related->{image}{href}) } );
$result->{image} = $img if ref $img;
} elsif ( !ref $related->{image} ) {
my $img = rchannel::Image->new( { src => $related->{image} } );
$result->{image} = $img if ref $img;
}
}
return $result;
}
sub __parse_date {
my $self = shift;
my $str = shift;
if ($str=~/(\d{2})(\d{2})(\d{4})T(\d{2})(\d{2})(\d{2})/){
return parsedate ("$3-$2-$1 $4:$5:$6");
} elsif ($str=~/(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}:\d{2})/){
return parsedate ("$1 $2");
} else {
return parsedate($str);
}
}
# TODO IMAGES:
# enclosure
# media:content
# media:thumbnail
# image
# img
# FOUDNED:
# author
# category
# comments
# content
# content:encoded
# content:format
# dc:creator
# dc:date
# dc:rights
# dc:subject
# description
# enclosure
# feedburner:awareness
# feedburner:origLink
# full-text
# fulltext
# guid
# guide
# habrahabr:ballsCount
# habrahabr:commentsCount
# id
# image
# img
# link
# media:content
# media:thumbnail
# pdalink
# pubDate
# pubdate
# pubid
# published
# rambler:full-text
# rambler:fulltext
# region
# section
# sections
# source
# sport
# summary
# text
# title
# updated
# wfw:commentRSS
# wfw:commentRss
# wmj:fulltext
# yandex:full-text
1;