Line # Revision Author
1 99 ahitrov package Contenido::Parser;
2
3 use strict;
4 use warnings;
5 use locale;
6
7 use Encode;
8 use URI;
9 use Data::Dumper;
10 use Contenido::Globals;
11 use LWP::UserAgent;
12 use Contenido::File::Scheme::FILE;
13 use Contenido::Parser::Util;
14
15 sub fetch {
16 my ($self, $input, %opts) = @_;
17
18 my ($fh, $content);
19 my $encoding = delete $opts{encoding};
20 if (not ref $input) {
21 no strict "refs";
22 my $scheme = uc(scheme($input));
23 if ( $scheme eq 'FILE' ) {
24 $fh = &{"Contenido::File::Scheme::".uc(scheme($input))."::get_fh"}($input);
25 } else {
26 my $request = new HTTP::Request GET => $input;
27 my $ua = new LWP::UserAgent;
28 $ua->timeout(10);
29 my $res = $ua->request($request);
30 if ($res->is_success) {
31 $self->{headers} = $res->headers;
32 my $content_length = $res->headers->header('content-length');
33 my $content_type = $res->headers->header('content-type');
34 $self->{content_type} = $content_type;
35 if ( $content_type =~ /charset\s*=\s*([a-z\d\-]+)/i ) {
36 $encoding = $1;
37 }
38 my $base_url = $input =~ /^([a-z]+:\/\/[a-z\.\d]+)/ ? $1 : '';
39 $self->{base_url} = $base_url if $base_url;
40 $content = $res->content;
41 } else {
42 warn $res->status_line." \n";
43 $self->{success} = 0;
44 $self->{reason} = $res->status_line;
45 return $self;
46 }
47 }
48 } elsif ((ref $input eq "GLOB") or (ref $input eq 'Apache::Upload') or (ref $input eq 'IO::File')) {
49 $fh = $input;
50 } elsif (ref $input eq "SCALAR") {
51 $fh = IO::Scalar->new($input);
52 } else {
53 warn("Path, scalar ref or fh needed");
54 $self->{success} = 0;
55 $self->{reason} = 'Path, scalar ref or fh needed';
56 return $self;
57 }
58
59 if ( ref $fh ) {
60 $content = <$fh>;
61 }
62 if ( $content ) {
63 warn Dumper($self);
64 unless ( $encoding ) {
65 $encoding = $self->__try_content_encoding( substr($content, 0, 350) );
66 $self->{encoding} = $encoding;
67 if ( $encoding && $encoding ne 'utf-8' ) {
68 Encode::from_to($content, $encoding, 'utf-8');
69 }
70 }
71 $self->{content} = $content;
72 $self->{success} = 1;
73 } else {
74 $self->{success} = 0;
75 $self->{reason} = 'Content is empty';
76 }
77 return $self;
78 }
79
80 sub is_success {
81 my ($self, $val) = @_;
82
83 if ( defined $val ) {
84 $self->{success} = $val;
85 return $self;
86 } else {
87 return $self->{success};
88 }
89 }
90
91 sub __try_content_encoding {
92 my ($self, $input)= @_;
93 if ( $input =~ /encoding[\ ]?=[\ ]?[\"\']?([a-z\-\d]+)/i ) {
94 return lc($1);
95 } elsif ( $input =~ /(utf-8|windows-1251|koi8-r)/i ) {
96 return lc($1);
97 } else {
98 return undef;
99 }
100 }
101
102 sub scheme {
103 my $uri = shift;
104 my $scheme;
105
106 $scheme = URI->new($uri)->scheme() || "file";
107
108 return $scheme;
109 }
110
111
112 1;