Index: Parser.pm =================================================================== --- Parser.pm (revision 141) +++ Parser.pm (revision 142) @@ -42,13 +42,17 @@ $self->{headers} = $res->headers; my $content_length = $res->headers->header('content-length'); my $content_type = $res->headers->header('content-type'); + my $headers_string = $res->headers->as_string; +# warn $res->content_type_charset."\n\n"; +# warn Dumper($res->headers) if $DEBUG; $self->{content_type} = $content_type; - if ( $content_type =~ /charset\s*=\s*([a-z\d\-]+)/i ) { - $encoding = $1; + if ( $res->content_type_charset ) { + $encoding = Encode::find_encoding($res->content_type_charset)->name; } my $base_url = $input =~ /^([a-z]+:\/\/[a-z\.\d]+)/ ? $1 : ''; $self->{base_url} = $base_url if $base_url; - $content = $res->content; + $content = $res->decoded_content( charset => 'none' ); +# warn "Charset: ".$res->content_charset."\n"; } else { warn $res->status_line." \n"; $self->{success} = 0; @@ -73,26 +77,64 @@ $content = <$fh>; } if ( $content ) { - unless ( $encoding ) { - $encoding = $self->__try_content_encoding( substr($content, 0, 350) ); - } - if ( $encoding && $encoding ne 'utf-8' ) { - warn "Encoding from $encoding\n..." if $DEBUG; - Encode::from_to($content, $encoding, 'utf-8'); - if ( exists $self->{headers} ) { - foreach my $header ( keys %{$self->{headers}} ) { - if ( ref $self->{headers}{$header} eq 'ARRAY' ) { - foreach my $val ( @{$self->{headers}{$header}} ) { - Encode::from_to($val, $encoding, 'utf-8'); + warn "starting content decoding...\n"; + if ( exists $self->{headers} && ref $self->{headers} && ($self->{headers}->content_is_html || $self->{headers}->content_is_xhtml || $self->{headers}->content_is_xml) ) { + unless ( $encoding ) { + $encoding = $self->__try_content_encoding( substr($content, 0, 350) ); + } + if ( $encoding && $encoding ne 'utf-8' && $encoding ne 'utf-8-strict' ) { + warn "Encoding from $encoding\n..." if $DEBUG; + Encode::from_to($content, $encoding, 'utf-8'); + if ( exists $self->{headers} ) { + foreach my $header ( keys %{$self->{headers}} ) { + if ( ref $self->{headers}{$header} eq 'ARRAY' ) { + foreach my $val ( @{$self->{headers}{$header}} ) { + Encode::from_to($val, $encoding, 'utf-8'); + } + } else { + Encode::from_to($self->{headers}{$header}, $encoding, 'utf-8'); } - } else { - Encode::from_to($self->{headers}{$header}, $encoding, 'utf-8'); } } + } else { +# Encode::_utf8_off($content); + if ( exists $self->{headers} ) { + foreach my $header ( keys %{$self->{headers}} ) { + if ( ref $self->{headers}{$header} eq 'ARRAY' ) { + foreach my $val ( @{$self->{headers}{$header}} ) { + Encode::_utf8_off($val); + } + } else { + warn "Test: ".$self->{headers}{$header}.": check flag: ".Encode::is_utf8($self->{headers}{$header}).". check: ".Encode::is_utf8($self->{headers}{$header},1)."\n"; + if ( Encode::is_utf8($self->{headers}{$header}) && Encode::is_utf8($self->{headers}{$header},1) ) { + Encode::_utf8_off($self->{headers}{$header}); +# Encode::_utf8_on($self->{headers}{$header}); +# $self->{headers}{$header} = Encode::encode('utf8', $self->{headers}{$header}, Encode::FB_QUIET); +# Encode::from_to($self->{headers}{$header}, $encoding, 'utf8'); + } + } + } + } } + $self->{encoding} = $encoding; + warn Dumper($self) if $DEBUG; + if ( $self->{headers}->content_is_html ) { + my $headers; + if ( $content =~ /