Line # Revision Author
1 98 ahitrov package Contenido::Parser::Util;
2
3 use strict;
4
5 sub clean_invalid_chars { # http://www.w3.org/TR/REC-xml/#NT-Char
6 my ($cont_ref) = shift;
7 $$cont_ref =~ s/[\x0-\x8|\xB\xC|\xE-\x1F|\x{d800}-\x{dfff}|\x{fffe}\x{ffff}]//sgi;
8 }
9
10 sub text_cleanup {
11 my $text = shift;
12 my $delim = shift || "\n\n";
13
14 $text =~ s/^\s+//; $text =~ s/\s+$//;
15 $text =~ s/\r\n/\n/g;
16
17 my @paragfs = $text =~ /\n{2,}/ ? # is paragraphs detected?
18 split /\n{2,}/, $text : # - by paragraphs only
19 split /\n+/, $text; # - by any newline
20
21 for (@paragfs) {
22 s/^\s+//mg; s/\s+$//mg; # trim whitespace
23 s/[[:blank:]]+/ /g; # collapse spaces
24 }
25
26 return join "\n\n", grep length $_, @paragfs;
27 }
28
29 1;