Repository List / Contenido / koi8 / core / ports / all / Jevix / MakeupKOI.pm @ r3

<!--#include virtual="/inc/header.html?context=svn"-->      <link rel="stylesheet" type="text/css" href="/css/trac/svnweb.css" />   <div>  <div id="navpath">
	<h1><a class="first" href="/">Repository List</a> /   <a href="/Contenido/">Contenido</a>    <span class="sep">/</span>    <a href="/Contenido/browse/koi8/?rev=3">koi8</a>     <span class="sep">/</span>    <a href="/Contenido/browse/koi8/core/?rev=3">core</a>     <span class="sep">/</span>    <a href="/Contenido/browse/koi8/core/ports/?rev=3">ports</a>     <span class="sep">/</span>    <a href="/Contenido/browse/koi8/core/ports/all/?rev=3">all</a>     <span class="sep">/</span>    <a href="/Contenido/browse/koi8/core/ports/all/Jevix/?rev=3">Jevix</a>      <span class="sep">/</span> MakeupKOI.pm    @ r3</h1>
      </div>  <div id="language-selection">
        <form action="/Contenido/view/koi8/core/ports/all/Jevix/MakeupKOI.pm">
          <select name="lang" onchange="this.form.submit();">             <option value="en" selected="selected">English</option>  </select>   <input type="hidden" name="rev" value="3" />  <noscript>
            <input type="submit" value="Go" />
          </noscript>
        </form>
      </div>
    </div>

    <div id="content">
                            <div class="actions">
  <ul>  <li><a href="/Contenido/log/koi8/core/ports/all/Jevix/MakeupKOI.pm?rev=3">View Revision Log</a></li>  <li><a href="/Contenido/blame/koi8/core/ports/all/Jevix/MakeupKOI.pm?rev=3">Blame/Annotate</a></li>  <li><a href="/Contenido/checkout/koi8/core/ports/all/Jevix/MakeupKOI.pm?rev=3">Checkout</a></li>  </ul>
</div>         <table id="info" summary="Revision Log">
  <tr>
    <th scope="row"> Revision <a href="/Contenido/revision?rev=3">3</a> (by ahitrov@rambler.ru, 2010/03/24 15:19:32)</th>
    <td class="message">The CORE<br />
</td>
  </tr>
</table>

<div id="preview">  <pre class="code-block">package Jevix::Makeup;

# ==========================================
#
#  Jevix Version 0.9.5 (windows-1251)
#
#  Developed by Igor Askarov
# 
#  Please send all suggestions to
#  Igor Askarov &lt;juks@juks.ru&gt;
#  http://www.jevix.ru/
#
#  Release date: 21/01/2008
#
# === Methods list==========================
#
#  new 						the constructor
#  procces 					entry sub
#  preset 					presets selector
#  makeup 					makeup the text
#  quotes 					quotes processor
#  cuttags 					tags processor
#  tagend					looking fo tag end
#  planttags 					sub to bring the tags back
#  vanish 					sub to remove all the stuff and bring the text to plain mode
#  parseTagsAllowString     			parse the tagsAllow string to hash
#  parseTagsDenyString      			parse the tagsDeny string to hash
#
# ==========================================

use strict;
use warnings;

my $markLength = 7;
my $strip;
my $result;
my $tags;
my $conf;

my @singleTags = qw/link input spacer img br hr/;
my @breakingTags = qw/p td div hr/;
my @spaceTags = qw/br/;
my @tagsToEat = qw/script style/;

# ==The constructor
sub new {
    my Jevix::Makeup $class = shift;

    return $class;
}

# ==Here we've got the input
sub process($$$) {
    my($class, $text, $userConfig) = @_;

    $conf = $userConfig ? $userConfig : {presetBasic=&gt;1};
    $class-&gt;preset();
    
    $strip = &quot;&quot;;
    $tags = [];

    $result = {};
    $result-&gt;{error} = 0;
    $result-&gt;{errorLog} = [];

    if(!$conf-&gt;{isHTML}) { $strip = $$text; } else { $class-&gt;cuttags($text, $conf, $result); }
    if($conf-&gt;{quotes}) { $class-&gt;quotes($conf); }
    $class-&gt;makeup($conf);

    $result-&gt;{text} = &quot;&quot;;
    if($conf-&gt;{isHTML}) { $class-&gt;planttags($result); } else { $result-&gt;{text} = $strip; }

    return $result;
}

# ==Choosing default setup when necessary
sub preset($$) {
    my ($class) = @_;

    if(!$conf || $conf-&gt;{presetBasic}) {
        $conf-&gt;{isHTML} = 1 if(!defined($conf-&gt;{isHTML}));                                # HTML mode
        $conf-&gt;{lineBreaks} = 1 if(!defined($conf-&gt;{lineBreaks}));                        # Linebreaks to &lt;br/&gt;
        $conf-&gt;{paragraphs} = 0 if(!defined($conf-&gt;{paragraphs}));                        # Paragraphs
        $conf-&gt;{dashes} = 1 if(!defined($conf-&gt;{dashes}));                                # Replace hyphens with dashes when necessary
        $conf-&gt;{dots} = 1 if(!defined($conf-&gt;{dots}));                                    # Replace 3 dots with a symbol
        $conf-&gt;{edgeSpaces} = 1 if(!defined($conf-&gt;{edgeSpaces}));                        # Wipe edge space characters
        $conf-&gt;{multiSpaces} = 1 if(!defined($conf-&gt;{multiSpaces}));                      # Wipe multispaces
        $conf-&gt;{redundantSpaces} = 1 if(!defined($conf-&gt;{redundantSpaces}));              # Wipe redundant spaces
        $conf-&gt;{compositeWordsLength} = 10 if(!defined($conf-&gt;{compositeWordsLength}));   # The maximim length of composite word to be put inside &lt;nobr&gt;
        $conf-&gt;{tagLf} = 1 if(!defined($conf-&gt;{tagLf}));                                  # Wipe crs and lfs after droppped tag
        $conf-&gt;{nbsp} = 1 if(!defined($conf-&gt;{nbsp}));                                    # Insert non-breaking spaces
        $conf-&gt;{quotes} = 1 if(!defined($conf-&gt;{quotes}));                                # Makeup quotes
        $conf-&gt;{qaType} = 0 if(!defined($conf-&gt;{qaType}));                                # Main quotes type
        $conf-&gt;{qbType} = 2 if(!defined($conf-&gt;{qbType}));                                # Nested quotes type
        $conf-&gt;{misc} = 1 if(!defined($conf-&gt;{misc}));                                    # Misc substitutions
        $conf-&gt;{codeMode} = 2 if(!defined($conf-&gt;{codeMode}));                            # The way jevix should represent html special characters
    }

    # If tagsAllow came as a string
    if(defined($conf-&gt;{tagsAllow}) &amp;&amp; !ref($conf-&gt;{tagsAllow})) {
        my $tmp = $class-&gt;parseTagsAllowString($conf-&gt;{tagsAllow});
	$conf-&gt;{tagsAllow} = $tmp-&gt;{tagsAllow};
	$conf-&gt;{tagsDenyAllAttributes} = $tmp-&gt;{tagsDenyAllAttributes};
    }

    # If tagsDeny came as a string
    if(defined($conf-&gt;{tagsDeny}) &amp;&amp; !ref($conf-&gt;{tagsDeny})) {
	$conf-&gt;{tagsDeny} = $class-&gt;parseTagsDenyString($conf-&gt;{tagsDeny});
    }
}

# ==Imposing clear text
sub makeup($$) {
    my ($class, $conf) = @_;
    
    # ==Misc
    # Prepositions
    my $prp_rus = &quot;�|���|����|�|���|��|��|���|��|��|�|��|���|���|�|���|��|���|����|��|��|��|�|��|���|�����|��|���|��|���|����|���|���|�|������|��|�|�����&quot;;
    my $prp_eng = &quot;aboard|about|above|absent|across|after|against|along|alongside|amid|amidst|among|amongst|around|as|astride|at|atop|before|behind|below|beneath|beside|besides|between|beyond|but|by|despite|down|during|except|following|for|from|in|inside|into|like|mid|minus|near|nearest|notwithstanding|of|off|on|onto|opposite|out|outside|over|past|re|round|save|since|than|through|throughout|till|to|toward|towards|under|underneath|unlike|until|up|upon|via|with|within|without&quot;;
    my $prp = &quot;$prp_rus|$prp_eng&quot;;

    my $letters = &quot;A-Za-z�-��-ѳ���&quot;;         # Characters
    my $cap_letters = &quot;A-Z�-�&quot;;                 # Capital characters

    my $sp = &quot; \xA0\t&quot;;                               # space class
    my $rt = &quot;\r?\n&quot;;                                 # cr class

    my $br = &quot;\x00\x0F.[\x01\x03].\x0F\x00&quot;;          # br tag
    my $pt = &quot;\x00\x0F.[\x02].\x0F\x00&quot;;              # Paragraph tag
    my $ps = &quot;\x00\x0F.[\x02][\x01\x03]\x0F\x00&quot;;     # Paragraph start
    my $pe = &quot;\x00\x0F.[\x02][\x02\x00]\x0F\x00&quot;;     # Paragraph end
    my $to = &quot;\x00\x0F..[\x03\x01]\x0F\x00&quot;;          # Opening tag
    my $tc = &quot;\x00\x0F..[\x02\x00]\x0F\x00&quot;;          # Closing tag
    my $bb = &quot;\x00\x0F..[\x02\x03]\x0F\x00&quot;;          # Tag where &lt;nobr&gt; is open
    my $nb = &quot;\x00\x0F..[\x01\x00]\x0F\x00&quot;;          # Tag where no &lt;nobr&gt; open
    my $ts = &quot;\x00\x0F&quot;;                              # Tag start
    my $te = &quot;\x0F\x00&quot;;                              # Tag end

    my $brt = &quot;&lt;br *\/?&gt;&quot;;                            # br tag in text mode
    my $pst = &quot;&lt;p&gt;&quot;;
    my $pet = &quot;&lt;/p&gt;&quot;;

    # Codes, metasymbols or what ever?
    my ($cdash, $cnbsp, $cdots, $cfracs, $ccopy, $creg);
    if(!$conf-&gt;{codeMode}) {
        ($cdash, $cnbsp, $cdots, $ccopy, $creg) = (&quot;�&quot;, &quot;�&quot;, &quot;�&quot;, &quot;�&quot;, &quot;�&quot;);
        $cfracs = {'1/4'=&gt;&quot;?&quot;, '1/2'=&gt;&quot;?&quot;, '3/4'=&gt;&quot;?&quot;};
    } elsif($conf-&gt;{codeMode} == 1) {
        ($cdash, $cnbsp, $cdots, $ccopy, $creg) = (&quot;&amp;#151;&quot;, &quot;&amp;#160;&quot;, &quot;&amp;#133;&quot;, &quot;&amp;#169;&quot;, &quot;&amp;#174;&quot;);
        $cfracs = {'1/4'=&gt;&quot;&amp;#188;&quot;, '1/2'=&gt;&quot;&amp;#189;&quot;, '3/4'=&gt;&quot;&amp;#190;&quot;};
    } else {
        ($cdash, $cnbsp, $cdots, $ccopy, $creg) = (&quot;&amp;mdash;&quot;, &quot;&amp;nbsp;&quot;, &quot;&amp;hellip;&quot;, &quot;&amp;copy;&quot;, &quot;&amp;reg;&quot;);
        $cfracs = {'1/4'=&gt;&quot;&amp;frac14;&quot;, '1/2'=&gt;&quot;&amp;frac12;&quot;, '3/4'=&gt;&quot;&amp;frac34;&quot;};
    }

    # Wiping edge spaces
    if($conf-&gt;{edgeSpaces}) { $strip =~ s/^[$sp\r\n]*(.+?)[$sp\r\n]*$/$1/isg; }

    # Wiping spaces between tags (&lt;/td&gt; &lt;/tr&gt;)
    if($conf-&gt;{tagSpaces}) { $strip =~ s/($tc)[$sp]($tc)/$1$2/isg; }

    # Wiping multispaces
    if($conf-&gt;{multiSpaces}) { $strip =~ s/([$sp]){2,}/$1/ig; }

    # Wiping redundant spaces
    if($conf-&gt;{redundantSpaces}) { $strip =~ s{([$sp]+(?![:;]-[)(])([;:,.)?!]))|(\()(?&lt;![:%;]-\()[$sp]+}{$1 ? $2 : $3}eig; } 

    if($conf-&gt;{nbsp}) {
        # Prepositions with &amp;nbsp;
        $strip =~ s/(^|\x00|[$sp])($prp)[$sp]([0-9$letters])/$1$2$cnbsp$3/gm;

        # &amp;nbsp; with digits
        $strip =~ s{($nb|^)(.*?)($bb|$)}{ my ($a, $b, $c) = ($1, $2, $3); $b =~ s/([0-9]+)([$sp]+|&amp;nbsp;|&amp;#160;)(?:(?=[0-9]{2,})|(?=%))/$1$cnbsp/ig; &quot;$a$b$c&quot;; }eisg;
    }    
    
    # Put composite words inside &lt;nobr&gt;
    if($conf-&gt;{compositeWords}) { $strip =~ s{($nb|^)(.*?)($bb|$)}{ my ($a, $b, $c) = ($1, $2, $3);
						$b =~ s{(^|[$sp\x00]|&amp;nbsp;)([$letters]+(?:-[$letters]+)+)(?=$|[$sp\x00])}{
							    my $d = !defined($1) ? &quot;&quot; : $1; my $e = !defined($2) ? &quot;&quot; : $2; my $f = !defined($3) ? &quot;&quot; : $3;
							    if(length($e) &lt;= $conf-&gt;{compositeWordsLength}) { &quot;$d&lt;nobr&gt;$e&lt;\/nobr&gt;&quot; } else {&quot;$d$e$f&quot;}
							}eig; &quot;$a$b$c&quot;;
					    }eisg; }

    # Dots
    if($conf-&gt;{dots}) { $strip =~ s/\.{3}|�|&amp;hellip;/$cdots/ig; }
   
    # Dashes
    if($conf-&gt;{dashes}) {
        # Hyphen
        $strip =~ s/([^$sp])([$sp]|&amp;#160;|&amp;nbsp;)(-{1,2}|�|&amp;mdash;|&amp;#151;)/$1$cnbsp$cdash/ig;
        # &quot;Speech&quot; hyphen
        $strip =~ s/((?:^|$ps|$br|$brt(?:$rt)*|[$rt]))[$sp]*(?:&amp;nbsp;)*(-{1,2}|�|&amp;mdash;|&amp;#151;)[$sp]*(?:&amp;nbsp;)*(.)/$1$cdash$cnbsp$3/ig;
    }

    # Misc stuff
    if($conf-&gt;{misc}) {
        # Fracs
        $strip =~ s{(?:(?&lt;=[$sp\x00])|(?&lt;=^))([13])/([24])(?:(?=[$sp\x00])|(?=$))}{if(defined($cfracs-&gt;{&quot;$1/$2&quot;})) { $cfracs-&gt;{&quot;$1/$2&quot;} } else { &quot;$1/$2&quot; } }esg;
        # Copyright &amp; registered
        $strip =~ s{(?:(?&lt;=[$sp\x00])|(?&lt;=^))(\([cr]\)|&amp;copy;|�)(?:(?=[$sp\x00?!;.,])|(?=$))}{ if((lc($1) eq &quot;(c)&quot;) || (lc($1) eq &quot;&amp;copy;&quot;) || ($1 eq &quot;�&quot;)) {$ccopy} elsif((lc($1) eq &quot;(r)&quot;) || (lc($1) eq &quot;&amp;reg;&quot;) || ($1 eq &quot;�&quot;)) {$creg} else { $2 } }eig;
    }

    # Paragraphs
    if($conf-&gt;{paragraphs}) { $strip =~ s{(^|$pe(?:$rt$rt)?|$rt$rt)(?!$ps)(.+?)($br)?($brt)?(?&lt;!$pe)(?:(?=$)|(?=$rt$rt)|(?=$ps))}{ my ($a, $b, $c) = ($1,$2,$3||&quot;&quot;); (($b =~ /^[ \r\n]+$/) || ($b =~ /^(&lt;br *\/?&gt;|$br)+$/)) ? &quot;$a$b$c&quot; : &quot;$a&lt;p&gt;$b&lt;/p&gt;&quot;;}eisg; }

    # Line break
    if($conf-&gt;{lineBreaks}) { $strip =~ s/(?&lt;!$pt)(?&lt;!$br)(?&lt;!$br\r)(?&lt;!$pe\r\n\r\n)(?&lt;!$pe\n\n)(?&lt;!$pe\r\n)(?&lt;!$pe\n)(?&lt;!$pe\r)(?&lt;!$pe)(?&lt;!$pet\r\n\r\n)(?&lt;!$pet\r\n\r)(?&lt;!$pet\n\n)(?&lt;!$pet\r\n)(?&lt;!$pet\n)(?&lt;!$pet\r)(?&lt;!$pet)(?&lt;!$pst)($rt)(?!$brt)/&lt;br \/&gt;$1/isg; }
}

# ==impose quotes
sub quotes($$) {
    my ($class, $conf) = @_;

    my $i;
    my ($a_open, $b_open) = (0,0);
    my ($cp, $c, $cn, $cn_is_sp, $cp_is_sp) = ('', '', '', 0, 0);
    my ($qaStart, $qaEnd, $qbStart, $qbEnd);
    my (@qs, @qe, @qs_ansi, @qe_ansi, @qs_html, @qe_html, @qs_ent, @qe_ent,);

    # space class
    my $sp =&quot; \t\xA0&quot;;
    # characters
    my $letters = &quot;A-Za-z�-��-ѳ���&quot;;

    @qs_ansi = (&quot;�&quot;, &quot;�&quot;, &quot;�&quot;, &quot;�&quot;, &quot;�&quot;, '&quot;');    
    @qe_ansi = (&quot;�&quot;, &quot;�&quot;, &quot;�&quot;, &quot;�&quot;, &quot;�&quot;, '&quot;');
    @qs_html = (&quot;&amp;#171;&quot;, &quot;&amp;#147;&quot;, &quot;&amp;#132;&quot;, &quot;&amp;#145;&quot;, &quot;&amp;#130;&quot;, &quot;&amp;#34;&quot;);
    @qe_html = (&quot;&amp;#187;&quot;, &quot;&amp;#148;&quot;, &quot;&amp;#147;&quot;, &quot;&amp;#146;&quot;, &quot;&amp;#145;&quot;, &quot;&amp;#34;&quot;);
    #          &lt;&lt;         ``         ..         `          .          &quot;
    @qs_ent = (&quot;&amp;laquo;&quot;, &quot;&amp;ldquo;&quot;, &quot;&amp;bdquo;&quot;, &quot;&amp;lsquo;&quot;, &quot;&amp;sbquo;&quot;, &quot;&amp;quot;&quot;);    
    #          &gt;&gt;         ''         ''          '         `          &quot;
    @qe_ent = (&quot;&amp;raquo;&quot;, &quot;&amp;rdquo;&quot;, &quot;&amp;ldquo;&quot;, &quot;&amp;rsquo;&quot;, &quot;&amp;lsquo;&quot;, &quot;&amp;quot;&quot;);

    # Quotes collection
    if(!$conf-&gt;{codeMode}) {
        @qs = @qs_ansi; @qe = @qe_ansi;
    } elsif ($conf-&gt;{codeMode} == 1) {
        @qs = @qs_html; @qe = @qe_html;
    } else {
        @qs = @qs_ent; @qe = @qe_ent;
    }
    
    # Getting configuration setting
    $conf-&gt;{qaType} ||= 0;
    $conf-&gt;{qbType} ||= 1;
    $conf-&gt;{qaType} = ($conf-&gt;{qaType} &gt;= 0 &amp;&amp; $conf-&gt;{qaType} &lt;= 5) ? $conf-&gt;{qaType} : 0;
    $conf-&gt;{qbType} = ($conf-&gt;{qbType} &gt;= 0 &amp;&amp; $conf-&gt;{qbType} &lt;= 5) ? $conf-&gt;{qbType} : 1;
   
    # Selecting quotes as requested by user
    ($qaStart, $qaEnd) = ($qs[$conf-&gt;{qaType}], $qe[$conf-&gt;{qaType}]);
    ($qbStart, $qbEnd) = ($qs[$conf-&gt;{qbType}], $qe[$conf-&gt;{qbType}]);
    
    # Resetting all the quotes inside text to &lt;&quot;&gt;
    my $qa = join('|', @qs_ansi) . '|' . join('|', @qe_ansi) . '|' . join('|', @qs_html) . '|' . join('|', @qe_html) . '|' . join('|', @qs_ent) . '|' . join('|', @qe_ent);
    $strip =~ s/(?:(?:(?&lt;=[^$letters])|(?&lt;=^))($qa))|(?:($qa)(?:(?=[^$letters])|(?=$)))/\&quot;/ig;
    
    my $spread = 1;
    my $mv = 0;
    my $mvn = 0;
    my @st;
    $i = 0;
    my $skip = 0;
    my @space;          # Space tags flag
    my @break;          # Text break flags
    
    $st[$_] = '' foreach(0..$spread + 1);
    $space[$_] = 0 foreach(0 + 1..$spread + 1);
    $break[$_] = 0 foreach(0 + 1..$spread + 1);
    $space[0] = 1;
    $break[0] = 1;

    while(1) {
        # Skipping tags
        foreach(0..$spread) {
            do {
                $skip = 0;
                if($i + $_ + $mv &lt;= length($strip)) {
                    if($i + $_ + $mv + 1 &lt; length($strip)) {
                        if((substr($strip, $i + $_ + $mv, 1) eq &quot;\x00&quot;) &amp;&amp; (substr($strip, $i + $_ + $mv + 1, 1) eq &quot;\x0F&quot;)) {
                            $space[$_ + 1] |= (ord(substr($strip, $i + $_ + $mv + 2, 1)) &amp; 2) &gt;&gt; 1;
                            $break[$_ + 1] |= ord(substr($strip, $i + $_ + $mv + 2, 1)) &amp; 1;
                            $mv += $markLength;
                            if(!$_) { $mvn = $mv; }
                            $st[$_ + 1] = &quot;&quot;;
                            $skip = 1;
                        }
                    }
                    if(!$skip) { $st[$_ + 1] = substr($strip, $i + $_ + $mv, 1); }
                } 
            } while($skip);
        }
        
        $i += $mvn;
        $mv = 0;
        $mvn = 0;

	($cp, $c, $cn) = ($st[0], $st[1], $st[2]);
	$cp_is_sp = (($cp =~ /[^0-9$letters]/) || $space[0] || $space[1] || $break[0] || !$i) ? 1 : 0;
	$cn_is_sp = (($cn =~ /[^0-9$letters]/) || $space[2] || $break[2] || $cn eq '') ? 1 : 0;

        # Reset state if breaking tag appears
        if($break[1] || $i == length($strip)) {
            if($a_open || $b_open) {
                # Log quote error if appears
                if($conf-&gt;{logErrors}) {
                    my $quoteErrSampleLength = 100;
                    my $z = $i - 1;
                    my $y;
                    while(1) {
                        if(substr($strip, $z, 1) eq &quot; &quot; || substr($strip, $z, 1) eq &quot;\xA0&quot; || !$z) { if($i-$z &lt;= $quoteErrSampleLength) {$y = $z}}
                        last if(!$z);
                        $z--;
                    }
                    my $sample = substr($strip, $y, ($i - $y));
                    $sample =~ s/\x00\x0F[^\x0F]+\x0F\x00//g;
                    $sample =~ s/&lt;\/?[a-z]+.*?&gt;//g;
                    push(@{$result-&gt;{errorLog}}, {type=&gt;&quot;Quote_error&quot;, message=&gt;&quot;Quote mismatch near [$sample]&lt;--&quot;});
                    $result-&gt;{error} = 1;
                }
            }
	    $a_open = 0;
	    $b_open = 0;
	}

        if($c eq '&quot;') {
	    if(!$a_open) {
	        $a_open = 1;
		substr($strip, $i, 1) = $qaStart;
		$i += length($qaStart) - 1;
	    } elsif ($a_open &amp;&amp; (($i == length($strip) - 1) || (!$b_open &amp;&amp; $cn_is_sp))) {
	        $a_open = 0;
		substr($strip, $i, 1) = $qaEnd;
		$i += length($qaEnd) - 1;
	    } elsif ($a_open &amp;&amp; !$b_open) {
	        $b_open = 1;
		substr($strip, $i, 1) = $qbStart;
		$i += length($qbStart) - 1;
	    } elsif ($a_open &amp;&amp; $b_open) {
	        $b_open = 0;
		substr($strip, $i, 1) = $qbEnd;
		$i += length($qbEnd) - 1;
	    }
	}
    
	last if($i == length($strip));
	$st[0] = $st[1];
	$space[0] = $space[1];
        $break[0] = $break[1];
	$space[$_] = 0 foreach(0 + 1..$spread + 1);
        $break[$_] = 0 foreach(0 + 1..$spread + 1);
	$i++;
    }
}

# ==Cutting the tags away
sub cuttags($$$$) {
    my($class, $text, $conf, $result) = @_;
    my $i = 0;                                                                                                                    # loop counter
    my $hop;												                                                                      # Jump length
    my ($c, $cn);                                                                                                                 # current &amp; next character
    my ($tl, $ts, $te, $cl, $tagName, $tagBody, $tagContent);                                                                     # tag length, tag dimensions, tag name, tag body text, single tag flag, content inside the tag
    my ($isTag, $isTagStart, $isSingle, $isSingleClosed, $isSpace, $isBreaking, $nobrIsOpen, $flagSet2, $flagSet1, $flagSet0);    # some useful flags
    my @tagsOpen;                                                                                                                 # an array storing the info about all the tags currently open
    
    # space class
    my $sp =&quot; \t\xA0&quot;;

    while(1) {
        $hop = index($$text, &quot;&lt;&quot;, $i);

        if($hop &lt; 0) {
            $strip .= substr($$text, $i, length($$text) - $i);
            last;
        } elsif($hop &gt; 0) {
            $strip .= substr($$text, $i, $hop - $i);
            $i = $hop;
        }

        ($c, $cn) = unpack(&quot;aa&quot;, substr($$text, $i, 2));
	
	$isTag = 0;

        # =If tag opens
	$isTagStart = ($cn =~ /!|[a-z]/i) ? 1 : 0;
	if($isTagStart || ($cn eq &quot;/&quot;)) { $isTag = 1; }

	if($isTag) {
	    $ts = $i;                                                                # Tag start position 
	    $te = $isTagStart ? tagend($text, $ts) : index($$text, &quot;&gt;&quot;, $ts);        # Tag end position

            if($te) {
                $tagBody = substr($$text, $ts, $te - $ts + 1);
		$tagName = $isTagStart ? ($tagBody =~ m/^&lt;([a-z]+)/i)[0] : ($tagBody =~ m/^&lt;\/\s*([a-z]+)/i)[0];
		$tagName =~ tr/A-Z/a-z/;
            }

	    if($te &amp;&amp; $tagName) {
		$tagBody = substr($$text, $ts, $te - $ts + 1);
		$tagName = $isTagStart ? ($tagBody =~ m/^&lt;([a-z]+)/i)[0] : ($tagBody =~ m/^&lt;\/\s*([a-z]+)/i)[0];
		$tagName =~ tr/A-Z/a-z/;

		# =Flags
		# Detecting whether the tag is single (self-closing) or double
		$isSingleClosed = 0;
                $isSingle = 0;
		if($isTagStart) {
  		    if(grep{$tagName eq $_} @singleTags) {
		        $isSingle = 1;
 		    } elsif (substr($tagBody, length($tagBody) - 2, 1) eq &quot;/&quot;) {
			$isSingle = 1;
			$isSingleClosed = 1;
		    }
		}

		# Detecting wether this is space tag or not
		$isSpace = (grep{$tagName eq $_} @spaceTags) ? 1 : 0;
		
		# Detecting wether this is breaking tag or not
		$isBreaking = (grep{$tagName eq $_} @breakingTags) ? 1 : 0;
		
		# Tag Length
		$tl = $te - $ts + 1;
		
		# Updating the status for tags open
		if($conf-&gt;{checkHTML} &amp;&amp; !$isSingle) {
  		    if($isTagStart) {
		        push(@tagsOpen, $tagName);
  		    } else {
		        if($tagsOpen[$#tagsOpen] ne $tagName) {
			# HTML error
			    $result-&gt;{error} = 1;
			    if($conf-&gt;{logErrors}) { push(@{$result-&gt;{errorLog}}, {type=&gt;&quot;HTML_Parse&quot;, position=&gt;$i, message=&gt;&quot;Found closing tag &lt;$tagName&gt; while waiting tag &lt;&quot; . $tagsOpen[$#tagsOpen] . &quot;&gt; to close!&quot;}); }
			} else {
			    pop(@tagsOpen);
			}
 		    }
		}

                # Eating tag content for some tags like &lt;script&gt;
                $tagContent = &quot;&quot;;
                $cl = 0;
   	        if((grep{$tagName eq $_} @tagsToEat) &amp;&amp; $isTagStart) {
                      $cl = index($$text, &quot;&lt;/$tagName&gt;&quot;, $ts + $tl) - $ts - $tl;
                      if($cl &gt; 0) {
                          $tagContent = substr($$text, $ts + $tl, $cl);
                      } else {
                          $cl = 0;
		          $result-&gt;{error} = 1;
			  if($conf-&gt;{logErrors}) { push(@{$result-&gt;{errorLog}}, {type=&gt;&quot;HTML_Parse&quot;, position=&gt;$i, message=&gt;&quot;Can't find &lt;$tagName&gt; end!&quot;}); }
                      }
                }

     	        # Should I drop all the tags by default?
		my $dropTag = 0;
		if($conf-&gt;{tagsDenyAll}) { $dropTag = 1; }
		
                # Checking deny list
                if(defined($conf-&gt;{tagsDeny}) &amp;&amp; !$dropTag) {
                    if($conf-&gt;{tagsDeny}-&gt;{$tagName}) { $dropTag = 1; }
                }

		# Checking allow list
		if(defined($conf-&gt;{tagsAllow}) &amp;&amp; $dropTag) {
		    if($conf-&gt;{tagsAllow}-&gt;{$tagName}) { $dropTag = 0; }
                }

                # Nobr tag status
                if($tagName eq &quot;nobr&quot; &amp;&amp; $isTagStart) {
                    $nobrIsOpen = 1;
                } elsif(($tagName eq &quot;nobr&quot; &amp;&amp; !$isTagStart) || (grep{$tagName eq $_} @breakingTags)) {
                    $nobrIsOpen = 0;
                }

                # =Final part
                if(!$dropTag) {
		    # =Processing tags
		    # Tag name to lower case
		    if($conf-&gt;{tagNamesToLower}) {
		        if($isTagStart) { $tagBody = &quot;&lt;&quot; . $tagName . substr($tagBody, length($tagName) + 1, length($tagBody) - length($tagName) - 1); }
			else { $tagBody =~ tr/A-Z/a-z/; }
		    }                           
		    # Tag name to upper case
	            if($conf-&gt;{tagNamesToUpper}) {
	                if($isTagStart) { $tagBody = &quot;&lt;&quot; . uc($tagName) . substr($tagBody, length($tagName) + 1, length($tagBody) - length($tagName) - 1); }
	                else { $tagBody =~ tr/a-z/A-Z/; }
	            }
		    # =Tag parameters to lower or upper case
		    if($isTagStart &amp;&amp; ($conf-&gt;{tagAttributesToLower} || $conf-&gt;{tagAttributesToUpper})) {
		        # Regular parameters
		        my $tmp = &quot;&quot;;
			while ($tagBody =~ m/([^\s]*\s*)(?:([a-z\r]+)(\s*)(?==)(=\s*))?/ig ) {
			    $tmp .= $1 if ($1); if($conf-&gt;{tagAttributesToLower}) { if($2) { $tmp .= lc($2); } } else { if($2) { $tmp .= uc($2); } } $tmp .= $3 if ($3); $tmp .= $4 if ($4); $tmp .= $5 if ($5);
			}

			# Single parameters (like &lt;checked&gt;)
                        if($conf-&gt;{tagAttributesToLower}) { $tagBody =~ s{(?&lt;!=)( +([a-z]+))}{lc($1)}eig; }
                        elsif($conf-&gt;{tagAttributesToUpper}) { $tagBody =~ s{(?&lt;!=)( +([a-z]+))}{uc($1)}eig; }
		    }
		    
		    # Simple XSS &amp; tag attributes protection
		    if($isTagStart &amp;&amp; ($conf-&gt;{simpleXSS} || $conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{validAttributes} || $conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{invalidAttributes} || $conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{denyAllAttributes} || $conf-&gt;{tagsDenyAllAttributes})) {
			$tagBody =~ s{(?&lt;!&lt;)(\s*)([a-z]+)([$sp]*=[$sp]*)(&quot;[^&quot;]+&quot;|[^$sp/&gt;]+)} {
			    my ($a, $b, $c, $d) = ($1||'', $2, $3, $4);
			    if($conf-&gt;{simpleXSS} &amp;&amp; ($b =~ /^on/ig || $d =~ /javascript|expression/ig)) {
				'';
			    } elsif(($conf-&gt;{tagsDenyAllAttributes} || $conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{denyAllAttributes} || ($conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{invalidAttributes} &amp;&amp; $conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{invalidAttributes}-&gt;{$b}))
								    &amp;&amp; !(($conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{validAttributes} &amp;&amp; $conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{validAttributes}-&gt;{$b})
								    || $conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{allowAllAttributes})
				    ) {
				'';
                            } elsif($conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{validAttributes} &amp;&amp; !$conf-&gt;{tagsAllow}-&gt;{$tagName}-&gt;{validAttributes}-&gt;{$b}) {
				'';
			    } else {
				$a . $b . $c . $d;
			    }
			}eig;			
		    }
		    
		    # Close single tag
		    if($conf-&gt;{tagCloseSingle} &amp;&amp; $isSingle &amp;&amp; !$isSingleClosed) {
		        if(substr($tagBody, length($tagBody) - 2, 1) ne &quot;/&quot;) {
			    if(substr($tagBody, length($tagBody) - 2, 1) ne &quot; &quot;) { substr($tagBody, length($tagBody) - 2, 1) .= &quot; /&quot;; } else { substr($tagBody, length($tagBody) - 2, 1) .= &quot;/&quot;; }
			}
		    }
						
		    # Quote attribute values
		    if($conf-&gt;{tagQuoteValues} &amp;&amp; $isTagStart) {
		        my $tmp = &quot;&quot;;
		        #                   1          23  4    5       6
		        while($tagBody =~ m/([&lt;a-z &gt;]+)?((=)(\s*)([^ &gt;]+)([ &gt;]+))?/ig) {
		            $tmp .= $1 if($1);
		            if($2) {
		    		$tmp .= $3 if($3);
		    		$tmp .= $4 if($4);
		        	if($5 &amp;&amp; substr($5, 0, 1) ne '&quot;' &amp;&amp; substr($5, length($5) - 1, 1) ne '&quot;') { $tmp .= &quot;\&quot;$5\&quot;&quot;; } else { $tmp .= $5; }
                                $tmp .= $6 if($6);
                            }
                        }
                        $tagBody = $tmp;
    		    }
																																														
		    # Unquote attribute values
		    if($conf-&gt;{tagUnQuoteValues}) {
		        $tagBody =~ s/([a-z]+)(\s*)(=)(\s*)&quot;([^\=\s&quot;&gt;]+)&quot;/$1$2$3$4$5/ig;   #&quot;
		    }
																																																						    
                    # Saving the tag
		    push(@$tags, {name=&gt;$tagName, body=&gt;$tagBody, content=&gt;$tagContent});
		
                    # Forming flagSet
                    #
                    # |byte2: _ _ _ _ _ _ isSpace isBreaking| byte1: _ _ _ _ _ p br| byte0: _ _ _ _ nobr isTagStart
                    $flagSet2 = 0;
                    if($isSpace) { $flagSet2 |= 2; }
                    if($isBreaking) { $flagSet2 |= 1; }
                    $flagSet1 = 0;
                    if($tagName eq &quot;br&quot;) { $flagSet1 |= 1; }
                    if($tagName eq &quot;p&quot;) { $flagSet1 |= 2; }
                    $flagSet0 = 0;
                    if($isTagStart) { $flagSet0 |= 1; }
                    if($nobrIsOpen) { $flagSet0 |= 2; }
	  	    # Planting the marker
		    $strip .= &quot;\x00\x0F&quot; . chr($flagSet2) . chr($flagSet1) . chr($flagSet0) . &quot;\x0F\x00&quot;;
		}
		
		# Moving the pointer (tag end position + content length)
		$i = $te + $cl;

                # Eating crs &amp; lfs after dropped tag
                if($conf-&gt;{tagLf} &amp;&amp; $dropTag) {
                    while(1) {
                        if(substr($$text, $i + 1, 1) eq &quot;\r&quot;) { $i++; } elsif(substr($$text, $i + 1, 1) eq &quot;\n&quot;) { $i++; last; } else { last }
                    }
                }
	    }
	} else {
        # This is not a tag, just add the &quot;&lt;&quot; to result
            $strip .= $c;
        }
	
	last if($i == length($$text));
	$i++;
    }
}

# ==Find where tag ends
sub tagend($$$) {
    my ($text, $i) = @_;
    
    my $gotcha = 0;
    my $quote = 0;
    
    $i |= 0;
    
    while (1) {
        if (substr($$text, $i, 1) eq '&quot;') { $quote ^= 1; }
	if (!$quote &amp;&amp; substr($$text, $i, 1) eq '&gt;') { $gotcha = $i; }
        last if ($i == length($$text) || $gotcha);
	$i++;
    }
    
    return $gotcha;
}

# ==Bring everything back to HTML
sub planttags($$) {
    my ($class, $result) = @_;
    my $i = 0;
    my $max = length($strip);
    my $ctag = 0;
    my $step;
  
    while (1) {
        if($i &lt; $max - 2 &amp;&amp; substr($strip, $i, 2) eq &quot;\x00\x0F&quot;) {
	    $result-&gt;{text} .= $$tags[$ctag]-&gt;{body};
            if($$tags[$ctag]-&gt;{content}) { $result-&gt;{text} .= $$tags[$ctag]-&gt;{content}; }
	    $i += $markLength;
	    $ctag++;
	} else {
   	    if($i &lt; $max - 2) { $step = index($strip, &quot;\x00\x0F&quot;, $i) - $i; } else { $step = $max - $i; }
	    if($step &lt; 0) { $step = $max - $i; }
	    
	    if($step &gt;= 0) {
  	        $result-&gt;{text} .= substr($strip, $i, $step);
                $i += $step;
	    }
	}
	
        last if($i == $max);
    }
}

# ==Bring the text to plain mode==
sub vanish($$) {
    my($class, $text) = @_;

    $$text =~ s/&amp;laquo;|&amp;ldquo;|&amp;bdquo;|&amp;lsquo;|&amp;sbquo;|&amp;quot;|&amp;raquo;|&amp;rdquo;|&amp;ldquo;|&amp;rsquo;|&amp;#171;|&amp;#147;|&amp;#132;|&amp;#145;|&amp;#130;|&amp;#34;|&amp;#187;|&amp;#148;|&amp;#146;|�|�|�|�|�|&quot;|�|�|�/&quot;/ig;
    $$text =~ s/&amp;nbsp;|&amp;#160;|�/ /ig;
    $$text =~ s/&amp;mdash;|&amp;ndash;|&amp;#151;|&amp;#150;|�|�/-/ig;
    $$text =~ s/&amp;hellip;|&amp;#133;|�/.../ig;
    $$text =~ s/&amp;copy;|&amp;#169;|�/(c)/ig;
    $$text =~ s/&amp;reg;|&amp;#174;|�/(r)/ig;
    $$text =~ s/&amp;frac14;|&amp;#188;/1\/4/ig;
    $$text =~ s/&amp;frac12;|&amp;#189;/1\/2/ig;
    $$text =~ s/&amp;frac34;|&amp;#190;/3\/4/ig;
}

# ==Parse the tagsAllow string advanced format==
sub parseTagsAllowString($$) {
    my($class, $string) = @_;

    return {tagsAllow=&gt;{}, tagsDenyAllAttributes=&gt;0} if(!$string);

    my $tagsAllow = {};
    my $tagsDenyAllAttributes = 0;

    # Should I deny all tag attributes by default?
    if(substr($string,0,1) eq '|') {
        $tagsDenyAllAttributes = 1;
	substr($string,0,1) = '';
    };
    
    # Parsing the Configuration String
    while($string =~ /([a-z:|]+)/ig) {
        my $tBody = $1;
	my ($tagName) = ($tBody =~ /^([a-z]+)/i)[0];
	
	last if(!$tagName);
	
	my $attrList = ();
	$tagsAllow-&gt;{$tagName}-&gt;{val}=1;
	
	if($tBody =~ /^$tagName\|$/i) {
	    $tagsAllow-&gt;{$tagName}-&gt;{denyAllAttributes}=1;
	} elsif($tBody =~ /^$tagName\:$/i) {
	    $tagsAllow-&gt;{$tagName}-&gt;{allowAllAttributes}=1;
	} else {
    	    while($tBody =~ /:([a-z]+)/ig) {
    		$tagsAllow-&gt;{$tagName}-&gt;{validAttributes}-&gt;{$1}=1;
    	    }
	
            while($tBody =~ /\|([a-z]+)/ig) {
		if(!$tagsAllow-&gt;{$tagName}-&gt;{validAttributes}-&gt;{$1}) {
        	    $tagsAllow-&gt;{$tagName}-&gt;{invalidAttributes}-&gt;{$1}=1;
		}
	    }
        }
    }

    return {tagsAllow=&gt;$tagsAllow, tagsDenyAllAttributes=&gt;$tagsDenyAllAttributes};
}

# ==Parse the tagsAllow string advanced format==
sub parseTagsDenyString($$) {
    my($class, $string) = @_;

    return {} if(!$string);

    my $tagsDeny = {};
    while($string =~ /([a-z]+)/ig) {
        $tagsDeny-&gt;{$1}-&gt;{val}=1;
    }

    return $tagsDeny;
}
                       
return 1;</pre>  </div>     </div>
    <div id="footer">
      <hr />
      <p class="right"><em><a href="http://search.cpan.org/dist/SVN-Web/">Powered by SVN::Web</a></em></p>
    </div>
<!--#include virtual="/inc/svn_foot.html"-->
<!--#include virtual="/inc/footer.html"-->