204 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			204 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
/**
 | 
						|
 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
 | 
						|
 * For an intro to the Lexer see:
 | 
						|
 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
 | 
						|
 *
 | 
						|
 * @author Marcus Baker http://www.lastcraft.com
 | 
						|
 */
 | 
						|
 | 
						|
namespace dokuwiki\Parsing\Lexer;
 | 
						|
 | 
						|
/**
 | 
						|
 * Compounded regular expression.
 | 
						|
 *
 | 
						|
 * Any of the contained patterns could match and when one does it's label is returned.
 | 
						|
 */
 | 
						|
class ParallelRegex
 | 
						|
{
 | 
						|
    /** @var string[] patterns to match */
 | 
						|
    protected $patterns;
 | 
						|
    /** @var string[] labels for above patterns */
 | 
						|
    protected $labels;
 | 
						|
    /** @var string the compound regex matching all patterns */
 | 
						|
    protected $regex;
 | 
						|
    /** @var bool case sensitive matching? */
 | 
						|
    protected $case;
 | 
						|
 | 
						|
    /**
 | 
						|
     * Constructor. Starts with no patterns.
 | 
						|
     *
 | 
						|
     * @param boolean $case    True for case sensitive, false
 | 
						|
     *                         for insensitive.
 | 
						|
     */
 | 
						|
    public function __construct($case)
 | 
						|
    {
 | 
						|
        $this->case = $case;
 | 
						|
        $this->patterns = array();
 | 
						|
        $this->labels = array();
 | 
						|
        $this->regex = null;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Adds a pattern with an optional label.
 | 
						|
     *
 | 
						|
     * @param mixed       $pattern Perl style regex. Must be UTF-8
 | 
						|
     *                             encoded. If its a string, the (, )
 | 
						|
     *                             lose their meaning unless they
 | 
						|
     *                             form part of a lookahead or
 | 
						|
     *                             lookbehind assertation.
 | 
						|
     * @param bool|string $label   Label of regex to be returned
 | 
						|
     *                             on a match. Label must be ASCII
 | 
						|
     */
 | 
						|
    public function addPattern($pattern, $label = true)
 | 
						|
    {
 | 
						|
        $count = count($this->patterns);
 | 
						|
        $this->patterns[$count] = $pattern;
 | 
						|
        $this->labels[$count] = $label;
 | 
						|
        $this->regex = null;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Attempts to match all patterns at once against a string.
 | 
						|
     *
 | 
						|
     * @param string $subject      String to match against.
 | 
						|
     * @param string $match        First matched portion of
 | 
						|
     *                             subject.
 | 
						|
     * @return bool|string         False if no match found, label if label exists, true if not
 | 
						|
     */
 | 
						|
    public function match($subject, &$match)
 | 
						|
    {
 | 
						|
        if (count($this->patterns) == 0) {
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
 | 
						|
            $match = "";
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
 | 
						|
        $match = $matches[0];
 | 
						|
        $size = count($matches);
 | 
						|
        // FIXME this could be made faster by storing the labels as keys in a hashmap
 | 
						|
        for ($i = 1; $i < $size; $i++) {
 | 
						|
            if ($matches[$i] && isset($this->labels[$i - 1])) {
 | 
						|
                return $this->labels[$i - 1];
 | 
						|
            }
 | 
						|
        }
 | 
						|
        return true;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Attempts to split the string against all patterns at once
 | 
						|
     *
 | 
						|
     * @param string $subject      String to match against.
 | 
						|
     * @param array $split         The split result: array containing, pre-match, match & post-match strings
 | 
						|
     * @return boolean             True on success.
 | 
						|
     *
 | 
						|
     * @author Christopher Smith <chris@jalakai.co.uk>
 | 
						|
     */
 | 
						|
    public function split($subject, &$split)
 | 
						|
    {
 | 
						|
        if (count($this->patterns) == 0) {
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
 | 
						|
        if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
 | 
						|
            if (function_exists('preg_last_error')) {
 | 
						|
                $err = preg_last_error();
 | 
						|
                switch ($err) {
 | 
						|
                    case PREG_BACKTRACK_LIMIT_ERROR:
 | 
						|
                        msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
 | 
						|
                        break;
 | 
						|
                    case PREG_RECURSION_LIMIT_ERROR:
 | 
						|
                        msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
 | 
						|
                        break;
 | 
						|
                    case PREG_BAD_UTF8_ERROR:
 | 
						|
                        msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
 | 
						|
                        break;
 | 
						|
                    case PREG_INTERNAL_ERROR:
 | 
						|
                        msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
 | 
						|
                        break;
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            $split = array($subject, "", "");
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
 | 
						|
        $idx = count($matches)-2;
 | 
						|
        list($pre, $post) = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2);
 | 
						|
        $split = array($pre, $matches[0], $post);
 | 
						|
 | 
						|
        return isset($this->labels[$idx]) ? $this->labels[$idx] : true;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Compounds the patterns into a single
 | 
						|
     * regular expression separated with the
 | 
						|
     * "or" operator. Caches the regex.
 | 
						|
     * Will automatically escape (, ) and / tokens.
 | 
						|
     *
 | 
						|
     * @return null|string
 | 
						|
     */
 | 
						|
    protected function getCompoundedRegex()
 | 
						|
    {
 | 
						|
        if ($this->regex == null) {
 | 
						|
            $cnt = count($this->patterns);
 | 
						|
            for ($i = 0; $i < $cnt; $i++) {
 | 
						|
                /*
 | 
						|
                 * decompose the input pattern into "(", "(?", ")",
 | 
						|
                 * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
 | 
						|
                 * elements.
 | 
						|
                 */
 | 
						|
                preg_match_all('/\\\\.|' .
 | 
						|
                               '\(\?|' .
 | 
						|
                               '[()]|' .
 | 
						|
                               '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
 | 
						|
                               '[^[()\\\\]+/', $this->patterns[$i], $elts);
 | 
						|
 | 
						|
                $pattern = "";
 | 
						|
                $level = 0;
 | 
						|
 | 
						|
                foreach ($elts[0] as $elt) {
 | 
						|
                    /*
 | 
						|
                     * for "(", ")" remember the nesting level, add "\"
 | 
						|
                     * only to the non-"(?" ones.
 | 
						|
                     */
 | 
						|
 | 
						|
                    switch ($elt) {
 | 
						|
                        case '(':
 | 
						|
                            $pattern .= '\(';
 | 
						|
                            break;
 | 
						|
                        case ')':
 | 
						|
                            if ($level > 0)
 | 
						|
                                $level--; /* closing (? */
 | 
						|
                            else $pattern .= '\\';
 | 
						|
                            $pattern .= ')';
 | 
						|
                            break;
 | 
						|
                        case '(?':
 | 
						|
                            $level++;
 | 
						|
                            $pattern .= '(?';
 | 
						|
                            break;
 | 
						|
                        default:
 | 
						|
                            if (substr($elt, 0, 1) == '\\')
 | 
						|
                                $pattern .= $elt;
 | 
						|
                            else $pattern .= str_replace('/', '\/', $elt);
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                $this->patterns[$i] = "($pattern)";
 | 
						|
            }
 | 
						|
            $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
 | 
						|
        }
 | 
						|
        return $this->regex;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Accessor for perl regex mode flags to use.
 | 
						|
     * @return string       Perl regex flags.
 | 
						|
     */
 | 
						|
    protected function getPerlMatchingFlags()
 | 
						|
    {
 | 
						|
        return ($this->case ? "msS" : "msSi");
 | 
						|
    }
 | 
						|
}
 |