Initial commit
This commit is contained in:
		
							
								
								
									
										349
									
								
								content/inc/Parsing/Lexer/Lexer.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										349
									
								
								content/inc/Parsing/Lexer/Lexer.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,349 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
 | 
			
		||||
 * For an intro to the Lexer see:
 | 
			
		||||
 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
 | 
			
		||||
 *
 | 
			
		||||
 * @author Marcus Baker http://www.lastcraft.com
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
namespace dokuwiki\Parsing\Lexer;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Accepts text and breaks it into tokens.
 | 
			
		||||
 *
 | 
			
		||||
 * Some optimisation to make the sure the content is only scanned by the PHP regex
 | 
			
		||||
 * parser once. Lexer modes must not start with leading underscores.
 | 
			
		||||
 */
 | 
			
		||||
class Lexer
 | 
			
		||||
{
 | 
			
		||||
    /** @var ParallelRegex[] */
 | 
			
		||||
    protected $regexes;
 | 
			
		||||
    /** @var \Doku_Handler */
 | 
			
		||||
    protected $handler;
 | 
			
		||||
    /** @var StateStack */
 | 
			
		||||
    protected $modeStack;
 | 
			
		||||
    /** @var array mode "rewrites" */
 | 
			
		||||
    protected $mode_handlers;
 | 
			
		||||
    /** @var bool case sensitive? */
 | 
			
		||||
    protected $case;
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Sets up the lexer in case insensitive matching by default.
 | 
			
		||||
     *
 | 
			
		||||
     * @param \Doku_Handler $handler  Handling strategy by reference.
 | 
			
		||||
     * @param string $start            Starting handler.
 | 
			
		||||
     * @param boolean $case            True for case sensitive.
 | 
			
		||||
     */
 | 
			
		||||
    public function __construct($handler, $start = "accept", $case = false)
 | 
			
		||||
    {
 | 
			
		||||
        $this->case = $case;
 | 
			
		||||
        $this->regexes = array();
 | 
			
		||||
        $this->handler = $handler;
 | 
			
		||||
        $this->modeStack = new StateStack($start);
 | 
			
		||||
        $this->mode_handlers = array();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Adds a token search pattern for a particular parsing mode.
 | 
			
		||||
     *
 | 
			
		||||
     * The pattern does not change the current mode.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $pattern      Perl style regex, but ( and )
 | 
			
		||||
     *                             lose the usual meaning.
 | 
			
		||||
     * @param string $mode         Should only apply this
 | 
			
		||||
     *                             pattern when dealing with
 | 
			
		||||
     *                             this type of input.
 | 
			
		||||
     */
 | 
			
		||||
    public function addPattern($pattern, $mode = "accept")
 | 
			
		||||
    {
 | 
			
		||||
        if (! isset($this->regexes[$mode])) {
 | 
			
		||||
            $this->regexes[$mode] = new ParallelRegex($this->case);
 | 
			
		||||
        }
 | 
			
		||||
        $this->regexes[$mode]->addPattern($pattern);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Adds a pattern that will enter a new parsing mode.
 | 
			
		||||
     *
 | 
			
		||||
     * Useful for entering parenthesis, strings, tags, etc.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $pattern      Perl style regex, but ( and ) lose the usual meaning.
 | 
			
		||||
     * @param string $mode         Should only apply this pattern when dealing with this type of input.
 | 
			
		||||
     * @param string $new_mode     Change parsing to this new nested mode.
 | 
			
		||||
     */
 | 
			
		||||
    public function addEntryPattern($pattern, $mode, $new_mode)
 | 
			
		||||
    {
 | 
			
		||||
        if (! isset($this->regexes[$mode])) {
 | 
			
		||||
            $this->regexes[$mode] = new ParallelRegex($this->case);
 | 
			
		||||
        }
 | 
			
		||||
        $this->regexes[$mode]->addPattern($pattern, $new_mode);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Adds a pattern that will exit the current mode and re-enter the previous one.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $pattern      Perl style regex, but ( and ) lose the usual meaning.
 | 
			
		||||
     * @param string $mode         Mode to leave.
 | 
			
		||||
     */
 | 
			
		||||
    public function addExitPattern($pattern, $mode)
 | 
			
		||||
    {
 | 
			
		||||
        if (! isset($this->regexes[$mode])) {
 | 
			
		||||
            $this->regexes[$mode] = new ParallelRegex($this->case);
 | 
			
		||||
        }
 | 
			
		||||
        $this->regexes[$mode]->addPattern($pattern, "__exit");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Adds a pattern that has a special mode.
 | 
			
		||||
     *
 | 
			
		||||
     * Acts as an entry and exit pattern in one go, effectively calling a special
 | 
			
		||||
     * parser handler for this token only.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $pattern      Perl style regex, but ( and ) lose the usual meaning.
 | 
			
		||||
     * @param string $mode         Should only apply this pattern when dealing with this type of input.
 | 
			
		||||
     * @param string $special      Use this mode for this one token.
 | 
			
		||||
     */
 | 
			
		||||
    public function addSpecialPattern($pattern, $mode, $special)
 | 
			
		||||
    {
 | 
			
		||||
        if (! isset($this->regexes[$mode])) {
 | 
			
		||||
            $this->regexes[$mode] = new ParallelRegex($this->case);
 | 
			
		||||
        }
 | 
			
		||||
        $this->regexes[$mode]->addPattern($pattern, "_$special");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Adds a mapping from a mode to another handler.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $mode        Mode to be remapped.
 | 
			
		||||
     * @param string $handler     New target handler.
 | 
			
		||||
     */
 | 
			
		||||
    public function mapHandler($mode, $handler)
 | 
			
		||||
    {
 | 
			
		||||
        $this->mode_handlers[$mode] = $handler;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Splits the page text into tokens.
 | 
			
		||||
     *
 | 
			
		||||
     * Will fail if the handlers report an error or if no content is consumed. If successful then each
 | 
			
		||||
     * unparsed and parsed token invokes a call to the held listener.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $raw        Raw HTML text.
 | 
			
		||||
     * @return boolean           True on success, else false.
 | 
			
		||||
     */
 | 
			
		||||
    public function parse($raw)
 | 
			
		||||
    {
 | 
			
		||||
        if (! isset($this->handler)) {
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
        $initialLength = strlen($raw);
 | 
			
		||||
        $length = $initialLength;
 | 
			
		||||
        $pos = 0;
 | 
			
		||||
        while (is_array($parsed = $this->reduce($raw))) {
 | 
			
		||||
            list($unmatched, $matched, $mode) = $parsed;
 | 
			
		||||
            $currentLength = strlen($raw);
 | 
			
		||||
            $matchPos = $initialLength - $currentLength - strlen($matched);
 | 
			
		||||
            if (! $this->dispatchTokens($unmatched, $matched, $mode, $pos, $matchPos)) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            if ($currentLength == $length) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            $length = $currentLength;
 | 
			
		||||
            $pos = $initialLength - $currentLength;
 | 
			
		||||
        }
 | 
			
		||||
        if (!$parsed) {
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
        return $this->invokeHandler($raw, DOKU_LEXER_UNMATCHED, $pos);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Gives plugins access to the mode stack
 | 
			
		||||
     *
 | 
			
		||||
     * @return StateStack
 | 
			
		||||
     */
 | 
			
		||||
    public function getModeStack()
 | 
			
		||||
    {
 | 
			
		||||
        return $this->modeStack;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Sends the matched token and any leading unmatched
 | 
			
		||||
     * text to the parser changing the lexer to a new
 | 
			
		||||
     * mode if one is listed.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $unmatched Unmatched leading portion.
 | 
			
		||||
     * @param string $matched Actual token match.
 | 
			
		||||
     * @param bool|string $mode Mode after match. A boolean false mode causes no change.
 | 
			
		||||
     * @param int $initialPos
 | 
			
		||||
     * @param int $matchPos Current byte index location in raw doc thats being parsed
 | 
			
		||||
     * @return boolean             False if there was any error from the parser.
 | 
			
		||||
     */
 | 
			
		||||
    protected function dispatchTokens($unmatched, $matched, $mode, $initialPos, $matchPos)
 | 
			
		||||
    {
 | 
			
		||||
        if (! $this->invokeHandler($unmatched, DOKU_LEXER_UNMATCHED, $initialPos)) {
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
        if ($this->isModeEnd($mode)) {
 | 
			
		||||
            if (! $this->invokeHandler($matched, DOKU_LEXER_EXIT, $matchPos)) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            return $this->modeStack->leave();
 | 
			
		||||
        }
 | 
			
		||||
        if ($this->isSpecialMode($mode)) {
 | 
			
		||||
            $this->modeStack->enter($this->decodeSpecial($mode));
 | 
			
		||||
            if (! $this->invokeHandler($matched, DOKU_LEXER_SPECIAL, $matchPos)) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            return $this->modeStack->leave();
 | 
			
		||||
        }
 | 
			
		||||
        if (is_string($mode)) {
 | 
			
		||||
            $this->modeStack->enter($mode);
 | 
			
		||||
            return $this->invokeHandler($matched, DOKU_LEXER_ENTER, $matchPos);
 | 
			
		||||
        }
 | 
			
		||||
        return $this->invokeHandler($matched, DOKU_LEXER_MATCHED, $matchPos);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Tests to see if the new mode is actually to leave the current mode and pop an item from the matching
 | 
			
		||||
     * mode stack.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $mode    Mode to test.
 | 
			
		||||
     * @return boolean        True if this is the exit mode.
 | 
			
		||||
     */
 | 
			
		||||
    protected function isModeEnd($mode)
 | 
			
		||||
    {
 | 
			
		||||
        return ($mode === "__exit");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Test to see if the mode is one where this mode is entered for this token only and automatically
 | 
			
		||||
     * leaves immediately afterwoods.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $mode    Mode to test.
 | 
			
		||||
     * @return boolean        True if this is the exit mode.
 | 
			
		||||
     */
 | 
			
		||||
    protected function isSpecialMode($mode)
 | 
			
		||||
    {
 | 
			
		||||
        return (strncmp($mode, "_", 1) == 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Strips the magic underscore marking single token modes.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $mode    Mode to decode.
 | 
			
		||||
     * @return string         Underlying mode name.
 | 
			
		||||
     */
 | 
			
		||||
    protected function decodeSpecial($mode)
 | 
			
		||||
    {
 | 
			
		||||
        return substr($mode, 1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Calls the parser method named after the current mode.
 | 
			
		||||
     *
 | 
			
		||||
     * Empty content will be ignored. The lexer has a parser handler for each mode in the lexer.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $content Text parsed.
 | 
			
		||||
     * @param boolean $is_match Token is recognised rather
 | 
			
		||||
     *                               than unparsed data.
 | 
			
		||||
     * @param int $pos Current byte index location in raw doc
 | 
			
		||||
     *                             thats being parsed
 | 
			
		||||
     * @return bool
 | 
			
		||||
     */
 | 
			
		||||
    protected function invokeHandler($content, $is_match, $pos)
 | 
			
		||||
    {
 | 
			
		||||
        if (($content === "") || ($content === false)) {
 | 
			
		||||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
        $handler = $this->modeStack->getCurrent();
 | 
			
		||||
        if (isset($this->mode_handlers[$handler])) {
 | 
			
		||||
            $handler = $this->mode_handlers[$handler];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // modes starting with plugin_ are all handled by the same
 | 
			
		||||
        // handler but with an additional parameter
 | 
			
		||||
        if (substr($handler, 0, 7)=='plugin_') {
 | 
			
		||||
            list($handler,$plugin) = explode('_', $handler, 2);
 | 
			
		||||
            return $this->handler->$handler($content, $is_match, $pos, $plugin);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return $this->handler->$handler($content, $is_match, $pos);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Tries to match a chunk of text and if successful removes the recognised chunk and any leading
 | 
			
		||||
     * unparsed data. Empty strings will not be matched.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $raw         The subject to parse. This is the content that will be eaten.
 | 
			
		||||
     * @return array|bool         Three item list of unparsed content followed by the
 | 
			
		||||
     *                            recognised token and finally the action the parser is to take.
 | 
			
		||||
     *                            True if no match, false if there is a parsing error.
 | 
			
		||||
     */
 | 
			
		||||
    protected function reduce(&$raw)
 | 
			
		||||
    {
 | 
			
		||||
        if (! isset($this->regexes[$this->modeStack->getCurrent()])) {
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
        if ($raw === "") {
 | 
			
		||||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
        if ($action = $this->regexes[$this->modeStack->getCurrent()]->split($raw, $split)) {
 | 
			
		||||
            list($unparsed, $match, $raw) = $split;
 | 
			
		||||
            return array($unparsed, $match, $action);
 | 
			
		||||
        }
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Escapes regex characters other than (, ) and /
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $str
 | 
			
		||||
     * @return string
 | 
			
		||||
     */
 | 
			
		||||
    public static function escape($str)
 | 
			
		||||
    {
 | 
			
		||||
        $chars = array(
 | 
			
		||||
            '/\\\\/',
 | 
			
		||||
            '/\./',
 | 
			
		||||
            '/\+/',
 | 
			
		||||
            '/\*/',
 | 
			
		||||
            '/\?/',
 | 
			
		||||
            '/\[/',
 | 
			
		||||
            '/\^/',
 | 
			
		||||
            '/\]/',
 | 
			
		||||
            '/\$/',
 | 
			
		||||
            '/\{/',
 | 
			
		||||
            '/\}/',
 | 
			
		||||
            '/\=/',
 | 
			
		||||
            '/\!/',
 | 
			
		||||
            '/\</',
 | 
			
		||||
            '/\>/',
 | 
			
		||||
            '/\|/',
 | 
			
		||||
            '/\:/'
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        $escaped = array(
 | 
			
		||||
            '\\\\\\\\',
 | 
			
		||||
            '\.',
 | 
			
		||||
            '\+',
 | 
			
		||||
            '\*',
 | 
			
		||||
            '\?',
 | 
			
		||||
            '\[',
 | 
			
		||||
            '\^',
 | 
			
		||||
            '\]',
 | 
			
		||||
            '\$',
 | 
			
		||||
            '\{',
 | 
			
		||||
            '\}',
 | 
			
		||||
            '\=',
 | 
			
		||||
            '\!',
 | 
			
		||||
            '\<',
 | 
			
		||||
            '\>',
 | 
			
		||||
            '\|',
 | 
			
		||||
            '\:'
 | 
			
		||||
        );
 | 
			
		||||
        return preg_replace($chars, $escaped, $str);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										203
									
								
								content/inc/Parsing/Lexer/ParallelRegex.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										203
									
								
								content/inc/Parsing/Lexer/ParallelRegex.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,203 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
 | 
			
		||||
 * For an intro to the Lexer see:
 | 
			
		||||
 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
 | 
			
		||||
 *
 | 
			
		||||
 * @author Marcus Baker http://www.lastcraft.com
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
namespace dokuwiki\Parsing\Lexer;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Compounded regular expression.
 | 
			
		||||
 *
 | 
			
		||||
 * Any of the contained patterns could match and when one does it's label is returned.
 | 
			
		||||
 */
 | 
			
		||||
class ParallelRegex
 | 
			
		||||
{
 | 
			
		||||
    /** @var string[] patterns to match */
 | 
			
		||||
    protected $patterns;
 | 
			
		||||
    /** @var string[] labels for above patterns */
 | 
			
		||||
    protected $labels;
 | 
			
		||||
    /** @var string the compound regex matching all patterns */
 | 
			
		||||
    protected $regex;
 | 
			
		||||
    /** @var bool case sensitive matching? */
 | 
			
		||||
    protected $case;
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Constructor. Starts with no patterns.
 | 
			
		||||
     *
 | 
			
		||||
     * @param boolean $case    True for case sensitive, false
 | 
			
		||||
     *                         for insensitive.
 | 
			
		||||
     */
 | 
			
		||||
    public function __construct($case)
 | 
			
		||||
    {
 | 
			
		||||
        $this->case = $case;
 | 
			
		||||
        $this->patterns = array();
 | 
			
		||||
        $this->labels = array();
 | 
			
		||||
        $this->regex = null;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Adds a pattern with an optional label.
 | 
			
		||||
     *
 | 
			
		||||
     * @param mixed       $pattern Perl style regex. Must be UTF-8
 | 
			
		||||
     *                             encoded. If its a string, the (, )
 | 
			
		||||
     *                             lose their meaning unless they
 | 
			
		||||
     *                             form part of a lookahead or
 | 
			
		||||
     *                             lookbehind assertation.
 | 
			
		||||
     * @param bool|string $label   Label of regex to be returned
 | 
			
		||||
     *                             on a match. Label must be ASCII
 | 
			
		||||
     */
 | 
			
		||||
    public function addPattern($pattern, $label = true)
 | 
			
		||||
    {
 | 
			
		||||
        $count = count($this->patterns);
 | 
			
		||||
        $this->patterns[$count] = $pattern;
 | 
			
		||||
        $this->labels[$count] = $label;
 | 
			
		||||
        $this->regex = null;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Attempts to match all patterns at once against a string.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $subject      String to match against.
 | 
			
		||||
     * @param string $match        First matched portion of
 | 
			
		||||
     *                             subject.
 | 
			
		||||
     * @return bool|string         False if no match found, label if label exists, true if not
 | 
			
		||||
     */
 | 
			
		||||
    public function match($subject, &$match)
 | 
			
		||||
    {
 | 
			
		||||
        if (count($this->patterns) == 0) {
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
        if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
 | 
			
		||||
            $match = "";
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        $match = $matches[0];
 | 
			
		||||
        $size = count($matches);
 | 
			
		||||
        // FIXME this could be made faster by storing the labels as keys in a hashmap
 | 
			
		||||
        for ($i = 1; $i < $size; $i++) {
 | 
			
		||||
            if ($matches[$i] && isset($this->labels[$i - 1])) {
 | 
			
		||||
                return $this->labels[$i - 1];
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Attempts to split the string against all patterns at once
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $subject      String to match against.
 | 
			
		||||
     * @param array $split         The split result: array containing, pre-match, match & post-match strings
 | 
			
		||||
     * @return boolean             True on success.
 | 
			
		||||
     *
 | 
			
		||||
     * @author Christopher Smith <chris@jalakai.co.uk>
 | 
			
		||||
     */
 | 
			
		||||
    public function split($subject, &$split)
 | 
			
		||||
    {
 | 
			
		||||
        if (count($this->patterns) == 0) {
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
 | 
			
		||||
            if (function_exists('preg_last_error')) {
 | 
			
		||||
                $err = preg_last_error();
 | 
			
		||||
                switch ($err) {
 | 
			
		||||
                    case PREG_BACKTRACK_LIMIT_ERROR:
 | 
			
		||||
                        msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
 | 
			
		||||
                        break;
 | 
			
		||||
                    case PREG_RECURSION_LIMIT_ERROR:
 | 
			
		||||
                        msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
 | 
			
		||||
                        break;
 | 
			
		||||
                    case PREG_BAD_UTF8_ERROR:
 | 
			
		||||
                        msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
 | 
			
		||||
                        break;
 | 
			
		||||
                    case PREG_INTERNAL_ERROR:
 | 
			
		||||
                        msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
 | 
			
		||||
                        break;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            $split = array($subject, "", "");
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        $idx = count($matches)-2;
 | 
			
		||||
        list($pre, $post) = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2);
 | 
			
		||||
        $split = array($pre, $matches[0], $post);
 | 
			
		||||
 | 
			
		||||
        return isset($this->labels[$idx]) ? $this->labels[$idx] : true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Compounds the patterns into a single
 | 
			
		||||
     * regular expression separated with the
 | 
			
		||||
     * "or" operator. Caches the regex.
 | 
			
		||||
     * Will automatically escape (, ) and / tokens.
 | 
			
		||||
     *
 | 
			
		||||
     * @return null|string
 | 
			
		||||
     */
 | 
			
		||||
    protected function getCompoundedRegex()
 | 
			
		||||
    {
 | 
			
		||||
        if ($this->regex == null) {
 | 
			
		||||
            $cnt = count($this->patterns);
 | 
			
		||||
            for ($i = 0; $i < $cnt; $i++) {
 | 
			
		||||
                /*
 | 
			
		||||
                 * decompose the input pattern into "(", "(?", ")",
 | 
			
		||||
                 * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
 | 
			
		||||
                 * elements.
 | 
			
		||||
                 */
 | 
			
		||||
                preg_match_all('/\\\\.|' .
 | 
			
		||||
                               '\(\?|' .
 | 
			
		||||
                               '[()]|' .
 | 
			
		||||
                               '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
 | 
			
		||||
                               '[^[()\\\\]+/', $this->patterns[$i], $elts);
 | 
			
		||||
 | 
			
		||||
                $pattern = "";
 | 
			
		||||
                $level = 0;
 | 
			
		||||
 | 
			
		||||
                foreach ($elts[0] as $elt) {
 | 
			
		||||
                    /*
 | 
			
		||||
                     * for "(", ")" remember the nesting level, add "\"
 | 
			
		||||
                     * only to the non-"(?" ones.
 | 
			
		||||
                     */
 | 
			
		||||
 | 
			
		||||
                    switch ($elt) {
 | 
			
		||||
                        case '(':
 | 
			
		||||
                            $pattern .= '\(';
 | 
			
		||||
                            break;
 | 
			
		||||
                        case ')':
 | 
			
		||||
                            if ($level > 0)
 | 
			
		||||
                                $level--; /* closing (? */
 | 
			
		||||
                            else $pattern .= '\\';
 | 
			
		||||
                            $pattern .= ')';
 | 
			
		||||
                            break;
 | 
			
		||||
                        case '(?':
 | 
			
		||||
                            $level++;
 | 
			
		||||
                            $pattern .= '(?';
 | 
			
		||||
                            break;
 | 
			
		||||
                        default:
 | 
			
		||||
                            if (substr($elt, 0, 1) == '\\')
 | 
			
		||||
                                $pattern .= $elt;
 | 
			
		||||
                            else $pattern .= str_replace('/', '\/', $elt);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                $this->patterns[$i] = "($pattern)";
 | 
			
		||||
            }
 | 
			
		||||
            $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
 | 
			
		||||
        }
 | 
			
		||||
        return $this->regex;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Accessor for perl regex mode flags to use.
 | 
			
		||||
     * @return string       Perl regex flags.
 | 
			
		||||
     */
 | 
			
		||||
    protected function getPerlMatchingFlags()
 | 
			
		||||
    {
 | 
			
		||||
        return ($this->case ? "msS" : "msSi");
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										60
									
								
								content/inc/Parsing/Lexer/StateStack.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								content/inc/Parsing/Lexer/StateStack.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,60 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
 | 
			
		||||
 * For an intro to the Lexer see:
 | 
			
		||||
 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
 | 
			
		||||
 *
 | 
			
		||||
 * @author Marcus Baker http://www.lastcraft.com
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
namespace dokuwiki\Parsing\Lexer;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * States for a stack machine.
 | 
			
		||||
 */
 | 
			
		||||
class StateStack
 | 
			
		||||
{
 | 
			
		||||
    protected $stack;
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Constructor. Starts in named state.
 | 
			
		||||
     * @param string $start        Starting state name.
 | 
			
		||||
     */
 | 
			
		||||
    public function __construct($start)
 | 
			
		||||
    {
 | 
			
		||||
        $this->stack = array($start);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Accessor for current state.
 | 
			
		||||
     * @return string       State.
 | 
			
		||||
     */
 | 
			
		||||
    public function getCurrent()
 | 
			
		||||
    {
 | 
			
		||||
        return $this->stack[count($this->stack) - 1];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Adds a state to the stack and sets it to be the current state.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $state        New state.
 | 
			
		||||
     */
 | 
			
		||||
    public function enter($state)
 | 
			
		||||
    {
 | 
			
		||||
        array_push($this->stack, $state);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Leaves the current state and reverts
 | 
			
		||||
     * to the previous one.
 | 
			
		||||
     * @return boolean    false if we attempt to drop off the bottom of the list.
 | 
			
		||||
     */
 | 
			
		||||
    public function leave()
 | 
			
		||||
    {
 | 
			
		||||
        if (count($this->stack) == 1) {
 | 
			
		||||
            return false;
 | 
			
		||||
        }
 | 
			
		||||
        array_pop($this->stack);
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user