dockerwiki/content/inc/parser/metadata.php
2021-10-26 13:02:53 +02:00

752 lines
20 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
/**
* The MetaData Renderer
*
* Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
* but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
* $persistent.
*
* Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
*
* @author Esther Brunner <wikidesign@gmail.com>
*/
class Doku_Renderer_metadata extends Doku_Renderer
{
/** the approximate byte lenght to capture for the abstract */
const ABSTRACT_LEN = 250;
/** the maximum UTF8 character length for the abstract */
const ABSTRACT_MAX = 500;
/** @var array transient meta data, will be reset on each rendering */
public $meta = array();
/** @var array persistent meta data, will be kept until explicitly deleted */
public $persistent = array();
/** @var array the list of headers used to create unique link ids */
protected $headers = array();
/** @var string temporary $doc store */
protected $store = '';
/** @var string keeps the first image reference */
protected $firstimage = '';
/** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
public $capturing = true;
/** @var bool determines if enough data for the abstract was collected, yet */
public $capture = true;
/** @var int number of bytes captured for abstract */
protected $captured = 0;
/**
* Returns the format produced by this renderer.
*
* @return string always 'metadata'
*/
public function getFormat()
{
return 'metadata';
}
/**
* Initialize the document
*
* Sets up some of the persistent info about the page if it doesn't exist, yet.
*/
public function document_start()
{
global $ID;
$this->headers = array();
// external pages are missing create date
if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) {
$this->persistent['date']['created'] = filectime(wikiFN($ID));
}
if (!isset($this->persistent['user'])) {
$this->persistent['user'] = '';
}
if (!isset($this->persistent['creator'])) {
$this->persistent['creator'] = '';
}
// reset metadata to persistent values
$this->meta = $this->persistent;
}
/**
* Finalize the document
*
* Stores collected data in the metadata
*/
public function document_end()
{
global $ID;
// store internal info in metadata (notoc,nocache)
$this->meta['internal'] = $this->info;
if (!isset($this->meta['description']['abstract'])) {
// cut off too long abstracts
$this->doc = trim($this->doc);
if (strlen($this->doc) > self::ABSTRACT_MAX) {
$this->doc = \dokuwiki\Utf8\PhpString::substr($this->doc, 0, self::ABSTRACT_MAX).'…';
}
$this->meta['description']['abstract'] = $this->doc;
}
$this->meta['relation']['firstimage'] = $this->firstimage;
if (!isset($this->meta['date']['modified'])) {
$this->meta['date']['modified'] = filemtime(wikiFN($ID));
}
}
/**
* Render plain text data
*
* This function takes care of the amount captured data and will stop capturing when
* enough abstract data is available
*
* @param $text
*/
public function cdata($text)
{
if (!$this->capture || !$this->capturing) {
return;
}
$this->doc .= $text;
$this->captured += strlen($text);
if ($this->captured > self::ABSTRACT_LEN) {
$this->capture = false;
}
}
/**
* Add an item to the TOC
*
* @param string $id the hash link
* @param string $text the text to display
* @param int $level the nesting level
*/
public function toc_additem($id, $text, $level)
{
global $conf;
//only add items within configured levels
if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
// the TOC is one of our standard ul list arrays ;-)
$this->meta['description']['tableofcontents'][] = array(
'hid' => $id,
'title' => $text,
'type' => 'ul',
'level' => $level - $conf['toptoclevel'] + 1
);
}
}
/**
* Render a heading
*
* @param string $text the text to display
* @param int $level header level
* @param int $pos byte position in the original source
*/
public function header($text, $level, $pos)
{
if (!isset($this->meta['title'])) {
$this->meta['title'] = $text;
}
// add the header to the TOC
$hid = $this->_headerToLink($text, true);
$this->toc_additem($hid, $text, $level);
// add to summary
$this->cdata(DOKU_LF.$text.DOKU_LF);
}
/**
* Open a paragraph
*/
public function p_open()
{
$this->cdata(DOKU_LF);
}
/**
* Close a paragraph
*/
public function p_close()
{
$this->cdata(DOKU_LF);
}
/**
* Create a line break
*/
public function linebreak()
{
$this->cdata(DOKU_LF);
}
/**
* Create a horizontal line
*/
public function hr()
{
$this->cdata(DOKU_LF.'----------'.DOKU_LF);
}
/**
* Callback for footnote start syntax
*
* All following content will go to the footnote instead of
* the document. To achieve this the previous rendered content
* is moved to $store and $doc is cleared
*
* @author Andreas Gohr <andi@splitbrain.org>
*/
public function footnote_open()
{
if ($this->capture) {
// move current content to store
// this is required to ensure safe behaviour of plugins accessed within footnotes
$this->store = $this->doc;
$this->doc = '';
// disable capturing
$this->capturing = false;
}
}
/**
* Callback for footnote end syntax
*
* All content rendered whilst within footnote syntax mode is discarded,
* the previously rendered content is restored and capturing is re-enabled.
*
* @author Andreas Gohr
*/
public function footnote_close()
{
if ($this->capture) {
// re-enable capturing
$this->capturing = true;
// restore previously rendered content
$this->doc = $this->store;
$this->store = '';
}
}
/**
* Open an unordered list
*/
public function listu_open()
{
$this->cdata(DOKU_LF);
}
/**
* Open an ordered list
*/
public function listo_open()
{
$this->cdata(DOKU_LF);
}
/**
* Open a list item
*
* @param int $level the nesting level
* @param bool $node true when a node; false when a leaf
*/
public function listitem_open($level, $node=false)
{
$this->cdata(str_repeat(DOKU_TAB, $level).'* ');
}
/**
* Close a list item
*/
public function listitem_close()
{
$this->cdata(DOKU_LF);
}
/**
* Output preformatted text
*
* @param string $text
*/
public function preformatted($text)
{
$this->cdata($text);
}
/**
* Start a block quote
*/
public function quote_open()
{
$this->cdata(DOKU_LF.DOKU_TAB.'"');
}
/**
* Stop a block quote
*/
public function quote_close()
{
$this->cdata('"'.DOKU_LF);
}
/**
* Display text as file content, optionally syntax highlighted
*
* @param string $text text to show
* @param string $lang programming language to use for syntax highlighting
* @param string $file file path label
*/
public function file($text, $lang = null, $file = null)
{
$this->cdata(DOKU_LF.$text.DOKU_LF);
}
/**
* Display text as code content, optionally syntax highlighted
*
* @param string $text text to show
* @param string $language programming language to use for syntax highlighting
* @param string $file file path label
*/
public function code($text, $language = null, $file = null)
{
$this->cdata(DOKU_LF.$text.DOKU_LF);
}
/**
* Format an acronym
*
* Uses $this->acronyms
*
* @param string $acronym
*/
public function acronym($acronym)
{
$this->cdata($acronym);
}
/**
* Format a smiley
*
* Uses $this->smiley
*
* @param string $smiley
*/
public function smiley($smiley)
{
$this->cdata($smiley);
}
/**
* Format an entity
*
* Entities are basically small text replacements
*
* Uses $this->entities
*
* @param string $entity
*/
public function entity($entity)
{
$this->cdata($entity);
}
/**
* Typographically format a multiply sign
*
* Example: ($x=640, $y=480) should result in "640×480"
*
* @param string|int $x first value
* @param string|int $y second value
*/
public function multiplyentity($x, $y)
{
$this->cdata($x.'×'.$y);
}
/**
* Render an opening single quote char (language specific)
*/
public function singlequoteopening()
{
global $lang;
$this->cdata($lang['singlequoteopening']);
}
/**
* Render a closing single quote char (language specific)
*/
public function singlequoteclosing()
{
global $lang;
$this->cdata($lang['singlequoteclosing']);
}
/**
* Render an apostrophe char (language specific)
*/
public function apostrophe()
{
global $lang;
$this->cdata($lang['apostrophe']);
}
/**
* Render an opening double quote char (language specific)
*/
public function doublequoteopening()
{
global $lang;
$this->cdata($lang['doublequoteopening']);
}
/**
* Render an closinging double quote char (language specific)
*/
public function doublequoteclosing()
{
global $lang;
$this->cdata($lang['doublequoteclosing']);
}
/**
* Render a CamelCase link
*
* @param string $link The link name
* @see http://en.wikipedia.org/wiki/CamelCase
*/
public function camelcaselink($link)
{
$this->internallink($link, $link);
}
/**
* Render a page local link
*
* @param string $hash hash link identifier
* @param string $name name for the link
*/
public function locallink($hash, $name = null)
{
if (is_array($name)) {
$this->_firstimage($name['src']);
if ($name['type'] == 'internalmedia') {
$this->_recordMediaUsage($name['src']);
}
}
}
/**
* keep track of internal links in $this->meta['relation']['references']
*
* @param string $id page ID to link to. eg. 'wiki:syntax'
* @param string|array|null $name name for the link, array for media file
*/
public function internallink($id, $name = null)
{
global $ID;
if (is_array($name)) {
$this->_firstimage($name['src']);
if ($name['type'] == 'internalmedia') {
$this->_recordMediaUsage($name['src']);
}
}
$parts = explode('?', $id, 2);
if (count($parts) === 2) {
$id = $parts[0];
}
$default = $this->_simpleTitle($id);
// first resolve and clean up the $id
resolve_pageid(getNS($ID), $id, $exists);
@list($page) = explode('#', $id, 2);
// set metadata
$this->meta['relation']['references'][$page] = $exists;
// $data = array('relation' => array('isreferencedby' => array($ID => true)));
// p_set_metadata($id, $data);
// add link title to summary
if ($this->capture) {
$name = $this->_getLinkTitle($name, $default, $id);
$this->doc .= $name;
}
}
/**
* Render an external link
*
* @param string $url full URL with scheme
* @param string|array|null $name name for the link, array for media file
*/
public function externallink($url, $name = null)
{
if (is_array($name)) {
$this->_firstimage($name['src']);
if ($name['type'] == 'internalmedia') {
$this->_recordMediaUsage($name['src']);
}
}
if ($this->capture) {
$this->doc .= $this->_getLinkTitle($name, '<'.$url.'>');
}
}
/**
* Render an interwiki link
*
* You may want to use $this->_resolveInterWiki() here
*
* @param string $match original link - probably not much use
* @param string|array $name name for the link, array for media file
* @param string $wikiName indentifier (shortcut) for the remote wiki
* @param string $wikiUri the fragment parsed from the original link
*/
public function interwikilink($match, $name, $wikiName, $wikiUri)
{
if (is_array($name)) {
$this->_firstimage($name['src']);
if ($name['type'] == 'internalmedia') {
$this->_recordMediaUsage($name['src']);
}
}
if ($this->capture) {
list($wikiUri) = explode('#', $wikiUri, 2);
$name = $this->_getLinkTitle($name, $wikiUri);
$this->doc .= $name;
}
}
/**
* Link to windows share
*
* @param string $url the link
* @param string|array $name name for the link, array for media file
*/
public function windowssharelink($url, $name = null)
{
if (is_array($name)) {
$this->_firstimage($name['src']);
if ($name['type'] == 'internalmedia') {
$this->_recordMediaUsage($name['src']);
}
}
if ($this->capture) {
if ($name) {
$this->doc .= $name;
} else {
$this->doc .= '<'.$url.'>';
}
}
}
/**
* Render a linked E-Mail Address
*
* Should honor $conf['mailguard'] setting
*
* @param string $address Email-Address
* @param string|array $name name for the link, array for media file
*/
public function emaillink($address, $name = null)
{
if (is_array($name)) {
$this->_firstimage($name['src']);
if ($name['type'] == 'internalmedia') {
$this->_recordMediaUsage($name['src']);
}
}
if ($this->capture) {
if ($name) {
$this->doc .= $name;
} else {
$this->doc .= '<'.$address.'>';
}
}
}
/**
* Render an internal media file
*
* @param string $src media ID
* @param string $title descriptive text
* @param string $align left|center|right
* @param int $width width of media in pixel
* @param int $height height of media in pixel
* @param string $cache cache|recache|nocache
* @param string $linking linkonly|detail|nolink
*/
public function internalmedia($src, $title = null, $align = null, $width = null,
$height = null, $cache = null, $linking = null)
{
if ($this->capture && $title) {
$this->doc .= '['.$title.']';
}
$this->_firstimage($src);
$this->_recordMediaUsage($src);
}
/**
* Render an external media file
*
* @param string $src full media URL
* @param string $title descriptive text
* @param string $align left|center|right
* @param int $width width of media in pixel
* @param int $height height of media in pixel
* @param string $cache cache|recache|nocache
* @param string $linking linkonly|detail|nolink
*/
public function externalmedia($src, $title = null, $align = null, $width = null,
$height = null, $cache = null, $linking = null)
{
if ($this->capture && $title) {
$this->doc .= '['.$title.']';
}
$this->_firstimage($src);
}
/**
* Render the output of an RSS feed
*
* @param string $url URL of the feed
* @param array $params Finetuning of the output
*/
public function rss($url, $params)
{
$this->meta['relation']['haspart'][$url] = true;
$this->meta['date']['valid']['age'] =
isset($this->meta['date']['valid']['age']) ?
min($this->meta['date']['valid']['age'], $params['refresh']) :
$params['refresh'];
}
#region Utils
/**
* Removes any Namespace from the given name but keeps
* casing and special chars
*
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string $name
*
* @return mixed|string
*/
public function _simpleTitle($name)
{
global $conf;
if (is_array($name)) {
return '';
}
if ($conf['useslash']) {
$nssep = '[:;/]';
} else {
$nssep = '[:;]';
}
$name = preg_replace('!.*'.$nssep.'!', '', $name);
//if there is a hash we use the anchor name only
$name = preg_replace('!.*#!', '', $name);
return $name;
}
/**
* Construct a title and handle images in titles
*
* @author Harry Fuecks <hfuecks@gmail.com>
* @param string|array|null $title either string title or media array
* @param string $default default title if nothing else is found
* @param null|string $id linked page id (used to extract title from first heading)
* @return string title text
*/
public function _getLinkTitle($title, $default, $id = null)
{
if (is_array($title)) {
if ($title['title']) {
return '['.$title['title'].']';
} else {
return $default;
}
} elseif (is_null($title) || trim($title) == '') {
if (useHeading('content') && $id) {
$heading = p_get_first_heading($id, METADATA_DONT_RENDER);
if ($heading) {
return $heading;
}
}
return $default;
} else {
return $title;
}
}
/**
* Remember first image
*
* @param string $src image URL or ID
*/
protected function _firstimage($src)
{
global $ID;
if ($this->firstimage) {
return;
}
list($src) = explode('#', $src, 2);
if (!media_isexternal($src)) {
resolve_mediaid(getNS($ID), $src, $exists);
}
if (preg_match('/.(jpe?g|gif|png)$/i', $src)) {
$this->firstimage = $src;
}
}
/**
* Store list of used media files in metadata
*
* @param string $src media ID
*/
protected function _recordMediaUsage($src)
{
global $ID;
list ($src) = explode('#', $src, 2);
if (media_isexternal($src)) {
return;
}
resolve_mediaid(getNS($ID), $src, $exists);
$this->meta['relation']['media'][$src] = $exists;
}
#endregion
}
//Setup VIM: ex: et ts=4 :