302 lines
12 KiB
JavaScript
302 lines
12 KiB
JavaScript
import {
|
|
noopTest,
|
|
edit,
|
|
merge
|
|
} from './helpers.js';
|
|
|
|
/**
|
|
* Block-Level Grammar
|
|
*/
|
|
export const block = {
|
|
newline: /^(?: *(?:\n|$))+/,
|
|
code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
|
|
fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
|
|
hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,
|
|
heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
|
|
blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
|
|
list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/,
|
|
html: '^ {0,3}(?:' // optional indentation
|
|
+ '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
|
|
+ '|comment[^\\n]*(\\n+|$)' // (2)
|
|
+ '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
|
|
+ '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
|
|
+ '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
|
|
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
|
|
+ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
|
|
+ '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
|
|
+ ')',
|
|
def: /^ {0,3}\[(label)\]: *(?:\n *)?<?([^\s>]+)>?(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/,
|
|
table: noopTest,
|
|
lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
|
|
// regex template, placeholders will be replaced according to different paragraph
|
|
// interruption rules of commonmark and the original markdown spec:
|
|
_paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/,
|
|
text: /^[^\n]+/
|
|
};
|
|
|
|
block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
|
|
block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
|
|
block.def = edit(block.def)
|
|
.replace('label', block._label)
|
|
.replace('title', block._title)
|
|
.getRegex();
|
|
|
|
block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
|
|
block.listItemStart = edit(/^( *)(bull) */)
|
|
.replace('bull', block.bullet)
|
|
.getRegex();
|
|
|
|
block.list = edit(block.list)
|
|
.replace(/bull/g, block.bullet)
|
|
.replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
|
|
.replace('def', '\\n+(?=' + block.def.source + ')')
|
|
.getRegex();
|
|
|
|
block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
|
|
+ '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
|
|
+ '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
|
|
+ '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
|
|
+ '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
|
|
+ '|track|ul';
|
|
block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
|
|
block.html = edit(block.html, 'i')
|
|
.replace('comment', block._comment)
|
|
.replace('tag', block._tag)
|
|
.replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
|
|
.getRegex();
|
|
|
|
block.paragraph = edit(block._paragraph)
|
|
.replace('hr', block.hr)
|
|
.replace('heading', ' {0,3}#{1,6} ')
|
|
.replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
|
|
.replace('|table', '')
|
|
.replace('blockquote', ' {0,3}>')
|
|
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
|
|
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
|
|
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
|
|
.replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
|
|
.getRegex();
|
|
|
|
block.blockquote = edit(block.blockquote)
|
|
.replace('paragraph', block.paragraph)
|
|
.getRegex();
|
|
|
|
/**
|
|
* Normal Block Grammar
|
|
*/
|
|
|
|
block.normal = merge({}, block);
|
|
|
|
/**
|
|
* GFM Block Grammar
|
|
*/
|
|
|
|
block.gfm = merge({}, block.normal, {
|
|
table: '^ *([^\\n ].*\\|.*)\\n' // Header
|
|
+ ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)(?:\\| *)?' // Align
|
|
+ '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
|
|
});
|
|
|
|
block.gfm.table = edit(block.gfm.table)
|
|
.replace('hr', block.hr)
|
|
.replace('heading', ' {0,3}#{1,6} ')
|
|
.replace('blockquote', ' {0,3}>')
|
|
.replace('code', ' {4}[^\\n]')
|
|
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
|
|
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
|
|
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
|
|
.replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
|
|
.getRegex();
|
|
|
|
block.gfm.paragraph = edit(block._paragraph)
|
|
.replace('hr', block.hr)
|
|
.replace('heading', ' {0,3}#{1,6} ')
|
|
.replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
|
|
.replace('table', block.gfm.table) // interrupt paragraphs with table
|
|
.replace('blockquote', ' {0,3}>')
|
|
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
|
|
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
|
|
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
|
|
.replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
|
|
.getRegex();
|
|
/**
|
|
* Pedantic grammar (original John Gruber's loose markdown specification)
|
|
*/
|
|
|
|
block.pedantic = merge({}, block.normal, {
|
|
html: edit(
|
|
'^ *(?:comment *(?:\\n|\\s*$)'
|
|
+ '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
|
|
+ '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
|
|
.replace('comment', block._comment)
|
|
.replace(/tag/g, '(?!(?:'
|
|
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
|
|
+ '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
|
|
+ '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
|
|
.getRegex(),
|
|
def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
|
|
heading: /^(#{1,6})(.*)(?:\n+|$)/,
|
|
fences: noopTest, // fences not supported
|
|
paragraph: edit(block.normal._paragraph)
|
|
.replace('hr', block.hr)
|
|
.replace('heading', ' *#{1,6} *[^\n]')
|
|
.replace('lheading', block.lheading)
|
|
.replace('blockquote', ' {0,3}>')
|
|
.replace('|fences', '')
|
|
.replace('|list', '')
|
|
.replace('|html', '')
|
|
.getRegex()
|
|
});
|
|
|
|
/**
|
|
* Inline-Level Grammar
|
|
*/
|
|
export const inline = {
|
|
escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
|
|
autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
|
|
url: noopTest,
|
|
tag: '^comment'
|
|
+ '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
|
|
+ '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
|
|
+ '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
|
|
+ '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
|
|
+ '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
|
|
link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
|
|
reflink: /^!?\[(label)\]\[(ref)\]/,
|
|
nolink: /^!?\[(ref)\](?:\[\])?/,
|
|
reflinkSearch: 'reflink|nolink(?!\\()',
|
|
emStrong: {
|
|
lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
|
|
// (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
|
|
// () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
|
|
rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[^*]+(?=[^*])|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
|
|
rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
|
|
},
|
|
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
|
|
br: /^( {2,}|\\)\n(?!\s*$)/,
|
|
del: noopTest,
|
|
text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
|
|
punctuation: /^([\spunctuation])/
|
|
};
|
|
|
|
// list of punctuation marks from CommonMark spec
|
|
// without * and _ to handle the different emphasis markers * and _
|
|
inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
|
|
inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
|
|
|
|
// sequences em should skip over [title](link), `code`, <html>
|
|
inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
|
|
inline.escapedEmSt = /\\\*|\\_/g;
|
|
|
|
inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
|
|
|
|
inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
|
|
.replace(/punct/g, inline._punctuation)
|
|
.getRegex();
|
|
|
|
inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g')
|
|
.replace(/punct/g, inline._punctuation)
|
|
.getRegex();
|
|
|
|
inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g')
|
|
.replace(/punct/g, inline._punctuation)
|
|
.getRegex();
|
|
|
|
inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
|
|
|
|
inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
|
|
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
|
|
inline.autolink = edit(inline.autolink)
|
|
.replace('scheme', inline._scheme)
|
|
.replace('email', inline._email)
|
|
.getRegex();
|
|
|
|
inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
|
|
|
|
inline.tag = edit(inline.tag)
|
|
.replace('comment', inline._comment)
|
|
.replace('attribute', inline._attribute)
|
|
.getRegex();
|
|
|
|
inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
|
|
inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
|
|
inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
|
|
|
|
inline.link = edit(inline.link)
|
|
.replace('label', inline._label)
|
|
.replace('href', inline._href)
|
|
.replace('title', inline._title)
|
|
.getRegex();
|
|
|
|
inline.reflink = edit(inline.reflink)
|
|
.replace('label', inline._label)
|
|
.replace('ref', block._label)
|
|
.getRegex();
|
|
|
|
inline.nolink = edit(inline.nolink)
|
|
.replace('ref', block._label)
|
|
.getRegex();
|
|
|
|
inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
|
|
.replace('reflink', inline.reflink)
|
|
.replace('nolink', inline.nolink)
|
|
.getRegex();
|
|
|
|
/**
|
|
* Normal Inline Grammar
|
|
*/
|
|
|
|
inline.normal = merge({}, inline);
|
|
|
|
/**
|
|
* Pedantic Inline Grammar
|
|
*/
|
|
|
|
inline.pedantic = merge({}, inline.normal, {
|
|
strong: {
|
|
start: /^__|\*\*/,
|
|
middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
|
|
endAst: /\*\*(?!\*)/g,
|
|
endUnd: /__(?!_)/g
|
|
},
|
|
em: {
|
|
start: /^_|\*/,
|
|
middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
|
|
endAst: /\*(?!\*)/g,
|
|
endUnd: /_(?!_)/g
|
|
},
|
|
link: edit(/^!?\[(label)\]\((.*?)\)/)
|
|
.replace('label', inline._label)
|
|
.getRegex(),
|
|
reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
|
|
.replace('label', inline._label)
|
|
.getRegex()
|
|
});
|
|
|
|
/**
|
|
* GFM Inline Grammar
|
|
*/
|
|
|
|
inline.gfm = merge({}, inline.normal, {
|
|
escape: edit(inline.escape).replace('])', '~|])').getRegex(),
|
|
_extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
|
|
url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
|
|
_backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
|
|
del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
|
|
text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
|
|
});
|
|
|
|
inline.gfm.url = edit(inline.gfm.url, 'i')
|
|
.replace('email', inline.gfm._extended_email)
|
|
.getRegex();
|
|
/**
|
|
* GFM + Line Breaks Inline Grammar
|
|
*/
|
|
|
|
inline.breaks = merge({}, inline.gfm, {
|
|
br: edit(inline.br).replace('{2,}', '*').getRegex(),
|
|
text: edit(inline.gfm.text)
|
|
.replace('\\b_', '\\b_| {2,}\\n')
|
|
.replace(/\{2,\}/g, '*')
|
|
.getRegex()
|
|
});
|