You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Markdown.Converter.js 51 KiB

4 years ago

  1. //>>excludeStart('excludeMdown', pragmas.excludeMdown)
  2. //
  3. // edited original source code to convert API and wrap into
  4. // an AMD module, changes are before/after the wrapped code.
  5. //
  6. // you can replace the markdown converter as long as it
  7. // implements the `makeHtml()` method
  8. //
  9. define(['require', 'exports'], function(require, exports){
  10. // ======= START WRAP
  11. var Markdown;
  12. if (typeof exports === "object" && typeof require === "function") // we're in a CommonJS (e.g. Node.js) module
  13. Markdown = exports;
  14. else
  15. Markdown = {};
  16. // The following text is included for historical reasons, but should
  17. // be taken with a pinch of salt; it's not all true anymore.
  18. //
  19. // Wherever possible, Showdown is a straight, line-by-line port
  20. // of the Perl version of Markdown.
  21. //
  22. // This is not a normal parser design; it's basically just a
  23. // series of string substitutions. It's hard to read and
  24. // maintain this way, but keeping Showdown close to the original
  25. // design makes it easier to port new features.
  26. //
  27. // More importantly, Showdown behaves like markdown.pl in most
  28. // edge cases. So web applications can do client-side preview
  29. // in Javascript, and then build identical HTML on the server.
  30. //
  31. // This port needs the new RegExp functionality of ECMA 262,
  32. // 3rd Edition (i.e. Javascript 1.5). Most modern web browsers
  33. // should do fine. Even with the new regular expression features,
  34. // We do a lot of work to emulate Perl's regex functionality.
  35. // The tricky changes in this file mostly have the "attacklab:"
  36. // label. Major or self-explanatory changes don't.
  37. //
  38. // Smart diff tools like Araxis Merge will be able to match up
  39. // this file with markdown.pl in a useful way. A little tweaking
  40. // helps: in a copy of markdown.pl, replace "#" with "//" and
  41. // replace "$text" with "text". Be sure to ignore whitespace
  42. // and line endings.
  43. //
  44. //
  45. // Usage:
  46. //
  47. // var text = "Markdown *rocks*.";
  48. //
  49. // var converter = new Markdown.Converter();
  50. // var html = converter.makeHtml(text);
  51. //
  52. // alert(html);
  53. //
  54. // Note: move the sample code to the bottom of this
  55. // file before uncommenting it.
  56. //
  57. (function () {
  58. function identity(x) { return x; }
  59. function returnFalse(x) { return false; }
  60. function HookCollection() { }
  61. HookCollection.prototype = {
  62. chain: function (hookname, func) {
  63. var original = this[hookname];
  64. if (!original)
  65. throw new Error("unknown hook " + hookname);
  66. if (original === identity)
  67. this[hookname] = func;
  68. else
  69. this[hookname] = function (x) { return func(original(x)); }
  70. },
  71. set: function (hookname, func) {
  72. if (!this[hookname])
  73. throw new Error("unknown hook " + hookname);
  74. this[hookname] = func;
  75. },
  76. addNoop: function (hookname) {
  77. this[hookname] = identity;
  78. },
  79. addFalse: function (hookname) {
  80. this[hookname] = returnFalse;
  81. }
  82. };
  83. Markdown.HookCollection = HookCollection;
  84. // g_urls and g_titles allow arbitrary user-entered strings as keys. This
  85. // caused an exception (and hence stopped the rendering) when the user entered
  86. // e.g. [push] or [__proto__]. Adding a prefix to the actual key prevents this
  87. // (since no builtin property starts with "s_"). See
  88. // http://meta.stackoverflow.com/questions/64655/strange-wmd-bug
  89. // (granted, switching from Array() to Object() alone would have left only __proto__
  90. // to be a problem)
  91. function SaveHash() { }
  92. SaveHash.prototype = {
  93. set: function (key, value) {
  94. this["s_" + key] = value;
  95. },
  96. get: function (key) {
  97. return this["s_" + key];
  98. }
  99. };
  100. Markdown.Converter = function () {
  101. var pluginHooks = this.hooks = new HookCollection();
  102. pluginHooks.addNoop("plainLinkText"); // given a URL that was encountered by itself (without markup), should return the link text that's to be given to this link
  103. pluginHooks.addNoop("preConversion"); // called with the orignal text as given to makeHtml. The result of this plugin hook is the actual markdown source that will be cooked
  104. pluginHooks.addNoop("postConversion"); // called with the final cooked HTML code. The result of this plugin hook is the actual output of makeHtml
  105. //
  106. // Private state of the converter instance:
  107. //
  108. // Global hashes, used by various utility routines
  109. var g_urls;
  110. var g_titles;
  111. var g_html_blocks;
  112. // Used to track when we're inside an ordered or unordered list
  113. // (see _ProcessListItems() for details):
  114. var g_list_level;
  115. this.makeHtml = function (text) {
  116. //
  117. // Main function. The order in which other subs are called here is
  118. // essential. Link and image substitutions need to happen before
  119. // _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  120. // and <img> tags get encoded.
  121. //
  122. // This will only happen if makeHtml on the same converter instance is called from a plugin hook.
  123. // Don't do that.
  124. if (g_urls)
  125. throw new Error("Recursive call to converter.makeHtml");
  126. // Create the private state objects.
  127. g_urls = new SaveHash();
  128. g_titles = new SaveHash();
  129. g_html_blocks = [];
  130. g_list_level = 0;
  131. text = pluginHooks.preConversion(text);
  132. // attacklab: Replace ~ with ~T
  133. // This lets us use tilde as an escape char to avoid md5 hashes
  134. // The choice of character is arbitray; anything that isn't
  135. // magic in Markdown will work.
  136. text = text.replace(/~/g, "~T");
  137. // attacklab: Replace $ with ~D
  138. // RegExp interprets $ as a special character
  139. // when it's in a replacement string
  140. text = text.replace(/\$/g, "~D");
  141. // Standardize line endings
  142. text = text.replace(/\r\n/g, "\n"); // DOS to Unix
  143. text = text.replace(/\r/g, "\n"); // Mac to Unix
  144. // Make sure text begins and ends with a couple of newlines:
  145. text = "\n\n" + text + "\n\n";
  146. // Convert all tabs to spaces.
  147. text = _Detab(text);
  148. // Strip any lines consisting only of spaces and tabs.
  149. // This makes subsequent regexen easier to write, because we can
  150. // match consecutive blank lines with /\n+/ instead of something
  151. // contorted like /[ \t]*\n+/ .
  152. text = text.replace(/^[ \t]+$/mg, "");
  153. // Turn block-level HTML blocks into hash entries
  154. text = _HashHTMLBlocks(text);
  155. // Strip link definitions, store in hashes.
  156. text = _StripLinkDefinitions(text);
  157. text = _RunBlockGamut(text);
  158. text = _UnescapeSpecialChars(text);
  159. // attacklab: Restore dollar signs
  160. text = text.replace(/~D/g, "$$");
  161. // attacklab: Restore tildes
  162. text = text.replace(/~T/g, "~");
  163. text = pluginHooks.postConversion(text);
  164. g_html_blocks = g_titles = g_urls = null;
  165. return text;
  166. };
  167. function _StripLinkDefinitions(text) {
  168. //
  169. // Strips link definitions from text, stores the URLs and titles in
  170. // hash references.
  171. //
  172. // Link defs are in the form: ^[id]: url "optional title"
  173. /*
  174. text = text.replace(/
  175. ^[ ]{0,3}\[(.+)\]: // id = $1 attacklab: g_tab_width - 1
  176. [ \t]*
  177. \n? // maybe *one* newline
  178. [ \t]*
  179. <?(\S+?)>? // url = $2
  180. (?=\s|$) // lookahead for whitespace instead of the lookbehind removed below
  181. [ \t]*
  182. \n? // maybe one newline
  183. [ \t]*
  184. ( // (potential) title = $3
  185. (\n*) // any lines skipped = $4 attacklab: lookbehind removed
  186. [ \t]+
  187. ["(]
  188. (.+?) // title = $5
  189. [")]
  190. [ \t]*
  191. )? // title is optional
  192. (?:\n+|$)
  193. /gm, function(){...});
  194. */
  195. text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?(?=\s|$)[ \t]*\n?[ \t]*((\n*)["(](.+?)[")][ \t]*)?(?:\n+)/gm,
  196. function (wholeMatch, m1, m2, m3, m4, m5) {
  197. m1 = m1.toLowerCase();
  198. g_urls.set(m1, _EncodeAmpsAndAngles(m2)); // Link IDs are case-insensitive
  199. if (m4) {
  200. // Oops, found blank lines, so it's not a title.
  201. // Put back the parenthetical statement we stole.
  202. return m3;
  203. } else if (m5) {
  204. g_titles.set(m1, m5.replace(/"/g, "&quot;"));
  205. }
  206. // Completely remove the definition from the text
  207. return "";
  208. }
  209. );
  210. return text;
  211. }
  212. function _HashHTMLBlocks(text) {
  213. // Hashify HTML blocks:
  214. // We only want to do this for block-level HTML tags, such as headers,
  215. // lists, and tables. That's because we still want to wrap <p>s around
  216. // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  217. // phrase emphasis, and spans. The list of tags we're looking for is
  218. // hard-coded:
  219. var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"
  220. var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"
  221. // First, look for nested blocks, e.g.:
  222. // <div>
  223. // <div>
  224. // tags for inner block must be indented.
  225. // </div>
  226. // </div>
  227. //
  228. // The outermost tags must start at the left margin for this to match, and
  229. // the inner nested divs must be indented.
  230. // We need to do this before the next, more liberal match, because the next
  231. // match will start at the first `<div>` and stop at the first `</div>`.
  232. // attacklab: This regex can be expensive when it fails.
  233. /*
  234. text = text.replace(/
  235. ( // save in $1
  236. ^ // start of line (with /m)
  237. <($block_tags_a) // start tag = $2
  238. \b // word break
  239. // attacklab: hack around khtml/pcre bug...
  240. [^\r]*?\n // any number of lines, minimally matching
  241. </\2> // the matching end tag
  242. [ \t]* // trailing spaces/tabs
  243. (?=\n+) // followed by a newline
  244. ) // attacklab: there are sentinel newlines at end of document
  245. /gm,function(){...}};
  246. */
  247. text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm, hashElement);
  248. //
  249. // Now match more liberally, simply from `\n<tag>` to `</tag>\n`
  250. //
  251. /*
  252. text = text.replace(/
  253. ( // save in $1
  254. ^ // start of line (with /m)
  255. <($block_tags_b) // start tag = $2
  256. \b // word break
  257. // attacklab: hack around khtml/pcre bug...
  258. [^\r]*? // any number of lines, minimally matching
  259. .*</\2> // the matching end tag
  260. [ \t]* // trailing spaces/tabs
  261. (?=\n+) // followed by a newline
  262. ) // attacklab: there are sentinel newlines at end of document
  263. /gm,function(){...}};
  264. */
  265. text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm, hashElement);
  266. // Special case just for <hr />. It was easier to make a special case than
  267. // to make the other regex more complicated.
  268. /*
  269. text = text.replace(/
  270. \n // Starting after a blank line
  271. [ ]{0,3}
  272. ( // save in $1
  273. (<(hr) // start tag = $2
  274. \b // word break
  275. ([^<>])*?
  276. \/?>) // the matching end tag
  277. [ \t]*
  278. (?=\n{2,}) // followed by a blank line
  279. )
  280. /g,hashElement);
  281. */
  282. text = text.replace(/\n[ ]{0,3}((<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g, hashElement);
  283. // Special case for standalone HTML comments:
  284. /*
  285. text = text.replace(/
  286. \n\n // Starting after a blank line
  287. [ ]{0,3} // attacklab: g_tab_width - 1
  288. ( // save in $1
  289. <!
  290. (--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--) // see http://www.w3.org/TR/html-markup/syntax.html#comments and http://meta.stackoverflow.com/q/95256
  291. >
  292. [ \t]*
  293. (?=\n{2,}) // followed by a blank line
  294. )
  295. /g,hashElement);
  296. */
  297. text = text.replace(/\n\n[ ]{0,3}(<!(--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)>[ \t]*(?=\n{2,}))/g, hashElement);
  298. // PHP and ASP-style processor instructions (<?...?> and <%...%>)
  299. /*
  300. text = text.replace(/
  301. (?:
  302. \n\n // Starting after a blank line
  303. )
  304. ( // save in $1
  305. [ ]{0,3} // attacklab: g_tab_width - 1
  306. (?:
  307. <([?%]) // $2
  308. [^\r]*?
  309. \2>
  310. )
  311. [ \t]*
  312. (?=\n{2,}) // followed by a blank line
  313. )
  314. /g,hashElement);
  315. */
  316. text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g, hashElement);
  317. return text;
  318. }
  319. function hashElement(wholeMatch, m1) {
  320. var blockText = m1;
  321. // Undo double lines
  322. blockText = blockText.replace(/^\n+/, "");
  323. // strip trailing blank lines
  324. blockText = blockText.replace(/\n+$/g, "");
  325. // Replace the element text with a marker ("~KxK" where x is its key)
  326. blockText = "\n\n~K" + (g_html_blocks.push(blockText) - 1) + "K\n\n";
  327. return blockText;
  328. }
  329. function _RunBlockGamut(text, doNotUnhash) {
  330. //
  331. // These are all the transformations that form block-level
  332. // tags like paragraphs, headers, and list items.
  333. //
  334. text = _DoHeaders(text);
  335. // Do Horizontal Rules:
  336. var replacement = "<hr />\n";
  337. text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm, replacement);
  338. text = text.replace(/^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$/gm, replacement);
  339. text = text.replace(/^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$/gm, replacement);
  340. text = _DoLists(text);
  341. text = _DoCodeBlocks(text);
  342. text = _DoBlockQuotes(text);
  343. // We already ran _HashHTMLBlocks() before, in Markdown(), but that
  344. // was to escape raw HTML in the original Markdown source. This time,
  345. // we're escaping the markup we've just created, so that we don't wrap
  346. // <p> tags around block-level tags.
  347. text = _HashHTMLBlocks(text);
  348. text = _FormParagraphs(text, doNotUnhash);
  349. return text;
  350. }
  351. function _RunSpanGamut(text) {
  352. //
  353. // These are all the transformations that occur *within* block-level
  354. // tags like paragraphs, headers, and list items.
  355. //
  356. text = _DoCodeSpans(text);
  357. text = _EscapeSpecialCharsWithinTagAttributes(text);
  358. text = _EncodeBackslashEscapes(text);
  359. // Process anchor and image tags. Images must come first,
  360. // because ![foo][f] looks like an anchor.
  361. text = _DoImages(text);
  362. text = _DoAnchors(text);
  363. // Make links out of things like `<http://example.com/>`
  364. // Must come after _DoAnchors(), because you can use < and >
  365. // delimiters in inline links like [this](<url>).
  366. text = _DoAutoLinks(text);
  367. text = text.replace(/~P/g, "://"); // put in place to prevent autolinking; reset now
  368. text = _EncodeAmpsAndAngles(text);
  369. text = _DoItalicsAndBold(text);
  370. // Do hard breaks:
  371. text = text.replace(/ +\n/g, " <br>\n");
  372. return text;
  373. }
  374. function _EscapeSpecialCharsWithinTagAttributes(text) {
  375. //
  376. // Within tags -- meaning between < and > -- encode [\ ` * _] so they
  377. // don't conflict with their use in Markdown for code, italics and strong.
  378. //
  379. // Build a regex to find HTML tags and comments. See Friedl's
  380. // "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
  381. // SE: changed the comment part of the regex
  382. var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--(?:|(?:[^>-]|-[^>])(?:[^-]|-[^-])*)--)>)/gi;
  383. text = text.replace(regex, function (wholeMatch) {
  384. var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g, "$1`");
  385. tag = escapeCharacters(tag, wholeMatch.charAt(1) == "!" ? "\\`*_/" : "\\`*_"); // also escape slashes in comments to prevent autolinking there -- http://meta.stackoverflow.com/questions/95987
  386. return tag;
  387. });
  388. return text;
  389. }
  390. function _DoAnchors(text) {
  391. //
  392. // Turn Markdown link shortcuts into XHTML <a> tags.
  393. //
  394. //
  395. // First, handle reference-style links: [link text] [id]
  396. //
  397. /*
  398. text = text.replace(/
  399. ( // wrap whole match in $1
  400. \[
  401. (
  402. (?:
  403. \[[^\]]*\] // allow brackets nested one level
  404. |
  405. [^\[] // or anything else
  406. )*
  407. )
  408. \]
  409. [ ]? // one optional space
  410. (?:\n[ ]*)? // one optional newline followed by spaces
  411. \[
  412. (.*?) // id = $3
  413. \]
  414. )
  415. ()()()() // pad remaining backreferences
  416. /g, writeAnchorTag);
  417. */
  418. text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g, writeAnchorTag);
  419. //
  420. // Next, inline-style links: [link text](url "optional title")
  421. //
  422. /*
  423. text = text.replace(/
  424. ( // wrap whole match in $1
  425. \[
  426. (
  427. (?:
  428. \[[^\]]*\] // allow brackets nested one level
  429. |
  430. [^\[\]] // or anything else
  431. )*
  432. )
  433. \]
  434. \( // literal paren
  435. [ \t]*
  436. () // no id, so leave $3 empty
  437. <?( // href = $4
  438. (?:
  439. \([^)]*\) // allow one level of (correctly nested) parens (think MSDN)
  440. |
  441. [^()]
  442. )*?
  443. )>?
  444. [ \t]*
  445. ( // $5
  446. (['"]) // quote char = $6
  447. (.*?) // Title = $7
  448. \6 // matching quote
  449. [ \t]* // ignore any spaces/tabs between closing quote and )
  450. )? // title is optional
  451. \)
  452. )
  453. /g, writeAnchorTag);
  454. */
  455. text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()<?((?:\([^)]*\)|[^()])*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g, writeAnchorTag);
  456. //
  457. // Last, handle reference-style shortcuts: [link text]
  458. // These must come last in case you've also got [link test][1]
  459. // or [link test](/foo)
  460. //
  461. /*
  462. text = text.replace(/
  463. ( // wrap whole match in $1
  464. \[
  465. ([^\[\]]+) // link text = $2; can't contain '[' or ']'
  466. \]
  467. )
  468. ()()()()() // pad rest of backreferences
  469. /g, writeAnchorTag);
  470. */
  471. text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
  472. return text;
  473. }
  474. function writeAnchorTag(wholeMatch, m1, m2, m3, m4, m5, m6, m7) {
  475. if (m7 == undefined) m7 = "";
  476. var whole_match = m1;
  477. var link_text = m2.replace(/:\/\//g, "~P"); // to prevent auto-linking withing the link. will be converted back after the auto-linker runs
  478. var link_id = m3.toLowerCase();
  479. var url = m4;
  480. var title = m7;
  481. if (url == "") {
  482. if (link_id == "") {
  483. // lower-case and turn embedded newlines into spaces
  484. link_id = link_text.toLowerCase().replace(/ ?\n/g, " ");
  485. }
  486. url = "#" + link_id;
  487. if (g_urls.get(link_id) != undefined) {
  488. url = g_urls.get(link_id);
  489. if (g_titles.get(link_id) != undefined) {
  490. title = g_titles.get(link_id);
  491. }
  492. }
  493. else {
  494. if (whole_match.search(/\(\s*\)$/m) > -1) {
  495. // Special case for explicit empty url
  496. url = "";
  497. } else {
  498. return whole_match;
  499. }
  500. }
  501. }
  502. url = encodeProblemUrlChars(url);
  503. url = escapeCharacters(url, "*_");
  504. var result = "<a href=\"" + url + "\"";
  505. if (title != "") {
  506. title = attributeEncode(title);
  507. title = escapeCharacters(title, "*_");
  508. result += " title=\"" + title + "\"";
  509. }
  510. result += ">" + link_text + "</a>";
  511. return result;
  512. }
  513. function _DoImages(text) {
  514. //
  515. // Turn Markdown image shortcuts into <img> tags.
  516. //
  517. //
  518. // First, handle reference-style labeled images: ![alt text][id]
  519. //
  520. /*
  521. text = text.replace(/
  522. ( // wrap whole match in $1
  523. !\[
  524. (.*?) // alt text = $2
  525. \]
  526. [ ]? // one optional space
  527. (?:\n[ ]*)? // one optional newline followed by spaces
  528. \[
  529. (.*?) // id = $3
  530. \]
  531. )
  532. ()()()() // pad rest of backreferences
  533. /g, writeImageTag);
  534. */
  535. text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g, writeImageTag);
  536. //
  537. // Next, handle inline images: ![alt text](url "optional title")
  538. // Don't forget: encode * and _
  539. /*
  540. text = text.replace(/
  541. ( // wrap whole match in $1
  542. !\[
  543. (.*?) // alt text = $2
  544. \]
  545. \s? // One optional whitespace character
  546. \( // literal paren
  547. [ \t]*
  548. () // no id, so leave $3 empty
  549. <?(\S+?)>? // src url = $4
  550. [ \t]*
  551. ( // $5
  552. (['"]) // quote char = $6
  553. (.*?) // title = $7
  554. \6 // matching quote
  555. [ \t]*
  556. )? // title is optional
  557. \)
  558. )
  559. /g, writeImageTag);
  560. */
  561. text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()<?(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g, writeImageTag);
  562. return text;
  563. }
  564. function attributeEncode(text) {
  565. // unconditionally replace angle brackets here -- what ends up in an attribute (e.g. alt or title)
  566. // never makes sense to have verbatim HTML in it (and the sanitizer would totally break it)
  567. return text.replace(/>/g, "&gt;").replace(/</g, "&lt;").replace(/"/g, "&quot;");
  568. }
  569. function writeImageTag(wholeMatch, m1, m2, m3, m4, m5, m6, m7) {
  570. var whole_match = m1;
  571. var alt_text = m2;
  572. var link_id = m3.toLowerCase();
  573. var url = m4;
  574. var title = m7;
  575. if (!title) title = "";
  576. if (url == "") {
  577. if (link_id == "") {
  578. // lower-case and turn embedded newlines into spaces
  579. link_id = alt_text.toLowerCase().replace(/ ?\n/g, " ");
  580. }
  581. url = "#" + link_id;
  582. if (g_urls.get(link_id) != undefined) {
  583. url = g_urls.get(link_id);
  584. if (g_titles.get(link_id) != undefined) {
  585. title = g_titles.get(link_id);
  586. }
  587. }
  588. else {
  589. return whole_match;
  590. }
  591. }
  592. alt_text = escapeCharacters(attributeEncode(alt_text), "*_[]()");
  593. url = escapeCharacters(url, "*_");
  594. var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
  595. // attacklab: Markdown.pl adds empty title attributes to images.
  596. // Replicate this bug.
  597. //if (title != "") {
  598. title = attributeEncode(title);
  599. title = escapeCharacters(title, "*_");
  600. result += " title=\"" + title + "\"";
  601. //}
  602. result += " />";
  603. return result;
  604. }
  605. function _DoHeaders(text) {
  606. // Setext-style headers:
  607. // Header 1
  608. // ========
  609. //
  610. // Header 2
  611. // --------
  612. //
  613. text = text.replace(/^(.+)[ \t]*\n=+[ \t]*\n+/gm,
  614. function (wholeMatch, m1) { return "<h1>" + _RunSpanGamut(m1) + "</h1>\n\n"; }
  615. );
  616. text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
  617. function (matchFound, m1) { return "<h2>" + _RunSpanGamut(m1) + "</h2>\n\n"; }
  618. );
  619. // atx-style headers:
  620. // # Header 1
  621. // ## Header 2
  622. // ## Header 2 with closing hashes ##
  623. // ...
  624. // ###### Header 6
  625. //
  626. /*
  627. text = text.replace(/
  628. ^(\#{1,6}) // $1 = string of #'s
  629. [ \t]*
  630. (.+?) // $2 = Header text
  631. [ \t]*
  632. \#* // optional closing #'s (not counted)
  633. \n+
  634. /gm, function() {...});
  635. */
  636. text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
  637. function (wholeMatch, m1, m2) {
  638. var h_level = m1.length;
  639. return "<h" + h_level + ">" + _RunSpanGamut(m2) + "</h" + h_level + ">\n\n";
  640. }
  641. );
  642. return text;
  643. }
  644. function _DoLists(text) {
  645. //
  646. // Form HTML ordered (numbered) and unordered (bulleted) lists.
  647. //
  648. // attacklab: add sentinel to hack around khtml/safari bug:
  649. // http://bugs.webkit.org/show_bug.cgi?id=11231
  650. text += "~0";
  651. // Re-usable pattern to match any entirel ul or ol list:
  652. /*
  653. var whole_list = /
  654. ( // $1 = whole list
  655. ( // $2
  656. [ ]{0,3} // attacklab: g_tab_width - 1
  657. ([*+-]|\d+[.]) // $3 = first list item marker
  658. [ \t]+
  659. )
  660. [^\r]+?
  661. ( // $4
  662. ~0 // sentinel for workaround; should be $
  663. |
  664. \n{2,}
  665. (?=\S)
  666. (?! // Negative lookahead for another list item marker
  667. [ \t]*
  668. (?:[*+-]|\d+[.])[ \t]+
  669. )
  670. )
  671. )
  672. /g
  673. */
  674. var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm;
  675. if (g_list_level) {
  676. text = text.replace(whole_list, function (wholeMatch, m1, m2) {
  677. var list = m1;
  678. var list_type = (m2.search(/[*+-]/g) > -1) ? "ul" : "ol";
  679. var result = _ProcessListItems(list, list_type);
  680. // Trim any trailing whitespace, to put the closing `</$list_type>`
  681. // up on the preceding line, to get it past the current stupid
  682. // HTML block parser. This is a hack to work around the terrible
  683. // hack that is the HTML block parser.
  684. result = result.replace(/\s+$/, "");
  685. result = "<" + list_type + ">" + result + "</" + list_type + ">\n";
  686. return result;
  687. });
  688. } else {
  689. whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g;
  690. text = text.replace(whole_list, function (wholeMatch, m1, m2, m3) {
  691. var runup = m1;
  692. var list = m2;
  693. var list_type = (m3.search(/[*+-]/g) > -1) ? "ul" : "ol";
  694. var result = _ProcessListItems(list, list_type);
  695. result = runup + "<" + list_type + ">\n" + result + "</" + list_type + ">\n";
  696. return result;
  697. });
  698. }
  699. // attacklab: strip sentinel
  700. text = text.replace(/~0/, "");
  701. return text;
  702. }
  703. var _listItemMarkers = { ol: "\\d+[.]", ul: "[*+-]" };
  704. function _ProcessListItems(list_str, list_type) {
  705. //
  706. // Process the contents of a single ordered or unordered list, splitting it
  707. // into individual list items.
  708. //
  709. // list_type is either "ul" or "ol".
  710. // The $g_list_level global keeps track of when we're inside a list.
  711. // Each time we enter a list, we increment it; when we leave a list,
  712. // we decrement. If it's zero, we're not in a list anymore.
  713. //
  714. // We do this because when we're not inside a list, we want to treat
  715. // something like this:
  716. //
  717. // I recommend upgrading to version
  718. // 8. Oops, now this line is treated
  719. // as a sub-list.
  720. //
  721. // As a single paragraph, despite the fact that the second line starts
  722. // with a digit-period-space sequence.
  723. //
  724. // Whereas when we're inside a list (or sub-list), that line will be
  725. // treated as the start of a sub-list. What a kludge, huh? This is
  726. // an aspect of Markdown's syntax that's hard to parse perfectly
  727. // without resorting to mind-reading. Perhaps the solution is to
  728. // change the syntax rules such that sub-lists must start with a
  729. // starting cardinal number; e.g. "1." or "a.".
  730. g_list_level++;
  731. // trim trailing blank lines:
  732. list_str = list_str.replace(/\n{2,}$/, "\n");
  733. // attacklab: add sentinel to emulate \z
  734. list_str += "~0";
  735. // In the original attacklab showdown, list_type was not given to this function, and anything
  736. // that matched /[*+-]|\d+[.]/ would just create the next <li>, causing this mismatch:
  737. //
  738. // Markdown rendered by WMD rendered by MarkdownSharp
  739. // ------------------------------------------------------------------
  740. // 1. first 1. first 1. first
  741. // 2. second 2. second 2. second
  742. // - third 3. third * third
  743. //
  744. // We changed this to behave identical to MarkdownSharp. This is the constructed RegEx,
  745. // with {MARKER} being one of \d+[.] or [*+-], depending on list_type:
  746. /*
  747. list_str = list_str.replace(/
  748. (^[ \t]*) // leading whitespace = $1
  749. ({MARKER}) [ \t]+ // list marker = $2
  750. ([^\r]+? // list item text = $3
  751. (\n+)
  752. )
  753. (?=
  754. (~0 | \2 ({MARKER}) [ \t]+)
  755. )
  756. /gm, function(){...});
  757. */
  758. var marker = _listItemMarkers[list_type];
  759. var re = new RegExp("(^[ \\t]*)(" + marker + ")[ \\t]+([^\\r]+?(\\n+))(?=(~0|\\1(" + marker + ")[ \\t]+))", "gm");
  760. var last_item_had_a_double_newline = false;
  761. list_str = list_str.replace(re,
  762. function (wholeMatch, m1, m2, m3) {
  763. var item = m3;
  764. var leading_space = m1;
  765. var ends_with_double_newline = /\n\n$/.test(item);
  766. var contains_double_newline = ends_with_double_newline || item.search(/\n{2,}/) > -1;
  767. if (contains_double_newline || last_item_had_a_double_newline) {
  768. item = _RunBlockGamut(_Outdent(item), /* doNotUnhash = */true);
  769. }
  770. else {
  771. // Recursion for sub-lists:
  772. item = _DoLists(_Outdent(item));
  773. item = item.replace(/\n$/, ""); // chomp(item)
  774. item = _RunSpanGamut(item);
  775. }
  776. last_item_had_a_double_newline = ends_with_double_newline;
  777. return "<li>" + item + "</li>\n";
  778. }
  779. );
  780. // attacklab: strip sentinel
  781. list_str = list_str.replace(/~0/g, "");
  782. g_list_level--;
  783. return list_str;
  784. }
  785. function _DoCodeBlocks(text) {
  786. //
  787. // Process Markdown `<pre><code>` blocks.
  788. //
  789. /*
  790. text = text.replace(/
  791. (?:\n\n|^)
  792. ( // $1 = the code block -- one or more lines, starting with a space/tab
  793. (?:
  794. (?:[ ]{4}|\t) // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
  795. .*\n+
  796. )+
  797. )
  798. (\n*[ ]{0,3}[^ \t\n]|(?=~0)) // attacklab: g_tab_width
  799. /g ,function(){...});
  800. */
  801. // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
  802. text += "~0";
  803. text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
  804. function (wholeMatch, m1, m2) {
  805. var codeblock = m1;
  806. var nextChar = m2;
  807. codeblock = _EncodeCode(_Outdent(codeblock));
  808. codeblock = _Detab(codeblock);
  809. codeblock = codeblock.replace(/^\n+/g, ""); // trim leading newlines
  810. codeblock = codeblock.replace(/\n+$/g, ""); // trim trailing whitespace
  811. codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
  812. return "\n\n" + codeblock + "\n\n" + nextChar;
  813. }
  814. );
  815. // attacklab: strip sentinel
  816. text = text.replace(/~0/, "");
  817. return text;
  818. }
  819. function hashBlock(text) {
  820. text = text.replace(/(^\n+|\n+$)/g, "");
  821. return "\n\n~K" + (g_html_blocks.push(text) - 1) + "K\n\n";
  822. }
  823. function _DoCodeSpans(text) {
  824. //
  825. // * Backtick quotes are used for <code></code> spans.
  826. //
  827. // * You can use multiple backticks as the delimiters if you want to
  828. // include literal backticks in the code span. So, this input:
  829. //
  830. // Just type ``foo `bar` baz`` at the prompt.
  831. //
  832. // Will translate to:
  833. //
  834. // <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  835. //
  836. // There's no arbitrary limit to the number of backticks you
  837. // can use as delimters. If you need three consecutive backticks
  838. // in your code, use four for delimiters, etc.
  839. //
  840. // * You can use spaces to get literal backticks at the edges:
  841. //
  842. // ... type `` `bar` `` ...
  843. //
  844. // Turns to:
  845. //
  846. // ... type <code>`bar`</code> ...
  847. //
  848. /*
  849. text = text.replace(/
  850. (^|[^\\]) // Character before opening ` can't be a backslash
  851. (`+) // $2 = Opening run of `
  852. ( // $3 = The code block
  853. [^\r]*?
  854. [^`] // attacklab: work around lack of lookbehind
  855. )
  856. \2 // Matching closer
  857. (?!`)
  858. /gm, function(){...});
  859. */
  860. text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
  861. function (wholeMatch, m1, m2, m3, m4) {
  862. var c = m3;
  863. c = c.replace(/^([ \t]*)/g, ""); // leading whitespace
  864. c = c.replace(/[ \t]*$/g, ""); // trailing whitespace
  865. c = _EncodeCode(c);
  866. c = c.replace(/:\/\//g, "~P"); // to prevent auto-linking. Not necessary in code *blocks*, but in code spans. Will be converted back after the auto-linker runs.
  867. return m1 + "<code>" + c + "</code>";
  868. }
  869. );
  870. return text;
  871. }
  872. function _EncodeCode(text) {
  873. //
  874. // Encode/escape certain characters inside Markdown code runs.
  875. // The point is that in code, these characters are literals,
  876. // and lose their special Markdown meanings.
  877. //
  878. // Encode all ampersands; HTML entities are not
  879. // entities within a Markdown code span.
  880. text = text.replace(/&/g, "&amp;");
  881. // Do the angle bracket song and dance:
  882. text = text.replace(/</g, "&lt;");
  883. text = text.replace(/>/g, "&gt;");
  884. // Now, escape characters that are magic in Markdown:
  885. text = escapeCharacters(text, "\*_{}[]\\", false);
  886. // jj the line above breaks this:
  887. //---
  888. //* Item
  889. // 1. Subitem
  890. // special char: *
  891. //---
  892. return text;
  893. }
  894. function _DoItalicsAndBold(text) {
  895. // <strong> must go first:
  896. text = text.replace(/([\W_]|^)(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\2([\W_]|$)/g,
  897. "$1<strong>$3</strong>$4");
  898. text = text.replace(/([\W_]|^)(\*|_)(?=\S)([^\r\*_]*?\S)\2([\W_]|$)/g,
  899. "$1<em>$3</em>$4");
  900. return text;
  901. }
  902. function _DoBlockQuotes(text) {
  903. /*
  904. text = text.replace(/
  905. ( // Wrap whole match in $1
  906. (
  907. ^[ \t]*>[ \t]? // '>' at the start of a line
  908. .+\n // rest of the first line
  909. (.+\n)* // subsequent consecutive lines
  910. \n* // blanks
  911. )+
  912. )
  913. /gm, function(){...});
  914. */
  915. text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
  916. function (wholeMatch, m1) {
  917. var bq = m1;
  918. // attacklab: hack around Konqueror 3.5.4 bug:
  919. // "----------bug".replace(/^-/g,"") == "bug"
  920. bq = bq.replace(/^[ \t]*>[ \t]?/gm, "~0"); // trim one level of quoting
  921. // attacklab: clean up hack
  922. bq = bq.replace(/~0/g, "");
  923. bq = bq.replace(/^[ \t]+$/gm, ""); // trim whitespace-only lines
  924. bq = _RunBlockGamut(bq); // recurse
  925. bq = bq.replace(/(^|\n)/g, "$1 ");
  926. // These leading spaces screw with <pre> content, so we need to fix that:
  927. bq = bq.replace(
  928. /(\s*<pre>[^\r]+?<\/pre>)/gm,
  929. function (wholeMatch, m1) {
  930. var pre = m1;
  931. // attacklab: hack around Konqueror 3.5.4 bug:
  932. pre = pre.replace(/^ /mg, "~0");
  933. pre = pre.replace(/~0/g, "");
  934. return pre;
  935. });
  936. return hashBlock("<blockquote>\n" + bq + "\n</blockquote>");
  937. }
  938. );
  939. return text;
  940. }
  941. function _FormParagraphs(text, doNotUnhash) {
  942. //
  943. // Params:
  944. // $text - string to process with html <p> tags
  945. //
  946. // Strip leading and trailing lines:
  947. text = text.replace(/^\n+/g, "");
  948. text = text.replace(/\n+$/g, "");
  949. var grafs = text.split(/\n{2,}/g);
  950. var grafsOut = [];
  951. var markerRe = /~K(\d+)K/;
  952. //
  953. // Wrap <p> tags.
  954. //
  955. var end = grafs.length;
  956. for (var i = 0; i < end; i++) {
  957. var str = grafs[i];
  958. // if this is an HTML marker, copy it
  959. if (markerRe.test(str)) {
  960. grafsOut.push(str);
  961. }
  962. else if (/\S/.test(str)) {
  963. str = _RunSpanGamut(str);
  964. str = str.replace(/^([ \t]*)/g, "<p>");
  965. str += "</p>"
  966. grafsOut.push(str);
  967. }
  968. }
  969. //
  970. // Unhashify HTML blocks
  971. //
  972. if (!doNotUnhash) {
  973. end = grafsOut.length;
  974. for (var i = 0; i < end; i++) {
  975. var foundAny = true;
  976. while (foundAny) { // we may need several runs, since the data may be nested
  977. foundAny = false;
  978. grafsOut[i] = grafsOut[i].replace(/~K(\d+)K/g, function (wholeMatch, id) {
  979. foundAny = true;
  980. return g_html_blocks[id];
  981. });
  982. }
  983. }
  984. }
  985. return grafsOut.join("\n\n");
  986. }
  987. function _EncodeAmpsAndAngles(text) {
  988. // Smart processing for ampersands and angle brackets that need to be encoded.
  989. // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  990. // http://bumppo.net/projects/amputator/
  991. text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g, "&amp;");
  992. // Encode naked <'s
  993. text = text.replace(/<(?![a-z\/?\$!])/gi, "&lt;");
  994. return text;
  995. }
  996. function _EncodeBackslashEscapes(text) {
  997. //
  998. // Parameter: String.
  999. // Returns: The string, with after processing the following backslash
  1000. // escape sequences.
  1001. //
  1002. // attacklab: The polite way to do this is with the new
  1003. // escapeCharacters() function:
  1004. //
  1005. // text = escapeCharacters(text,"\\",true);
  1006. // text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
  1007. //
  1008. // ...but we're sidestepping its use of the (slow) RegExp constructor
  1009. // as an optimization for Firefox. This function gets called a LOT.
  1010. text = text.replace(/\\(\\)/g, escapeCharacters_callback);
  1011. text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g, escapeCharacters_callback);
  1012. return text;
  1013. }
  1014. function _DoAutoLinks(text) {
  1015. // note that at this point, all other URL in the text are already hyperlinked as <a href=""></a>
  1016. // *except* for the <http://www.foo.com> case
  1017. // automatically add < and > around unadorned raw hyperlinks
  1018. // must be preceded by space/BOF and followed by non-word/EOF character
  1019. text = text.replace(/(^|\s)(https?|ftp)(:\/\/[-A-Z0-9+&@#\/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#\/%=~_|\[\]])($|\W)/gi, "$1<$2$3>$4");
  1020. // autolink anything like <http://example.com>
  1021. var replacer = function (wholematch, m1) { return "<a href=\"" + m1 + "\">" + pluginHooks.plainLinkText(m1) + "</a>"; }
  1022. text = text.replace(/<((https?|ftp):[^'">\s]+)>/gi, replacer);
  1023. // Email addresses: <address@domain.foo>
  1024. /*
  1025. text = text.replace(/
  1026. <
  1027. (?:mailto:)?
  1028. (
  1029. [-.\w]+
  1030. \@
  1031. [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
  1032. )
  1033. >
  1034. /gi, _DoAutoLinks_callback());
  1035. */
  1036. /* disabling email autolinking, since we don't do that on the server, either
  1037. text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
  1038. function(wholeMatch,m1) {
  1039. return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
  1040. }
  1041. );
  1042. */
  1043. return text;
  1044. }
  1045. function _UnescapeSpecialChars(text) {
  1046. //
  1047. // Swap back in all the special characters we've hidden.
  1048. //
  1049. text = text.replace(/~E(\d+)E/g,
  1050. function (wholeMatch, m1) {
  1051. var charCodeToReplace = parseInt(m1);
  1052. return String.fromCharCode(charCodeToReplace);
  1053. }
  1054. );
  1055. return text;
  1056. }
  1057. function _Outdent(text) {
  1058. //
  1059. // Remove one level of line-leading tabs or spaces
  1060. //
  1061. // attacklab: hack around Konqueror 3.5.4 bug:
  1062. // "----------bug".replace(/^-/g,"") == "bug"
  1063. text = text.replace(/^(\t|[ ]{1,4})/gm, "~0"); // attacklab: g_tab_width
  1064. // attacklab: clean up hack
  1065. text = text.replace(/~0/g, "")
  1066. return text;
  1067. }
  1068. function _Detab(text) {
  1069. if (!/\t/.test(text))
  1070. return text;
  1071. var spaces = [" ", " ", " ", " "],
  1072. skew = 0,
  1073. v;
  1074. return text.replace(/[\n\t]/g, function (match, offset) {
  1075. if (match === "\n") {
  1076. skew = offset + 1;
  1077. return match;
  1078. }
  1079. v = (offset - skew) % 4;
  1080. skew = offset + 1;
  1081. return spaces[v];
  1082. });
  1083. }
  1084. //
  1085. // attacklab: Utility functions
  1086. //
  1087. var _problemUrlChars = /(?:["'*()[\]:]|~D)/g;
  1088. // hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems
  1089. function encodeProblemUrlChars(url) {
  1090. if (!url)
  1091. return "";
  1092. var len = url.length;
  1093. return url.replace(_problemUrlChars, function (match, offset) {
  1094. if (match == "~D") // escape for dollar
  1095. return "%24";
  1096. if (match == ":") {
  1097. if (offset == len - 1 || /[0-9\/]/.test(url.charAt(offset + 1)))
  1098. return ":"
  1099. }
  1100. return "%" + match.charCodeAt(0).toString(16);
  1101. });
  1102. }
  1103. function escapeCharacters(text, charsToEscape, afterBackslash) {
  1104. // First we have to escape the escape characters so that
  1105. // we can build a character class out of them
  1106. var regexString = "([" + charsToEscape.replace(/([\[\]\\])/g, "\\$1") + "])";
  1107. if (afterBackslash) {
  1108. regexString = "\\\\" + regexString;
  1109. }
  1110. var regex = new RegExp(regexString, "g");
  1111. text = text.replace(regex, escapeCharacters_callback);
  1112. return text;
  1113. }
  1114. function escapeCharacters_callback(wholeMatch, m1) {
  1115. var charCodeToEscape = m1.charCodeAt(0);
  1116. return "~E" + charCodeToEscape + "E";
  1117. }
  1118. }; // end of the Markdown.Converter constructor
  1119. })();
  1120. // ======= END WRAP
  1121. // no reason for multiple instances,
  1122. // just call `makeHtml`
  1123. return new Markdown.Converter();
  1124. });
  1125. //>>excludeEnd('excludeMdown')