Skip to content

Commit

Permalink
feat: 支持 CommonMark 0.31.2。
Browse files Browse the repository at this point in the history
  • Loading branch information
CYJB committed Mar 21, 2024
1 parent 21c5942 commit a9d566e
Show file tree
Hide file tree
Showing 39 changed files with 9,146 additions and 9,203 deletions.
4 changes: 2 additions & 2 deletions Cyjb.Markdown/ParseBlock/BlockLexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace Cyjb.Markdown.ParseBlock;
/// <summary>
/// 表示 Markdown 的块级元素词法分析器。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/"/>
/// <see href="https://spec.commonmark.org/0.31.2/"/>
[LexerRejectable]
[LexerRegex("WS", "[ \t]")]
[LexerRegex("WS_1", @"[ \t]+|[ \t]*\r?\n[ \t]*")]
Expand Down Expand Up @@ -273,7 +273,7 @@ private void HtmlCDataAction()
"nav|noframes|" +
"ol|optgroup|option|" +
"p|param|" +
"section|source|summary|" +
"search|section|summary|" +
"table|tbody|td|tfoot|th|thead|title|tr|track|" +
"ul" +
")(( |\t|\\/?>).*)?$", RegexOptions.IgnoreCase, Kind = BlockKind.HtmlStart)]
Expand Down
2 changes: 1 addition & 1 deletion Cyjb.Markdown/ParseBlock/BlockParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Cyjb.Markdown.ParseBlock;
/// <summary>
/// 表示 Markdown 的块级语法分析器。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/"/>
/// <see href="https://spec.commonmark.org/0.31.2/"/>
internal sealed class BlockParser
{
/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public IEnumerable<BlockProcessor> TryStart(BlockParser parser, BlockLine line,
public static HtmlAttributeList? ParseAttributes(BlockText text)
{
// 最后一个字符是 }
if (text.Length == 0 || text[^1] != '}')
if (text.Length == 0 || text.PeekBack().Text[^1] != '}')
{
return null;
}
Expand Down
19 changes: 6 additions & 13 deletions Cyjb.Markdown/ParseInline/InlineLexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Cyjb.Markdown.ParseInline;
/// <summary>
/// 表示 Markdown 的行级元素语法分析器。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#inlines"/>
/// <see href="https://spec.commonmark.org/0.31.2/#inlines"/>
[LexerRejectable]
[LexerInclusiveContext("LinkClose")]
[LexerRegex("WS", @"[ \t]*\r?\n?[ \t]*")]
Expand Down Expand Up @@ -263,20 +263,13 @@ private void HtmlEndTagAction()
/// <summary>
/// HTML 注释的动作。
/// </summary>
[LexerSymbol(@"[<]!---->", Kind = InlineKind.Node)]
[LexerSymbol(@"[<]!--[^>-]-->", RegexOptions.Singleline, Kind = InlineKind.Node)]
[LexerSymbol(@"[<]!--[^>].*[^-]-->", RegexOptions.Singleline, Kind = InlineKind.Node)]
[LexerSymbol(@"[<]!-->", Kind = InlineKind.Node, UseShortest = true)]
[LexerSymbol(@"[<]!--->", Kind = InlineKind.Node, UseShortest = true)]
[LexerSymbol(@"[<]!---->", Kind = InlineKind.Node, UseShortest = true)]
[LexerSymbol(@"[<]!--.*-->", RegexOptions.Singleline, Kind = InlineKind.Node, UseShortest = true)]
private void HtmlCommentAction()
{
var content = Text[4..^3];
if (content.StartsWith("->") || content.Contains("--", StringComparison.Ordinal))
{
Reject();
}
else
{
Accept(new Html(MarkdownKind.HtmlComment, Text.ToString(), Span));
}
Accept(new Html(MarkdownKind.HtmlComment, Text.ToString(), Span));
}

/// <summary>
Expand Down
5,708 changes: 2,809 additions & 2,899 deletions Cyjb.Markdown/ParseInline/InlineLexer.designed.cs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cyjb.Markdown/ParseInline/InlineParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Cyjb.Markdown.ParseInline;
/// <summary>
/// 表示 Markdown 的行级语法分析器。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/"/>
/// <see href="https://spec.commonmark.org/0.31.2/"/>
internal sealed class InlineParser
{
/// <summary>
Expand Down
28 changes: 19 additions & 9 deletions Cyjb.Markdown/Syntax/Html.cs
Original file line number Diff line number Diff line change
Expand Up @@ -146,22 +146,32 @@ private static string GetContent(MarkdownKind kind, string text)
switch (kind)
{
case MarkdownKind.HtmlStartTag:
ReadOnlySpan<char> span = text.AsSpan()[1..];
int idx = span.IndexOfAny(" \t\r\n>");
if (idx > 0)
{
span = span[..idx];
ReadOnlySpan<char> span = text.AsSpan(1);
int idx = span.IndexOfAny(" \t\r\n>");
if (idx > 0)
{
span = span[..idx];
}
return span.Trim().ToString();
}
return span.Trim().ToString();
case MarkdownKind.HtmlEndTag:
case MarkdownKind.HtmlDeclaration:
return text.AsSpan()[2..^1].Trim().ToString();
return text.AsSpan(2, text.Length - 3).Trim().ToString();
case MarkdownKind.HtmlComment:
return text.AsSpan()[4..^3].Trim().ToString();
{
// 存在 <!--> 等总长度不足 7 的场景。
int len = text.Length - 7;
if (len <= 0)
{
return string.Empty;
}
return text.AsSpan(4, len).Trim().ToString();
}
case MarkdownKind.HtmlCData:
return text.AsSpan()[9..^3].Trim().ToString();
return text.AsSpan(9, text.Length - 12).Trim().ToString();
case MarkdownKind.HtmlProcessing:
return text.AsSpan()[2..^2].Trim().ToString();
return text.AsSpan(2, text.Length - 4).Trim().ToString();
default:
throw CommonExceptions.Unreachable();
}
Expand Down
2 changes: 1 addition & 1 deletion Cyjb.Markdown/Utils/LinkUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public static string NormalizeLabel(BlockText label)
/// <returns>标准化后的标签。</returns>
public static string NormalizeLabel(ReadOnlySpan<char> label)
{
MarkdownUtil.Trim(ref label);
label = label.Trim(MarkdownUtil.Whitespace);
if (label.IsEmpty)
{
return string.Empty;
Expand Down
44 changes: 21 additions & 23 deletions Cyjb.Markdown/Utils/MarkdownUtil.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Cyjb.Collections;

namespace Cyjb.Markdown.Utils;

/// <summary>
Expand All @@ -20,6 +22,11 @@ internal static partial class MarkdownUtil
/// </summary>
public static readonly char[] WhitespaceCharsWithoutNewLine = new char[] { ' ', '\t' };

/// <summary>
/// Unicode 标点符号的集合。
/// </summary>
private static readonly ReadOnlyCharSet UnicodePunctuations = GetUnicodePunctuations();

/// <summary>
/// 返回指定字符是否表示 Markdown 空白。
/// </summary>
Expand All @@ -38,10 +45,7 @@ public static bool IsWhitespace(char ch)
/// <returns>指定字符是否是 Markdown 的标点符号。</returns>
public static bool IsPunctuation(char ch)
{
return char.IsPunctuation(ch) ||
ch == '$' || ch == '+' || ch == '<' || ch == '=' ||
ch == '>' || ch == '^' || ch == '`' || ch == '|' ||
ch == '~' || ch == '\'';
return UnicodePunctuations.Contains(ch);
}

/// <summary>
Expand Down Expand Up @@ -89,26 +93,20 @@ public static bool TrimStart(ref StringView text)
}

/// <summary>
/// 移除指定文本末尾的空白
/// 返回 Unicode 标点符号的集合
/// </summary>
/// <param name="text">要移除末尾空白的文本。</param>
/// <returns>如果移除了任何末尾空白,则返回 <c>true</c>;否则返回 <c>false</c>。</returns>
public static bool TrimEnd(ref ReadOnlySpan<char> text)
/// <remarks>包含 Unicode P 和 S 类别。</remarks>
private static ReadOnlyCharSet GetUnicodePunctuations()
{
int len = text.Length;
text = text.TrimEnd(Whitespace);
return text.Length < len;
}

/// <summary>
/// 移除指定文本的起始和尾随空白。
/// </summary>
/// <param name="text">要移除起始和尾随空白的文本。</param>
/// <returns>如果移除了任何起始和尾随空白,则返回 <c>true</c>;否则返回 <c>false</c>。</returns>
public static bool Trim(ref ReadOnlySpan<char> text)
{
int len = text.Length;
text = text.TrimStart(Whitespace).TrimEnd(Whitespace);
return text.Length < len;
CharSet set = new();
for (int i = 0; i <= char.MaxValue; i++)
{
char ch = (char)i;
if (char.IsPunctuation(ch) || char.IsSymbol(ch))
{
set.Add(ch);
}
}
return set.AsReadOnly();
}
}
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Console.WriteLine(renderer);

## 支持的 Markdown 特性

- 支持完整的 [CommonMark 0.30](https://spec.commonmark.org/0.30),具体请参见[这里](https://commonmark.org/help/)
- 支持完整的 [CommonMark 0.31.2](https://spec.commonmark.org/0.31.2),具体请参见[这里](https://commonmark.org/help/)
- 支持部分 [GitHub Flavored Markdown (GFM)](https://docs.github.com/zh/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax),具体包括:
- [删除线](https://docs.github.com/zh/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#styling-text):`~~ ~~`。
- [任务列表](https://docs.github.com/zh/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#task-lists):`- [ ] task`。
Expand Down Expand Up @@ -59,3 +59,8 @@ Markdown 解析用到了由 [Cyjb.Compilers.Design](https://github.com/CYJB/Cyjb

欢迎访问我的[博客](http://www.cnblogs.com/cyjb/)获取更多信息。

## 参考

- [CommonMark Spec](https://spec.commonmark.org/)
- [babelmark3](https://babelmark.github.io/)
- [AST Explorer](https://astexplorer.net/)
38 changes: 19 additions & 19 deletions TestMarkdown/CommonMark/UnitTestATXHeading.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ namespace TestMarkdown.CommonMark;
/// <summary>
/// ATX 标题的单元测试。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#atx-headings"/>
/// <see href="https://spec.commonmark.org/0.31.2/#atx-headings"/>
[TestClass]
public class UnitTestATXHeading : BaseTest
{
/// <see href="https://spec.commonmark.org/0.30/#example-62"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-62"/>
[TestMethod]
public void Test62()
{
Expand Down Expand Up @@ -44,7 +44,7 @@ public void Test62()
/// <summary>
/// 多于 6 个 # 不是标题。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-63"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-63"/>
[TestMethod]
public void Test63()
{
Expand All @@ -59,7 +59,7 @@ public void Test63()
/// <summary>
/// # 后至少需要一个空格或 Tab。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-64"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-64"/>
[TestMethod]
public void Test64()
{
Expand All @@ -78,7 +78,7 @@ public void Test64()
/// <summary>
/// 首个 # 被转义,不是标题。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-65"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-65"/>
[TestMethod]
public void Test65()
{
Expand All @@ -93,7 +93,7 @@ public void Test65()
/// <summary>
/// 标题的内容作为行级节点解析。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-66"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-66"/>
[TestMethod]
public void Test66()
{
Expand All @@ -113,7 +113,7 @@ public void Test66()
/// <summary>
/// 起始和结束空格会被忽略。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-67"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-67"/>
[TestMethod]
public void Test67()
{
Expand All @@ -128,7 +128,7 @@ public void Test67()
/// <summary>
/// 最多允许三个空格的缩进。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-68"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-68"/>
[TestMethod]
public void Test68()
{
Expand All @@ -151,7 +151,7 @@ public void Test68()
/// <summary>
/// 四个空格就太多了。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-69"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-69"/>
[TestMethod]
public void Test69()
{
Expand All @@ -160,7 +160,7 @@ public void Test69()
CodeBlock(0..11, "# foo\r\n");
});
}
/// <see href="https://spec.commonmark.org/0.30/#example-70"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-70"/>
[TestMethod]
public void Test70()
{
Expand All @@ -177,7 +177,7 @@ public void Test70()
/// <summary>
/// 闭合的 # 是可选的。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-71"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-71"/>
[TestMethod]
public void Test71()
{
Expand All @@ -196,7 +196,7 @@ public void Test71()
/// <summary>
/// 闭合的 # 个数不需要与起始 # 个数相同。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-72"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-72"/>
[TestMethod]
public void Test72()
{
Expand All @@ -215,7 +215,7 @@ public void Test72()
/// <summary>
/// 闭合 # 后允许出现空格或 Tab。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-73"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-73"/>
[TestMethod]
public void Test73()
{
Expand All @@ -230,7 +230,7 @@ public void Test73()
/// <summary>
/// 闭合 # 后不允许出现非空白字符。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-74"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-74"/>
[TestMethod]
public void Test74()
{
Expand All @@ -245,7 +245,7 @@ public void Test74()
/// <summary>
/// 闭合 # 前必须包含空格或 Tab。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-75"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-75"/>
[TestMethod]
public void Test75()
{
Expand All @@ -260,7 +260,7 @@ public void Test75()
/// <summary>
/// 被转义的 # 不计入闭合序列中。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-76"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-76"/>
[TestMethod]
public void Test76()
{
Expand All @@ -283,7 +283,7 @@ public void Test76()
/// <summary>
/// ATX 标题不需要使用空行分割,并且可以中断段落。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-77"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-77"/>
[TestMethod]
public void Test77()
{
Expand All @@ -297,7 +297,7 @@ public void Test77()
ThematicBreak(14..20);
});
}
/// <see href="https://spec.commonmark.org/0.30/#example-78"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-78"/>
[TestMethod]
public void Test78()
{
Expand All @@ -320,7 +320,7 @@ public void Test78()
/// <summary>
/// ATX 标题可以是空的。
/// </summary>
/// <see href="https://spec.commonmark.org/0.30/#example-79"/>
/// <see href="https://spec.commonmark.org/0.31.2/#example-79"/>
[TestMethod]
public void Test79()
{
Expand Down
Loading

0 comments on commit a9d566e

Please sign in to comment.