Last update: 2025-12-17
Parsing and Rendering Markdown in Go
In pkg/markdown/ast.go:
package markdown
import "time"
type Document struct {
Header *Header
Body []Block
}
type Header struct {
Title string
Slug string
Description string
LastUpdate time.Time
Draft bool
Tags []string
}
type Block interface{ _guardBlock() }
type H1 string
type H2 string
type Paragraph Text
type List []Text
type Pre string
type Image [2]string
func (H1) _guardBlock() {}
func (H2) _guardBlock() {}
func (Paragraph) _guardBlock() {}
func (List) _guardBlock() {}
func (Pre) _guardBlock() {}
func (Image) _guardBlock() {}
type Inline interface{ _guardInline() }
type Text []Inline
type PlainText string
type Link [2]string
type Code string
type Emphasis string
func (PlainText) _guardInline() {}
func (Code) _guardInline() {}
func (Link) _guardInline() {}
func (Emphasis) _guardInline() {}
In pkg/markdown/parser:
package markdown
import (
"fmt"
"strings"
"time"
)
func Parse(src string) (doc *Document, err error) {
p := NewParser([]rune(src))
doc, err = p.ParseHeader()
if err != nil {
return nil, fmt.Errorf("parse metadata: %w", err)
}
blocks, err := p.ParseRawBlocks()
if err != nil {
return nil, fmt.Errorf("parse body: %w", err)
}
for _, block := range blocks {
b := block.Parse()
doc.Body = append(doc.Body, b)
}
return doc, nil
}
func ParseRichText(src string) (rtxt Text) {
src = strings.TrimSpace(src)
p := &Parser{src: []rune(src)}
for {
switch {
case p.peekChar() == charEOF:
return rtxt
case p.peekChar() == '[':
label := p.readInline(']')
label = strings.TrimPrefix(label, "[")
label = strings.TrimSuffix(label, "]")
url := p.readInline(')')
url = strings.TrimPrefix(url, "(")
url = strings.TrimPrefix(url, ")")
rtxt = append(rtxt, Link{label, url})
case p.peekChar() == '<':
url := strings.TrimSpace(p.readInline('>'))
url = strings.TrimPrefix(url, "<")
url = strings.TrimSuffix(url, ">")
rtxt = append(rtxt, Link{url, url})
case p.peekChar() == '`':
_ = p.readChar()
code := strings.TrimSpace(p.readInline('`'))
code = strings.TrimSuffix(code, "`")
rtxt = append(rtxt, Code(code))
case p.peekChar() == '*':
_ = p.readChar()
code := strings.TrimSpace(p.readInline('*'))
code = strings.TrimSuffix(code, "*")
rtxt = append(rtxt, Emphasis(code))
default:
rtxt = append(rtxt, PlainText(p.readPlainText()))
}
}
}
type Parser struct {
src []rune
i int
line int
}
func NewParser(src []rune) *Parser { return &Parser{src: src, line: 1} }
type RawBlock struct {
Type string
Code string
}
func (p *Parser) ParseHeader() (doc *Document, err error) {
line := p.readLine()
if line != "---" {
return nil, p.errf("unexpected metadata start line: %q", line)
}
doc = &Document{Header: &Header{}}
for {
line = p.readLine()
if line == "---" {
return doc, nil
}
key, value, ok := strings.Cut(line, ":")
if !ok {
return nil, p.errf("missing ':' in metadata line: %q", line)
}
key = strings.TrimSpace(key)
value = strings.TrimSpace(value)
switch strings.ToLower(key) {
default:
return nil, p.errf("unknown blog post key: %q", key)
case "title":
doc.Header.Title = value
case "slug":
doc.Header.Slug = value
case "description":
doc.Header.Description = value
case "last-update":
doc.Header.LastUpdate, err = time.Parse(time.DateOnly, value)
if err != nil {
return nil, p.errf("invalid last update value: %w", err)
}
case "draft":
doc.Header.Draft = value == "true"
case "tags":
doc.Header.Tags = strings.Split(value, ", ")
}
}
}
func (p *Parser) ParseRawBlocks() (blocks []*RawBlock, err error) {
for {
p.ignoreSpace()
switch {
case p.peekChar() == charEOF:
return blocks, nil
case p.peeks("# "):
blocks = append(blocks, &RawBlock{Type: "h1", Code: p.readLine()})
case p.peeks("## "):
blocks = append(blocks, &RawBlock{Type: "h2", Code: p.readLine()})
case p.peeks("- "):
blocks = append(blocks, &RawBlock{Type: "ul", Code: p.readUntil("\n\n")})
case p.peeks("
if !ok {
url = block
}
return Image{strings.TrimSpace(label), strings.TrimSpace(url)}
default:
panic("unreachable")
}
}
type ParserError struct {
Err error
Line int
}
func (err *ParserError) Error() string { return fmt.Sprintf("%s (at line %d)", err.Err, err.Line) }
func (err *ParserError) Unwrap() error { return err.Err }
func (p *Parser) errf(v string, args ...any) error {
return &ParserError{fmt.Errorf(v, args...), p.line}
}
const charEOF = 0
func (p *Parser) peekChar() (c rune) {
if p.i > len(p.src)-1 {
return charEOF
}
return p.src[p.i]
}
func (p *Parser) peek(n int) (v string) {
if p.i+n > len(p.src)-1 {
return string(p.src[p.i:len(p.src)])
}
return string(p.src[p.i : p.i+n])
}
func (p *Parser) peeks(v string) bool { return p.peek(len(v)) == v }
func (p *Parser) ignoreSpace() {
for {
c := p.peekChar()
if !(c == '\n' || c == ' ' || c == '\t') {
break
}
p.readChar()
}
}
func (p *Parser) readChar() (c rune) {
if p.i > len(p.src)-1 {
return charEOF
}
c = p.src[p.i]
if c == '\n' {
p.line++
}
p.i++
return c
}
func (p *Parser) readLine() (v string) {
var c rune
for {
c = p.readChar()
if c == '\n' || c == charEOF {
return v
}
v += string(c)
}
}
func (p *Parser) readUntil(seq string) (v string) {
for {
if p.peekChar() == charEOF {
return v
} else if p.peeks(seq) {
for range seq {
v += string(p.readChar())
}
return v
}
v += string(p.readChar())
}
}
func (p *Parser) readUntilLine(line string) (v string) {
for {
l := p.readLine()
if l == "" && p.peekChar() == charEOF {
return v
}
v += l + "\n"
if l == line {
return v
}
}
}
func (p *Parser) readUntilBlankLineOrNewBlock() (v string) {
prev := rune(0)
for {
if p.peekChar() == charEOF {
return v
} else if p.peeks("\n\n") {
v += "\n\n"
return v
} else if p.peeksBlockPrefix() && prev == '\n' {
return v
}
c := p.readChar()
v += string(c)
prev = c
}
}
func (p *Parser) peeksBlockPrefix() bool {
for _, blockPrefix := range [...]string{"# ", "## ", "- ", "```"} {
if p.peeks(blockPrefix) {
return true
}
}
return false
}
func (p *Parser) readInline(until rune) (v string) {
escaped := false
c := rune(0)
for {
c = p.readChar()
switch {
case c == charEOF:
return v
case !escaped && c == until:
return v
case escaped:
v += string(c)
escaped = false
case !escaped && c == '\\':
escaped = true
default:
v += string(c)
}
}
}
func (p *Parser) readPlainText() (v string) {
escaped := false
c := rune(0)
for {
if c := p.peekChar(); c == charEOF || !escaped && isRichTextMarker(c) {
return v
}
c = p.readChar()
switch {
case escaped:
v += string(c)
escaped = false
case !escaped && c == '\\':
escaped = true
default:
v += string(c)
}
}
}
func isRichTextMarker(c rune) bool {
for _, v := range [...]rune{'[', '<', '`', '*'} {
if c == v {
return true
}
}
return false
}
In pkg/markdown/util.go:
package markdown
import (
"fmt"
"html"
"io/fs"
"slices"
"strings"
)
func ParseFS(fsys fs.FS) (docs []*Document, err error) {
err = fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return fmt.Errorf("walk FS: %w", err)
} else if !d.Type().IsRegular() {
return nil
}
b, err := fs.ReadFile(fsys, path)
if err != nil {
return fmt.Errorf("read file: %w", err)
}
p, err := Parse(string(b))
if err != nil {
return fmt.Errorf("parse document: %w", err)
}
docs = append(docs, p)
return nil
})
if err != nil {
return nil, fmt.Errorf("walk FS: %w", err)
}
slices.SortFunc(docs, func(a, b *Document) int { return b.Header.LastUpdate.Compare(a.Header.LastUpdate) })
return docs, nil
}
func BlockHTML(b Block) (v string) {
switch b := b.(type) {
default:
panic(fmt.Errorf("unexpected block type: %T", b))
case H1:
return "<h1>" + html.EscapeString(string(b)) + "</h1>"
case H2:
return "<h2>" + html.EscapeString(string(b)) + "</h2>"
case Paragraph:
return "<p>" + RichTextHTML(Text(b)) + "</p>"
case List:
v += "<ul>"
for _, n := range b {
v += "<li>" + RichTextHTML(n) + "</li>"
}
v += "</ul>"
return v
case Pre:
return "<pre>" + html.EscapeString(string(b)) + "</pre>"
case Image:
return "<img alt=\"" + html.EscapeString(b[0]) + "\" src=\"" + html.EscapeString(b[1]) + "\" />"
}
}
func RichTextHTML(txt Text) (v string) {
for _, inline := range txt {
switch n := inline.(type) {
default:
panic(fmt.Errorf("unexpected block type: %T", n))
case PlainText:
const lineBreakMarker = " " + " " + "\n" // Two-consecutive space before LF == forced line-break.
v += strings.ReplaceAll(string(n), lineBreakMarker, "<br />")
case Link:
v += "<a href=\"" + n[1] + "\">" + html.EscapeString(n[0]) + "</a>"
case Code:
v += "<code>" + html.EscapeString(string(n)) + "</code>"
case Emphasis:
v += "<strong>" + html.EscapeString(string(n)) + "</strong>"
}
}
return v
}
type TaggedDocuments struct {
Tag string
Documents []*Document
}
func GroupByTag(docs []*Document) (groups []*TaggedDocuments) {
// Group documents by tag.
m := map[string][]*Document{}
for _, doc := range docs {
for _, tag := range doc.Header.Tags {
m[tag] = append(m[tag], doc)
}
}
// Convert map to slice and sort by number of documents.
groups = make([]*TaggedDocuments, 0, len(m))
for k, v := range m {
groups = append(groups, &TaggedDocuments{k, v})
}
slices.SortFunc(groups, func(a, b *TaggedDocuments) int { return len(b.Documents) - len(a.Documents) })
return groups
}