Julien Sellier

Last update: 2025-12-17

Parsing and Rendering Markdown in Go

In pkg/markdown/ast.go:

package markdown

import "time"

type Document struct {
	Header *Header
	Body   []Block
}

type Header struct {
	Title       string
	Slug        string
	Description string
	LastUpdate  time.Time
	Draft       bool
	Tags        []string
}

type Block interface{ _guardBlock() }

type H1 string
type H2 string
type Paragraph Text
type List []Text
type Pre string
type Image [2]string

func (H1) _guardBlock()        {}
func (H2) _guardBlock()        {}
func (Paragraph) _guardBlock() {}
func (List) _guardBlock()      {}
func (Pre) _guardBlock()       {}
func (Image) _guardBlock()     {}

type Inline interface{ _guardInline() }

type Text []Inline
type PlainText string
type Link [2]string
type Code string
type Emphasis string

func (PlainText) _guardInline() {}
func (Code) _guardInline()      {}
func (Link) _guardInline()      {}
func (Emphasis) _guardInline()  {}

In pkg/markdown/parser:

package markdown

import (
	"fmt"
	"strings"
	"time"
)

func Parse(src string) (doc *Document, err error) {
	p := NewParser([]rune(src))
	doc, err = p.ParseHeader()
	if err != nil {
		return nil, fmt.Errorf("parse metadata: %w", err)
	}
	blocks, err := p.ParseRawBlocks()
	if err != nil {
		return nil, fmt.Errorf("parse body: %w", err)
	}
	for _, block := range blocks {
		b := block.Parse()
		doc.Body = append(doc.Body, b)
	}
	return doc, nil
}

func ParseRichText(src string) (rtxt Text) {
	src = strings.TrimSpace(src)
	p := &Parser{src: []rune(src)}
	for {
		switch {
		case p.peekChar() == charEOF:
			return rtxt
		case p.peekChar() == '[':
			label := p.readInline(']')
			label = strings.TrimPrefix(label, "[")
			label = strings.TrimSuffix(label, "]")
			url := p.readInline(')')
			url = strings.TrimPrefix(url, "(")
			url = strings.TrimPrefix(url, ")")
			rtxt = append(rtxt, Link{label, url})
		case p.peekChar() == '<':
			url := strings.TrimSpace(p.readInline('>'))
			url = strings.TrimPrefix(url, "<")
			url = strings.TrimSuffix(url, ">")
			rtxt = append(rtxt, Link{url, url})
		case p.peekChar() == '`':
			_ = p.readChar()
			code := strings.TrimSpace(p.readInline('`'))
			code = strings.TrimSuffix(code, "`")
			rtxt = append(rtxt, Code(code))
		case p.peekChar() == '*':
			_ = p.readChar()
			code := strings.TrimSpace(p.readInline('*'))
			code = strings.TrimSuffix(code, "*")
			rtxt = append(rtxt, Emphasis(code))
		default:
			rtxt = append(rtxt, PlainText(p.readPlainText()))
		}
	}
}

type Parser struct {
	src  []rune
	i    int
	line int
}

func NewParser(src []rune) *Parser { return &Parser{src: src, line: 1} }

type RawBlock struct {
	Type string
	Code string
}

func (p *Parser) ParseHeader() (doc *Document, err error) {
	line := p.readLine()
	if line != "---" {
		return nil, p.errf("unexpected metadata start line: %q", line)
	}
	doc = &Document{Header: &Header{}}
	for {
		line = p.readLine()
		if line == "---" {
			return doc, nil
		}
		key, value, ok := strings.Cut(line, ":")
		if !ok {
			return nil, p.errf("missing ':' in metadata line: %q", line)
		}
		key = strings.TrimSpace(key)
		value = strings.TrimSpace(value)
		switch strings.ToLower(key) {
		default:
			return nil, p.errf("unknown blog post key: %q", key)
		case "title":
			doc.Header.Title = value
		case "slug":
			doc.Header.Slug = value
		case "description":
			doc.Header.Description = value
		case "last-update":
			doc.Header.LastUpdate, err = time.Parse(time.DateOnly, value)
			if err != nil {
				return nil, p.errf("invalid last update value: %w", err)
			}
		case "draft":
			doc.Header.Draft = value == "true"
		case "tags":
			doc.Header.Tags = strings.Split(value, ", ")
		}
	}
}

func (p *Parser) ParseRawBlocks() (blocks []*RawBlock, err error) {
	for {
		p.ignoreSpace()
		switch {
		case p.peekChar() == charEOF:
			return blocks, nil
		case p.peeks("# "):
			blocks = append(blocks, &RawBlock{Type: "h1", Code: p.readLine()})
		case p.peeks("## "):
			blocks = append(blocks, &RawBlock{Type: "h2", Code: p.readLine()})
		case p.peeks("- "):
			blocks = append(blocks, &RawBlock{Type: "ul", Code: p.readUntil("\n\n")})
		case p.peeks("!["):
			blocks = append(blocks, &RawBlock{Type: "img", Code: p.readLine()})
		case p.peeks("```"):
			blocks = append(blocks, &RawBlock{Type: "pre", Code: p.readUntilLine("```")})
		default:
			blocks = append(blocks, &RawBlock{Type: "p", Code: p.readUntilBlankLineOrNewBlock()})
		}
	}
}

func (b *RawBlock) Parse() Block {
	switch b.Type {
	case "h1":
		return H1(strings.TrimSpace(strings.TrimPrefix(b.Code, "# ")))
	case "h2":
		return H2(strings.TrimSpace(strings.TrimPrefix(b.Code, "## ")))
	case "p":
		return Paragraph(ParseRichText(b.Code))
	case "ul":
		items := strings.Split(strings.TrimSpace(b.Code), "\n")
		block := List{}
		for _, item := range items {
			item = strings.TrimPrefix(item, "- ")
			block = append(block, ParseRichText(item))
		}
		return block
	case "pre":
		block := strings.TrimSpace(b.Code)
		block = strings.TrimPrefix(block, "```")
		_, block, _ = strings.Cut(block, "\n") // Ignore first line (optional format hint).
		block = strings.TrimSuffix(block, "```")
		block = strings.ReplaceAll(block, "\n\\```", "```") // Handle escaped triple-backtick.
		return Pre(block)
	case "img":
		block := strings.TrimSpace(b.Code)
		block = strings.TrimPrefix(block, "![")
		block = strings.TrimSuffix(block, ")")
		label, url, ok := strings.Cut(block, "](")
		if !ok {
			url = block
		}
		return Image{strings.TrimSpace(label), strings.TrimSpace(url)}
	default:
		panic("unreachable")
	}
}

type ParserError struct {
	Err  error
	Line int
}

func (err *ParserError) Error() string { return fmt.Sprintf("%s (at line %d)", err.Err, err.Line) }
func (err *ParserError) Unwrap() error { return err.Err }

func (p *Parser) errf(v string, args ...any) error {
	return &ParserError{fmt.Errorf(v, args...), p.line}
}

const charEOF = 0

func (p *Parser) peekChar() (c rune) {
	if p.i > len(p.src)-1 {
		return charEOF
	}
	return p.src[p.i]
}

func (p *Parser) peek(n int) (v string) {
	if p.i+n > len(p.src)-1 {
		return string(p.src[p.i:len(p.src)])
	}
	return string(p.src[p.i : p.i+n])
}

func (p *Parser) peeks(v string) bool { return p.peek(len(v)) == v }

func (p *Parser) ignoreSpace() {
	for {
		c := p.peekChar()
		if !(c == '\n' || c == ' ' || c == '\t') {
			break
		}
		p.readChar()
	}
}

func (p *Parser) readChar() (c rune) {
	if p.i > len(p.src)-1 {
		return charEOF
	}
	c = p.src[p.i]
	if c == '\n' {
		p.line++
	}
	p.i++
	return c
}

func (p *Parser) readLine() (v string) {
	var c rune
	for {
		c = p.readChar()
		if c == '\n' || c == charEOF {
			return v
		}
		v += string(c)
	}
}

func (p *Parser) readUntil(seq string) (v string) {
	for {
		if p.peekChar() == charEOF {
			return v
		} else if p.peeks(seq) {
			for range seq {
				v += string(p.readChar())
			}
			return v
		}
		v += string(p.readChar())
	}
}

func (p *Parser) readUntilLine(line string) (v string) {
	for {
		l := p.readLine()
		if l == "" && p.peekChar() == charEOF {
			return v
		}
		v += l + "\n"
		if l == line {
			return v
		}
	}
}

func (p *Parser) readUntilBlankLineOrNewBlock() (v string) {
	prev := rune(0)
	for {
		if p.peekChar() == charEOF {
			return v
		} else if p.peeks("\n\n") {
			v += "\n\n"
			return v
		} else if p.peeksBlockPrefix() && prev == '\n' {
			return v
		}
		c := p.readChar()
		v += string(c)
		prev = c
	}
}

func (p *Parser) peeksBlockPrefix() bool {
	for _, blockPrefix := range [...]string{"# ", "## ", "- ", "```"} {
		if p.peeks(blockPrefix) {
			return true
		}
	}
	return false
}

func (p *Parser) readInline(until rune) (v string) {
	escaped := false
	c := rune(0)
	for {
		c = p.readChar()
		switch {
		case c == charEOF:
			return v
		case !escaped && c == until:
			return v
		case escaped:
			v += string(c)
			escaped = false
		case !escaped && c == '\\':
			escaped = true
		default:
			v += string(c)
		}
	}
}

func (p *Parser) readPlainText() (v string) {
	escaped := false
	c := rune(0)
	for {
		if c := p.peekChar(); c == charEOF || !escaped && isRichTextMarker(c) {
			return v
		}
		c = p.readChar()
		switch {
		case escaped:
			v += string(c)
			escaped = false
		case !escaped && c == '\\':
			escaped = true
		default:
			v += string(c)
		}
	}
}

func isRichTextMarker(c rune) bool {
	for _, v := range [...]rune{'[', '<', '`', '*'} {
		if c == v {
			return true
		}
	}
	return false
}

In pkg/markdown/util.go:

package markdown

import (
	"fmt"
	"html"
	"io/fs"
	"slices"
	"strings"
)

func ParseFS(fsys fs.FS) (docs []*Document, err error) {
	err = fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error {
		if err != nil {
			return fmt.Errorf("walk FS: %w", err)
		} else if !d.Type().IsRegular() {
			return nil
		}
		b, err := fs.ReadFile(fsys, path)
		if err != nil {
			return fmt.Errorf("read file: %w", err)
		}
		p, err := Parse(string(b))
		if err != nil {
			return fmt.Errorf("parse document: %w", err)
		}
		docs = append(docs, p)
		return nil
	})
	if err != nil {
		return nil, fmt.Errorf("walk FS: %w", err)
	}
	slices.SortFunc(docs, func(a, b *Document) int { return b.Header.LastUpdate.Compare(a.Header.LastUpdate) })
	return docs, nil
}

func BlockHTML(b Block) (v string) {
	switch b := b.(type) {
	default:
		panic(fmt.Errorf("unexpected block type: %T", b))
	case H1:
		return "<h1>" + html.EscapeString(string(b)) + "</h1>"
	case H2:
		return "<h2>" + html.EscapeString(string(b)) + "</h2>"
	case Paragraph:
		return "<p>" + RichTextHTML(Text(b)) + "</p>"
	case List:
		v += "<ul>"
		for _, n := range b {
			v += "<li>" + RichTextHTML(n) + "</li>"
		}
		v += "</ul>"
		return v
	case Pre:
		return "<pre>" + html.EscapeString(string(b)) + "</pre>"
	case Image:
		return "<img alt=\"" + html.EscapeString(b[0]) + "\" src=\"" + html.EscapeString(b[1]) + "\" />"
	}
}

func RichTextHTML(txt Text) (v string) {
	for _, inline := range txt {
		switch n := inline.(type) {
		default:
			panic(fmt.Errorf("unexpected block type: %T", n))
		case PlainText:
			const lineBreakMarker = " " + " " + "\n" // Two-consecutive space before LF == forced line-break.
			v += strings.ReplaceAll(string(n), lineBreakMarker, "<br />")
		case Link:
			v += "<a href=\"" + n[1] + "\">" + html.EscapeString(n[0]) + "</a>"
		case Code:
			v += "<code>" + html.EscapeString(string(n)) + "</code>"
		case Emphasis:
			v += "<strong>" + html.EscapeString(string(n)) + "</strong>"
		}
	}
	return v
}

type TaggedDocuments struct {
	Tag       string
	Documents []*Document
}

func GroupByTag(docs []*Document) (groups []*TaggedDocuments) {
	// Group documents by tag.
	m := map[string][]*Document{}
	for _, doc := range docs {
		for _, tag := range doc.Header.Tags {
			m[tag] = append(m[tag], doc)
		}
	}

	// Convert map to slice and sort by number of documents.
	groups = make([]*TaggedDocuments, 0, len(m))
	for k, v := range m {
		groups = append(groups, &TaggedDocuments{k, v})
	}
	slices.SortFunc(groups, func(a, b *TaggedDocuments) int { return len(b.Documents) - len(a.Documents) })
	return groups
}