Implement some features of mycomarkup

2025-12-04 07:38:06 +00:00 · 2020-10-30 18:25:48 +05:00
parent 657fb5d357
commit d6c6ad4ae3
20 changed files with 335 additions and 59 deletions
--- a/markup/lexer.go
+++ b/markup/lexer.go
@@ -0,0 +1,182 @@
+package markup
+
+import (
+	"fmt"
+	"html"
+	"path"
+	"strings"
+)
+
+// HyphaExists holds function that checks that a hypha is present.
+var HyphaExists func(string) bool
+
+// HyphaAccess holds function that accesses a hypha by its name.
+var HyphaAccess func(string) (rawText, binaryHtml string, err error)
+
+// GemLexerState is used by markup parser to remember what is going on.
+type GemLexerState struct {
+	// Name of hypha being parsed
+	name  string
+	where string // "", "list", "pre"
+	// Line id
+	id  int
+	buf string
+}
+
+type Line struct {
+	id int
+	// interface{} may be bad. What I need is a sum of string and Transclusion
+	contents interface{}
+}
+
+// Parse markup line starting with "=>" according to wikilink rules.
+// See http://localhost:1737/page/wikilink
+func wikilink(src string, state *GemLexerState) (href, text, class string) {
+	src = strings.TrimSpace(remover("=>")(src))
+	if src == "" {
+		return
+	}
+	// Href is text after => till first whitespace
+	href = strings.Fields(src)[0]
+	// Text is everything after whitespace.
+	// If there's no text, make it same as href
+	if text = strings.TrimPrefix(src, href); text == "" {
+		text = href
+	}
+
+	class = "wikilink_internal"
+
+	switch {
+	case strings.HasPrefix(href, "./"):
+		hyphaName := canonicalName(path.Join(
+			state.name, strings.TrimPrefix(href, "./")))
+		if !HyphaExists(hyphaName) {
+			class = "wikilink_new"
+		}
+		href = path.Join("/page", hyphaName)
+	case strings.HasPrefix(href, "../"):
+		hyphaName := canonicalName(path.Join(
+			path.Dir(state.name), strings.TrimPrefix(href, "../")))
+		if !HyphaExists(hyphaName) {
+			class = "wikilink_new"
+		}
+		href = path.Join("/page", hyphaName)
+	case strings.HasPrefix(href, "/"):
+	case strings.ContainsRune(href, ':'):
+		class = "wikilink_external"
+	default:
+		href = path.Join("/page", href)
+	}
+	return href, strings.TrimSpace(text), class
+}
+
+func lex(name, content string) (ast []Line) {
+	var state = GemLexerState{name: name}
+
+	for _, line := range append(strings.Split(content, "\n"), "") {
+		geminiLineToAST(line, &state, &ast)
+	}
+	return ast
+}
+
+// Lex `line` in markup and save it to `ast` using `state`.
+func geminiLineToAST(line string, state *GemLexerState, ast *[]Line) {
+	addLine := func(text interface{}) {
+		*ast = append(*ast, Line{id: state.id, contents: text})
+	}
+
+	if "" == strings.TrimSpace(line) {
+		if state.where == "list" {
+			state.where = ""
+			addLine(state.buf + "</ul>")
+		}
+		return
+	}
+
+	startsWith := func(token string) bool {
+		return strings.HasPrefix(line, token)
+	}
+
+	// Beware! Usage of goto. Some may say it is considered evil but in this case it helped to make a better-structured code.
+	switch state.where {
+	case "pre":
+		goto preformattedState
+	case "list":
+		goto listState
+	default:
+		goto normalState
+	}
+
+preformattedState:
+	switch {
+	case startsWith("```"):
+		state.where = ""
+		state.buf = strings.TrimSuffix(state.buf, "\n")
+		addLine(state.buf + "</code></pre>")
+		state.buf = ""
+	default:
+		state.buf += html.EscapeString(line) + "\n"
+	}
+	return
+
+listState:
+	switch {
+	case startsWith("*"):
+		state.buf += fmt.Sprintf("\t<li>%s</li>\n", remover("*")(line))
+	case startsWith("```"):
+		state.where = "pre"
+		addLine(state.buf + "</ul>")
+		state.id++
+		state.buf = fmt.Sprintf("<pre id='%d' alt='%s' class='codeblock'><code>", state.id, strings.TrimPrefix(line, "```"))
+	default:
+		state.where = ""
+		addLine(state.buf + "</ul>")
+		goto normalState
+	}
+	return
+
+normalState:
+	state.id++
+	switch {
+
+	case startsWith("```"):
+		state.where = "pre"
+		state.buf = fmt.Sprintf("<pre id='%d' alt='%s' class='codeblock'><code>", state.id, strings.TrimPrefix(line, "```"))
+	case startsWith("* "):
+		state.where = "list"
+		state.buf = fmt.Sprintf("<ul id='%d'>\n", state.id)
+		goto listState
+
+	case startsWith("###### "):
+		addLine(fmt.Sprintf(
+			"<h6 id='%d'>%s</h6>", state.id, line[7:]))
+	case startsWith("##### "):
+		addLine(fmt.Sprintf(
+			"<h5 id='%d'>%s</h5>", state.id, line[6:]))
+	case startsWith("#### "):
+		addLine(fmt.Sprintf(
+			"<h4 id='%d'>%s</h4>", state.id, line[5:]))
+	case startsWith("### "):
+		addLine(fmt.Sprintf(
+			"<h3 id='%d'>%s</h3>", state.id, line[4:]))
+	case startsWith("## "):
+		addLine(fmt.Sprintf(
+			"<h2 id='%d'>%s</h2>", state.id, line[3:]))
+	case startsWith("# "):
+		addLine(fmt.Sprintf(
+			"<h1 id='%d'>%s</h1>", state.id, line[2:]))
+
+	case startsWith(">"):
+		addLine(fmt.Sprintf(
+			"<blockquote id='%d'>%s</blockquote>", state.id, remover(">")(line)))
+	case startsWith("=>"):
+		source, content, class := wikilink(line, state)
+		addLine(fmt.Sprintf(
+			`<p><a id='%d' class='%s' href="%s">%s</a></p>`, state.id, class, source, content))
+
+	case startsWith("<="):
+		addLine(parseTransclusion(line, state.name))
+	default:
+		addLine(fmt.Sprintf("<p id='%d'>%s</p>", state.id, ParagraphToHtml(line)))
+	}
+}
--- a/markup/lexer_test.go
+++ b/markup/lexer_test.go
@@ -0,0 +1,57 @@
+package markup
+
+import (
+	"fmt"
+	"io/ioutil"
+	"reflect"
+	"testing"
+)
+
+// TODO: move test markup docs to files, perhaps? These strings sure are ugly
+func TestLex(t *testing.T) {
+	check := func(name, content string, expectedAst []Line) {
+		if ast := lex(name, content); !reflect.DeepEqual(ast, expectedAst) {
+			if len(ast) != len(expectedAst) {
+				t.Error("Expected and generated AST length of", name, "do not match. Printed generated AST.")
+				for _, l := range ast {
+					fmt.Printf("%d: %s\n", l.id, l.contents)
+				}
+				return
+			}
+			for i, e := range ast {
+				if e != expectedAst[i] {
+					t.Error("Mismatch when lexing", name, "\nExpected:", expectedAst[i], "\nGot:", e)
+				}
+			}
+		}
+	}
+	contentsB, err := ioutil.ReadFile("testdata/test.myco")
+	if err != nil {
+		t.Error("Could not read test markup file!")
+	}
+	contents := string(contentsB)
+	check("Apple", contents, []Line{
+		{1, "<h1 id='1'>1</h1>"},
+		{2, "<h2 id='2'>2</h2>"},
+		{3, "<h3 id='3'>3</h3>"},
+		{4, "<blockquote id='4'>quote</blockquote>"},
+		{5, `<ul id='5'>
+	<li>li 1</li>
+	<li>li 2</li>
+</ul>`},
+		{6, "<p id='6'>text</p>"},
+		{7, "<p id='7'>more text</p>"},
+		{8, `<p><a id='8' class='wikilink_internal' href="/page/Pear">some link</a></p>`},
+		{9, `<ul id='9'>
+	<li>li\n"+</li>
+</ul>`},
+		{10, `<pre id='10' alt='alt text goes here' class='codeblock'><code>=&gt; preformatted text
+where markup is not lexed</code></pre>`},
+		{11, `<p><a id='11' class='wikilink_internal' href="/page/linking">linking</a></p>`},
+		{12, "<p id='12'>text</p>"},
+		{13, `<pre id='13' alt='' class='codeblock'><code>()
+/\</code></pre>`},
+		// More thorough testing of xclusions is done in xclusion_test.go
+		{14, Transclusion{"apple", 1, 3}},
+	})
+}
--- a/markup/mycomarkup.go
+++ b/markup/mycomarkup.go
@@ -0,0 +1,102 @@
+// This is not done yet
+package markup
+
+import (
+	"html"
+	"strings"
+)
+
+// A Mycomarkup-formatted document
+type MycoDoc struct {
+	// data
+	hyphaName string
+	contents  string
+
+	// state
+	recursionDepth int
+
+	// results
+}
+
+// Constructor
+func Doc(hyphaName, contents string) *MycoDoc {
+	return &MycoDoc{
+		hyphaName: hyphaName,
+		contents:  contents,
+	}
+}
+
+// AsHtml returns an html representation of the document
+func (md *MycoDoc) AsHtml() string {
+	return ""
+}
+
+type BlockType int
+
+const (
+	BlockH1 = iota
+	BlockH2
+	BlockH3
+	BlockH4
+	BlockH5
+	BlockH6
+	BlockRocket
+	BlockPre
+	BlockQuote
+	BlockPara
+)
+
+type CrawlWhere int
+
+const (
+	inSomewhere = iota
+	inPre
+	inEnd
+)
+
+func crawl(name, content string) []string {
+	stateStack := []CrawlWhere{inSomewhere}
+
+	startsWith := func(token string) bool {
+		return strings.HasPrefix(content, token)
+	}
+
+	pop := func() {
+		stateStack = stateStack[:len(stateStack)-1]
+	}
+
+	push := func(s CrawlWhere) {
+		stateStack = append(stateStack, s)
+	}
+
+	readln := func(c string) (string, string) {
+		parts := strings.SplitN(c, "\n", 1)
+		return parts[0], parts[1]
+	}
+
+	preAcc := ""
+	line := ""
+
+	for {
+		switch stateStack[0] {
+		case inSomewhere:
+			switch {
+			case startsWith("```"):
+				push(inPre)
+				_, content = readln(content)
+			default:
+			}
+		case inPre:
+			switch {
+			case startsWith("```"):
+				pop()
+				_, content = readln(content)
+			default:
+				line, content = readln(content)
+				preAcc += html.EscapeString(line)
+			}
+		}
+	}
+
+	return []string{}
+}
--- a/markup/paragraph.go
+++ b/markup/paragraph.go
@@ -0,0 +1,108 @@
+package markup
+
+import (
+	"bytes"
+	"fmt"
+	"html"
+	"strings"
+)
+
+type spanTokenType int
+
+const (
+	spanTextNode = iota
+	spanItalic
+	spanBold
+	spanMono
+	spanSuper
+	spanSub
+	spanMark
+)
+
+func tagFromState(stt spanTokenType, tagState map[spanTokenType]bool, tagName, originalForm string) string {
+	if tagState[spanMono] && (stt != spanMono) {
+		return originalForm
+	}
+	if tagState[stt] {
+		tagState[stt] = false
+		return fmt.Sprintf("</%s>", tagName)
+	} else {
+		tagState[stt] = true
+		return fmt.Sprintf("<%s>", tagName)
+	}
+}
+
+// getTextNode splits the `p` into two parts `textNode` and `rest` by the first encountered rune that resembles a span tag. If there is none, `textNode = p`, `rest = ""`. It handles escaping with backslash.
+func getTextNode(input *bytes.Buffer) string {
+	var (
+		textNodeBuffer = bytes.Buffer{}
+		escaping       = false
+	)
+	// Always read the first byte in advance to avoid endless loops that kill computers (sad experience)
+	if input.Len() != 0 {
+		b, _ := input.ReadByte()
+		textNodeBuffer.WriteByte(b)
+	}
+	for input.Len() != 0 {
+		// Assume no error is possible because we check for length
+		b, _ := input.ReadByte()
+		if escaping {
+			textNodeBuffer.WriteByte(b)
+			escaping = false
+		} else if b == '\\' {
+			escaping = true
+		} else if strings.IndexByte("/*`^,!", b) >= 0 {
+			input.UnreadByte()
+			break
+		} else {
+			textNodeBuffer.WriteByte(b)
+		}
+	}
+	return textNodeBuffer.String()
+}
+
+func ParagraphToHtml(input string) string {
+	var (
+		p   = bytes.NewBufferString(input)
+		ret strings.Builder
+		// true = tag is opened, false = tag is not opened
+		tagState = map[spanTokenType]bool{
+			spanItalic: false,
+			spanBold:   false,
+			spanMono:   false,
+			spanSuper:  false,
+			spanSub:    false,
+			spanMark:   false,
+		}
+		startsWith = func(t string) bool {
+			return bytes.HasPrefix(p.Bytes(), []byte(t))
+		}
+	)
+
+	for p.Len() != 0 {
+		switch {
+		case startsWith("//"):
+			ret.WriteString(tagFromState(spanItalic, tagState, "em", "//"))
+			p.Next(2)
+		case startsWith("**"):
+			ret.WriteString(tagFromState(spanBold, tagState, "strong", "**"))
+			p.Next(2)
+		case startsWith("`"):
+			ret.WriteString(tagFromState(spanMono, tagState, "code", "`"))
+			p.Next(1)
+		case startsWith("^"):
+			ret.WriteString(tagFromState(spanSuper, tagState, "sup", "^"))
+			p.Next(1)
+		case startsWith(",,"):
+			ret.WriteString(tagFromState(spanSub, tagState, "sub", ",,"))
+			p.Next(2)
+		case startsWith("!!"):
+			ret.WriteString(tagFromState(spanMark, tagState, "mark", "!!"))
+			p.Next(2)
+		default:
+			ret.WriteString(html.EscapeString(getTextNode(p)))
+		}
+	}
+
+	return ret.String()
+}
--- a/markup/paragraph_test.go
+++ b/markup/paragraph_test.go
@@ -0,0 +1,44 @@
+package markup
+
+import (
+	"fmt"
+	"testing"
+)
+
+/*
+func TestGetTextNode(t *testing.T) {
+	tests := [][]string{
+		// input   textNode  rest
+		{"barab", "barab", ""},
+		{"test, ", "test", ", "},
+		{"/test/", "", "/test/"},
+		{"\\/test/", "/test", "/"},
+		{"test \\/ar", "test /ar", ""},
+		{"test //italian// test", "test ", "//italian// test"},
+	}
+	for _, triplet := range tests {
+		a, b := getTextNode([]byte(triplet[0]))
+		if a != triplet[1] || string(b) != triplet[2] {
+			t.Error(fmt.Sprintf("Wanted: %q\nGot: %q %q", triplet, a, b))
+		}
+	}
+}
+*/
+
+func TestParagraphToHtml(t *testing.T) {
+	tests := [][]string{
+		{"a simple paragraph", "a simple paragraph"},
+		{"//italic//", "<em>italic</em>"},
+		{"Embedded //italic//", "Embedded <em>italic</em>"},
+		{"double //italian// //text//", "double <em>italian</em> <em>text</em>"},
+		{"it has `mono`", "it has <code>mono</code>"},
+		{"this is a left **bold", "this is a left <strong>bold"},
+		{"this line has a ,comma, two of them", "this line has a ,comma, two of them"},
+		{"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."},
+	}
+	for _, test := range tests {
+		if ParagraphToHtml(test[0]) != test[1] {
+			t.Error(fmt.Sprintf("%q: Wanted %q, got %q", test[0], test[1], ParagraphToHtml(test[0])))
+		}
+	}
+}
--- a/markup/parser.go
+++ b/markup/parser.go
@@ -0,0 +1,31 @@
+package markup
+
+import ()
+
+const maxRecursionLevel = 3
+
+type GemParserState struct {
+	recursionLevel int
+}
+
+func Parse(ast []Line, from, to int, state GemParserState) (html string) {
+	if state.recursionLevel > maxRecursionLevel {
+		return "Transclusion depth limit"
+	}
+	for _, line := range ast {
+		if line.id >= from && (line.id <= to || to == 0) {
+			switch v := line.contents.(type) {
+			case Transclusion:
+				html += Transclude(v, state)
+			case string:
+				html += v
+			}
+		}
+	}
+	return html
+}
+
+func ToHtml(name, text string) string {
+	state := GemParserState{}
+	return Parse(lex(name, text), 0, 0, state)
+}
--- a/markup/testdata/test.myco
+++ b/markup/testdata/test.myco
@@ -0,0 +1,24 @@
+# 1
+## 2
+### 3
+> quote
+
+* li 1
+* li 2
+text
+more text
+=> Pear some link
+
+* li\n"+
+```alt text goes here
+=> preformatted text
+where markup is not lexed
+```it ends here"
+=>linking
+
+text
+```
+()
+/\
+```
+<= Apple : 1..3
--- a/markup/utils.go
+++ b/markup/utils.go
@@ -0,0 +1,23 @@
+package markup
+
+import (
+	"strings"
+)
+
+// Function that returns a function that can strip `prefix` and trim whitespace when called.
+func remover(prefix string) func(string) string {
+	return func(l string) string {
+		return strings.TrimSpace(strings.TrimPrefix(l, prefix))
+	}
+}
+
+// Remove #, ## or ### from beginning of `line`.
+func removeHeadingOctothorps(line string) string {
+	f := remover("#")
+	return f(f(f(line)))
+}
+
+// Return a canonical representation of a hypha `name`.
+func canonicalName(name string) string {
+	return strings.ToLower(strings.ReplaceAll(strings.TrimSpace(name), " ", "_"))
+}
--- a/markup/xclusion.go
+++ b/markup/xclusion.go
@@ -0,0 +1,106 @@
+package markup
+
+import (
+	"fmt"
+	"path"
+	"strconv"
+	"strings"
+)
+
+const xclError = -9
+
+// Transclusion is used by markup parser to remember what hyphae shall be transcluded.
+type Transclusion struct {
+	name string
+	from int // inclusive
+	to   int // inclusive
+}
+
+// Transclude transcludes `xcl` and returns html representation.
+func Transclude(xcl Transclusion, state GemParserState) (html string) {
+	state.recursionLevel++
+	tmptOk := `<section class="transclusion transclusion_ok">
+	<a class="transclusion__link" href="/page/%s">%s</a>
+	<div class="transclusion__content">%s</div>
+</section>`
+	tmptFailed := `<section class="transclusion transclusion_failed">
+	<p>Failed to transclude <a href="/page/%s">%s</a></p>
+</section>`
+	if xcl.from == xclError || xcl.to == xclError || xcl.from > xcl.to {
+		return fmt.Sprintf(tmptFailed, xcl.name, xcl.name)
+	}
+
+	rawText, binaryHtml, err := HyphaAccess(xcl.name)
+	if err != nil {
+		return fmt.Sprintf(tmptFailed, xcl.name, xcl.name)
+	}
+	xclText := Parse(lex(xcl.name, rawText), xcl.from, xcl.to, state)
+	return fmt.Sprintf(tmptOk, xcl.name, xcl.name, binaryHtml+xclText)
+}
+
+/* Grammar from hypha ‘transclusion’:
+transclusion_line  ::= transclusion_token hypha_name LWS* [":" LWS* range LWS*]
+transclusion_token ::= "<=" LWS+
+hypha_name         ::= canonical_name | noncanonical_name
+range              ::= id | (from_id two_dots to_id) | (from_id two_dots) | (two_dots to_id)
+two_dots           ::= ".."
+*/
+
+func parseTransclusion(line, hyphaName string) (xclusion Transclusion) {
+	line = strings.TrimSpace(remover("<=")(line))
+	if line == "" {
+		return Transclusion{"", xclError, xclError}
+	}
+
+	if strings.ContainsRune(line, ':') {
+		parts := strings.SplitN(line, ":", 2)
+		xclusion.name = xclCanonicalName(hyphaName, strings.TrimSpace(parts[0]))
+		selector := strings.TrimSpace(parts[1])
+		xclusion.from, xclusion.to = parseSelector(selector)
+	} else {
+		xclusion.name = xclCanonicalName(hyphaName, strings.TrimSpace(line))
+	}
+	return xclusion
+}
+
+func xclCanonicalName(hyphaName, xclName string) string {
+	switch {
+	case strings.HasPrefix(xclName, "./"):
+		return canonicalName(path.Join(hyphaName, strings.TrimPrefix(xclName, "./")))
+	case strings.HasPrefix(xclName, "../"):
+		return canonicalName(path.Join(path.Dir(hyphaName), strings.TrimPrefix(xclName, "../")))
+	default:
+		return canonicalName(xclName)
+	}
+}
+
+// At this point:
+// selector ::= id
+//            | from ".."
+//            | from ".." to
+//            |      ".." to
+// If it is not, return (xclError, xclError).
+func parseSelector(selector string) (from, to int) {
+	if selector == "" {
+		return 0, 0
+	}
+	if strings.Contains(selector, "..") {
+		parts := strings.Split(selector, "..")
+
+		var (
+			fromStr       = strings.TrimSpace(parts[0])
+			from, fromErr = strconv.Atoi(fromStr)
+			toStr         = strings.TrimSpace(parts[1])
+			to, toErr     = strconv.Atoi(toStr)
+		)
+		if fromStr == "" && toStr == "" {
+			return 0, 0
+		}
+		if fromErr == nil || toErr == nil {
+			return from, to
+		}
+	} else if id, err := strconv.Atoi(selector); err == nil {
+		return id, id
+	}
+	return xclError, xclError
+}
--- a/markup/xclusion_test.go
+++ b/markup/xclusion_test.go
@@ -0,0 +1,22 @@
+package markup
+
+import (
+	"testing"
+)
+
+func TestParseTransclusion(t *testing.T) {
+	check := func(line string, expectedXclusion Transclusion) {
+		if xcl := parseTransclusion(line, "t"); xcl != expectedXclusion {
+			t.Error(line, "; got:", xcl, "wanted:", expectedXclusion)
+		}
+	}
+	check("<=  ", Transclusion{"", -9, -9})
+	check("<=hypha", Transclusion{"hypha", 0, 0})
+	check("<=  hypha\t", Transclusion{"hypha", 0, 0})
+	check("<= hypha :", Transclusion{"hypha", 0, 0})
+	check("<= hypha : ..", Transclusion{"hypha", 0, 0})
+	check("<= hypha : 3", Transclusion{"hypha", 3, 3})
+	check("<= hypha : 3..", Transclusion{"hypha", 3, 0})
+	check("<= hypha : ..3", Transclusion{"hypha", 0, 3})
+	check("<= hypha : 3..4", Transclusion{"hypha", 3, 4})
+}