1
0
mirror of https://github.com/osmarks/mycorrhiza.git synced 2025-12-04 07:38:06 +00:00

Implement some features of mycomarkup

This commit is contained in:
bouncepaw
2020-10-30 18:25:48 +05:00
parent 657fb5d357
commit d6c6ad4ae3
20 changed files with 335 additions and 59 deletions

182
markup/lexer.go Normal file
View File

@@ -0,0 +1,182 @@
package markup
import (
"fmt"
"html"
"path"
"strings"
)
// HyphaExists holds function that checks that a hypha is present.
var HyphaExists func(string) bool
// HyphaAccess holds function that accesses a hypha by its name.
var HyphaAccess func(string) (rawText, binaryHtml string, err error)
// GemLexerState is used by markup parser to remember what is going on.
type GemLexerState struct {
// Name of hypha being parsed
name string
where string // "", "list", "pre"
// Line id
id int
buf string
}
type Line struct {
id int
// interface{} may be bad. What I need is a sum of string and Transclusion
contents interface{}
}
// Parse markup line starting with "=>" according to wikilink rules.
// See http://localhost:1737/page/wikilink
func wikilink(src string, state *GemLexerState) (href, text, class string) {
src = strings.TrimSpace(remover("=>")(src))
if src == "" {
return
}
// Href is text after => till first whitespace
href = strings.Fields(src)[0]
// Text is everything after whitespace.
// If there's no text, make it same as href
if text = strings.TrimPrefix(src, href); text == "" {
text = href
}
class = "wikilink_internal"
switch {
case strings.HasPrefix(href, "./"):
hyphaName := canonicalName(path.Join(
state.name, strings.TrimPrefix(href, "./")))
if !HyphaExists(hyphaName) {
class = "wikilink_new"
}
href = path.Join("/page", hyphaName)
case strings.HasPrefix(href, "../"):
hyphaName := canonicalName(path.Join(
path.Dir(state.name), strings.TrimPrefix(href, "../")))
if !HyphaExists(hyphaName) {
class = "wikilink_new"
}
href = path.Join("/page", hyphaName)
case strings.HasPrefix(href, "/"):
case strings.ContainsRune(href, ':'):
class = "wikilink_external"
default:
href = path.Join("/page", href)
}
return href, strings.TrimSpace(text), class
}
func lex(name, content string) (ast []Line) {
var state = GemLexerState{name: name}
for _, line := range append(strings.Split(content, "\n"), "") {
geminiLineToAST(line, &state, &ast)
}
return ast
}
// Lex `line` in markup and save it to `ast` using `state`.
func geminiLineToAST(line string, state *GemLexerState, ast *[]Line) {
addLine := func(text interface{}) {
*ast = append(*ast, Line{id: state.id, contents: text})
}
if "" == strings.TrimSpace(line) {
if state.where == "list" {
state.where = ""
addLine(state.buf + "</ul>")
}
return
}
startsWith := func(token string) bool {
return strings.HasPrefix(line, token)
}
// Beware! Usage of goto. Some may say it is considered evil but in this case it helped to make a better-structured code.
switch state.where {
case "pre":
goto preformattedState
case "list":
goto listState
default:
goto normalState
}
preformattedState:
switch {
case startsWith("```"):
state.where = ""
state.buf = strings.TrimSuffix(state.buf, "\n")
addLine(state.buf + "</code></pre>")
state.buf = ""
default:
state.buf += html.EscapeString(line) + "\n"
}
return
listState:
switch {
case startsWith("*"):
state.buf += fmt.Sprintf("\t<li>%s</li>\n", remover("*")(line))
case startsWith("```"):
state.where = "pre"
addLine(state.buf + "</ul>")
state.id++
state.buf = fmt.Sprintf("<pre id='%d' alt='%s' class='codeblock'><code>", state.id, strings.TrimPrefix(line, "```"))
default:
state.where = ""
addLine(state.buf + "</ul>")
goto normalState
}
return
normalState:
state.id++
switch {
case startsWith("```"):
state.where = "pre"
state.buf = fmt.Sprintf("<pre id='%d' alt='%s' class='codeblock'><code>", state.id, strings.TrimPrefix(line, "```"))
case startsWith("* "):
state.where = "list"
state.buf = fmt.Sprintf("<ul id='%d'>\n", state.id)
goto listState
case startsWith("###### "):
addLine(fmt.Sprintf(
"<h6 id='%d'>%s</h6>", state.id, line[7:]))
case startsWith("##### "):
addLine(fmt.Sprintf(
"<h5 id='%d'>%s</h5>", state.id, line[6:]))
case startsWith("#### "):
addLine(fmt.Sprintf(
"<h4 id='%d'>%s</h4>", state.id, line[5:]))
case startsWith("### "):
addLine(fmt.Sprintf(
"<h3 id='%d'>%s</h3>", state.id, line[4:]))
case startsWith("## "):
addLine(fmt.Sprintf(
"<h2 id='%d'>%s</h2>", state.id, line[3:]))
case startsWith("# "):
addLine(fmt.Sprintf(
"<h1 id='%d'>%s</h1>", state.id, line[2:]))
case startsWith(">"):
addLine(fmt.Sprintf(
"<blockquote id='%d'>%s</blockquote>", state.id, remover(">")(line)))
case startsWith("=>"):
source, content, class := wikilink(line, state)
addLine(fmt.Sprintf(
`<p><a id='%d' class='%s' href="%s">%s</a></p>`, state.id, class, source, content))
case startsWith("<="):
addLine(parseTransclusion(line, state.name))
default:
addLine(fmt.Sprintf("<p id='%d'>%s</p>", state.id, ParagraphToHtml(line)))
}
}

57
markup/lexer_test.go Normal file
View File

@@ -0,0 +1,57 @@
package markup
import (
"fmt"
"io/ioutil"
"reflect"
"testing"
)
// TODO: move test markup docs to files, perhaps? These strings sure are ugly
func TestLex(t *testing.T) {
check := func(name, content string, expectedAst []Line) {
if ast := lex(name, content); !reflect.DeepEqual(ast, expectedAst) {
if len(ast) != len(expectedAst) {
t.Error("Expected and generated AST length of", name, "do not match. Printed generated AST.")
for _, l := range ast {
fmt.Printf("%d: %s\n", l.id, l.contents)
}
return
}
for i, e := range ast {
if e != expectedAst[i] {
t.Error("Mismatch when lexing", name, "\nExpected:", expectedAst[i], "\nGot:", e)
}
}
}
}
contentsB, err := ioutil.ReadFile("testdata/test.myco")
if err != nil {
t.Error("Could not read test markup file!")
}
contents := string(contentsB)
check("Apple", contents, []Line{
{1, "<h1 id='1'>1</h1>"},
{2, "<h2 id='2'>2</h2>"},
{3, "<h3 id='3'>3</h3>"},
{4, "<blockquote id='4'>quote</blockquote>"},
{5, `<ul id='5'>
<li>li 1</li>
<li>li 2</li>
</ul>`},
{6, "<p id='6'>text</p>"},
{7, "<p id='7'>more text</p>"},
{8, `<p><a id='8' class='wikilink_internal' href="/page/Pear">some link</a></p>`},
{9, `<ul id='9'>
<li>li\n"+</li>
</ul>`},
{10, `<pre id='10' alt='alt text goes here' class='codeblock'><code>=&gt; preformatted text
where markup is not lexed</code></pre>`},
{11, `<p><a id='11' class='wikilink_internal' href="/page/linking">linking</a></p>`},
{12, "<p id='12'>text</p>"},
{13, `<pre id='13' alt='' class='codeblock'><code>()
/\</code></pre>`},
// More thorough testing of xclusions is done in xclusion_test.go
{14, Transclusion{"apple", 1, 3}},
})
}

102
markup/mycomarkup.go Normal file
View File

@@ -0,0 +1,102 @@
// This is not done yet
package markup
import (
"html"
"strings"
)
// A Mycomarkup-formatted document
type MycoDoc struct {
// data
hyphaName string
contents string
// state
recursionDepth int
// results
}
// Constructor
func Doc(hyphaName, contents string) *MycoDoc {
return &MycoDoc{
hyphaName: hyphaName,
contents: contents,
}
}
// AsHtml returns an html representation of the document
func (md *MycoDoc) AsHtml() string {
return ""
}
type BlockType int
const (
BlockH1 = iota
BlockH2
BlockH3
BlockH4
BlockH5
BlockH6
BlockRocket
BlockPre
BlockQuote
BlockPara
)
type CrawlWhere int
const (
inSomewhere = iota
inPre
inEnd
)
func crawl(name, content string) []string {
stateStack := []CrawlWhere{inSomewhere}
startsWith := func(token string) bool {
return strings.HasPrefix(content, token)
}
pop := func() {
stateStack = stateStack[:len(stateStack)-1]
}
push := func(s CrawlWhere) {
stateStack = append(stateStack, s)
}
readln := func(c string) (string, string) {
parts := strings.SplitN(c, "\n", 1)
return parts[0], parts[1]
}
preAcc := ""
line := ""
for {
switch stateStack[0] {
case inSomewhere:
switch {
case startsWith("```"):
push(inPre)
_, content = readln(content)
default:
}
case inPre:
switch {
case startsWith("```"):
pop()
_, content = readln(content)
default:
line, content = readln(content)
preAcc += html.EscapeString(line)
}
}
}
return []string{}
}

108
markup/paragraph.go Normal file
View File

@@ -0,0 +1,108 @@
package markup
import (
"bytes"
"fmt"
"html"
"strings"
)
type spanTokenType int
const (
spanTextNode = iota
spanItalic
spanBold
spanMono
spanSuper
spanSub
spanMark
)
func tagFromState(stt spanTokenType, tagState map[spanTokenType]bool, tagName, originalForm string) string {
if tagState[spanMono] && (stt != spanMono) {
return originalForm
}
if tagState[stt] {
tagState[stt] = false
return fmt.Sprintf("</%s>", tagName)
} else {
tagState[stt] = true
return fmt.Sprintf("<%s>", tagName)
}
}
// getTextNode splits the `p` into two parts `textNode` and `rest` by the first encountered rune that resembles a span tag. If there is none, `textNode = p`, `rest = ""`. It handles escaping with backslash.
func getTextNode(input *bytes.Buffer) string {
var (
textNodeBuffer = bytes.Buffer{}
escaping = false
)
// Always read the first byte in advance to avoid endless loops that kill computers (sad experience)
if input.Len() != 0 {
b, _ := input.ReadByte()
textNodeBuffer.WriteByte(b)
}
for input.Len() != 0 {
// Assume no error is possible because we check for length
b, _ := input.ReadByte()
if escaping {
textNodeBuffer.WriteByte(b)
escaping = false
} else if b == '\\' {
escaping = true
} else if strings.IndexByte("/*`^,!", b) >= 0 {
input.UnreadByte()
break
} else {
textNodeBuffer.WriteByte(b)
}
}
return textNodeBuffer.String()
}
func ParagraphToHtml(input string) string {
var (
p = bytes.NewBufferString(input)
ret strings.Builder
// true = tag is opened, false = tag is not opened
tagState = map[spanTokenType]bool{
spanItalic: false,
spanBold: false,
spanMono: false,
spanSuper: false,
spanSub: false,
spanMark: false,
}
startsWith = func(t string) bool {
return bytes.HasPrefix(p.Bytes(), []byte(t))
}
)
for p.Len() != 0 {
switch {
case startsWith("//"):
ret.WriteString(tagFromState(spanItalic, tagState, "em", "//"))
p.Next(2)
case startsWith("**"):
ret.WriteString(tagFromState(spanBold, tagState, "strong", "**"))
p.Next(2)
case startsWith("`"):
ret.WriteString(tagFromState(spanMono, tagState, "code", "`"))
p.Next(1)
case startsWith("^"):
ret.WriteString(tagFromState(spanSuper, tagState, "sup", "^"))
p.Next(1)
case startsWith(",,"):
ret.WriteString(tagFromState(spanSub, tagState, "sub", ",,"))
p.Next(2)
case startsWith("!!"):
ret.WriteString(tagFromState(spanMark, tagState, "mark", "!!"))
p.Next(2)
default:
ret.WriteString(html.EscapeString(getTextNode(p)))
}
}
return ret.String()
}

44
markup/paragraph_test.go Normal file
View File

@@ -0,0 +1,44 @@
package markup
import (
"fmt"
"testing"
)
/*
func TestGetTextNode(t *testing.T) {
tests := [][]string{
// input textNode rest
{"barab", "barab", ""},
{"test, ", "test", ", "},
{"/test/", "", "/test/"},
{"\\/test/", "/test", "/"},
{"test \\/ar", "test /ar", ""},
{"test //italian// test", "test ", "//italian// test"},
}
for _, triplet := range tests {
a, b := getTextNode([]byte(triplet[0]))
if a != triplet[1] || string(b) != triplet[2] {
t.Error(fmt.Sprintf("Wanted: %q\nGot: %q %q", triplet, a, b))
}
}
}
*/
func TestParagraphToHtml(t *testing.T) {
tests := [][]string{
{"a simple paragraph", "a simple paragraph"},
{"//italic//", "<em>italic</em>"},
{"Embedded //italic//", "Embedded <em>italic</em>"},
{"double //italian// //text//", "double <em>italian</em> <em>text</em>"},
{"it has `mono`", "it has <code>mono</code>"},
{"this is a left **bold", "this is a left <strong>bold"},
{"this line has a ,comma, two of them", "this line has a ,comma, two of them"},
{"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."},
}
for _, test := range tests {
if ParagraphToHtml(test[0]) != test[1] {
t.Error(fmt.Sprintf("%q: Wanted %q, got %q", test[0], test[1], ParagraphToHtml(test[0])))
}
}
}

31
markup/parser.go Normal file
View File

@@ -0,0 +1,31 @@
package markup
import ()
const maxRecursionLevel = 3
type GemParserState struct {
recursionLevel int
}
func Parse(ast []Line, from, to int, state GemParserState) (html string) {
if state.recursionLevel > maxRecursionLevel {
return "Transclusion depth limit"
}
for _, line := range ast {
if line.id >= from && (line.id <= to || to == 0) {
switch v := line.contents.(type) {
case Transclusion:
html += Transclude(v, state)
case string:
html += v
}
}
}
return html
}
func ToHtml(name, text string) string {
state := GemParserState{}
return Parse(lex(name, text), 0, 0, state)
}

24
markup/testdata/test.myco vendored Normal file
View File

@@ -0,0 +1,24 @@
# 1
## 2
### 3
> quote
* li 1
* li 2
text
more text
=> Pear some link
* li\n"+
```alt text goes here
=> preformatted text
where markup is not lexed
```it ends here"
=>linking
text
```
()
/\
```
<= Apple : 1..3

23
markup/utils.go Normal file
View File

@@ -0,0 +1,23 @@
package markup
import (
"strings"
)
// Function that returns a function that can strip `prefix` and trim whitespace when called.
func remover(prefix string) func(string) string {
return func(l string) string {
return strings.TrimSpace(strings.TrimPrefix(l, prefix))
}
}
// Remove #, ## or ### from beginning of `line`.
func removeHeadingOctothorps(line string) string {
f := remover("#")
return f(f(f(line)))
}
// Return a canonical representation of a hypha `name`.
func canonicalName(name string) string {
return strings.ToLower(strings.ReplaceAll(strings.TrimSpace(name), " ", "_"))
}

106
markup/xclusion.go Normal file
View File

@@ -0,0 +1,106 @@
package markup
import (
"fmt"
"path"
"strconv"
"strings"
)
const xclError = -9
// Transclusion is used by markup parser to remember what hyphae shall be transcluded.
type Transclusion struct {
name string
from int // inclusive
to int // inclusive
}
// Transclude transcludes `xcl` and returns html representation.
func Transclude(xcl Transclusion, state GemParserState) (html string) {
state.recursionLevel++
tmptOk := `<section class="transclusion transclusion_ok">
<a class="transclusion__link" href="/page/%s">%s</a>
<div class="transclusion__content">%s</div>
</section>`
tmptFailed := `<section class="transclusion transclusion_failed">
<p>Failed to transclude <a href="/page/%s">%s</a></p>
</section>`
if xcl.from == xclError || xcl.to == xclError || xcl.from > xcl.to {
return fmt.Sprintf(tmptFailed, xcl.name, xcl.name)
}
rawText, binaryHtml, err := HyphaAccess(xcl.name)
if err != nil {
return fmt.Sprintf(tmptFailed, xcl.name, xcl.name)
}
xclText := Parse(lex(xcl.name, rawText), xcl.from, xcl.to, state)
return fmt.Sprintf(tmptOk, xcl.name, xcl.name, binaryHtml+xclText)
}
/* Grammar from hypha transclusion:
transclusion_line ::= transclusion_token hypha_name LWS* [":" LWS* range LWS*]
transclusion_token ::= "<=" LWS+
hypha_name ::= canonical_name | noncanonical_name
range ::= id | (from_id two_dots to_id) | (from_id two_dots) | (two_dots to_id)
two_dots ::= ".."
*/
func parseTransclusion(line, hyphaName string) (xclusion Transclusion) {
line = strings.TrimSpace(remover("<=")(line))
if line == "" {
return Transclusion{"", xclError, xclError}
}
if strings.ContainsRune(line, ':') {
parts := strings.SplitN(line, ":", 2)
xclusion.name = xclCanonicalName(hyphaName, strings.TrimSpace(parts[0]))
selector := strings.TrimSpace(parts[1])
xclusion.from, xclusion.to = parseSelector(selector)
} else {
xclusion.name = xclCanonicalName(hyphaName, strings.TrimSpace(line))
}
return xclusion
}
func xclCanonicalName(hyphaName, xclName string) string {
switch {
case strings.HasPrefix(xclName, "./"):
return canonicalName(path.Join(hyphaName, strings.TrimPrefix(xclName, "./")))
case strings.HasPrefix(xclName, "../"):
return canonicalName(path.Join(path.Dir(hyphaName), strings.TrimPrefix(xclName, "../")))
default:
return canonicalName(xclName)
}
}
// At this point:
// selector ::= id
// | from ".."
// | from ".." to
// | ".." to
// If it is not, return (xclError, xclError).
func parseSelector(selector string) (from, to int) {
if selector == "" {
return 0, 0
}
if strings.Contains(selector, "..") {
parts := strings.Split(selector, "..")
var (
fromStr = strings.TrimSpace(parts[0])
from, fromErr = strconv.Atoi(fromStr)
toStr = strings.TrimSpace(parts[1])
to, toErr = strconv.Atoi(toStr)
)
if fromStr == "" && toStr == "" {
return 0, 0
}
if fromErr == nil || toErr == nil {
return from, to
}
} else if id, err := strconv.Atoi(selector); err == nil {
return id, id
}
return xclError, xclError
}

22
markup/xclusion_test.go Normal file
View File

@@ -0,0 +1,22 @@
package markup
import (
"testing"
)
func TestParseTransclusion(t *testing.T) {
check := func(line string, expectedXclusion Transclusion) {
if xcl := parseTransclusion(line, "t"); xcl != expectedXclusion {
t.Error(line, "; got:", xcl, "wanted:", expectedXclusion)
}
}
check("<= ", Transclusion{"", -9, -9})
check("<=hypha", Transclusion{"hypha", 0, 0})
check("<= hypha\t", Transclusion{"hypha", 0, 0})
check("<= hypha :", Transclusion{"hypha", 0, 0})
check("<= hypha : ..", Transclusion{"hypha", 0, 0})
check("<= hypha : 3", Transclusion{"hypha", 3, 3})
check("<= hypha : 3..", Transclusion{"hypha", 3, 0})
check("<= hypha : ..3", Transclusion{"hypha", 0, 3})
check("<= hypha : 3..4", Transclusion{"hypha", 3, 4})
}