1
0
mirror of https://github.com/osmarks/mycorrhiza.git synced 2024-12-12 13:30:26 +00:00
mycorrhiza/markup/paragraph.go

185 lines
5.1 KiB
Go
Raw Normal View History

2020-10-30 13:25:48 +00:00
package markup
import (
"bytes"
"fmt"
"html"
"strings"
2021-01-03 21:10:33 +00:00
"unicode"
2020-10-30 13:25:48 +00:00
)
type spanTokenType int
const (
spanTextNode = iota
spanItalic
spanBold
spanMono
spanSuper
spanSub
spanMark
2020-11-11 17:42:31 +00:00
spanStrike
2020-11-04 17:42:02 +00:00
spanLink
2020-10-30 13:25:48 +00:00
)
func tagFromState(stt spanTokenType, tagState map[spanTokenType]bool, tagName, originalForm string) string {
if tagState[spanMono] && (stt != spanMono) {
return originalForm
}
if tagState[stt] {
tagState[stt] = false
return fmt.Sprintf("</%s>", tagName)
} else {
tagState[stt] = true
return fmt.Sprintf("<%s>", tagName)
}
}
2021-01-03 21:10:33 +00:00
func getLinkNode(input *bytes.Buffer, hyphaName string, isBracketedLink bool) string {
if isBracketedLink {
input.Next(2) // drop those [[
}
2020-11-04 17:42:02 +00:00
var (
escaping = false
addrBuf = bytes.Buffer{}
displayBuf = bytes.Buffer{}
currBuf = &addrBuf
)
for input.Len() != 0 {
b, _ := input.ReadByte()
if escaping {
currBuf.WriteByte(b)
escaping = false
2021-01-03 21:10:33 +00:00
} else if isBracketedLink && b == '|' && currBuf == &addrBuf {
2020-11-04 17:42:02 +00:00
currBuf = &displayBuf
2021-01-03 21:10:33 +00:00
} else if isBracketedLink && b == ']' && bytes.HasPrefix(input.Bytes(), []byte{']'}) {
2020-11-04 17:42:02 +00:00
input.Next(1)
break
2021-01-03 21:10:33 +00:00
} else if !isBracketedLink && unicode.IsSpace(rune(b)) {
break
2020-11-04 17:42:02 +00:00
} else {
currBuf.WriteByte(b)
}
}
href, text, class := LinkParts(addrBuf.String(), displayBuf.String(), hyphaName)
return fmt.Sprintf(`<a href="%s" class="%s">%s</a>`, href, class, html.EscapeString(text))
2020-11-04 17:42:02 +00:00
}
// getTextNode splits the `input` into two parts `textNode` and `rest` by the first encountered rune that resembles a span tag. If there is none, `textNode = input`, `rest = ""`. It handles escaping with backslash.
2020-10-30 13:25:48 +00:00
func getTextNode(input *bytes.Buffer) string {
var (
textNodeBuffer = bytes.Buffer{}
escaping = false
2021-01-03 21:10:33 +00:00
startsWith = func(t string) bool {
return bytes.HasPrefix(input.Bytes(), []byte(t))
}
couldBeLinkStart = func() bool {
return startsWith("https://") || startsWith("http://") || startsWith("gemini://") || startsWith("gopher://") || startsWith("ftp://")
}
2020-10-30 13:25:48 +00:00
)
// Always read the first byte in advance to avoid endless loops that kill computers (sad experience)
if input.Len() != 0 {
b, _ := input.ReadByte()
textNodeBuffer.WriteByte(b)
}
for input.Len() != 0 {
// Assume no error is possible because we check for length
b, _ := input.ReadByte()
if escaping {
textNodeBuffer.WriteByte(b)
escaping = false
} else if b == '\\' {
escaping = true
2020-11-11 17:42:31 +00:00
} else if strings.IndexByte("/*`^,![~", b) >= 0 {
2020-10-30 13:25:48 +00:00
input.UnreadByte()
break
2021-01-03 21:10:33 +00:00
} else if couldBeLinkStart() {
textNodeBuffer.WriteByte(b)
break
2020-10-30 13:25:48 +00:00
} else {
textNodeBuffer.WriteByte(b)
}
}
return textNodeBuffer.String()
}
2020-11-04 17:42:02 +00:00
func ParagraphToHtml(hyphaName, input string) string {
2020-10-30 13:25:48 +00:00
var (
p = bytes.NewBufferString(input)
ret strings.Builder
// true = tag is opened, false = tag is not opened
tagState = map[spanTokenType]bool{
spanItalic: false,
spanBold: false,
spanMono: false,
spanSuper: false,
spanSub: false,
spanMark: false,
2020-11-04 17:42:02 +00:00
spanLink: false,
2020-10-30 13:25:48 +00:00
}
startsWith = func(t string) bool {
return bytes.HasPrefix(p.Bytes(), []byte(t))
}
noTagsActive = func() bool {
return !(tagState[spanItalic] || tagState[spanBold] || tagState[spanMono] || tagState[spanSuper] || tagState[spanSub] || tagState[spanMark] || tagState[spanLink])
}
2020-10-30 13:25:48 +00:00
)
for p.Len() != 0 {
switch {
case startsWith("//"):
ret.WriteString(tagFromState(spanItalic, tagState, "em", "//"))
p.Next(2)
case startsWith("**"):
ret.WriteString(tagFromState(spanBold, tagState, "strong", "**"))
p.Next(2)
case startsWith("`"):
ret.WriteString(tagFromState(spanMono, tagState, "code", "`"))
p.Next(1)
case startsWith("^"):
ret.WriteString(tagFromState(spanSuper, tagState, "sup", "^"))
p.Next(1)
case startsWith(",,"):
ret.WriteString(tagFromState(spanSub, tagState, "sub", ",,"))
p.Next(2)
case startsWith("!!"):
ret.WriteString(tagFromState(spanMark, tagState, "mark", "!!"))
p.Next(2)
2020-11-11 17:42:31 +00:00
case startsWith("~~"):
ret.WriteString(tagFromState(spanMark, tagState, "s", "~~"))
p.Next(2)
2020-11-04 17:42:02 +00:00
case startsWith("[["):
2021-01-03 21:10:33 +00:00
ret.WriteString(getLinkNode(p, hyphaName, true))
case (startsWith("https://") || startsWith("http://") || startsWith("gemini://") || startsWith("gopher://") || startsWith("ftp://")) && noTagsActive():
2021-01-03 21:10:33 +00:00
ret.WriteString(getLinkNode(p, hyphaName, false))
2020-10-30 13:25:48 +00:00
default:
ret.WriteString(html.EscapeString(getTextNode(p)))
}
}
for stt, open := range tagState {
if open {
switch stt {
case spanItalic:
ret.WriteString(tagFromState(spanItalic, tagState, "em", "//"))
case spanBold:
ret.WriteString(tagFromState(spanBold, tagState, "strong", "**"))
case spanMono:
ret.WriteString(tagFromState(spanMono, tagState, "code", "`"))
case spanSuper:
ret.WriteString(tagFromState(spanSuper, tagState, "sup", "^"))
case spanSub:
ret.WriteString(tagFromState(spanSub, tagState, "sub", ",,"))
case spanMark:
ret.WriteString(tagFromState(spanMark, tagState, "mark", "!!"))
2020-11-11 17:42:31 +00:00
case spanStrike:
ret.WriteString(tagFromState(spanMark, tagState, "s", "~~"))
2020-11-04 17:42:02 +00:00
case spanLink:
ret.WriteString(tagFromState(spanLink, tagState, "a", "[["))
}
}
}
2020-10-30 13:25:48 +00:00
return ret.String()
}