2020-10-30 13:25:48 +00:00
package markup
import (
"bytes"
"fmt"
"html"
"strings"
2021-01-03 21:10:33 +00:00
"unicode"
2020-10-30 13:25:48 +00:00
)
type spanTokenType int
const (
spanTextNode = iota
spanItalic
spanBold
spanMono
spanSuper
spanSub
spanMark
2020-11-11 17:42:31 +00:00
spanStrike
2020-11-04 17:42:02 +00:00
spanLink
2020-10-30 13:25:48 +00:00
)
func tagFromState ( stt spanTokenType , tagState map [ spanTokenType ] bool , tagName , originalForm string ) string {
if tagState [ spanMono ] && ( stt != spanMono ) {
return originalForm
}
if tagState [ stt ] {
tagState [ stt ] = false
return fmt . Sprintf ( "</%s>" , tagName )
} else {
tagState [ stt ] = true
return fmt . Sprintf ( "<%s>" , tagName )
}
}
2021-01-03 21:10:33 +00:00
func getLinkNode ( input * bytes . Buffer , hyphaName string , isBracketedLink bool ) string {
if isBracketedLink {
input . Next ( 2 ) // drop those [[
}
2020-11-04 17:42:02 +00:00
var (
escaping = false
addrBuf = bytes . Buffer { }
displayBuf = bytes . Buffer { }
currBuf = & addrBuf
)
for input . Len ( ) != 0 {
b , _ := input . ReadByte ( )
if escaping {
currBuf . WriteByte ( b )
escaping = false
2021-01-03 21:10:33 +00:00
} else if isBracketedLink && b == '|' && currBuf == & addrBuf {
2020-11-04 17:42:02 +00:00
currBuf = & displayBuf
2021-01-03 21:10:33 +00:00
} else if isBracketedLink && b == ']' && bytes . HasPrefix ( input . Bytes ( ) , [ ] byte { ']' } ) {
2020-11-04 17:42:02 +00:00
input . Next ( 1 )
break
2021-01-03 21:10:33 +00:00
} else if ! isBracketedLink && unicode . IsSpace ( rune ( b ) ) {
break
2020-11-04 17:42:02 +00:00
} else {
currBuf . WriteByte ( b )
}
}
2021-01-24 08:18:59 +00:00
href , text , class := LinkParts ( addrBuf . String ( ) , displayBuf . String ( ) , hyphaName )
return fmt . Sprintf ( ` <a href="%s" class="%s">%s</a> ` , href , class , html . EscapeString ( text ) )
2020-11-04 17:42:02 +00:00
}
// getTextNode splits the `input` into two parts `textNode` and `rest` by the first encountered rune that resembles a span tag. If there is none, `textNode = input`, `rest = ""`. It handles escaping with backslash.
2020-10-30 13:25:48 +00:00
func getTextNode ( input * bytes . Buffer ) string {
var (
textNodeBuffer = bytes . Buffer { }
escaping = false
2021-01-03 21:10:33 +00:00
startsWith = func ( t string ) bool {
return bytes . HasPrefix ( input . Bytes ( ) , [ ] byte ( t ) )
}
couldBeLinkStart = func ( ) bool {
return startsWith ( "https://" ) || startsWith ( "http://" ) || startsWith ( "gemini://" ) || startsWith ( "gopher://" ) || startsWith ( "ftp://" )
}
2020-10-30 13:25:48 +00:00
)
// Always read the first byte in advance to avoid endless loops that kill computers (sad experience)
if input . Len ( ) != 0 {
b , _ := input . ReadByte ( )
textNodeBuffer . WriteByte ( b )
}
for input . Len ( ) != 0 {
// Assume no error is possible because we check for length
b , _ := input . ReadByte ( )
if escaping {
textNodeBuffer . WriteByte ( b )
escaping = false
} else if b == '\\' {
escaping = true
2020-11-11 17:42:31 +00:00
} else if strings . IndexByte ( "/*`^,![~" , b ) >= 0 {
2020-10-30 13:25:48 +00:00
input . UnreadByte ( )
break
2021-01-03 21:10:33 +00:00
} else if couldBeLinkStart ( ) {
textNodeBuffer . WriteByte ( b )
break
2020-10-30 13:25:48 +00:00
} else {
textNodeBuffer . WriteByte ( b )
}
}
return textNodeBuffer . String ( )
}
2020-11-04 17:42:02 +00:00
func ParagraphToHtml ( hyphaName , input string ) string {
2020-10-30 13:25:48 +00:00
var (
p = bytes . NewBufferString ( input )
ret strings . Builder
// true = tag is opened, false = tag is not opened
tagState = map [ spanTokenType ] bool {
spanItalic : false ,
spanBold : false ,
spanMono : false ,
spanSuper : false ,
spanSub : false ,
spanMark : false ,
2020-11-04 17:42:02 +00:00
spanLink : false ,
2020-10-30 13:25:48 +00:00
}
startsWith = func ( t string ) bool {
return bytes . HasPrefix ( p . Bytes ( ) , [ ] byte ( t ) )
}
2021-01-09 20:49:48 +00:00
noTagsActive = func ( ) bool {
return ! ( tagState [ spanItalic ] || tagState [ spanBold ] || tagState [ spanMono ] || tagState [ spanSuper ] || tagState [ spanSub ] || tagState [ spanMark ] || tagState [ spanLink ] )
}
2020-10-30 13:25:48 +00:00
)
for p . Len ( ) != 0 {
switch {
case startsWith ( "//" ) :
ret . WriteString ( tagFromState ( spanItalic , tagState , "em" , "//" ) )
p . Next ( 2 )
case startsWith ( "**" ) :
ret . WriteString ( tagFromState ( spanBold , tagState , "strong" , "**" ) )
p . Next ( 2 )
case startsWith ( "`" ) :
ret . WriteString ( tagFromState ( spanMono , tagState , "code" , "`" ) )
p . Next ( 1 )
case startsWith ( "^" ) :
ret . WriteString ( tagFromState ( spanSuper , tagState , "sup" , "^" ) )
p . Next ( 1 )
case startsWith ( ",," ) :
ret . WriteString ( tagFromState ( spanSub , tagState , "sub" , ",," ) )
p . Next ( 2 )
case startsWith ( "!!" ) :
ret . WriteString ( tagFromState ( spanMark , tagState , "mark" , "!!" ) )
p . Next ( 2 )
2020-11-11 17:42:31 +00:00
case startsWith ( "~~" ) :
ret . WriteString ( tagFromState ( spanMark , tagState , "s" , "~~" ) )
p . Next ( 2 )
2020-11-04 17:42:02 +00:00
case startsWith ( "[[" ) :
2021-01-03 21:10:33 +00:00
ret . WriteString ( getLinkNode ( p , hyphaName , true ) )
2021-01-09 20:49:48 +00:00
case ( startsWith ( "https://" ) || startsWith ( "http://" ) || startsWith ( "gemini://" ) || startsWith ( "gopher://" ) || startsWith ( "ftp://" ) ) && noTagsActive ( ) :
2021-01-03 21:10:33 +00:00
ret . WriteString ( getLinkNode ( p , hyphaName , false ) )
2020-10-30 13:25:48 +00:00
default :
ret . WriteString ( html . EscapeString ( getTextNode ( p ) ) )
}
}
2020-10-30 13:34:10 +00:00
for stt , open := range tagState {
if open {
switch stt {
case spanItalic :
ret . WriteString ( tagFromState ( spanItalic , tagState , "em" , "//" ) )
case spanBold :
ret . WriteString ( tagFromState ( spanBold , tagState , "strong" , "**" ) )
case spanMono :
ret . WriteString ( tagFromState ( spanMono , tagState , "code" , "`" ) )
case spanSuper :
ret . WriteString ( tagFromState ( spanSuper , tagState , "sup" , "^" ) )
case spanSub :
ret . WriteString ( tagFromState ( spanSub , tagState , "sub" , ",," ) )
case spanMark :
ret . WriteString ( tagFromState ( spanMark , tagState , "mark" , "!!" ) )
2020-11-11 17:42:31 +00:00
case spanStrike :
ret . WriteString ( tagFromState ( spanMark , tagState , "s" , "~~" ) )
2020-11-04 17:42:02 +00:00
case spanLink :
ret . WriteString ( tagFromState ( spanLink , tagState , "a" , "[[" ) )
2020-10-30 13:34:10 +00:00
}
}
}
2020-10-30 13:25:48 +00:00
return ret . String ( )
}