`)
w.WriteString(spoiler)
w.WriteString("")
}
w.WriteByte('<')
w.WriteString("span")
if isSpoiler {
writeAttribute(w, "class", "hicli-spoiler")
}
var style string
if bgColor != "" {
style += fmt.Sprintf("background-color: %s;", bgColor)
}
if textColor != "" {
style += fmt.Sprintf("color: %s;", textColor)
}
if style != "" {
writeAttribute(w, "style", style)
}
}
type tagStack []atom.Atom
func (ts *tagStack) contains(tags ...atom.Atom) bool {
for i := len(*ts) - 1; i >= 0; i-- {
for _, tag := range tags {
if (*ts)[i] == tag {
return true
}
}
}
return false
}
func (ts *tagStack) push(tag atom.Atom) {
*ts = append(*ts, tag)
}
func (ts *tagStack) pop(tag atom.Atom) bool {
if len(*ts) > 0 && (*ts)[len(*ts)-1] == tag {
*ts = (*ts)[:len(*ts)-1]
return true
}
return false
}
func getCodeBlockLanguage(token html.Token) string {
for _, attr := range token.Attr {
if attr.Key == "class" {
match := languageRegex.FindStringSubmatch(attr.Val)
if len(match) == 2 {
return match[1]
}
}
}
return ""
}
const builderPreallocBuffer = 100
func sanitizeAndLinkifyHTML(body string) (string, []id.ContentURI, error) {
tz := html.NewTokenizer(strings.NewReader(body))
var built strings.Builder
built.Grow(len(body) + builderPreallocBuffer)
var codeBlock *strings.Builder
var codeBlockLanguage string
var inlineImages []id.ContentURI
ts := make(tagStack, 0, 2)
Loop:
for {
switch tz.Next() {
case html.ErrorToken:
err := tz.Err()
if errors.Is(err, io.EOF) {
break Loop
}
return "", nil, err
case html.StartTagToken, html.SelfClosingTagToken:
token := tz.Token()
if codeBlock != nil {
if token.DataAtom == atom.Code {
codeBlockLanguage = getCodeBlockLanguage(token)
}
// Don't allow any tags inside code blocks
continue
}
if !tagIsAllowed(token.DataAtom) {
continue
}
switch token.DataAtom {
case atom.Pre:
codeBlock = &strings.Builder{}
continue
case atom.A:
mxc := writeA(&built, token.Attr)
if !mxc.IsEmpty() {
inlineImages = append(inlineImages, mxc)
}
case atom.Img:
mxc := writeImg(&built, token.Attr)
if !mxc.IsEmpty() {
inlineImages = append(inlineImages, mxc)
}
case atom.Div:
math, ok := getAttribute(token.Attr, "data-mx-maths")
if ok {
built.WriteString(`')
if !isSelfClosing(token.DataAtom) && token.Type != html.SelfClosingTagToken {
ts.push(token.DataAtom)
}
case html.EndTagToken:
tagName, _ := tz.TagName()
tag := atom.Lookup(tagName)
if !tagIsAllowed(tag) {
continue
}
if tag == atom.Pre && codeBlock != nil {
writeCodeBlock(&built, codeBlockLanguage, codeBlock)
codeBlockLanguage = ""
codeBlock = nil
} else if ts.pop(tag) {
// TODO instead of only popping when the last tag in the stack matches, this should go through the stack
// and close all tags until it finds the matching tag
if tag == atom.Font {
built.WriteString("")
} else {
built.WriteString("")
built.Write(tagName)
built.WriteByte('>')
}
} else if (tag == atom.Span || tag == atom.Div) && ts.pop(atom.Math) {
built.WriteString("")
}
case html.TextToken:
if codeBlock != nil {
codeBlock.Write(tz.Text())
} else if ts.contains(atom.Pre, atom.Code, atom.A) {
writeEscapedBytes(&built, tz.Text())
} else {
linkifyAndWriteBytes(&built, tz.Text())
}
case html.DoctypeToken, html.CommentToken:
// ignore
}
}
slices.Reverse(ts)
for _, t := range ts {
built.WriteString("")
built.WriteString(t.String())
built.WriteByte('>')
}
return built.String(), inlineImages, nil
}
var CodeBlockFormatter = chromahtml.New(
chromahtml.WithClasses(true),
chromahtml.WithLineNumbers(true),
)
type lineRewriter struct {
w *strings.Builder
}
var lineNumberRewriter = regexp.MustCompile(`(\s*\d+)`)
var lineNumberReplacement = []byte(``)
func (lr *lineRewriter) Write(p []byte) (n int, err error) {
n = len(p)
p = lineNumberRewriter.ReplaceAll(p, lineNumberReplacement)
lr.w.Write(p)
return
}
func writeCodeBlock(w *strings.Builder, language string, block *strings.Builder) {
lexer := lexers.Get(language)
if lexer == nil {
lexer = lexers.Fallback
}
lexer = chroma.Coalesce(lexer)
iter, err := lexer.Tokenise(nil, block.String())
if err != nil {
w.WriteString("')
writeEscapedString(w, block.String())
w.WriteString("
")
return
}
err = CodeBlockFormatter.Format(&lineRewriter{w}, styles.Fallback, iter)
if err != nil {
// This should never fail
panic(err)
}
}