1
0
mirror of https://github.com/janet-lang/janet synced 2025-01-23 13:46:52 +00:00

Support dedenting longstrings with Windows EOLs

This commit is contained in:
Michael Camilleri 2024-12-17 05:14:59 +09:00
parent 4daecc9a41
commit 67e8518ba6
No known key found for this signature in database
GPG Key ID: 7EB218A48DF8B572
2 changed files with 43 additions and 22 deletions

View File

@ -363,8 +363,7 @@ static int stringend(JanetParser *p, JanetParseState *state) {
JanetParseState top = p->states[p->statecount - 1]; JanetParseState top = p->states[p->statecount - 1];
int32_t indent_col = (int32_t) top.column - 1; int32_t indent_col = (int32_t) top.column - 1;
uint8_t *r = bufstart, *end = r + buflen; uint8_t *r = bufstart, *end = r + buflen;
/* Check if there are any characters before the start column - /* Unless there are only spaces before EOLs, disable reindenting */
* if so, do not reindent. */
int reindent = 1; int reindent = 1;
while (reindent && (r < end)) { while (reindent && (r < end)) {
if (*r++ == '\n') { if (*r++ == '\n') {
@ -374,34 +373,36 @@ static int stringend(JanetParser *p, JanetParseState *state) {
break; break;
} }
} }
if ((r + 1) < end && *r == '\r' && *(r + 1) == '\n') reindent = 1;
} }
} }
/* Now reindent if able to, otherwise just drop leading newline. */ /* Now reindent if able */
if (!reindent) { if (reindent) {
if (buflen > 0 && bufstart[0] == '\n') {
buflen--;
bufstart++;
}
} else {
uint8_t *w = bufstart; uint8_t *w = bufstart;
r = bufstart; r = bufstart;
while (r < end) { while (r < end) {
if (*r == '\n') { if (*r == '\n') {
if (r == bufstart) { *w++ = *r++;
/* Skip leading newline */
r++;
} else {
*w++ = *r++;
}
for (int32_t j = 0; (r < end) && (*r != '\n') && (j < indent_col); j++, r++); for (int32_t j = 0; (r < end) && (*r != '\n') && (j < indent_col); j++, r++);
if ((r + 1) < end && *r == '\r' && *(r + 1) == '\n') *w++ = *r++;
} else { } else {
*w++ = *r++; *w++ = *r++;
} }
} }
buflen = (int32_t)(w - bufstart); buflen = (int32_t)(w - bufstart);
} }
/* Check for trailing newline character so we can remove it */ /* Check for leading EOL so we can remove it */
if (buflen > 0 && bufstart[buflen - 1] == '\n') { if (buflen > 1 && bufstart[0] == '\r' && bufstart[1] == '\n') { /* Windows EOL */
buflen = buflen - 2;
bufstart = bufstart + 2;
} else if (buflen > 0 && bufstart[0] == '\n') { /* Unix EOL */
buflen--;
bufstart++;
}
/* Check for trailing EOL so we can remove it */
if (buflen > 1 && bufstart[buflen - 2] == '\r' && bufstart[buflen - 1] == '\n') { /* Windows EOL */
buflen = buflen - 2;
} else if (buflen > 0 && bufstart[buflen - 1] == '\n') { /* Unix EOL */
buflen--; buflen--;
} }
} }

View File

@ -57,6 +57,8 @@
(for i (+ index 1) (+ index indent 1) (for i (+ index 1) (+ index indent 1)
(case (get text i) (case (get text i)
nil (break) nil (break)
(chr "\r") (if-not (= (chr "\n") (get text (inc i)))
(set rewrite false))
(chr "\n") (break) (chr "\n") (break)
(chr " ") nil (chr " ") nil
(set rewrite false)))) (set rewrite false))))
@ -64,12 +66,17 @@
# Only re-indent if no dedented characters. # Only re-indent if no dedented characters.
(def str (def str
(if rewrite (if rewrite
(peg/replace-all ~(* "\n" (between 0 ,indent " ")) "\n" text) (peg/replace-all ~(* '(* (? "\r") "\n") (between 0 ,indent " "))
(fn [mtch eol] eol) text)
text)) text))
(def first-nl (= (chr "\n") (first str))) (def first-eol (cond
(def last-nl (= (chr "\n") (last str))) (string/has-prefix? "\r\n" str) :crlf
(string/slice str (if first-nl 1 0) (if last-nl -2))) (string/has-prefix? "\n" str) :lf))
(def last-eol (cond
(string/has-suffix? "\r\n" str) :crlf
(string/has-suffix? "\n" str) :lf))
(string/slice str (case first-eol :crlf 2 :lf 1 0) (case last-eol :crlf -3 :lf -2)))
(defn reindent-reference (defn reindent-reference
"Same as reindent but use parser functionality. Useful for "Same as reindent but use parser functionality. Useful for
@ -89,8 +96,10 @@
(let [a (reindent text indent) (let [a (reindent text indent)
b (reindent-reference text indent)] b (reindent-reference text indent)]
(assert (= a b) (assert (= a b)
(string "indent " indent-counter " (indent=" indent ")")))) (string/format "reindent: %q, parse: %q (indent-test #%d with indent of %d)" a b indent-counter indent)
)))
# Unix EOLs
(check-indent "" 0) (check-indent "" 0)
(check-indent "\n" 0) (check-indent "\n" 0)
(check-indent "\n" 1) (check-indent "\n" 1)
@ -106,6 +115,17 @@
(check-indent "\n Hello, world!\n " 4) (check-indent "\n Hello, world!\n " 4)
(check-indent "\n Hello, world!\n dedented text\n " 4) (check-indent "\n Hello, world!\n dedented text\n " 4)
(check-indent "\n Hello, world!\n indented text\n " 4) (check-indent "\n Hello, world!\n indented text\n " 4)
# Windows EOLs
(check-indent "\r\n" 0)
(check-indent "\r\n" 1)
(check-indent "\r\n\r\n" 0)
(check-indent "\r\n\r\n" 1)
(check-indent "\r\nHello, world!" 0)
(check-indent "\r\nHello, world!" 1)
(check-indent "\r\n Hello, world!\r\n " 4)
(check-indent "\r\n Hello, world!\r\n " 4)
(check-indent "\r\n Hello, world!\r\n dedented text\r\n " 4)
(check-indent "\r\n Hello, world!\r\n indented text\r\n " 4)
# Symbols with @ character # Symbols with @ character
# d68eae9 # d68eae9