1
0
mirror of https://github.com/janet-lang/janet synced 2025-01-23 13:46:52 +00:00

Support dedenting longstrings with Windows EOLs

This commit is contained in:
Michael Camilleri 2024-12-17 05:14:59 +09:00
parent 4daecc9a41
commit 67e8518ba6
No known key found for this signature in database
GPG Key ID: 7EB218A48DF8B572
2 changed files with 43 additions and 22 deletions

View File

@ -363,8 +363,7 @@ static int stringend(JanetParser *p, JanetParseState *state) {
JanetParseState top = p->states[p->statecount - 1];
int32_t indent_col = (int32_t) top.column - 1;
uint8_t *r = bufstart, *end = r + buflen;
/* Check if there are any characters before the start column -
* if so, do not reindent. */
/* Unless there are only spaces before EOLs, disable reindenting */
int reindent = 1;
while (reindent && (r < end)) {
if (*r++ == '\n') {
@ -374,34 +373,36 @@ static int stringend(JanetParser *p, JanetParseState *state) {
break;
}
}
if ((r + 1) < end && *r == '\r' && *(r + 1) == '\n') reindent = 1;
}
}
/* Now reindent if able to, otherwise just drop leading newline. */
if (!reindent) {
if (buflen > 0 && bufstart[0] == '\n') {
buflen--;
bufstart++;
}
} else {
/* Now reindent if able */
if (reindent) {
uint8_t *w = bufstart;
r = bufstart;
while (r < end) {
if (*r == '\n') {
if (r == bufstart) {
/* Skip leading newline */
r++;
} else {
*w++ = *r++;
}
*w++ = *r++;
for (int32_t j = 0; (r < end) && (*r != '\n') && (j < indent_col); j++, r++);
if ((r + 1) < end && *r == '\r' && *(r + 1) == '\n') *w++ = *r++;
} else {
*w++ = *r++;
}
}
buflen = (int32_t)(w - bufstart);
}
/* Check for trailing newline character so we can remove it */
if (buflen > 0 && bufstart[buflen - 1] == '\n') {
/* Check for leading EOL so we can remove it */
if (buflen > 1 && bufstart[0] == '\r' && bufstart[1] == '\n') { /* Windows EOL */
buflen = buflen - 2;
bufstart = bufstart + 2;
} else if (buflen > 0 && bufstart[0] == '\n') { /* Unix EOL */
buflen--;
bufstart++;
}
/* Check for trailing EOL so we can remove it */
if (buflen > 1 && bufstart[buflen - 2] == '\r' && bufstart[buflen - 1] == '\n') { /* Windows EOL */
buflen = buflen - 2;
} else if (buflen > 0 && bufstart[buflen - 1] == '\n') { /* Unix EOL */
buflen--;
}
}

View File

@ -57,6 +57,8 @@
(for i (+ index 1) (+ index indent 1)
(case (get text i)
nil (break)
(chr "\r") (if-not (= (chr "\n") (get text (inc i)))
(set rewrite false))
(chr "\n") (break)
(chr " ") nil
(set rewrite false))))
@ -64,12 +66,17 @@
# Only re-indent if no dedented characters.
(def str
(if rewrite
(peg/replace-all ~(* "\n" (between 0 ,indent " ")) "\n" text)
(peg/replace-all ~(* '(* (? "\r") "\n") (between 0 ,indent " "))
(fn [mtch eol] eol) text)
text))
(def first-nl (= (chr "\n") (first str)))
(def last-nl (= (chr "\n") (last str)))
(string/slice str (if first-nl 1 0) (if last-nl -2)))
(def first-eol (cond
(string/has-prefix? "\r\n" str) :crlf
(string/has-prefix? "\n" str) :lf))
(def last-eol (cond
(string/has-suffix? "\r\n" str) :crlf
(string/has-suffix? "\n" str) :lf))
(string/slice str (case first-eol :crlf 2 :lf 1 0) (case last-eol :crlf -3 :lf -2)))
(defn reindent-reference
"Same as reindent but use parser functionality. Useful for
@ -89,8 +96,10 @@
(let [a (reindent text indent)
b (reindent-reference text indent)]
(assert (= a b)
(string "indent " indent-counter " (indent=" indent ")"))))
(string/format "reindent: %q, parse: %q (indent-test #%d with indent of %d)" a b indent-counter indent)
)))
# Unix EOLs
(check-indent "" 0)
(check-indent "\n" 0)
(check-indent "\n" 1)
@ -106,6 +115,17 @@
(check-indent "\n Hello, world!\n " 4)
(check-indent "\n Hello, world!\n dedented text\n " 4)
(check-indent "\n Hello, world!\n indented text\n " 4)
# Windows EOLs
(check-indent "\r\n" 0)
(check-indent "\r\n" 1)
(check-indent "\r\n\r\n" 0)
(check-indent "\r\n\r\n" 1)
(check-indent "\r\nHello, world!" 0)
(check-indent "\r\nHello, world!" 1)
(check-indent "\r\n Hello, world!\r\n " 4)
(check-indent "\r\n Hello, world!\r\n " 4)
(check-indent "\r\n Hello, world!\r\n dedented text\r\n " 4)
(check-indent "\r\n Hello, world!\r\n indented text\r\n " 4)
# Symbols with @ character
# d68eae9