mirror of
https://github.com/janet-lang/janet
synced 2024-11-29 11:29:54 +00:00
2ea28f29b0
Not particularly useful actually, by and large false positives.
554 lines
18 KiB
C
554 lines
18 KiB
C
/*
|
|
* Copyright (c) 2019 Calvin Rose
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal in the Software without restriction, including without limitation the
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <string.h>
|
|
|
|
#ifndef JANET_AMALG
|
|
#include <janet.h>
|
|
#include "gc.h"
|
|
#include "util.h"
|
|
#include "state.h"
|
|
#endif
|
|
|
|
/* Begin building a string */
|
|
uint8_t *janet_string_begin(int32_t length) {
|
|
JanetStringHead *head = janet_gcalloc(JANET_MEMORY_STRING, sizeof(JanetStringHead) + length + 1);
|
|
head->length = length;
|
|
uint8_t *data = (uint8_t *)head->data;
|
|
data[length] = 0;
|
|
return data;
|
|
}
|
|
|
|
/* Finish building a string */
|
|
const uint8_t *janet_string_end(uint8_t *str) {
|
|
janet_string_hash(str) = janet_string_calchash(str, janet_string_length(str));
|
|
return str;
|
|
}
|
|
|
|
/* Load a buffer as a string */
|
|
const uint8_t *janet_string(const uint8_t *buf, int32_t len) {
|
|
JanetStringHead *head = janet_gcalloc(JANET_MEMORY_STRING, sizeof(JanetStringHead) + len + 1);
|
|
head->length = len;
|
|
head->hash = janet_string_calchash(buf, len);
|
|
uint8_t *data = (uint8_t *)head->data;
|
|
memcpy(data, buf, len);
|
|
data[len] = 0;
|
|
return data;
|
|
}
|
|
|
|
/* Compare two strings */
|
|
int janet_string_compare(const uint8_t *lhs, const uint8_t *rhs) {
|
|
int32_t xlen = janet_string_length(lhs);
|
|
int32_t ylen = janet_string_length(rhs);
|
|
int32_t len = xlen > ylen ? ylen : xlen;
|
|
int res = memcmp(lhs, rhs, len);
|
|
if (res) return res;
|
|
if (xlen == ylen) return 0;
|
|
return xlen < ylen ? -1 : 1;
|
|
}
|
|
|
|
/* Compare a janet string with a piece of memory */
|
|
int janet_string_equalconst(const uint8_t *lhs, const uint8_t *rhs, int32_t rlen, int32_t rhash) {
|
|
int32_t lhash = janet_string_hash(lhs);
|
|
int32_t llen = janet_string_length(lhs);
|
|
if (lhs == rhs)
|
|
return 1;
|
|
if (lhash != rhash || llen != rlen)
|
|
return 0;
|
|
return !memcmp(lhs, rhs, rlen);
|
|
}
|
|
|
|
/* Check if two strings are equal */
|
|
int janet_string_equal(const uint8_t *lhs, const uint8_t *rhs) {
|
|
return janet_string_equalconst(lhs, rhs,
|
|
janet_string_length(rhs), janet_string_hash(rhs));
|
|
}
|
|
|
|
/* Load a c string */
|
|
const uint8_t *janet_cstring(const char *str) {
|
|
return janet_string((const uint8_t *)str, (int32_t)strlen(str));
|
|
}
|
|
|
|
/* Knuth Morris Pratt Algorithm */
|
|
|
|
struct kmp_state {
|
|
int32_t i;
|
|
int32_t j;
|
|
int32_t textlen;
|
|
int32_t patlen;
|
|
int32_t *lookup;
|
|
const uint8_t *text;
|
|
const uint8_t *pat;
|
|
};
|
|
|
|
static void kmp_init(
|
|
struct kmp_state *s,
|
|
const uint8_t *text, int32_t textlen,
|
|
const uint8_t *pat, int32_t patlen) {
|
|
int32_t *lookup = calloc(patlen, sizeof(int32_t));
|
|
if (!lookup) {
|
|
JANET_OUT_OF_MEMORY;
|
|
}
|
|
s->lookup = lookup;
|
|
s->i = 0;
|
|
s->j = 0;
|
|
s->text = text;
|
|
s->pat = pat;
|
|
s->textlen = textlen;
|
|
s->patlen = patlen;
|
|
/* Init state machine */
|
|
{
|
|
int32_t i, j;
|
|
for (i = 1, j = 0; i < patlen; i++) {
|
|
while (j && pat[j] != pat[i]) j = lookup[j - 1];
|
|
if (pat[j] == pat[i]) j++;
|
|
lookup[i] = j;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void kmp_deinit(struct kmp_state *state) {
|
|
free(state->lookup);
|
|
}
|
|
|
|
static void kmp_seti(struct kmp_state *state, int32_t i) {
|
|
state->i = i;
|
|
state->j = 0;
|
|
}
|
|
|
|
static int32_t kmp_next(struct kmp_state *state) {
|
|
int32_t i = state->i;
|
|
int32_t j = state->j;
|
|
int32_t textlen = state->textlen;
|
|
int32_t patlen = state->patlen;
|
|
const uint8_t *text = state->text;
|
|
const uint8_t *pat = state->pat;
|
|
int32_t *lookup = state->lookup;
|
|
while (i < textlen) {
|
|
if (text[i] == pat[j]) {
|
|
if (j == patlen - 1) {
|
|
state->i = i + 1;
|
|
state->j = lookup[j];
|
|
return i - j;
|
|
} else {
|
|
i++;
|
|
j++;
|
|
}
|
|
} else {
|
|
if (j > 0) {
|
|
j = lookup[j - 1];
|
|
} else {
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/* CFuns */
|
|
|
|
static Janet cfun_string_slice(int32_t argc, Janet *argv) {
|
|
JanetRange range = janet_getslice(argc, argv);
|
|
JanetByteView view = janet_getbytes(argv, 0);
|
|
return janet_stringv(view.bytes + range.start, range.end - range.start);
|
|
}
|
|
|
|
static Janet cfun_string_repeat(int32_t argc, Janet *argv) {
|
|
janet_fixarity(argc, 2);
|
|
JanetByteView view = janet_getbytes(argv, 0);
|
|
int32_t rep = janet_getinteger(argv, 1);
|
|
if (rep < 0) janet_panic("expected non-negative number of repetitions");
|
|
if (rep == 0) return janet_cstringv("");
|
|
int64_t mulres = (int64_t) rep * view.len;
|
|
if (mulres > INT32_MAX) janet_panic("result string is too long");
|
|
uint8_t *newbuf = janet_string_begin((int32_t) mulres);
|
|
uint8_t *end = newbuf + mulres;
|
|
for (uint8_t *p = newbuf; p < end; p += view.len) {
|
|
memcpy(p, view.bytes, view.len);
|
|
}
|
|
return janet_wrap_string(janet_string_end(newbuf));
|
|
}
|
|
|
|
static Janet cfun_string_bytes(int32_t argc, Janet *argv) {
|
|
janet_fixarity(argc, 1);
|
|
JanetByteView view = janet_getbytes(argv, 0);
|
|
Janet *tup = janet_tuple_begin(view.len);
|
|
int32_t i;
|
|
for (i = 0; i < view.len; i++) {
|
|
tup[i] = janet_wrap_integer((int32_t) view.bytes[i]);
|
|
}
|
|
return janet_wrap_tuple(janet_tuple_end(tup));
|
|
}
|
|
|
|
static Janet cfun_string_frombytes(int32_t argc, Janet *argv) {
|
|
int32_t i;
|
|
uint8_t *buf = janet_string_begin(argc);
|
|
for (i = 0; i < argc; i++) {
|
|
int32_t c = janet_getinteger(argv, i);
|
|
buf[i] = c & 0xFF;
|
|
}
|
|
return janet_wrap_string(janet_string_end(buf));
|
|
}
|
|
|
|
static Janet cfun_string_asciilower(int32_t argc, Janet *argv) {
|
|
janet_fixarity(argc, 1);
|
|
JanetByteView view = janet_getbytes(argv, 0);
|
|
uint8_t *buf = janet_string_begin(view.len);
|
|
for (int32_t i = 0; i < view.len; i++) {
|
|
uint8_t c = view.bytes[i];
|
|
if (c >= 65 && c <= 90) {
|
|
buf[i] = c + 32;
|
|
} else {
|
|
buf[i] = c;
|
|
}
|
|
}
|
|
return janet_wrap_string(janet_string_end(buf));
|
|
}
|
|
|
|
static Janet cfun_string_asciiupper(int32_t argc, Janet *argv) {
|
|
janet_fixarity(argc, 1);
|
|
JanetByteView view = janet_getbytes(argv, 0);
|
|
uint8_t *buf = janet_string_begin(view.len);
|
|
for (int32_t i = 0; i < view.len; i++) {
|
|
uint8_t c = view.bytes[i];
|
|
if (c >= 97 && c <= 122) {
|
|
buf[i] = c - 32;
|
|
} else {
|
|
buf[i] = c;
|
|
}
|
|
}
|
|
return janet_wrap_string(janet_string_end(buf));
|
|
}
|
|
|
|
static Janet cfun_string_reverse(int32_t argc, Janet *argv) {
|
|
janet_fixarity(argc, 1);
|
|
JanetByteView view = janet_getbytes(argv, 0);
|
|
uint8_t *buf = janet_string_begin(view.len);
|
|
int32_t i, j;
|
|
for (i = 0, j = view.len - 1; i < view.len; i++, j--) {
|
|
buf[i] = view.bytes[j];
|
|
}
|
|
return janet_wrap_string(janet_string_end(buf));
|
|
}
|
|
|
|
static void findsetup(int32_t argc, Janet *argv, struct kmp_state *s, int32_t extra) {
|
|
janet_arity(argc, 2, 3 + extra);
|
|
JanetByteView pat = janet_getbytes(argv, 0);
|
|
JanetByteView text = janet_getbytes(argv, 1);
|
|
int32_t start = 0;
|
|
if (argc >= 3) {
|
|
start = janet_getinteger(argv, 2);
|
|
if (start < 0) janet_panic("expected non-negative start index");
|
|
}
|
|
kmp_init(s, text.bytes, text.len, pat.bytes, pat.len);
|
|
s->i = start;
|
|
}
|
|
|
|
static Janet cfun_string_find(int32_t argc, Janet *argv) {
|
|
int32_t result;
|
|
struct kmp_state state;
|
|
findsetup(argc, argv, &state, 0);
|
|
result = kmp_next(&state);
|
|
kmp_deinit(&state);
|
|
return result < 0
|
|
? janet_wrap_nil()
|
|
: janet_wrap_integer(result);
|
|
}
|
|
|
|
static Janet cfun_string_findall(int32_t argc, Janet *argv) {
|
|
int32_t result;
|
|
struct kmp_state state;
|
|
findsetup(argc, argv, &state, 0);
|
|
JanetArray *array = janet_array(0);
|
|
while ((result = kmp_next(&state)) >= 0) {
|
|
janet_array_push(array, janet_wrap_integer(result));
|
|
}
|
|
kmp_deinit(&state);
|
|
return janet_wrap_array(array);
|
|
}
|
|
|
|
struct replace_state {
|
|
struct kmp_state kmp;
|
|
const uint8_t *subst;
|
|
int32_t substlen;
|
|
};
|
|
|
|
static void replacesetup(int32_t argc, Janet *argv, struct replace_state *s) {
|
|
janet_arity(argc, 3, 4);
|
|
JanetByteView pat = janet_getbytes(argv, 0);
|
|
JanetByteView subst = janet_getbytes(argv, 1);
|
|
JanetByteView text = janet_getbytes(argv, 2);
|
|
int32_t start = 0;
|
|
if (argc == 4) {
|
|
start = janet_getinteger(argv, 3);
|
|
if (start < 0) janet_panic("expected non-negative start index");
|
|
}
|
|
kmp_init(&s->kmp, text.bytes, text.len, pat.bytes, pat.len);
|
|
s->kmp.i = start;
|
|
s->subst = subst.bytes;
|
|
s->substlen = subst.len;
|
|
}
|
|
|
|
static Janet cfun_string_replace(int32_t argc, Janet *argv) {
|
|
int32_t result;
|
|
struct replace_state s;
|
|
uint8_t *buf;
|
|
replacesetup(argc, argv, &s);
|
|
result = kmp_next(&s.kmp);
|
|
if (result < 0) {
|
|
kmp_deinit(&s.kmp);
|
|
return janet_stringv(s.kmp.text, s.kmp.textlen);
|
|
}
|
|
buf = janet_string_begin(s.kmp.textlen - s.kmp.patlen + s.substlen);
|
|
memcpy(buf, s.kmp.text, result);
|
|
memcpy(buf + result, s.subst, s.substlen);
|
|
memcpy(buf + result + s.substlen,
|
|
s.kmp.text + result + s.kmp.patlen,
|
|
s.kmp.textlen - result - s.kmp.patlen);
|
|
kmp_deinit(&s.kmp);
|
|
return janet_wrap_string(janet_string_end(buf));
|
|
}
|
|
|
|
static Janet cfun_string_replaceall(int32_t argc, Janet *argv) {
|
|
int32_t result;
|
|
struct replace_state s;
|
|
JanetBuffer b;
|
|
int32_t lastindex = 0;
|
|
replacesetup(argc, argv, &s);
|
|
janet_buffer_init(&b, s.kmp.textlen);
|
|
while ((result = kmp_next(&s.kmp)) >= 0) {
|
|
janet_buffer_push_bytes(&b, s.kmp.text + lastindex, result - lastindex);
|
|
janet_buffer_push_bytes(&b, s.subst, s.substlen);
|
|
lastindex = result + s.kmp.patlen;
|
|
kmp_seti(&s.kmp, lastindex);
|
|
}
|
|
janet_buffer_push_bytes(&b, s.kmp.text + lastindex, s.kmp.textlen - lastindex);
|
|
const uint8_t *ret = janet_string(b.data, b.count);
|
|
janet_buffer_deinit(&b);
|
|
kmp_deinit(&s.kmp);
|
|
return janet_wrap_string(ret);
|
|
}
|
|
|
|
static Janet cfun_string_split(int32_t argc, Janet *argv) {
|
|
int32_t result;
|
|
JanetArray *array;
|
|
struct kmp_state state;
|
|
int32_t limit = -1, lastindex = 0;
|
|
if (argc == 4) {
|
|
limit = janet_getinteger(argv, 3);
|
|
}
|
|
findsetup(argc, argv, &state, 1);
|
|
array = janet_array(0);
|
|
while ((result = kmp_next(&state)) >= 0 && limit--) {
|
|
const uint8_t *slice = janet_string(state.text + lastindex, result - lastindex);
|
|
janet_array_push(array, janet_wrap_string(slice));
|
|
lastindex = result + state.patlen;
|
|
}
|
|
{
|
|
const uint8_t *slice = janet_string(state.text + lastindex, state.textlen - lastindex);
|
|
janet_array_push(array, janet_wrap_string(slice));
|
|
}
|
|
kmp_deinit(&state);
|
|
return janet_wrap_array(array);
|
|
}
|
|
|
|
static Janet cfun_string_checkset(int32_t argc, Janet *argv) {
|
|
uint32_t bitset[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
janet_arity(argc, 2, 3);
|
|
JanetByteView set = janet_getbytes(argv, 0);
|
|
JanetByteView str = janet_getbytes(argv, 1);
|
|
/* Populate set */
|
|
for (int32_t i = 0; i < set.len; i++) {
|
|
int index = set.bytes[i] >> 5;
|
|
uint32_t mask = 1 << (set.bytes[i] & 7);
|
|
bitset[index] |= mask;
|
|
}
|
|
if (argc == 3) {
|
|
if (janet_getboolean(argv, 2)) {
|
|
for (int i = 0; i < 8; i++)
|
|
bitset[i] = ~bitset[i];
|
|
}
|
|
}
|
|
/* Check set */
|
|
for (int32_t i = 0; i < str.len; i++) {
|
|
int index = str.bytes[i] >> 5;
|
|
uint32_t mask = 1 << (str.bytes[i] & 7);
|
|
if (!(bitset[index] & mask)) {
|
|
return janet_wrap_false();
|
|
}
|
|
}
|
|
return janet_wrap_true();
|
|
}
|
|
|
|
static Janet cfun_string_join(int32_t argc, Janet *argv) {
|
|
janet_arity(argc, 1, 2);
|
|
JanetView parts = janet_getindexed(argv, 0);
|
|
JanetByteView joiner;
|
|
if (argc == 2) {
|
|
joiner = janet_getbytes(argv, 1);
|
|
} else {
|
|
joiner.bytes = NULL;
|
|
joiner.len = 0;
|
|
}
|
|
/* Check args */
|
|
int32_t i;
|
|
int64_t finallen = 0;
|
|
for (i = 0; i < parts.len; i++) {
|
|
const uint8_t *chunk;
|
|
int32_t chunklen = 0;
|
|
if (!janet_bytes_view(parts.items[i], &chunk, &chunklen)) {
|
|
janet_panicf("item %d of parts is not a byte sequence, got %v", i, parts.items[i]);
|
|
}
|
|
if (i) finallen += joiner.len;
|
|
finallen += chunklen;
|
|
if (finallen > INT32_MAX)
|
|
janet_panic("result string too long");
|
|
}
|
|
uint8_t *buf, *out;
|
|
out = buf = janet_string_begin((int32_t) finallen);
|
|
for (i = 0; i < parts.len; i++) {
|
|
const uint8_t *chunk = NULL;
|
|
int32_t chunklen = 0;
|
|
if (i) {
|
|
memcpy(out, joiner.bytes, joiner.len);
|
|
out += joiner.len;
|
|
}
|
|
janet_bytes_view(parts.items[i], &chunk, &chunklen);
|
|
memcpy(out, chunk, chunklen);
|
|
out += chunklen;
|
|
}
|
|
return janet_wrap_string(janet_string_end(buf));
|
|
}
|
|
|
|
static Janet cfun_string_format(int32_t argc, Janet *argv) {
|
|
janet_arity(argc, 1, -1);
|
|
JanetBuffer *buffer = janet_buffer(0);
|
|
const char *strfrmt = (const char *) janet_getstring(argv, 0);
|
|
janet_buffer_format(buffer, strfrmt, 0, argc, argv);
|
|
return janet_stringv(buffer->data, buffer->count);
|
|
}
|
|
|
|
static const JanetReg string_cfuns[] = {
|
|
{
|
|
"string/slice", cfun_string_slice,
|
|
JDOC("(string/slice bytes [,start=0 [,end=(length str)]])\n\n"
|
|
"Returns a substring from a byte sequence. The substring is from "
|
|
"index start inclusive to index end exclusive. All indexing "
|
|
"is from 0. 'start' and 'end' can also be negative to indicate indexing "
|
|
"from the end of the string.")
|
|
},
|
|
{
|
|
"string/repeat", cfun_string_repeat,
|
|
JDOC("(string/repeat bytes n)\n\n"
|
|
"Returns a string that is n copies of bytes concatenated.")
|
|
},
|
|
{
|
|
"string/bytes", cfun_string_bytes,
|
|
JDOC("(string/bytes str)\n\n"
|
|
"Returns an array of integers that are the byte values of the string.")
|
|
},
|
|
{
|
|
"string/from-bytes", cfun_string_frombytes,
|
|
JDOC("(string/from-bytes byte-array)\n\n"
|
|
"Creates a string from an array of integers with byte values. All integers "
|
|
"will be coerced to the range of 1 byte 0-255.")
|
|
},
|
|
{
|
|
"string/ascii-lower", cfun_string_asciilower,
|
|
JDOC("(string/ascii-lower str)\n\n"
|
|
"Returns a new string where all bytes are replaced with the "
|
|
"lowercase version of themselves in ASCII. Does only a very simple "
|
|
"case check, meaning no unicode support.")
|
|
},
|
|
{
|
|
"string/ascii-upper", cfun_string_asciiupper,
|
|
JDOC("(string/ascii-upper str)\n\n"
|
|
"Returns a new string where all bytes are replaced with the "
|
|
"uppercase version of themselves in ASCII. Does only a very simple "
|
|
"case check, meaning no unicode support.")
|
|
},
|
|
{
|
|
"string/reverse", cfun_string_reverse,
|
|
JDOC("(string/reverse str)\n\n"
|
|
"Returns a string that is the reversed version of str.")
|
|
},
|
|
{
|
|
"string/find", cfun_string_find,
|
|
JDOC("(string/find patt str)\n\n"
|
|
"Searches for the first instance of pattern patt in string "
|
|
"str. Returns the index of the first character in patt if found, "
|
|
"otherwise returns nil.")
|
|
},
|
|
{
|
|
"string/find-all", cfun_string_findall,
|
|
JDOC("(string/find patt str)\n\n"
|
|
"Searches for all instances of pattern patt in string "
|
|
"str. Returns an array of all indices of found patterns. Overlapping "
|
|
"instances of the pattern are not counted, meaning a byte in string "
|
|
"will only contribute to finding at most on occurrence of pattern. If no "
|
|
"occurrences are found, will return an empty array.")
|
|
},
|
|
{
|
|
"string/replace", cfun_string_replace,
|
|
JDOC("(string/replace patt subst str)\n\n"
|
|
"Replace the first occurrence of patt with subst in the string str. "
|
|
"Will return the new string if patt is found, otherwise returns str.")
|
|
},
|
|
{
|
|
"string/replace-all", cfun_string_replaceall,
|
|
JDOC("(string/replace-all patt subst str)\n\n"
|
|
"Replace all instances of patt with subst in the string str. "
|
|
"Will return the new string if patt is found, otherwise returns str.")
|
|
},
|
|
{
|
|
"string/split", cfun_string_split,
|
|
JDOC("(string/split delim str)\n\n"
|
|
"Splits a string str with delimiter delim and returns an array of "
|
|
"substrings. The substrings will not contain the delimiter delim. If delim "
|
|
"is not found, the returned array will have one element.")
|
|
},
|
|
{
|
|
"string/check-set", cfun_string_checkset,
|
|
JDOC("(string/check-set set str)\n\n"
|
|
"Checks if any of the bytes in the string set appear in the string str. "
|
|
"Returns true if some bytes in set do appear in str, false if no bytes do.")
|
|
},
|
|
{
|
|
"string/join", cfun_string_join,
|
|
JDOC("(string/join parts [,sep])\n\n"
|
|
"Joins an array of strings into one string, optionally separated by "
|
|
"a separator string sep.")
|
|
},
|
|
{
|
|
"string/format", cfun_string_format,
|
|
JDOC("(string/format format & values)\n\n"
|
|
"Similar to snprintf, but specialized for operating with janet. Returns "
|
|
"a new string.")
|
|
},
|
|
{NULL, NULL, NULL}
|
|
};
|
|
|
|
/* Module entry point */
|
|
void janet_lib_string(JanetTable *env) {
|
|
janet_core_cfuns(env, NULL, string_cfuns);
|
|
}
|