mirror of
				https://github.com/janet-lang/janet
				synced 2025-10-25 04:37:42 +00:00 
			
		
		
		
	Address #547 - don't drop references.
Keep a separate stack for tagged references. May cause pegs to use more memory but makes the backref and backmatch features much more powerful. Also disables the second stack if backref and backmatch are not used in the peg.
This commit is contained in:
		| @@ -2,6 +2,8 @@ | |||||||
| All notable changes to this project will be documented in this file. | All notable changes to this project will be documented in this file. | ||||||
|  |  | ||||||
| ## Unreleased - ??? | ## Unreleased - ??? | ||||||
|  | - Make `backmatch` and `backref` more expressive in pegs. | ||||||
|  | - Fix buggy `string/split`. | ||||||
| - Add `fiber/last-value` to get the value that was last yielded, errored, or signaled | - Add `fiber/last-value` to get the value that was last yielded, errored, or signaled | ||||||
|   by a fiber. |   by a fiber. | ||||||
| - Remove `:generate` verb from `loop` macros. Instead, use the `:in` verb | - Remove `:generate` verb from `loop` macros. Instead, use the `:in` verb | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -41,7 +41,7 @@ SONAME_SETTER=-Wl,-soname, | |||||||
| # For cross compilation | # For cross compilation | ||||||
| HOSTCC?=$(CC) | HOSTCC?=$(CC) | ||||||
| HOSTAR?=$(AR) | HOSTAR?=$(AR) | ||||||
| CFLAGS?=-O2 | CFLAGS?=-Og -g3 | ||||||
| LDFLAGS?=-rdynamic | LDFLAGS?=-rdynamic | ||||||
|  |  | ||||||
| COMMON_CFLAGS:=-std=c99 -Wall -Wextra -Isrc/include -Isrc/conf -fvisibility=hidden -fPIC | COMMON_CFLAGS:=-std=c99 -Wall -Wextra -Isrc/include -Isrc/conf -fvisibility=hidden -fPIC | ||||||
|   | |||||||
| @@ -351,7 +351,7 @@ static Janet cfun_net_connect(int32_t argc, Janet *argv) { | |||||||
|     /* Create socket */ |     /* Create socket */ | ||||||
|     JSock sock = JSOCKDEFAULT; |     JSock sock = JSOCKDEFAULT; | ||||||
|     void *addr = NULL; |     void *addr = NULL; | ||||||
|     socklen_t addrlen; |     socklen_t addrlen = 0; | ||||||
| #ifndef JANET_WINDOWS | #ifndef JANET_WINDOWS | ||||||
|     if (is_unix) { |     if (is_unix) { | ||||||
|         sock = socket(AF_UNIX, socktype | JSOCKFLAGS, 0); |         sock = socket(AF_UNIX, socktype | JSOCKFLAGS, 0); | ||||||
|   | |||||||
| @@ -44,11 +44,13 @@ typedef struct { | |||||||
|     JanetArray *captures; |     JanetArray *captures; | ||||||
|     JanetBuffer *scratch; |     JanetBuffer *scratch; | ||||||
|     JanetBuffer *tags; |     JanetBuffer *tags; | ||||||
|  |     JanetArray *tagged_captures; | ||||||
|     const Janet *extrav; |     const Janet *extrav; | ||||||
|     int32_t *linemap; |     int32_t *linemap; | ||||||
|     int32_t extrac; |     int32_t extrac; | ||||||
|     int32_t depth; |     int32_t depth; | ||||||
|     int32_t linemaplen; |     int32_t linemaplen; | ||||||
|  |     int32_t has_backref; | ||||||
|     enum { |     enum { | ||||||
|         PEG_MODE_NORMAL, |         PEG_MODE_NORMAL, | ||||||
|         PEG_MODE_ACCUMULATE |         PEG_MODE_ACCUMULATE | ||||||
| @@ -60,6 +62,7 @@ typedef struct { | |||||||
|  * if one branch fails and try a new branch. */ |  * if one branch fails and try a new branch. */ | ||||||
| typedef struct { | typedef struct { | ||||||
|     int32_t cap; |     int32_t cap; | ||||||
|  |     int32_t tcap; | ||||||
|     int32_t scratch; |     int32_t scratch; | ||||||
| } CapState; | } CapState; | ||||||
|  |  | ||||||
| @@ -68,6 +71,7 @@ static CapState cap_save(PegState *s) { | |||||||
|     CapState cs; |     CapState cs; | ||||||
|     cs.scratch = s->scratch->count; |     cs.scratch = s->scratch->count; | ||||||
|     cs.cap = s->captures->count; |     cs.cap = s->captures->count; | ||||||
|  |     cs.tcap = s->tagged_captures->count; | ||||||
|     return cs; |     return cs; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -75,7 +79,15 @@ static CapState cap_save(PegState *s) { | |||||||
| static void cap_load(PegState *s, CapState cs) { | static void cap_load(PegState *s, CapState cs) { | ||||||
|     s->scratch->count = cs.scratch; |     s->scratch->count = cs.scratch; | ||||||
|     s->captures->count = cs.cap; |     s->captures->count = cs.cap; | ||||||
|     s->tags->count = cs.cap; |     s->tags->count = cs.tcap; | ||||||
|  |     s->tagged_captures->count = cs.tcap; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* Load a saved capture state in the case of success. Keeps | ||||||
|  |  * tagged captures around for backref. */ | ||||||
|  | static void cap_load_keept(PegState *s, CapState cs) { | ||||||
|  |     s->scratch->count = cs.scratch; | ||||||
|  |     s->captures->count = cs.cap; | ||||||
| } | } | ||||||
|  |  | ||||||
| /* Add a capture */ | /* Add a capture */ | ||||||
| @@ -83,8 +95,11 @@ static void pushcap(PegState *s, Janet capture, uint32_t tag) { | |||||||
|     if (s->mode == PEG_MODE_ACCUMULATE) { |     if (s->mode == PEG_MODE_ACCUMULATE) { | ||||||
|         janet_to_string_b(s->scratch, capture); |         janet_to_string_b(s->scratch, capture); | ||||||
|     } |     } | ||||||
|     if (tag || s->mode == PEG_MODE_NORMAL) { |     if (s->mode == PEG_MODE_NORMAL) { | ||||||
|         janet_array_push(s->captures, capture); |         janet_array_push(s->captures, capture); | ||||||
|  |     } | ||||||
|  |     if (s->has_backref) { | ||||||
|  |         janet_array_push(s->tagged_captures, capture); | ||||||
|         janet_buffer_push_u8(s->tags, tag); |         janet_buffer_push_u8(s->tags, tag); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -321,7 +336,7 @@ tail: | |||||||
|             uint32_t tag = rule[2]; |             uint32_t tag = rule[2]; | ||||||
|             for (int32_t i = s->tags->count - 1; i >= 0; i--) { |             for (int32_t i = s->tags->count - 1; i >= 0; i--) { | ||||||
|                 if (s->tags->data[i] == search) { |                 if (s->tags->data[i] == search) { | ||||||
|                     pushcap(s, s->captures->data[i], tag); |                     pushcap(s, s->tagged_captures->data[i], tag); | ||||||
|                     return text; |                     return text; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| @@ -358,15 +373,15 @@ tail: | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         case RULE_CAPTURE: { |         case RULE_CAPTURE: { | ||||||
|             uint32_t tag = rule[2]; |  | ||||||
|             down1(s); |             down1(s); | ||||||
|             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text); |             const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text); | ||||||
|             up1(s); |             up1(s); | ||||||
|             if (!result) return NULL; |             if (!result) return NULL; | ||||||
|             /* Specialized pushcap - avoid intermediate string creation */ |             /* Specialized pushcap - avoid intermediate string creation */ | ||||||
|             if (!tag && s->mode == PEG_MODE_ACCUMULATE) { |             if (!s->has_backref && s->mode == PEG_MODE_ACCUMULATE) { | ||||||
|                 janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text)); |                 janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text)); | ||||||
|             } else { |             } else { | ||||||
|  |                 uint32_t tag = rule[2]; | ||||||
|                 pushcap(s, janet_stringv(text, (int32_t)(result - text)), tag); |                 pushcap(s, janet_stringv(text, (int32_t)(result - text)), tag); | ||||||
|             } |             } | ||||||
|             return result; |             return result; | ||||||
| @@ -388,7 +403,7 @@ tail: | |||||||
|             if (!result) return NULL; |             if (!result) return NULL; | ||||||
|             Janet cap = janet_stringv(s->scratch->data + cs.scratch, |             Janet cap = janet_stringv(s->scratch->data + cs.scratch, | ||||||
|                                       s->scratch->count - cs.scratch); |                                       s->scratch->count - cs.scratch); | ||||||
|             cap_load(s, cs); |             cap_load_keept(s, cs); | ||||||
|             pushcap(s, cap, tag); |             pushcap(s, cap, tag); | ||||||
|             return result; |             return result; | ||||||
|         } |         } | ||||||
| @@ -419,7 +434,7 @@ tail: | |||||||
|                         s->captures->data + cs.cap, |                         s->captures->data + cs.cap, | ||||||
|                         sizeof(Janet) * num_sub_captures); |                         sizeof(Janet) * num_sub_captures); | ||||||
|             sub_captures->count = num_sub_captures; |             sub_captures->count = num_sub_captures; | ||||||
|             cap_load(s, cs); |             cap_load_keept(s, cs); | ||||||
|             pushcap(s, janet_wrap_array(sub_captures), tag); |             pushcap(s, janet_wrap_array(sub_captures), tag); | ||||||
|             return result; |             return result; | ||||||
|         } |         } | ||||||
| @@ -464,7 +479,7 @@ tail: | |||||||
|                                      s->captures->data + cs.cap); |                                      s->captures->data + cs.cap); | ||||||
|                     break; |                     break; | ||||||
|             } |             } | ||||||
|             cap_load(s, cs); |             cap_load_keept(s, cs); | ||||||
|             if (rule[0] == RULE_MATCHTIME && !janet_truthy(cap)) return NULL; |             if (rule[0] == RULE_MATCHTIME && !janet_truthy(cap)) return NULL; | ||||||
|             pushcap(s, cap, tag); |             pushcap(s, cap, tag); | ||||||
|             return result; |             return result; | ||||||
| @@ -495,7 +510,7 @@ tail: | |||||||
|             uint32_t search = rule[1]; |             uint32_t search = rule[1]; | ||||||
|             for (int32_t i = s->tags->count - 1; i >= 0; i--) { |             for (int32_t i = s->tags->count - 1; i >= 0; i--) { | ||||||
|                 if (s->tags->data[i] == search) { |                 if (s->tags->data[i] == search) { | ||||||
|                     Janet capture = s->captures->data[i]; |                     Janet capture = s->tagged_captures->data[i]; | ||||||
|                     if (!janet_checktype(capture, JANET_STRING)) |                     if (!janet_checktype(capture, JANET_STRING)) | ||||||
|                         return NULL; |                         return NULL; | ||||||
|                     const uint8_t *bytes = janet_unwrap_string(capture); |                     const uint8_t *bytes = janet_unwrap_string(capture); | ||||||
| @@ -597,6 +612,7 @@ typedef struct { | |||||||
|     Janet form; |     Janet form; | ||||||
|     int depth; |     int depth; | ||||||
|     uint32_t nexttag; |     uint32_t nexttag; | ||||||
|  |     int has_backref; | ||||||
| } Builder; | } Builder; | ||||||
|  |  | ||||||
| /* Forward declaration to allow recursion */ | /* Forward declaration to allow recursion */ | ||||||
| @@ -937,6 +953,7 @@ static void spec_reference(Builder *b, int32_t argc, const Janet *argv) { | |||||||
|     Reserve r = reserve(b, 3); |     Reserve r = reserve(b, 3); | ||||||
|     uint32_t search = emit_tag(b, argv[0]); |     uint32_t search = emit_tag(b, argv[0]); | ||||||
|     uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0; |     uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0; | ||||||
|  |     b->has_backref = 1; | ||||||
|     emit_2(r, RULE_GETTAG, search, tag); |     emit_2(r, RULE_GETTAG, search, tag); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -959,6 +976,7 @@ static void spec_column(Builder *b, int32_t argc, const Janet *argv) { | |||||||
| } | } | ||||||
|  |  | ||||||
| static void spec_backmatch(Builder *b, int32_t argc, const Janet *argv) { | static void spec_backmatch(Builder *b, int32_t argc, const Janet *argv) { | ||||||
|  |     b->has_backref = 1; | ||||||
|     spec_tag1(b, argc, argv, RULE_BACKMATCH); |     spec_tag1(b, argc, argv, RULE_BACKMATCH); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -1295,6 +1313,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     /* verify peg bytecode */ |     /* verify peg bytecode */ | ||||||
|  |     int32_t has_backref = 0; | ||||||
|     uint32_t i = 0; |     uint32_t i = 0; | ||||||
|     while (i < blen) { |     while (i < blen) { | ||||||
|         uint32_t instr = bytecode[i]; |         uint32_t instr = bytecode[i]; | ||||||
| @@ -1310,9 +1329,13 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) { | |||||||
|             case RULE_POSITION: |             case RULE_POSITION: | ||||||
|             case RULE_LINE: |             case RULE_LINE: | ||||||
|             case RULE_COLUMN: |             case RULE_COLUMN: | ||||||
|  |                 /* [1 word] */ | ||||||
|  |                 i += 2; | ||||||
|  |                 break; | ||||||
|             case RULE_BACKMATCH: |             case RULE_BACKMATCH: | ||||||
|                 /* [1 word] */ |                 /* [1 word] */ | ||||||
|                 i += 2; |                 i += 2; | ||||||
|  |                 has_backref = 1; | ||||||
|                 break; |                 break; | ||||||
|             case RULE_SET: |             case RULE_SET: | ||||||
|                 /* [8 words] */ |                 /* [8 words] */ | ||||||
| @@ -1353,9 +1376,13 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) { | |||||||
|                 i += 4; |                 i += 4; | ||||||
|                 break; |                 break; | ||||||
|             case RULE_ARGUMENT: |             case RULE_ARGUMENT: | ||||||
|  |                 /* [searchtag, tag] */ | ||||||
|  |                 i += 3; | ||||||
|  |                 break; | ||||||
|             case RULE_GETTAG: |             case RULE_GETTAG: | ||||||
|                 /* [searchtag, tag] */ |                 /* [searchtag, tag] */ | ||||||
|                 i += 3; |                 i += 3; | ||||||
|  |                 has_backref = 1; | ||||||
|                 break; |                 break; | ||||||
|             case RULE_CONSTANT: |             case RULE_CONSTANT: | ||||||
|                 /* [constant, tag] */ |                 /* [constant, tag] */ | ||||||
| @@ -1409,6 +1436,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) { | |||||||
|     /* Good return */ |     /* Good return */ | ||||||
|     peg->bytecode = bytecode; |     peg->bytecode = bytecode; | ||||||
|     peg->constants = constants; |     peg->constants = constants; | ||||||
|  |     peg->has_backref = has_backref; | ||||||
|     free(op_flags); |     free(op_flags); | ||||||
|     return peg; |     return peg; | ||||||
|  |  | ||||||
| @@ -1445,6 +1473,7 @@ static JanetPeg *make_peg(Builder *b) { | |||||||
|     safe_memcpy(peg->bytecode, b->bytecode, bytecode_size); |     safe_memcpy(peg->bytecode, b->bytecode, bytecode_size); | ||||||
|     safe_memcpy(peg->constants, b->constants, constants_size); |     safe_memcpy(peg->constants, b->constants, constants_size); | ||||||
|     peg->bytecode_len = janet_v_count(b->bytecode); |     peg->bytecode_len = janet_v_count(b->bytecode); | ||||||
|  |     peg->has_backref = b->has_backref; | ||||||
|     return peg; |     return peg; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -1459,6 +1488,7 @@ static JanetPeg *compile_peg(Janet x) { | |||||||
|     builder.nexttag = 1; |     builder.nexttag = 1; | ||||||
|     builder.form = x; |     builder.form = x; | ||||||
|     builder.depth = JANET_RECURSION_GUARD; |     builder.depth = JANET_RECURSION_GUARD; | ||||||
|  |     builder.has_backref = 0; | ||||||
|     peg_compile1(&builder, x); |     peg_compile1(&builder, x); | ||||||
|     JanetPeg *peg = make_peg(&builder); |     JanetPeg *peg = make_peg(&builder); | ||||||
|     builder_cleanup(&builder); |     builder_cleanup(&builder); | ||||||
| @@ -1515,12 +1545,14 @@ static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) { | |||||||
|     ret.s.text_end = ret.bytes.bytes + ret.bytes.len; |     ret.s.text_end = ret.bytes.bytes + ret.bytes.len; | ||||||
|     ret.s.depth = JANET_RECURSION_GUARD; |     ret.s.depth = JANET_RECURSION_GUARD; | ||||||
|     ret.s.captures = janet_array(0); |     ret.s.captures = janet_array(0); | ||||||
|  |     ret.s.tagged_captures = janet_array(0); | ||||||
|     ret.s.scratch = janet_buffer(10); |     ret.s.scratch = janet_buffer(10); | ||||||
|     ret.s.tags = janet_buffer(10); |     ret.s.tags = janet_buffer(10); | ||||||
|     ret.s.constants = ret.peg->constants; |     ret.s.constants = ret.peg->constants; | ||||||
|     ret.s.bytecode = ret.peg->bytecode; |     ret.s.bytecode = ret.peg->bytecode; | ||||||
|     ret.s.linemap = NULL; |     ret.s.linemap = NULL; | ||||||
|     ret.s.linemaplen = -1; |     ret.s.linemaplen = -1; | ||||||
|  |     ret.s.has_backref = ret.peg->has_backref; | ||||||
|     return ret; |     return ret; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1839,6 +1839,7 @@ typedef struct { | |||||||
|     Janet *constants; |     Janet *constants; | ||||||
|     size_t bytecode_len; |     size_t bytecode_len; | ||||||
|     uint32_t num_constants; |     uint32_t num_constants; | ||||||
|  |     int has_backref; | ||||||
| } JanetPeg; | } JanetPeg; | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Calvin Rose
					Calvin Rose