1 files changed, 3044 insertions, 0 deletions
diff --git a/src/regexp.c b/src/regexp.c
new file mode 100644
index 0000000..6f3544e
--- /dev/null
+++ b/src/regexp.c
@@ -0,0 +1,3044 @@
+/* vim: set et ts=4 sw=4 : */
+/*
+ * Regular Expression Engine
+ * 
+ * Copyright (c) 2017-2018 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <assert.h>
+#include <alloca.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "regexp.h"
+
+/* must be provided by the user */
+bool lre_check_stack_overflow(void *opaque, size_t alloca_size) {
+    (void)opaque;
+    (void)alloca_size;
+    return false;
+}
+
+void *lre_realloc(void *opaque, void *ptr, size_t size) {
+    (void)opaque;
+    return realloc(ptr, size);
+}
+
+/* quickjs/cutils: */
+#define likely(x)       __builtin_expect(!!(x), 1)
+#define unlikely(x)     __builtin_expect(!!(x), 0)
+#ifndef countof
+#define countof(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+static inline int max_int(int a, int b)
+{
+    if (a > b)
+        return a;
+    else
+        return b;
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+    int c;
+    char *q = buf;
+
+    if (buf_size <= 0)
+        return;
+
+    for(;;) {
+        c = *str++;
+        if (c == 0 || q >= buf + buf_size - 1)
+            break;
+        *q++ = c;
+    }
+    *q = '\0';
+}
+
+typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size);
+
+typedef struct DynBuf {
+    uint8_t *buf;
+    size_t size;
+    size_t allocated_size;
+    bool error; /* true if a memory allocation error occurred */
+    DynBufReallocFunc *realloc_func;
+    void *opaque; /* for realloc_func */
+} DynBuf;
+
+int dbuf_put(DynBuf *s, const uint8_t *data, size_t len);
+
+struct __attribute__((packed)) packed_u16 {
+    uint16_t v;
+};
+
+struct __attribute__((packed)) packed_u32 {
+    uint32_t v;
+};
+
+static inline uint32_t get_u16(const uint8_t *tab)
+{
+    return ((const struct packed_u16 *)tab)->v;
+}
+
+static inline uint32_t get_u32(const uint8_t *tab)
+{
+    return ((const struct packed_u32 *)tab)->v;
+}
+
+static inline void put_u32(uint8_t *tab, uint32_t val)
+{
+    ((struct packed_u32 *)tab)->v = val;
+}
+
+static inline int dbuf_put_u16(DynBuf *s, uint16_t val)
+{
+    return dbuf_put(s, (uint8_t *)&val, 2);
+}
+static inline int dbuf_put_u32(DynBuf *s, uint32_t val)
+{
+    return dbuf_put(s, (uint8_t *)&val, 4);
+}
+
+static inline bool dbuf_error(DynBuf *s) {
+    return s->error;
+}
+
+/* return < 0 if error */
+int dbuf_realloc(DynBuf *s, size_t new_size)
+{
+    size_t size;
+    uint8_t *new_buf;
+    if (new_size > s->allocated_size) {
+        if (s->error)
+            return -1;
+        size = s->allocated_size * 3 / 2;
+        if (size > new_size)
+            new_size = size;
+        new_buf = s->realloc_func(s->opaque, s->buf, new_size);
+        if (!new_buf) {
+            s->error = true;
+            return -1;
+        }
+        s->buf = new_buf;
+        s->allocated_size = new_size;
+    }
+    return 0;
+}
+
+void dbuf_free(DynBuf *s)
+{
+    /* we test s->buf as a fail safe to avoid crashing if dbuf_free()
+       is called twice */
+    if (s->buf) {
+        s->realloc_func(s->opaque, s->buf, 0);
+    }
+    memset(s, 0, sizeof(*s));
+}
+
+static void *dbuf_default_realloc(void *opaque, void *ptr, size_t size)
+{
+    (void)opaque;
+    return realloc(ptr, size);
+}
+
+void dbuf_init2(DynBuf *s, void *opaque, DynBufReallocFunc *realloc_func)
+{
+    memset(s, 0, sizeof(*s));
+    if (!realloc_func)
+        realloc_func = dbuf_default_realloc;
+    s->opaque = opaque;
+    s->realloc_func = realloc_func;
+}
+
+int dbuf_put(DynBuf *s, const uint8_t *data, size_t len)
+{
+    if (unlikely((s->size + len) > s->allocated_size)) {
+        if (dbuf_realloc(s, s->size + len))
+            return -1;
+    }
+    memcpy(s->buf + s->size, data, len);
+    s->size += len;
+    return 0;
+}
+
+int dbuf_put_self(DynBuf *s, size_t offset, size_t len)
+{
+    if (unlikely((s->size + len) > s->allocated_size)) {
+        if (dbuf_realloc(s, s->size + len))
+            return -1;
+    }
+    memcpy(s->buf + s->size, s->buf + offset, len);
+    s->size += len;
+    return 0;
+}
+
+int dbuf_putc(DynBuf *s, uint8_t c)
+{
+    return dbuf_put(s, &c, 1);
+}
+
+static inline int from_hex(int c)
+{
+    if (c >= '0' && c <= '9')
+        return c - '0';
+    else if (c >= 'A' && c <= 'F')
+        return c - 'A' + 10;
+    else if (c >= 'a' && c <= 'f')
+        return c - 'a' + 10;
+    else
+        return -1;
+}
+
+#define UTF8_CHAR_LEN_MAX 6
+
+/* Note: at most 31 bits are encoded. At most UTF8_CHAR_LEN_MAX bytes
+   are output. */
+int unicode_to_utf8(uint8_t *buf, unsigned int c)
+{
+    uint8_t *q = buf;
+
+    if (c < 0x80) {
+        *q++ = c;
+    } else {
+        if (c < 0x800) {
+            *q++ = (c >> 6) | 0xc0;
+        } else {
+            if (c < 0x10000) {
+                *q++ = (c >> 12) | 0xe0;
+            } else {
+                if (c < 0x00200000) {
+                    *q++ = (c >> 18) | 0xf0;
+                } else {
+                    if (c < 0x04000000) {
+                        *q++ = (c >> 24) | 0xf8;
+                    } else if (c < 0x80000000) {
+                        *q++ = (c >> 30) | 0xfc;
+                        *q++ = ((c >> 24) & 0x3f) | 0x80;
+                    } else {
+                        return 0;
+                    }
+                    *q++ = ((c >> 18) & 0x3f) | 0x80;
+                }
+                *q++ = ((c >> 12) & 0x3f) | 0x80;
+            }
+            *q++ = ((c >> 6) & 0x3f) | 0x80;
+        }
+        *q++ = (c & 0x3f) | 0x80;
+    }
+    return q - buf;
+}
+
+static const unsigned int utf8_min_code[5] = {
+    0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
+};
+
+static const unsigned char utf8_first_code_mask[5] = {
+    0x1f, 0xf, 0x7, 0x3, 0x1,
+};
+
+/* return -1 if error. *pp is not updated in this case. max_len must
+   be >= 1. The maximum length for a UTF8 byte sequence is 6 bytes. */
+int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp)
+{
+    int l, c, b, i;
+
+    c = *p++;
+    if (c < 0x80) {
+        *pp = p;
+        return c;
+    }
+    switch(c) {
+    case 0xc0: case 0xc1: case 0xc2: case 0xc3:
+    case 0xc4: case 0xc5: case 0xc6: case 0xc7:
+    case 0xc8: case 0xc9: case 0xca: case 0xcb:
+    case 0xcc: case 0xcd: case 0xce: case 0xcf:
+    case 0xd0: case 0xd1: case 0xd2: case 0xd3:
+    case 0xd4: case 0xd5: case 0xd6: case 0xd7:
+    case 0xd8: case 0xd9: case 0xda: case 0xdb:
+    case 0xdc: case 0xdd: case 0xde: case 0xdf:
+        l = 1;
+        break;
+    case 0xe0: case 0xe1: case 0xe2: case 0xe3:
+    case 0xe4: case 0xe5: case 0xe6: case 0xe7:
+    case 0xe8: case 0xe9: case 0xea: case 0xeb:
+    case 0xec: case 0xed: case 0xee: case 0xef:
+        l = 2;
+        break;
+    case 0xf0: case 0xf1: case 0xf2: case 0xf3:
+    case 0xf4: case 0xf5: case 0xf6: case 0xf7:
+        l = 3;
+        break;
+    case 0xf8: case 0xf9: case 0xfa: case 0xfb:
+        l = 4;
+        break;
+    case 0xfc: case 0xfd:
+        l = 5;
+        break;
+    default:
+        return -1;
+    }
+    /* check that we have enough characters */
+    if (l > (max_len - 1))
+        return -1;
+    c &= utf8_first_code_mask[l - 1];
+    for(i = 0; i < l; i++) {
+        b = *p++;
+        if (b < 0x80 || b >= 0xc0)
+            return -1;
+        c = (c << 6) | (b & 0x3f);
+    }
+    if (c < (int)utf8_min_code[l - 1])
+        return -1;
+    *pp = p;
+    return c;
+}
+
+/* quickjs/libunicode: */
+#include "unicode-table.h"
+
+#define LRE_CC_RES_LEN_MAX 3
+
+enum {
+    RUN_TYPE_U,
+    RUN_TYPE_L,
+    RUN_TYPE_UF,
+    RUN_TYPE_LF,
+    RUN_TYPE_UL,
+    RUN_TYPE_LSU,
+    RUN_TYPE_U2L_399_EXT2,
+    RUN_TYPE_UF_D20,
+    RUN_TYPE_UF_D1_EXT,
+    RUN_TYPE_U_EXT,
+    RUN_TYPE_LF_EXT,
+    RUN_TYPE_U_EXT2,
+    RUN_TYPE_L_EXT2,
+    RUN_TYPE_U_EXT3,
+};
+
+typedef struct {
+    int len; /* in points, always even */
+    int size;
+    uint32_t *points; /* points sorted by increasing value */
+    void *mem_opaque;
+    void *(*realloc_func)(void *opaque, void *ptr, size_t size);
+} CharRange;
+
+typedef enum {
+    CR_OP_UNION,
+    CR_OP_INTER,
+    CR_OP_XOR,
+} CharRangeOpEnum;
+
+static void *cr_default_realloc(void *opaque, void *ptr, size_t size)
+{
+    (void)opaque;
+    return realloc(ptr, size);
+}
+
+void cr_init(CharRange *cr, void *mem_opaque, DynBufReallocFunc *realloc_func)
+{
+    cr->len = cr->size = 0;
+    cr->points = NULL;
+    cr->mem_opaque = mem_opaque;
+    cr->realloc_func = realloc_func ? realloc_func : cr_default_realloc;
+}
+
+void cr_free(CharRange *cr)
+{
+    cr->realloc_func(cr->mem_opaque, cr->points, 0);
+}
+
+int cr_realloc(CharRange *cr, int size)
+{
+    int new_size;
+    uint32_t *new_buf;
+    
+    if (size > cr->size) {
+        new_size = max_int(size, cr->size * 3 / 2);
+        new_buf = cr->realloc_func(cr->mem_opaque, cr->points,
+                                   new_size * sizeof(cr->points[0]));
+        if (!new_buf)
+            return -1;
+        cr->points = new_buf;
+        cr->size = new_size;
+    }
+    return 0;
+}
+
+static void cr_compress(CharRange *cr)
+{
+    int i, j, k, len;
+    uint32_t *pt;
+    
+    pt = cr->points;
+    len = cr->len;
+    i = 0;
+    j = 0;
+    k = 0;
+    while ((i + 1) < len) {
+        if (pt[i] == pt[i + 1]) {
+            /* empty interval */
+            i += 2;
+        } else {
+            j = i;
+            while ((j + 3) < len && pt[j + 1] == pt[j + 2])
+                j += 2;
+            /* just copy */
+            pt[k] = pt[i];
+            pt[k + 1] = pt[j + 1];
+            k += 2;
+            i = j + 2;
+        }
+    }
+    cr->len = k;
+}
+
+static inline int cr_add_point(CharRange *cr, uint32_t v)
+{
+    if (cr->len >= cr->size) {
+        if (cr_realloc(cr, cr->len + 1))
+            return -1;
+    }
+    cr->points[cr->len++] = v;
+    return 0;
+}
+
+int cr_invert(CharRange *cr)
+{
+    int len;
+    len = cr->len;
+    if (cr_realloc(cr, len + 2))
+        return -1;
+    memmove(cr->points + 1, cr->points, len * sizeof(cr->points[0]));
+    cr->points[0] = 0;
+    cr->points[len + 1] = UINT32_MAX;
+    cr->len = len + 2;
+    cr_compress(cr);
+    return 0;
+}
+
+int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
+          const uint32_t *b_pt, int b_len, int op)
+{
+    int a_idx, b_idx, is_in;
+    uint32_t v;
+    
+    a_idx = 0;
+    b_idx = 0;
+    for(;;) {
+        /* get one more point from a or b in increasing order */
+        if (a_idx < a_len && b_idx < b_len) {
+            if (a_pt[a_idx] < b_pt[b_idx]) {
+                goto a_add;
+            } else if (a_pt[a_idx] == b_pt[b_idx]) {
+                v = a_pt[a_idx];
+                a_idx++;
+                b_idx++;
+            } else {
+                goto b_add;
+            }
+        } else if (a_idx < a_len) {
+        a_add:
+            v = a_pt[a_idx++];
+        } else if (b_idx < b_len) {
+        b_add:
+            v = b_pt[b_idx++];
+        } else {
+            break;
+        }
+        /* add the point if the in/out status changes */
+        switch(op) {
+        case CR_OP_UNION:
+            is_in = (a_idx & 1) | (b_idx & 1);
+            break;
+        case CR_OP_INTER:
+            is_in = (a_idx & 1) & (b_idx & 1);
+            break;
+        case CR_OP_XOR:
+            is_in = (a_idx & 1) ^ (b_idx & 1);
+            break;
+        default:
+            abort();
+        }
+        if (is_in != (cr->len & 1)) {
+            if (cr_add_point(cr, v))
+                return -1;
+        }
+    }
+    cr_compress(cr);
+    return 0;
+}
+
+int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len)
+{
+    CharRange a = *cr;
+    int ret;
+    cr->len = 0;
+    cr->size = 0;
+    cr->points = NULL;
+    ret = cr_op(cr, a.points, a.len, b_pt, b_len, CR_OP_UNION);
+    cr_free(&a);
+    return ret;
+}
+
+static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2)
+{
+    uint32_t b_pt[2];
+    b_pt[0] = c1;
+    b_pt[1] = c2 + 1;
+    return cr_union1(cr, b_pt, 2);
+}
+
+/* conv_type:
+   0 = to upper 
+   1 = to lower
+   2 = case folding (= to lower with modifications) 
+*/
+int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
+{
+    if (c < 128) {
+        if (conv_type) {
+            if (c >= 'A' && c <= 'Z') {
+                c = c - 'A' + 'a';
+            }
+        } else {
+            if (c >= 'a' && c <= 'z') {
+                c = c - 'a' + 'A';
+            }
+        }
+    } else {
+        uint32_t v, code, data, type, len, a, is_lower;
+        int idx, idx_min, idx_max;
+        
+        is_lower = (conv_type != 0);
+        idx_min = 0;
+        idx_max = countof(case_conv_table1) - 1;
+        while (idx_min <= idx_max) {
+            idx = (unsigned)(idx_max + idx_min) / 2;
+            v = case_conv_table1[idx];
+            code = v >> (32 - 17);
+            len = (v >> (32 - 17 - 7)) & 0x7f;
+            if (c < code) {
+                idx_max = idx - 1;
+            } else if (c >= code + len) {
+                idx_min = idx + 1;
+            } else {
+                type = (v >> (32 - 17 - 7 - 4)) & 0xf;
+                data = ((v & 0xf) << 8) | case_conv_table2[idx];
+                switch(type) {
+                case RUN_TYPE_U:
+                case RUN_TYPE_L:
+                case RUN_TYPE_UF:
+                case RUN_TYPE_LF:
+                    if ((uint32_t)conv_type == (type & 1) ||
+                        (type >= RUN_TYPE_UF && conv_type == 2)) {
+                        c = c - code + (case_conv_table1[data] >> (32 - 17));
+                    }
+                    break;
+                case RUN_TYPE_UL:
+                    a = c - code;
+                    if ((a & 1) != (1 - is_lower))
+                        break;
+                    c = (a ^ 1) + code;
+                    break;
+                case RUN_TYPE_LSU:
+                    a = c - code;
+                    if (a == 1) {
+                        c += 2 * is_lower - 1;
+                    } else if (a == (1 - is_lower) * 2) {
+                        c += (2 * is_lower - 1) * 2;
+                    }
+                    break;
+                case RUN_TYPE_U2L_399_EXT2:
+                    if (!is_lower) {
+                        res[0] = c - code + case_conv_ext[data >> 6];
+                        res[1] = 0x399;
+                        return 2;
+                    } else {
+                        c = c - code + case_conv_ext[data & 0x3f];
+                    }
+                    break;
+                case RUN_TYPE_UF_D20:
+                    if (conv_type == 1)
+                        break;
+                    c = data + (conv_type == 2) * 0x20;
+                    break;
+                case RUN_TYPE_UF_D1_EXT:
+                    if (conv_type == 1)
+                        break;
+                    c = case_conv_ext[data] + (conv_type == 2);
+                    break;
+                case RUN_TYPE_U_EXT:
+                case RUN_TYPE_LF_EXT:
+                    if (is_lower != (type - RUN_TYPE_U_EXT))
+                        break;
+                    c = case_conv_ext[data];
+                    break;
+                case RUN_TYPE_U_EXT2:
+                case RUN_TYPE_L_EXT2:
+                    if ((uint32_t)conv_type != (type - RUN_TYPE_U_EXT2))
+                        break;
+                    res[0] = c - code + case_conv_ext[data >> 6];
+                    res[1] = case_conv_ext[data & 0x3f];
+                    return 2;
+                default:
+                case RUN_TYPE_U_EXT3:
+                    if (conv_type != 0)
+                        break;
+                    res[0] = case_conv_ext[data >> 8];
+                    res[1] = case_conv_ext[(data >> 4) & 0xf];
+                    res[2] = case_conv_ext[data & 0xf];
+                    return 3;
+                }
+                break;
+            }
+        }
+    }
+    res[0] = c;
+    return 1;
+}
+
+/* quickjs/libregexp: */
+typedef enum {
+#define DEF(id, size) REOP_ ## id,
+#include "regexp-opcode.h"
+#undef DEF
+    REOP_COUNT,
+} REOPCodeEnum;
+
+#define CAPTURE_COUNT_MAX 255
+#define STACK_SIZE_MAX 255
+
+/* unicode code points */
+#define CP_LS   0x2028
+#define CP_PS   0x2029
+
+#define TMP_BUF_SIZE 128
+
+typedef struct {
+    DynBuf byte_code;
+    const uint8_t *buf_ptr;
+    const uint8_t *buf_end;
+    const uint8_t *buf_start;
+    int re_flags;
+    bool is_utf16;
+    bool ignore_case;
+    bool dotall;
+    int capture_count;
+    int total_capture_count; /* -1 = not computed yet */
+    int has_named_captures; /* -1 = don't know, 0 = no, 1 = yes */
+    void *mem_opaque;
+    DynBuf group_names;
+    union {
+        char error_msg[TMP_BUF_SIZE];
+        char tmp_buf[TMP_BUF_SIZE];
+    } u;
+} REParseState;
+
+typedef struct {
+    uint8_t size;
+} REOpCode;
+
+static const REOpCode reopcode_info[REOP_COUNT] = {
+#define DEF(id, size) { size },
+#include "regexp-opcode.h"
+#undef DEF
+};
+
+#define RE_HEADER_FLAGS         0
+#define RE_HEADER_CAPTURE_COUNT 1
+#define RE_HEADER_STACK_SIZE    2
+
+#define RE_HEADER_LEN 7
+
+static inline int is_digit(int c) {
+    return c >= '0' && c <= '9';
+}
+
+/* insert 'len' bytes at position 'pos'. Return < 0 if error. */
+static int dbuf_insert(DynBuf *s, int pos, int len)
+{
+    if (dbuf_realloc(s, s->size + len))
+        return -1;
+    memmove(s->buf + pos + len, s->buf + pos, s->size - pos);
+    s->size += len;
+    return 0;
+}
+
+/* canonicalize with the specific JS regexp rules */
+static uint32_t lre_canonicalize(uint32_t c, bool is_utf16)
+{
+    uint32_t res[LRE_CC_RES_LEN_MAX];
+    int len;
+    if (is_utf16) {
+        if (likely(c < 128)) {
+            if (c >= 'A' && c <= 'Z')
+                c = c - 'A' + 'a';
+        } else {
+            lre_case_conv(res, c, 2);
+            c = res[0];
+        }
+    } else {
+        if (likely(c < 128)) {
+            if (c >= 'a' && c <= 'z')
+                c = c - 'a' + 'A';
+        } else {
+            /* legacy regexp: to upper case if single char >= 128 */
+            len = lre_case_conv(res, c, false);
+            if (len == 1 && res[0] >= 128)
+                c = res[0];
+        }
+    }
+    return c;
+}
+
+static const uint16_t char_range_d[] = {
+    1,
+    0x0030, 0x0039 + 1,
+};
+
+/* code point ranges for Zs,Zl or Zp property */
+static const uint16_t char_range_s[] = {
+    10,
+    0x0009, 0x000D + 1,
+    0x0020, 0x0020 + 1,
+    0x00A0, 0x00A0 + 1,
+    0x1680, 0x1680 + 1,
+    0x2000, 0x200A + 1,
+    /* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */
+    /* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */
+    0x2028, 0x2029 + 1,
+    0x202F, 0x202F + 1,
+    0x205F, 0x205F + 1,
+    0x3000, 0x3000 + 1,
+    /* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */
+    0xFEFF, 0xFEFF + 1,
+};
+
+bool lre_is_space(int c)
+{
+    int i, n, low, high;
+    n = (countof(char_range_s) - 1) / 2;
+    for(i = 0; i < n; i++) {
+        low = char_range_s[2 * i + 1];
+        if (c < low)
+            return false;
+        high = char_range_s[2 * i + 2];
+        if (c < high)
+            return true;
+    }
+    return false;
+}
+
+uint32_t const lre_id_start_table_ascii[4] = {
+    /* $ A-Z _ a-z */
+    0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
+};
+
+uint32_t const lre_id_continue_table_ascii[4] = {
+    /* $ 0-9 A-Z _ a-z */
+    0x00000000, 0x03FF0010, 0x87FFFFFE, 0x07FFFFFE
+};
+
+
+static const uint16_t char_range_w[] = {
+    4,
+    0x0030, 0x0039 + 1,
+    0x0041, 0x005A + 1,
+    0x005F, 0x005F + 1,
+    0x0061, 0x007A + 1,
+};
+
+#define CLASS_RANGE_BASE 0x40000000
+
+typedef enum {
+    CHAR_RANGE_d,
+    CHAR_RANGE_D,
+    CHAR_RANGE_s,
+    CHAR_RANGE_S,
+    CHAR_RANGE_w,
+    CHAR_RANGE_W,
+} CharRangeEnum;
+
+static const uint16_t *char_range_table[] = {
+    char_range_d,
+    char_range_s,
+    char_range_w,
+};
+
+static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
+{
+    bool invert;
+    const uint16_t *c_pt;
+    int len, i;
+    
+    invert = c & 1;
+    c_pt = char_range_table[c >> 1];
+    len = *c_pt++;
+    cr_init(cr, s->mem_opaque, lre_realloc);
+    for(i = 0; i < len * 2; i++) {
+        if (cr_add_point(cr, c_pt[i]))
+            goto fail;
+    }
+    if (invert) {
+        if (cr_invert(cr))
+            goto fail;
+    }
+    return 0;
+ fail:
+    cr_free(cr);
+    return -1;
+}
+
+static int cr_canonicalize(CharRange *cr)
+{
+    CharRange a;
+    uint32_t pt[2];
+    int i, ret;
+
+    cr_init(&a, cr->mem_opaque, lre_realloc);
+    pt[0] = 'a';
+    pt[1] = 'z' + 1;
+    ret = cr_op(&a, cr->points, cr->len, pt, 2, CR_OP_INTER);
+    if (ret)
+        goto fail;
+    /* convert to upper case */
+    /* XXX: the generic unicode case would be much more complicated
+       and not really useful */
+    for(i = 0; i < a.len; i++) {
+        a.points[i] += 'A' - 'a';
+    }
+    /* Note: for simplicity we keep the lower case ranges */
+    ret = cr_union1(cr, a.points, a.len);
+ fail:
+    cr_free(&a);
+    return ret;
+}
+
+static void re_emit_op(REParseState *s, int op)
+{
+    dbuf_putc(&s->byte_code, op);
+}
+
+/* return the offset of the u32 value */
+static int re_emit_op_u32(REParseState *s, int op, uint32_t val)
+{
+    int pos;
+    dbuf_putc(&s->byte_code, op);
+    pos = s->byte_code.size;
+    dbuf_put_u32(&s->byte_code, val);
+    return pos;
+}
+
+static int re_emit_goto(REParseState *s, int op, uint32_t val)
+{
+    int pos;
+    dbuf_putc(&s->byte_code, op);
+    pos = s->byte_code.size;
+    dbuf_put_u32(&s->byte_code, val - (pos + 4));
+    return pos;
+}
+
+static void re_emit_op_u8(REParseState *s, int op, uint32_t val)
+{
+    dbuf_putc(&s->byte_code, op);
+    dbuf_putc(&s->byte_code, val);
+}
+
+static void re_emit_op_u16(REParseState *s, int op, uint32_t val)
+{
+    dbuf_putc(&s->byte_code, op);
+    dbuf_put_u16(&s->byte_code, val);
+}
+
+static int __attribute__((format(printf, 2, 3))) re_parse_error(REParseState *s, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vsnprintf(s->u.error_msg, sizeof(s->u.error_msg), fmt, ap);
+    va_end(ap);
+    return -1;
+}
+
+static int re_parse_out_of_memory(REParseState *s)
+{
+    return re_parse_error(s, "out of memory");
+}
+
+/* If allow_overflow is false, return -1 in case of
+   overflow. Otherwise return INT32_MAX. */
+static int parse_digits(const uint8_t **pp, bool allow_overflow)
+{
+    const uint8_t *p;
+    uint64_t v;
+    int c;
+    
+    p = *pp;
+    v = 0;
+    for(;;) {
+        c = *p;
+        if (c < '0' || c > '9')
+            break;
+        v = v * 10 + c - '0';
+        if (v >= INT32_MAX) {
+            if (allow_overflow)
+                v = INT32_MAX;
+            else
+                return -1;
+        }
+        p++;
+    }
+    *pp = p;
+    return v;
+}
+
+static int re_parse_expect(REParseState *s, const uint8_t **pp, int c)
+{
+    const uint8_t *p;
+    p = *pp;
+    if (*p != c)
+        return re_parse_error(s, "expecting '%c'", c);
+    p++;
+    *pp = p;
+    return 0;
+}
+
+/* Parse an escape sequence, *pp points after the '\':
+   allow_utf16 value:
+   0 : no UTF-16 escapes allowed
+   1 : UTF-16 escapes allowed
+   2 : UTF-16 escapes allowed and escapes of surrogate pairs are
+   converted to a unicode character (unicode regexp case).
+
+   Return the unicode char and update *pp if recognized,
+   return -1 if malformed escape,
+   return -2 otherwise. */
+int lre_parse_escape(const uint8_t **pp, int allow_utf16)
+{
+    const uint8_t *p;
+    uint32_t c;
+
+    p = *pp;
+    c = *p++;
+    switch(c) {
+    case 'b':
+        c = '\b';
+        break;
+    case 'f':
+        c = '\f';
+        break;
+    case 'n':
+        c = '\n';
+        break;
+    case 'r':
+        c = '\r';
+        break;
+    case 't':
+        c = '\t';
+        break;
+    case 'v':
+        c = '\v';
+        break;
+    case 'x':
+    case 'u':
+        {
+            int h, n, i;
+            uint32_t c1;
+            
+            if (*p == '{' && allow_utf16) {
+                p++;
+                c = 0;
+                for(;;) {
+                    h = from_hex(*p++);
+                    if (h < 0)
+                        return -1;
+                    c = (c << 4) | h;
+                    if (c > 0x10FFFF)
+                        return -1;
+                    if (*p == '}')
+                        break;
+                }
+                p++;
+            } else {
+                if (c == 'x') {
+                    n = 2;
+                } else {
+                    n = 4;
+                }
+
+                c = 0;
+                for(i = 0; i < n; i++) {
+                    h = from_hex(*p++);
+                    if (h < 0) {
+                        return -1;
+                    }
+                    c = (c << 4) | h;
+                }
+                if (c >= 0xd800 && c < 0xdc00 &&
+                    allow_utf16 == 2 && p[0] == '\\' && p[1] == 'u') {
+                    /* convert an escaped surrogate pair into a
+                       unicode char */
+                    c1 = 0;
+                    for(i = 0; i < 4; i++) {
+                        h = from_hex(p[2 + i]);
+                        if (h < 0)
+                            break;
+                        c1 = (c1 << 4) | h;
+                    }
+                    if (i == 4 && c1 >= 0xdc00 && c1 < 0xe000) {
+                        p += 6;
+                        c = (((c & 0x3ff) << 10) | (c1 & 0x3ff)) + 0x10000;
+                    }
+                }
+            }
+        }
+        break;
+    case '0': case '1': case '2': case '3':
+    case '4': case '5': case '6': case '7':
+        c -= '0';
+        if (allow_utf16 == 2) {
+            /* only accept \0 not followed by digit */
+            if (c != 0 || is_digit(*p))
+                return -1;
+        } else {
+            /* parse a legacy octal sequence */
+            uint32_t v;
+            v = *p - '0';
+            if (v > 7)
+                break;
+            c = (c << 3) | v;
+            p++;
+            if (c >= 32)
+                break;
+            v = *p - '0';
+            if (v > 7)
+                break;
+            c = (c << 3) | v;
+            p++;
+        }
+        break;
+    default:
+        return -2;
+    }
+    *pp = p;
+    return c;
+}
+
+#ifdef CONFIG_ALL_UNICODE
+/* XXX: we use the same chars for name and value */
+static bool is_unicode_char(int c)
+{
+    return ((c >= '0' && c <= '9') ||
+            (c >= 'A' && c <= 'Z') ||
+            (c >= 'a' && c <= 'z') ||
+            (c == '_'));
+}
+
+static int parse_unicode_property(REParseState *s, CharRange *cr,
+                                  const uint8_t **pp, bool is_inv)
+{
+    const uint8_t *p;
+    char name[64], value[64];
+    char *q;
+    bool script_ext;
+    int ret;
+
+    p = *pp;
+    if (*p != '{')
+        return re_parse_error(s, "expecting '{' after \\p");
+    p++;
+    q = name;
+    while (is_unicode_char(*p)) {
+        if ((q - name) > sizeof(name) - 1)
+            goto unknown_property_name;
+        *q++ = *p++;
+    }
+    *q = '\0';
+    q = value;
+    if (*p == '=') {
+        p++;
+        while (is_unicode_char(*p)) {
+            if ((q - value) > sizeof(value) - 1)
+                return re_parse_error(s, "unknown unicode property value");
+            *q++ = *p++;
+        }
+    }
+    *q = '\0';
+    if (*p != '}')
+        return re_parse_error(s, "expecting '}'");
+    p++;
+    //    printf("name=%s value=%s\n", name, value);
+
+    if (!strcmp(name, "Script") || !strcmp(name, "sc")) {
+        script_ext = false;
+        goto do_script;
+    } else if (!strcmp(name, "Script_Extensions") || !strcmp(name, "scx")) {
+        script_ext = true;
+    do_script:
+        cr_init(cr, s->mem_opaque, lre_realloc);
+        ret = unicode_script(cr, value, script_ext);
+        if (ret) {
+            cr_free(cr);
+            if (ret == -2)
+                return re_parse_error(s, "unknown unicode script");
+            else
+                goto out_of_memory;
+        }
+    } else if (!strcmp(name, "General_Category") || !strcmp(name, "gc")) {
+        cr_init(cr, s->mem_opaque, lre_realloc);
+        ret = unicode_general_category(cr, value);
+        if (ret) {
+            cr_free(cr);
+            if (ret == -2)
+                return re_parse_error(s, "unknown unicode general category");
+            else
+                goto out_of_memory;
+        }
+    } else if (value[0] == '\0') {
+        cr_init(cr, s->mem_opaque, lre_realloc);
+        ret = unicode_general_category(cr, name);
+        if (ret == -1) {
+            cr_free(cr);
+            goto out_of_memory;
+        }
+        if (ret < 0) {
+            ret = unicode_prop(cr, name);
+            if (ret) {
+                cr_free(cr);
+                if (ret == -2)
+                    goto unknown_property_name;
+                else
+                    goto out_of_memory;
+            }
+        }
+    } else {
+    unknown_property_name:
+        return re_parse_error(s, "unknown unicode property name");
+    }
+
+    if (is_inv) {
+        if (cr_invert(cr)) {
+            cr_free(cr);
+            return -1;
+        }
+    }
+    *pp = p;
+    return 0;
+ out_of_memory:
+    return re_parse_out_of_memory(s);
+}
+#endif /* CONFIG_ALL_UNICODE */
+
+/* return -1 if error otherwise the character or a class range
+   (CLASS_RANGE_BASE). In case of class range, 'cr' is
+   initialized. Otherwise, it is ignored. */
+static int get_class_atom(REParseState *s, CharRange *cr,
+                          const uint8_t **pp, bool inclass)
+{
+    const uint8_t *p;
+    uint32_t c;
+    int ret;
+    
+    p = *pp;
+
+    c = *p;
+    switch(c) {
+    case '\\':
+        p++;
+        if (p >= s->buf_end)
+            goto unexpected_end;
+        c = *p++;
+        switch(c) {
+        case 'd':
+            c = CHAR_RANGE_d;
+            goto class_range;
+        case 'D':
+            c = CHAR_RANGE_D;
+            goto class_range;
+        case 's':
+            c = CHAR_RANGE_s;
+            goto class_range;
+        case 'S':
+            c = CHAR_RANGE_S;
+            goto class_range;
+        case 'w':
+            c = CHAR_RANGE_w;
+            goto class_range;
+        case 'W':
+            c = CHAR_RANGE_W;
+        class_range:
+            if (cr_init_char_range(s, cr, c))
+                return -1;
+            c = CLASS_RANGE_BASE;
+            break;
+        case 'c':
+            c = *p;
+            if ((c >= 'a' && c <= 'z') ||
+                (c >= 'A' && c <= 'Z') ||
+                (((c >= '0' && c <= '9') || c == '_') &&
+                 inclass && !s->is_utf16)) {   /* Annex B.1.4 */
+                c &= 0x1f;
+                p++;
+            } else if (s->is_utf16) {
+                goto invalid_escape;
+            } else {
+                /* otherwise return '\' and 'c' */
+                p--;
+                c = '\\';
+            }
+            break;
+#ifdef CONFIG_ALL_UNICODE
+        case 'p':
+        case 'P':
+            if (s->is_utf16) {
+                if (parse_unicode_property(s, cr, &p, (c == 'P')))
+                    return -1;
+                c = CLASS_RANGE_BASE;
+                break;
+            }
+            /* fall thru */
+#endif
+        default:
+            p--;
+            ret = lre_parse_escape(&p, s->is_utf16 * 2);
+            if (ret >= 0) {
+                c = ret;
+            } else {
+                if (ret == -2 && *p != '\0' && strchr("^$\\.*+?()[]{}|/", *p)) {
+                    /* always valid to escape these characters */
+                    goto normal_char;
+                } else if (s->is_utf16) {
+                invalid_escape:
+                    return re_parse_error(s, "invalid escape sequence in regular expression");
+                } else {
+                    /* just ignore the '\' */
+                    goto normal_char;
+                }
+            }
+            break;
+        }
+        break;
+    case '\0':
+        if (p >= s->buf_end) {
+        unexpected_end:
+            return re_parse_error(s, "unexpected end");
+        }
+        /* fall thru */
+    default:
+    normal_char:
+        /* normal char */
+        if (c >= 128) {
+            c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
+            if ((unsigned)c > 0xffff && !s->is_utf16) {
+                /* XXX: should handle non BMP-1 code points */
+                return re_parse_error(s, "malformed unicode char");
+            }
+        } else {
+            p++;
+        }
+        break;
+    }
+    *pp = p;
+    return c;
+}
+
+static int re_emit_range(REParseState *s, const CharRange *cr)
+{
+    int len, i;
+    uint32_t high;
+    
+    len = (unsigned)cr->len / 2;
+    if (len >= 65535)
+        return re_parse_error(s, "too many ranges");
+    if (len == 0) {
+        /* not sure it can really happen. Emit a match that is always
+           false */
+        re_emit_op_u32(s, REOP_char32, -1);
+    } else {
+        high = cr->points[cr->len - 1];
+        if (high == UINT32_MAX)
+            high = cr->points[cr->len - 2];
+        if (high <= 0xffff) {
+            /* can use 16 bit ranges with the conversion that 0xffff =
+               infinity */
+            re_emit_op_u16(s, REOP_range, len);
+            for(i = 0; i < cr->len; i += 2) {
+                dbuf_put_u16(&s->byte_code, cr->points[i]);
+                high = cr->points[i + 1] - 1;
+                if (high == UINT32_MAX - 1)
+                    high = 0xffff;
+                dbuf_put_u16(&s->byte_code, high);
+            }
+        } else {
+            re_emit_op_u16(s, REOP_range32, len);
+            for(i = 0; i < cr->len; i += 2) {
+                dbuf_put_u32(&s->byte_code, cr->points[i]);
+                dbuf_put_u32(&s->byte_code, cr->points[i + 1] - 1);
+            }
+        }
+    }
+    return 0;
+}
+
+static int re_parse_char_class(REParseState *s, const uint8_t **pp)
+{
+    const uint8_t *p;
+    uint32_t c1, c2;
+    CharRange cr_s, *cr = &cr_s;
+    CharRange cr1_s, *cr1 = &cr1_s;
+    bool invert;
+    
+    cr_init(cr, s->mem_opaque, lre_realloc);
+    p = *pp;
+    p++;    /* skip '[' */
+    invert = false;
+    if (*p == '^') {
+        p++;
+        invert = true;
+    }
+    for(;;) {
+        if (*p == ']')
+            break;
+        c1 = get_class_atom(s, cr1, &p, true);
+        if ((int)c1 < 0)
+            goto fail;
+        if (*p == '-' && p[1] != ']') {
+            const uint8_t *p0 = p + 1;
+            if (c1 >= CLASS_RANGE_BASE) {
+                if (s->is_utf16) {
+                    cr_free(cr1);
+                    goto invalid_class_range;
+                }
+                /* Annex B: match '-' character */
+                goto class_atom;
+            }
+            c2 = get_class_atom(s, cr1, &p0, true);
+            if ((int)c2 < 0)
+                goto fail;
+            if (c2 >= CLASS_RANGE_BASE) {
+                cr_free(cr1);
+                if (s->is_utf16) {
+                    goto invalid_class_range;
+                }
+                /* Annex B: match '-' character */
+                goto class_atom;
+            }
+            p = p0;
+            if (c2 < c1) {
+            invalid_class_range:
+                re_parse_error(s, "invalid class range");
+                goto fail;
+            }
+            if (cr_union_interval(cr, c1, c2))
+                goto memory_error;
+        } else {
+        class_atom:
+            if (c1 >= CLASS_RANGE_BASE) {
+                int ret;
+                ret = cr_union1(cr, cr1->points, cr1->len);
+                cr_free(cr1);
+                if (ret)
+                    goto memory_error;
+            } else {
+                if (cr_union_interval(cr, c1, c1))
+                    goto memory_error;
+            }
+        }
+    }
+    if (s->ignore_case) {
+        if (cr_canonicalize(cr))
+            goto memory_error;
+    }
+    if (invert) {
+        if (cr_invert(cr))
+            goto memory_error;
+    }
+    if (re_emit_range(s, cr))
+        goto fail;
+    cr_free(cr);
+    p++;    /* skip ']' */
+    *pp = p;
+    return 0;
+ memory_error:
+    re_parse_out_of_memory(s);
+ fail:
+    cr_free(cr);
+    return -1;
+}
+
+/* Return:
+   1 if the opcodes in bc_buf[] always advance the character pointer.
+   0 if the character pointer may not be advanced.
+   -1 if the code may depend on side effects of its previous execution (backreference)
+*/
+static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
+{
+    int pos, opcode, ret, len, i;
+    uint32_t val, last;
+    bool has_back_reference;
+    uint8_t capture_bitmap[CAPTURE_COUNT_MAX];
+    
+    ret = -2; /* not known yet */
+    pos = 0;
+    has_back_reference = false;
+    memset(capture_bitmap, 0, sizeof(capture_bitmap));
+    
+    while (pos < bc_buf_len) {
+        opcode = bc_buf[pos];
+        len = reopcode_info[opcode].size;
+        switch(opcode) {
+        case REOP_range:
+            val = get_u16(bc_buf + pos + 1);
+            len += val * 4;
+            goto simple_char;
+        case REOP_range32:
+            val = get_u16(bc_buf + pos + 1);
+            len += val * 8;
+            goto simple_char;
+        case REOP_char:
+        case REOP_char32:
+        case REOP_dot:
+        case REOP_any:
+        simple_char:
+            if (ret == -2)
+                ret = 1;
+            break;
+        case REOP_line_start:
+        case REOP_line_end:
+        case REOP_push_i32:
+        case REOP_push_char_pos:
+        case REOP_drop:
+        case REOP_word_boundary:
+        case REOP_not_word_boundary:
+        case REOP_prev:
+            /* no effect */
+            break;
+        case REOP_save_start:
+        case REOP_save_end:
+            val = bc_buf[pos + 1];
+            capture_bitmap[val] |= 1;
+            break;
+        case REOP_save_reset:
+            {
+                val = bc_buf[pos + 1];
+                last = bc_buf[pos + 2];
+                while (val < last)
+                    capture_bitmap[val++] |= 1;
+            }
+            break;
+        case REOP_back_reference:
+        case REOP_backward_back_reference:
+            val = bc_buf[pos + 1];
+            capture_bitmap[val] |= 2;
+            has_back_reference = true;
+            break;
+        default:
+            /* safe behvior: we cannot predict the outcome */
+            if (ret == -2)
+                ret = 0;
+            break;
+        }
+        pos += len;
+    }
+    if (has_back_reference) {
+        /* check if there is back reference which references a capture
+           made in the some code */
+        for(i = 0; i < CAPTURE_COUNT_MAX; i++) {
+            if (capture_bitmap[i] == 3)
+                return -1;
+        }
+    }
+    if (ret == -2)
+        ret = 0;
+    return ret;
+}
+
+/* return -1 if a simple quantifier cannot be used. Otherwise return
+   the number of characters in the atom. */
+static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
+{
+    int pos, opcode, len, count;
+    uint32_t val;
+    
+    count = 0;
+    pos = 0;
+    while (pos < bc_buf_len) {
+        opcode = bc_buf[pos];
+        len = reopcode_info[opcode].size;
+        switch(opcode) {
+        case REOP_range:
+            val = get_u16(bc_buf + pos + 1);
+            len += val * 4;
+            goto simple_char;
+        case REOP_range32:
+            val = get_u16(bc_buf + pos + 1);
+            len += val * 8;
+            goto simple_char;
+        case REOP_char:
+        case REOP_char32:
+        case REOP_dot:
+        case REOP_any:
+        simple_char:
+            count++;
+            break;
+        case REOP_line_start:
+        case REOP_line_end:
+        case REOP_word_boundary:
+        case REOP_not_word_boundary:
+            break;
+        default:
+            return -1;
+        }
+        pos += len;
+    }
+    return count;
+}
+
+/* '*pp' is the first char after '<' */
+static int re_parse_group_name(char *buf, int buf_size,
+                               const uint8_t **pp, bool is_utf16)
+{
+    const uint8_t *p;
+    uint32_t c;
+    char *q;
+
+    p = *pp;
+    q = buf;
+    for(;;) {
+        c = *p;
+        if (c == '\\') {
+            p++;
+            if (*p != 'u')
+                return -1;
+            c = lre_parse_escape(&p, is_utf16 * 2);
+        } else if (c == '>') {
+            break;
+        } else if (c >= 128) {
+            c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
+        } else {
+            p++;
+        }
+        if (c > 0x10FFFF)
+            return -1;
+        if (q == buf) {
+            if (!lre_js_is_ident_first(c))
+                return -1;
+        } else {
+            if (!lre_js_is_ident_next(c))
+                return -1;
+        }
+        if ((q - buf + UTF8_CHAR_LEN_MAX + 1) > buf_size)
+            return -1;
+        if (c < 128) {
+            *q++ = c;
+        } else {
+            q += unicode_to_utf8((uint8_t*)q, c);
+        }
+    }
+    if (q == buf)
+        return -1;
+    *q = '\0';
+    p++;
+    *pp = p;
+    return 0;
+}
+
+/* if capture_name = NULL: return the number of captures + 1.
+   Otherwise, return the capture index corresponding to capture_name
+   or -1 if none */
+static int re_parse_captures(REParseState *s, int *phas_named_captures,
+                             const char *capture_name)
+{
+    const uint8_t *p;
+    int capture_index;
+    char name[TMP_BUF_SIZE];
+
+    capture_index = 1;
+    *phas_named_captures = 0;
+    for (p = s->buf_start; p < s->buf_end; p++) {
+        switch (*p) {
+        case '(':
+            if (p[1] == '?') {
+                if (p[2] == '<' && p[3] != '=' && p[3] != '!') {
+                    *phas_named_captures = 1;
+                    /* potential named capture */
+                    if (capture_name) {
+                        p += 3;
+                        if (re_parse_group_name(name, sizeof(name), &p,
+                                                s->is_utf16) == 0) {
+                            if (!strcmp(name, capture_name))
+                                return capture_index;
+                        }
+                    }
+                    capture_index++;
+                }
+            } else {
+                capture_index++;
+            }
+            break;
+        case '\\':
+            p++;
+            break;
+        case '[':
+            for (p += 1 + (*p == ']'); p < s->buf_end && *p != ']'; p++) {
+                if (*p == '\\')
+                    p++;
+            }
+            break;
+        }
+    }
+    if (capture_name)
+        return -1;
+    else
+        return capture_index;
+}
+
+static int re_count_captures(REParseState *s)
+{
+    if (s->total_capture_count < 0) {
+        s->total_capture_count = re_parse_captures(s, &s->has_named_captures,
+                                                   NULL);
+    }
+    return s->total_capture_count;
+}
+
+static bool re_has_named_captures(REParseState *s)
+{
+    if (s->has_named_captures < 0)
+        re_count_captures(s);
+    return s->has_named_captures;
+}
+
+static int find_group_name(REParseState *s, const char *name)
+{
+    const char *p, *buf_end;
+    size_t len, name_len;
+    int capture_index;
+    
+    name_len = strlen(name);
+    p = (char *)s->group_names.buf;
+    buf_end = (char *)s->group_names.buf + s->group_names.size;
+    capture_index = 1;
+    while (p < buf_end) {
+        len = strlen(p);
+        if (len == name_len && memcmp(name, p, name_len) == 0)
+            return capture_index;
+        p += len + 1;
+        capture_index++;
+    }
+    return -1;
+}
+
+static int re_parse_disjunction(REParseState *s, bool is_backward_dir);
+
+static int re_parse_term(REParseState *s, bool is_backward_dir)
+{
+    const uint8_t *p;
+    int c, last_atom_start, quant_min, quant_max, last_capture_count;
+    bool greedy, add_zero_advance_check, is_neg, is_backward_lookahead;
+    CharRange cr_s, *cr = &cr_s;
+    
+    last_atom_start = -1;
+    last_capture_count = 0;
+    p = s->buf_ptr;
+    c = *p;
+    switch(c) {
+    case '^':
+        p++;
+        re_emit_op(s, REOP_line_start);
+        break;
+    case '$':
+        p++;
+        re_emit_op(s, REOP_line_end);
+        break;
+    case '.':
+        p++;
+        last_atom_start = s->byte_code.size;
+        last_capture_count = s->capture_count;
+        if (is_backward_dir)
+            re_emit_op(s, REOP_prev);
+        re_emit_op(s, s->dotall ? REOP_any : REOP_dot);
+        if (is_backward_dir)
+            re_emit_op(s, REOP_prev);
+        break;
+    case '{':
+        if (s->is_utf16) {
+            return re_parse_error(s, "syntax error");
+        } else if (!is_digit(p[1])) {
+            /* Annex B: we accept '{' not followed by digits as a
+               normal atom */
+            goto parse_class_atom;
+        } else {
+            const uint8_t *p1 = p + 1;
+            /* Annex B: error if it is like a repetition count */
+            parse_digits(&p1, true);
+            if (*p1 == ',') {
+                p1++;
+                if (is_digit(*p1)) {
+                    parse_digits(&p1, true);
+                }
+            }
+            if (*p1 != '}') {
+                goto parse_class_atom;
+            }
+        }
+        /* fall thru */
+    case '*':
+    case '+':
+    case '?':
+        return re_parse_error(s, "nothing to repeat");
+    case '(':
+        if (p[1] == '?') {
+            if (p[2] == ':') {
+                p += 3;
+                last_atom_start = s->byte_code.size;
+                last_capture_count = s->capture_count;
+                s->buf_ptr = p;
+                if (re_parse_disjunction(s, is_backward_dir))
+                    return -1;
+                p = s->buf_ptr;
+                if (re_parse_expect(s, &p, ')'))
+                    return -1;
+            } else if ((p[2] == '=' || p[2] == '!')) {
+                is_neg = (p[2] == '!');
+                is_backward_lookahead = false;
+                p += 3;
+                goto lookahead;
+            } else if (p[2] == '<' &&
+                       (p[3] == '=' || p[3] == '!')) {
+                int pos;
+                is_neg = (p[3] == '!');
+                is_backward_lookahead = true;
+                p += 4;
+                /* lookahead */
+            lookahead:
+                /* Annex B allows lookahead to be used as an atom for
+                   the quantifiers */
+                if (!s->is_utf16 && !is_backward_lookahead)  {
+                    last_atom_start = s->byte_code.size;
+                    last_capture_count = s->capture_count;
+                }
+                pos = re_emit_op_u32(s, REOP_lookahead + is_neg, 0);
+                s->buf_ptr = p;
+                if (re_parse_disjunction(s, is_backward_lookahead))
+                    return -1;
+                p = s->buf_ptr;
+                if (re_parse_expect(s, &p, ')'))
+                    return -1;
+                re_emit_op(s, REOP_match);
+                /* jump after the 'match' after the lookahead is successful */
+                if (dbuf_error(&s->byte_code))
+                    return -1;
+                put_u32(s->byte_code.buf + pos, s->byte_code.size - (pos + 4));
+            } else if (p[2] == '<') {
+                p += 3;
+                if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
+                                        &p, s->is_utf16)) {
+                    return re_parse_error(s, "invalid group name");
+                }
+                if (find_group_name(s, s->u.tmp_buf) > 0) {
+                    return re_parse_error(s, "duplicate group name");
+                }
+                /* group name with a trailing zero */
+                dbuf_put(&s->group_names, (uint8_t *)s->u.tmp_buf,
+                         strlen(s->u.tmp_buf) + 1);
+                s->has_named_captures = 1;
+                goto parse_capture;
+            } else {
+                return re_parse_error(s, "invalid group");
+            }
+        } else {
+            int capture_index;
+            p++;
+            /* capture without group name */
+            dbuf_putc(&s->group_names, 0);
+        parse_capture:
+            if (s->capture_count >= CAPTURE_COUNT_MAX)
+                return re_parse_error(s, "too many captures");
+            last_atom_start = s->byte_code.size;
+            last_capture_count = s->capture_count;
+            capture_index = s->capture_count++;
+            re_emit_op_u8(s, REOP_save_start + is_backward_dir,
+                          capture_index);
+            
+            s->buf_ptr = p;
+            if (re_parse_disjunction(s, is_backward_dir))
+                return -1;
+            p = s->buf_ptr;
+            
+            re_emit_op_u8(s, REOP_save_start + 1 - is_backward_dir,
+                          capture_index);
+            
+            if (re_parse_expect(s, &p, ')'))
+                return -1;
+        }
+        break;
+    case '\\':
+        switch(p[1]) {
+        case 'b':
+        case 'B':
+            re_emit_op(s, REOP_word_boundary + (p[1] != 'b'));
+            p += 2;
+            break;
+        case 'k':
+            {
+                const uint8_t *p1;
+                int dummy_res;
+                
+                p1 = p;
+                if (p1[2] != '<') {
+                    /* annex B: we tolerate invalid group names in non
+                       unicode mode if there is no named capture
+                       definition */
+                    if (s->is_utf16 || re_has_named_captures(s))
+                        return re_parse_error(s, "expecting group name");
+                    else
+                        goto parse_class_atom;
+                }
+                p1 += 3;
+                if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
+                                        &p1, s->is_utf16)) {
+                    if (s->is_utf16 || re_has_named_captures(s))
+                        return re_parse_error(s, "invalid group name");
+                    else
+                        goto parse_class_atom;
+                }
+                c = find_group_name(s, s->u.tmp_buf);
+                if (c < 0) {
+                    /* no capture name parsed before, try to look
+                       after (inefficient, but hopefully not common */
+                    c = re_parse_captures(s, &dummy_res, s->u.tmp_buf);
+                    if (c < 0) {
+                        if (s->is_utf16 || re_has_named_captures(s))
+                            return re_parse_error(s, "group name not defined");
+                        else
+                            goto parse_class_atom;
+                    }
+                }
+                p = p1;
+            }
+            goto emit_back_reference;
+        case '0':
+            p += 2;
+            c = 0;
+            if (s->is_utf16) {
+                if (is_digit(*p)) {
+                    return re_parse_error(s, "invalid decimal escape in regular expression");
+                }
+            } else {
+                /* Annex B.1.4: accept legacy octal */
+                if (*p >= '0' && *p <= '7') {
+                    c = *p++ - '0';
+                    if (*p >= '0' && *p <= '7') {
+                        c = (c << 3) + *p++ - '0';
+                    }
+                }
+            }
+            goto normal_char;
+        case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8':
+        case '9': 
+            {
+                const uint8_t *q = ++p;
+                
+                c = parse_digits(&p, false);
+                if (c < 0 || (c >= s->capture_count && c >= re_count_captures(s))) {
+                    if (!s->is_utf16) {
+                        /* Annex B.1.4: accept legacy octal */
+                        p = q;
+                        if (*p <= '7') {
+                            c = 0;
+                            if (*p <= '3')
+                                c = *p++ - '0';
+                            if (*p >= '0' && *p <= '7') {
+                                c = (c << 3) + *p++ - '0';
+                                if (*p >= '0' && *p <= '7') {
+                                    c = (c << 3) + *p++ - '0';
+                                }
+                            }
+                        } else {
+                            c = *p++;
+                        }
+                        goto normal_char;
+                    }
+                    return re_parse_error(s, "back reference out of range in regular expression");
+                }
+            emit_back_reference:
+                last_atom_start = s->byte_code.size;
+                last_capture_count = s->capture_count;
+                re_emit_op_u8(s, REOP_back_reference + is_backward_dir, c);
+            }
+            break;
+        default:
+            goto parse_class_atom;
+        }
+        break;
+    case '[':
+        last_atom_start = s->byte_code.size;
+        last_capture_count = s->capture_count;
+        if (is_backward_dir)
+            re_emit_op(s, REOP_prev);
+        if (re_parse_char_class(s, &p))
+            return -1;
+        if (is_backward_dir)
+            re_emit_op(s, REOP_prev);
+        break;
+    case ']':
+    case '}':
+        if (s->is_utf16)
+            return re_parse_error(s, "syntax error");
+        goto parse_class_atom;
+    default:
+    parse_class_atom:
+        c = get_class_atom(s, cr, &p, false);
+        if ((int)c < 0)
+            return -1;
+    normal_char:
+        last_atom_start = s->byte_code.size;
+        last_capture_count = s->capture_count;
+        if (is_backward_dir)
+            re_emit_op(s, REOP_prev);
+        if (c >= CLASS_RANGE_BASE) {
+            int ret;
+            /* Note: canonicalization is not needed */
+            ret = re_emit_range(s, cr);
+            cr_free(cr);
+            if (ret)
+                return -1;
+        } else {
+            if (s->ignore_case)
+                c = lre_canonicalize(c, s->is_utf16);
+            if (c <= 0xffff)
+                re_emit_op_u16(s, REOP_char, c);
+            else
+                re_emit_op_u32(s, REOP_char32, c);
+        }
+        if (is_backward_dir)
+            re_emit_op(s, REOP_prev);
+        break;
+    }
+
+    /* quantifier */
+    if (last_atom_start >= 0) {
+        c = *p;
+        switch(c) {
+        case '*':
+            p++;
+            quant_min = 0;
+            quant_max = INT32_MAX;
+            goto quantifier;
+        case '+':
+            p++;
+            quant_min = 1;
+            quant_max = INT32_MAX;
+            goto quantifier;
+        case '?':
+            p++;
+            quant_min = 0;
+            quant_max = 1;
+            goto quantifier;
+        case '{':
+            {
+                const uint8_t *p1 = p;
+                /* As an extension (see ES6 annex B), we accept '{' not
+                   followed by digits as a normal atom */
+                if (!is_digit(p[1])) {
+                    if (s->is_utf16)
+                        goto invalid_quant_count;
+                    break;
+                }
+                p++;
+                quant_min = parse_digits(&p, true);
+                quant_max = quant_min;
+                if (*p == ',') {
+                    p++;
+                    if (is_digit(*p)) {
+                        quant_max = parse_digits(&p, true);
+                        if (quant_max < quant_min) {
+                        invalid_quant_count:
+                            return re_parse_error(s, "invalid repetition count");
+                        }
+                    } else {
+                        quant_max = INT32_MAX; /* infinity */
+                    }
+                }
+                if (*p != '}' && !s->is_utf16) {
+                    /* Annex B: normal atom if invalid '{' syntax */
+                    p = p1;
+                    break;
+                }
+                if (re_parse_expect(s, &p, '}'))
+                    return -1;
+            }
+        quantifier:
+            greedy = true;
+            if (*p == '?') {
+                p++;
+                greedy = false;
+            }
+            if (last_atom_start < 0) {
+                return re_parse_error(s, "nothing to repeat");
+            }
+            if (greedy) {
+                int len, pos;
+                
+                if (quant_max > 0) {
+                    /* specific optimization for simple quantifiers */
+                    if (dbuf_error(&s->byte_code))
+                        goto out_of_memory;
+                    len = re_is_simple_quantifier(s->byte_code.buf + last_atom_start,
+                                                 s->byte_code.size - last_atom_start);
+                    if (len > 0) {
+                        re_emit_op(s, REOP_match);
+                        
+                        if (dbuf_insert(&s->byte_code, last_atom_start, 17))
+                            goto out_of_memory;
+                        pos = last_atom_start;
+                        s->byte_code.buf[pos++] = REOP_simple_greedy_quant;
+                        put_u32(&s->byte_code.buf[pos],
+                                s->byte_code.size - last_atom_start - 17);
+                        pos += 4;
+                        put_u32(&s->byte_code.buf[pos], quant_min);
+                        pos += 4;
+                        put_u32(&s->byte_code.buf[pos], quant_max);
+                        pos += 4;
+                        put_u32(&s->byte_code.buf[pos], len);
+                        pos += 4;
+                        goto done;
+                    }
+                }
+                
+                if (dbuf_error(&s->byte_code))
+                    goto out_of_memory;
+                add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start,
+                                                           s->byte_code.size - last_atom_start) == 0);
+            } else {
+                add_zero_advance_check = false;
+            }
+            
+            {
+                int len, pos;
+                len = s->byte_code.size - last_atom_start;
+                if (quant_min == 0) {
+                    /* need to reset the capture in case the atom is
+                       not executed */
+                    if (last_capture_count != s->capture_count) {
+                        if (dbuf_insert(&s->byte_code, last_atom_start, 3))
+                            goto out_of_memory;
+                        s->byte_code.buf[last_atom_start++] = REOP_save_reset;
+                        s->byte_code.buf[last_atom_start++] = last_capture_count;
+                        s->byte_code.buf[last_atom_start++] = s->capture_count - 1;
+                    }
+                    if (quant_max == 0) {
+                        s->byte_code.size = last_atom_start;
+                    } else if (quant_max == 1) {
+                        if (dbuf_insert(&s->byte_code, last_atom_start, 5))
+                            goto out_of_memory;
+                        s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
+                            greedy;
+                        put_u32(s->byte_code.buf + last_atom_start + 1, len);
+                    } else if (quant_max == INT32_MAX) {
+                        if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
+                            goto out_of_memory;
+                        s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
+                            greedy;
+                        put_u32(s->byte_code.buf + last_atom_start + 1,
+                                len + 5 + add_zero_advance_check);
+                        if (add_zero_advance_check) {
+                            /* avoid infinite loop by stoping the
+                               recursion if no advance was made in the
+                               atom (only works if the atom has no
+                               side effect) */
+                            s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
+                            re_emit_goto(s, REOP_bne_char_pos, last_atom_start); 
+                        } else {
+                            re_emit_goto(s, REOP_goto, last_atom_start);
+                        }
+                    } else {
+                        if (dbuf_insert(&s->byte_code, last_atom_start, 10))
+                            goto out_of_memory;
+                        pos = last_atom_start;
+                        s->byte_code.buf[pos++] = REOP_push_i32;
+                        put_u32(s->byte_code.buf + pos, quant_max);
+                        pos += 4;
+                        s->byte_code.buf[pos++] = REOP_split_goto_first + greedy;
+                        put_u32(s->byte_code.buf + pos, len + 5);
+                        re_emit_goto(s, REOP_loop, last_atom_start + 5);
+                        re_emit_op(s, REOP_drop);
+                    }
+                } else if (quant_min == 1 && quant_max == INT32_MAX &&
+                           !add_zero_advance_check) {
+                    re_emit_goto(s, REOP_split_next_first - greedy,
+                                 last_atom_start);
+                } else {
+                    if (quant_min == 1) {
+                        /* nothing to add */
+                    } else {
+                        if (dbuf_insert(&s->byte_code, last_atom_start, 5))
+                            goto out_of_memory;
+                        s->byte_code.buf[last_atom_start] = REOP_push_i32;
+                        put_u32(s->byte_code.buf + last_atom_start + 1,
+                                quant_min);
+                        last_atom_start += 5;
+                        re_emit_goto(s, REOP_loop, last_atom_start);
+                        re_emit_op(s, REOP_drop);
+                    }
+                    if (quant_max == INT32_MAX) {
+                        pos = s->byte_code.size;
+                        re_emit_op_u32(s, REOP_split_goto_first + greedy,
+                                       len + 5 + add_zero_advance_check);
+                        if (add_zero_advance_check)
+                            re_emit_op(s, REOP_push_char_pos);
+                        /* copy the atom */
+                        dbuf_put_self(&s->byte_code, last_atom_start, len);
+                        if (add_zero_advance_check)
+                            re_emit_goto(s, REOP_bne_char_pos, pos);
+                        else
+                            re_emit_goto(s, REOP_goto, pos);
+                    } else if (quant_max > quant_min) {
+                        re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min);
+                        pos = s->byte_code.size;
+                        re_emit_op_u32(s, REOP_split_goto_first + greedy, len + 5);
+                        /* copy the atom */
+                        dbuf_put_self(&s->byte_code, last_atom_start, len);
+                        
+                        re_emit_goto(s, REOP_loop, pos);
+                        re_emit_op(s, REOP_drop);
+                    }
+                }
+                last_atom_start = -1;
+            }
+            break;
+        default:
+            break;
+        }
+    }
+ done:
+    s->buf_ptr = p;
+    return 0;
+ out_of_memory:
+    return re_parse_out_of_memory(s);
+}
+
+static int re_parse_alternative(REParseState *s, bool is_backward_dir)
+{
+    const uint8_t *p;
+    int ret;
+    size_t start, term_start, end, term_size;
+
+    start = s->byte_code.size;
+    for(;;) {
+        p = s->buf_ptr;
+        if (p >= s->buf_end)
+            break;
+        if (*p == '|' || *p == ')')
+            break;
+        term_start = s->byte_code.size;
+        ret = re_parse_term(s, is_backward_dir);
+        if (ret)
+            return ret;
+        if (is_backward_dir) {
+            /* reverse the order of the terms (XXX: inefficient, but
+               speed is not really critical here) */
+            end = s->byte_code.size;
+            term_size = end - term_start;
+            if (dbuf_realloc(&s->byte_code, end + term_size))
+                return -1;
+            memmove(s->byte_code.buf + start + term_size,
+                    s->byte_code.buf + start,
+                    end - start);
+            memcpy(s->byte_code.buf + start, s->byte_code.buf + end,
+                   term_size);
+        }
+    }
+    return 0;
+}
+    
+static int re_parse_disjunction(REParseState *s, bool is_backward_dir)
+{
+    int start, len, pos;
+
+    start = s->byte_code.size;
+    if (re_parse_alternative(s, is_backward_dir))
+        return -1;
+    while (*s->buf_ptr == '|') {
+        s->buf_ptr++;
+
+        len = s->byte_code.size - start;
+
+        /* insert a split before the first alternative */
+        if (dbuf_insert(&s->byte_code, start, 5)) {
+            return re_parse_out_of_memory(s);
+        }
+        s->byte_code.buf[start] = REOP_split_next_first;
+        put_u32(s->byte_code.buf + start + 1, len + 5);
+
+        pos = re_emit_op_u32(s, REOP_goto, 0);
+
+        if (re_parse_alternative(s, is_backward_dir))
+            return -1;
+        
+        /* patch the goto */
+        len = s->byte_code.size - (pos + 4);
+        put_u32(s->byte_code.buf + pos, len);
+    }
+    return 0;
+}
+
+/* the control flow is recursive so the analysis can be linear */
+static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
+{
+    int stack_size, stack_size_max, pos, opcode, len;
+    uint32_t val;
+    
+    stack_size = 0;
+    stack_size_max = 0;
+    bc_buf += RE_HEADER_LEN;
+    bc_buf_len -= RE_HEADER_LEN;
+    pos = 0;
+    while (pos < bc_buf_len) {
+        opcode = bc_buf[pos];
+        len = reopcode_info[opcode].size;
+        assert(opcode < REOP_COUNT);
+        assert((pos + len) <= bc_buf_len);
+        switch(opcode) {
+        case REOP_push_i32:
+        case REOP_push_char_pos:
+            stack_size++;
+            if (stack_size > stack_size_max) {
+                if (stack_size > STACK_SIZE_MAX)
+                    return -1;
+                stack_size_max = stack_size;
+            }
+            break;
+        case REOP_drop:
+        case REOP_bne_char_pos:
+            assert(stack_size > 0);
+            stack_size--;
+            break;
+        case REOP_range:
+            val = get_u16(bc_buf + pos + 1);
+            len += val * 4;
+            break;
+        case REOP_range32:
+            val = get_u16(bc_buf + pos + 1);
+            len += val * 8;
+            break;
+        }
+        pos += len;
+    }
+    return stack_size_max;
+}
+
+/* 'buf' must be a zero terminated UTF-8 string of length buf_len.
+   Return NULL if error and allocate an error message in *perror_msg,
+   otherwise the compiled bytecode and its length in plen.
+*/
+uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
+                     const char *buf, size_t buf_len, int re_flags,
+                     void *opaque)
+{
+    REParseState s_s, *s = &s_s;
+    int stack_size;
+    bool is_sticky;
+    
+    memset(s, 0, sizeof(*s));
+    s->mem_opaque = opaque;
+    s->buf_ptr = (const uint8_t *)buf;
+    s->buf_end = s->buf_ptr + buf_len;
+    s->buf_start = s->buf_ptr;
+    s->re_flags = re_flags;
+    s->is_utf16 = ((re_flags & LRE_FLAG_UTF16) != 0);
+    is_sticky = ((re_flags & LRE_FLAG_STICKY) != 0);
+    s->ignore_case = ((re_flags & LRE_FLAG_IGNORECASE) != 0);
+    s->dotall = ((re_flags & LRE_FLAG_DOTALL) != 0);
+    s->capture_count = 1;
+    s->total_capture_count = -1;
+    s->has_named_captures = -1;
+    
+    dbuf_init2(&s->byte_code, opaque, lre_realloc);
+    dbuf_init2(&s->group_names, opaque, lre_realloc);
+
+    dbuf_putc(&s->byte_code, re_flags); /* first element is the flags */
+    dbuf_putc(&s->byte_code, 0); /* second element is the number of captures */
+    dbuf_putc(&s->byte_code, 0); /* stack size */
+    dbuf_put_u32(&s->byte_code, 0); /* bytecode length */
+    
+    if (!is_sticky) {
+        /* iterate thru all positions (about the same as .*?( ... ) )
+           .  We do it without an explicit loop so that lock step
+           thread execution will be possible in an optimized
+           implementation */
+        re_emit_op_u32(s, REOP_split_goto_first, 1 + 5);
+        re_emit_op(s, REOP_any);
+        re_emit_op_u32(s, REOP_goto, -(5 + 1 + 5));
+    }
+    re_emit_op_u8(s, REOP_save_start, 0);
+
+    if (re_parse_disjunction(s, false)) {
+    error:
+        dbuf_free(&s->byte_code);
+        dbuf_free(&s->group_names);
+        pstrcpy(error_msg, error_msg_size, s->u.error_msg);
+        *plen = 0;
+        return NULL;
+    }
+
+    re_emit_op_u8(s, REOP_save_end, 0);
+    
+    re_emit_op(s, REOP_match);
+
+    if (*s->buf_ptr != '\0') {
+        re_parse_error(s, "extraneous characters at the end");
+        goto error;
+    }
+
+    if (dbuf_error(&s->byte_code)) {
+        re_parse_out_of_memory(s);
+        goto error;
+    }
+    
+    stack_size = compute_stack_size(s->byte_code.buf, s->byte_code.size);
+    if (stack_size < 0) {
+        re_parse_error(s, "too many imbricated quantifiers");
+        goto error;
+    }
+    
+    s->byte_code.buf[RE_HEADER_CAPTURE_COUNT] = s->capture_count;
+    s->byte_code.buf[RE_HEADER_STACK_SIZE] = stack_size;
+    put_u32(s->byte_code.buf + 3, s->byte_code.size - RE_HEADER_LEN);
+
+    /* add the named groups if needed */
+    if (s->group_names.size > (size_t)(s->capture_count - 1)) {
+        dbuf_put(&s->byte_code, s->group_names.buf, s->group_names.size);
+        s->byte_code.buf[RE_HEADER_FLAGS] |= LRE_FLAG_NAMED_GROUPS;
+    }
+    dbuf_free(&s->group_names);
+    
+    error_msg[0] = '\0';
+    *plen = s->byte_code.size;
+    return s->byte_code.buf;
+}
+
+static bool is_line_terminator(uint32_t c)
+{
+    return (c == '\n' || c == '\r' || c == CP_LS || c == CP_PS);
+}
+
+static bool is_word_char(uint32_t c)
+{
+    return ((c >= '0' && c <= '9') ||
+            (c >= 'a' && c <= 'z') ||
+            (c >= 'A' && c <= 'Z') ||
+            (c == '_'));
+}
+
+#define GET_CHAR(c, cptr, cbuf_end)                                     \
+    do {                                                                \
+        if (cbuf_type == 0) {                                           \
+            c = *cptr++;                                                \
+        } else {                                                        \
+            uint32_t __c1;                                              \
+            c = *(uint16_t *)cptr;                                      \
+            cptr += 2;                                                  \
+            if (c >= 0xd800 && c < 0xdc00 &&                            \
+                cbuf_type == 2 && cptr < cbuf_end) {                    \
+                __c1 = *(uint16_t *)cptr;                               \
+                if (__c1 >= 0xdc00 && __c1 < 0xe000) {                  \
+                    c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
+                    cptr += 2;                                          \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+    } while (0)
+
+#define PEEK_CHAR(c, cptr, cbuf_end)             \
+    do {                                         \
+        if (cbuf_type == 0) {                    \
+            c = cptr[0];                         \
+        } else {                                 \
+            uint32_t __c1;                                              \
+            c = ((uint16_t *)cptr)[0];                                  \
+            if (c >= 0xd800 && c < 0xdc00 &&                            \
+                cbuf_type == 2 && (cptr + 2) < cbuf_end) {              \
+                __c1 = ((uint16_t *)cptr)[1];                           \
+                if (__c1 >= 0xdc00 && __c1 < 0xe000) {                  \
+                    c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
+                }                                                       \
+            }                                                           \
+        }                                        \
+    } while (0)
+
+#define PEEK_PREV_CHAR(c, cptr, cbuf_start)                 \
+    do {                                         \
+        if (cbuf_type == 0) {                    \
+            c = cptr[-1];                        \
+        } else {                                 \
+            uint32_t __c1;                                              \
+            c = ((uint16_t *)cptr)[-1];                                 \
+            if (c >= 0xdc00 && c < 0xe000 &&                            \
+                cbuf_type == 2 && (cptr - 4) >= cbuf_start) {              \
+                __c1 = ((uint16_t *)cptr)[-2];                          \
+                if (__c1 >= 0xd800 && __c1 < 0xdc00 ) {                 \
+                    c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+    } while (0)
+
+#define GET_PREV_CHAR(c, cptr, cbuf_start)       \
+    do {                                         \
+        if (cbuf_type == 0) {                    \
+            cptr--;                              \
+            c = cptr[0];                         \
+        } else {                                 \
+            uint32_t __c1;                                              \
+            cptr -= 2;                                                  \
+            c = ((uint16_t *)cptr)[0];                                 \
+            if (c >= 0xdc00 && c < 0xe000 &&                            \
+                cbuf_type == 2 && cptr > cbuf_start) {                  \
+                __c1 = ((uint16_t *)cptr)[-1];                          \
+                if (__c1 >= 0xd800 && __c1 < 0xdc00 ) {                 \
+                    cptr -= 2;                                          \
+                    c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+    } while (0)
+
+#define PREV_CHAR(cptr, cbuf_start)       \
+    do {                                  \
+        if (cbuf_type == 0) {             \
+            cptr--;                       \
+        } else {                          \
+            cptr -= 2;                          \
+            if (cbuf_type == 2) {                                       \
+                c = ((uint16_t *)cptr)[0];                              \
+                if (c >= 0xdc00 && c < 0xe000 && cptr > cbuf_start) {   \
+                    c = ((uint16_t *)cptr)[-1];                         \
+                    if (c >= 0xd800 && c < 0xdc00)                      \
+                        cptr -= 2;                                      \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+    } while (0)
+
+typedef uintptr_t StackInt;
+
+typedef enum {
+    RE_EXEC_STATE_SPLIT,
+    RE_EXEC_STATE_LOOKAHEAD,
+    RE_EXEC_STATE_NEGATIVE_LOOKAHEAD,
+    RE_EXEC_STATE_GREEDY_QUANT,
+} REExecStateEnum;
+
+typedef struct REExecState {
+    REExecStateEnum type : 8;
+    uint8_t stack_len;
+    size_t count; /* only used for RE_EXEC_STATE_GREEDY_QUANT */
+    const uint8_t *cptr;
+    const uint8_t *pc;
+    void *buf[];
+} REExecState;
+
+typedef struct {
+    const uint8_t *cbuf;
+    const uint8_t *cbuf_end;
+    /* 0 = 8 bit chars, 1 = 16 bit chars, 2 = 16 bit chars, UTF-16 */
+    int cbuf_type; 
+    int capture_count;
+    int stack_size_max;
+    bool multi_line;
+    bool ignore_case;
+    bool is_utf16;
+    void *opaque; /* used for stack overflow check */
+
+    size_t state_size;
+    uint8_t *state_stack;
+    size_t state_stack_size;
+    size_t state_stack_len;
+} REExecContext;
+
+static int push_state(REExecContext *s,
+                      uint8_t **capture,
+                      StackInt *stack, size_t stack_len,
+                      const uint8_t *pc, const uint8_t *cptr,
+                      REExecStateEnum type, size_t count)
+{
+    REExecState *rs;
+    uint8_t *new_stack;
+    size_t new_size, i, n;
+    StackInt *stack_buf;
+
+    if (unlikely((s->state_stack_len + 1) > s->state_stack_size)) {
+        /* reallocate the stack */
+        new_size = s->state_stack_size * 3 / 2;
+        if (new_size < 8)
+            new_size = 8;
+        new_stack = lre_realloc(s->opaque, s->state_stack, new_size * s->state_size);
+        if (!new_stack)
+            return -1;
+        s->state_stack_size = new_size;
+        s->state_stack = new_stack;
+    }
+    rs = (REExecState *)(s->state_stack + s->state_stack_len * s->state_size);
+    s->state_stack_len++;
+    rs->type = type;
+    rs->count = count;
+    rs->stack_len = stack_len;
+    rs->cptr = cptr;
+    rs->pc = pc;
+    n = 2 * s->capture_count;
+    for(i = 0; i < n; i++)
+        rs->buf[i] = capture[i];
+    stack_buf = (StackInt *)(rs->buf + n);
+    for(i = 0; i < stack_len; i++)
+        stack_buf[i] = stack[i];
+    return 0;
+}
+
+/* return 1 if match, 0 if not match or -1 if error. */
+static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
+                                   StackInt *stack, int stack_len,
+                                   const uint8_t *pc, const uint8_t *cptr,
+                                   bool no_recurse)
+{
+    int opcode, ret;
+    int cbuf_type;
+    uint32_t val, c;
+    const uint8_t *cbuf_end;
+    
+    cbuf_type = s->cbuf_type;
+    cbuf_end = s->cbuf_end;
+
+    for(;;) {
+        //        printf("top=%p: pc=%d\n", th_list.top, (int)(pc - (bc_buf + RE_HEADER_LEN)));
+        opcode = *pc++;
+        switch(opcode) {
+        case REOP_match:
+            {
+                REExecState *rs;
+                if (no_recurse)
+                    return (intptr_t)cptr;
+                ret = 1;
+                goto recurse;
+            no_match:
+                if (no_recurse)
+                    return 0;
+                ret = 0;
+            recurse:
+                for(;;) {
+                    if (s->state_stack_len == 0)
+                        return ret;
+                    rs = (REExecState *)(s->state_stack +
+                                         (s->state_stack_len - 1) * s->state_size);
+                    if (rs->type == RE_EXEC_STATE_SPLIT) {
+                        if (!ret) {
+                        pop_state:
+                            memcpy(capture, rs->buf,
+                                   sizeof(capture[0]) * 2 * s->capture_count);
+                        pop_state1:
+                            pc = rs->pc;
+                            cptr = rs->cptr;
+                            stack_len = rs->stack_len;
+                            memcpy(stack, rs->buf + 2 * s->capture_count,
+                                   stack_len * sizeof(stack[0]));
+                            s->state_stack_len--;
+                            break;
+                        }
+                    } else if (rs->type == RE_EXEC_STATE_GREEDY_QUANT) {
+                        if (!ret) {
+                            uint32_t char_count, i;
+                            memcpy(capture, rs->buf,
+                                   sizeof(capture[0]) * 2 * s->capture_count);
+                            stack_len = rs->stack_len;
+                            memcpy(stack, rs->buf + 2 * s->capture_count,
+                                   stack_len * sizeof(stack[0]));
+                            pc = rs->pc;
+                            cptr = rs->cptr;
+                            /* go backward */
+                            char_count = get_u32(pc + 12);
+                            for(i = 0; i < char_count; i++) {
+                                PREV_CHAR(cptr, s->cbuf);
+                            }
+                            pc = (pc + 16) + (int)get_u32(pc);
+                            rs->cptr = cptr;
+                            rs->count--;
+                            if (rs->count == 0) {
+                                s->state_stack_len--;
+                            }
+                            break;
+                        }
+                    } else {
+                        ret = ((rs->type == RE_EXEC_STATE_LOOKAHEAD && ret) ||
+                               (rs->type == RE_EXEC_STATE_NEGATIVE_LOOKAHEAD && !ret));
+                        if (ret) {
+                            /* keep the capture in case of positive lookahead */
+                            if (rs->type == RE_EXEC_STATE_LOOKAHEAD)
+                                goto pop_state1;
+                            else
+                                goto pop_state;
+                        }
+                    }
+                    s->state_stack_len--;
+                }
+            }
+            break;
+        case REOP_char32:
+            val = get_u32(pc);
+            pc += 4;
+            goto test_char;
+        case REOP_char:
+            val = get_u16(pc);
+            pc += 2;
+        test_char:
+            if (cptr >= cbuf_end)
+                goto no_match;
+            GET_CHAR(c, cptr, cbuf_end);
+            if (s->ignore_case) {
+                c = lre_canonicalize(c, s->is_utf16);
+            }
+            if (val != c)
+                goto no_match;
+            break;
+        case REOP_split_goto_first:
+        case REOP_split_next_first:
+            {
+                const uint8_t *pc1;
+                
+                val = get_u32(pc);
+                pc += 4;
+                if (opcode == REOP_split_next_first) {
+                    pc1 = pc + (int)val;
+                } else {
+                    pc1 = pc;
+                    pc = pc + (int)val;
+                }
+                ret = push_state(s, capture, stack, stack_len,
+                                 pc1, cptr, RE_EXEC_STATE_SPLIT, 0);
+                if (ret < 0)
+                    return -1;
+                break;
+            }
+        case REOP_lookahead:
+        case REOP_negative_lookahead:
+            val = get_u32(pc);
+            pc += 4;
+            ret = push_state(s, capture, stack, stack_len,
+                             pc + (int)val, cptr,
+                             RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead,
+                             0);
+            if (ret < 0)
+                return -1;
+            break;
+            
+        case REOP_goto:
+            val = get_u32(pc);
+            pc += 4 + (int)val;
+            break;
+        case REOP_line_start:
+            if (cptr == s->cbuf)
+                break;
+            if (!s->multi_line)
+                goto no_match;
+            PEEK_PREV_CHAR(c, cptr, s->cbuf);
+            if (!is_line_terminator(c))
+                goto no_match;
+            break;
+        case REOP_line_end:
+            if (cptr == cbuf_end)
+                break;
+            if (!s->multi_line)
+                goto no_match;
+            PEEK_CHAR(c, cptr, cbuf_end);
+            if (!is_line_terminator(c))
+                goto no_match;
+            break;
+        case REOP_dot:
+            if (cptr == cbuf_end)
+                goto no_match;
+            GET_CHAR(c, cptr, cbuf_end);
+            if (is_line_terminator(c))
+                goto no_match;
+            break;
+        case REOP_any:
+            if (cptr == cbuf_end)
+                goto no_match;
+            GET_CHAR(c, cptr, cbuf_end);
+            break;
+        case REOP_save_start:
+        case REOP_save_end:
+            val = *pc++;
+            assert(val < (uint32_t)s->capture_count);
+            capture[2 * val + opcode - REOP_save_start] = (uint8_t *)cptr;
+            break;
+        case REOP_save_reset:
+            {
+                uint32_t val2;
+                val = pc[0];
+                val2 = pc[1];
+                pc += 2;
+                assert(val2 < (uint32_t)s->capture_count);
+                while (val <= val2) {
+                    capture[2 * val] = NULL;
+                    capture[2 * val + 1] = NULL;
+                    val++;
+                }
+            }
+            break;
+        case REOP_push_i32:
+            val = get_u32(pc);
+            pc += 4;
+            stack[stack_len++] = val;
+            break;
+        case REOP_drop:
+            stack_len--;
+            break;
+        case REOP_loop:
+            val = get_u32(pc);
+            pc += 4;
+            if (--stack[stack_len - 1] != 0) {
+                pc += (int)val;
+            }
+            break;
+        case REOP_push_char_pos:
+            stack[stack_len++] = (uintptr_t)cptr;
+            break;
+        case REOP_bne_char_pos:
+            val = get_u32(pc);
+            pc += 4;
+            if (stack[--stack_len] != (uintptr_t)cptr)
+                pc += (int)val;
+            break;
+        case REOP_word_boundary:
+        case REOP_not_word_boundary:
+            {
+                bool v1, v2;
+                /* char before */
+                if (cptr == s->cbuf) {
+                    v1 = false;
+                } else {
+                    PEEK_PREV_CHAR(c, cptr, s->cbuf);
+                    v1 = is_word_char(c);
+                }
+                /* current char */
+                if (cptr >= cbuf_end) {
+                    v2 = false;
+                } else {
+                    PEEK_CHAR(c, cptr, cbuf_end);
+                    v2 = is_word_char(c);
+                }
+                if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode))
+                    goto no_match;
+            }
+            break;
+        case REOP_back_reference:
+        case REOP_backward_back_reference:
+            {
+                const uint8_t *cptr1, *cptr1_end, *cptr1_start;
+                uint32_t c1, c2;
+                
+                val = *pc++;
+                if (val >= (uint32_t)s->capture_count)
+                    goto no_match;
+                cptr1_start = capture[2 * val];
+                cptr1_end = capture[2 * val + 1];
+                if (!cptr1_start || !cptr1_end)
+                    break;
+                if (opcode == REOP_back_reference) {
+                    cptr1 = cptr1_start;
+                    while (cptr1 < cptr1_end) {
+                        if (cptr >= cbuf_end)
+                            goto no_match;
+                        GET_CHAR(c1, cptr1, cptr1_end);
+                        GET_CHAR(c2, cptr, cbuf_end);
+                        if (s->ignore_case) {
+                            c1 = lre_canonicalize(c1, s->is_utf16);
+                            c2 = lre_canonicalize(c2, s->is_utf16);
+                        }
+                        if (c1 != c2)
+                            goto no_match;
+                    }
+                } else {
+                    cptr1 = cptr1_end;
+                    while (cptr1 > cptr1_start) {
+                        if (cptr == s->cbuf)
+                            goto no_match;
+                        GET_PREV_CHAR(c1, cptr1, cptr1_start);
+                        GET_PREV_CHAR(c2, cptr, s->cbuf);
+                        if (s->ignore_case) {
+                            c1 = lre_canonicalize(c1, s->is_utf16);
+                            c2 = lre_canonicalize(c2, s->is_utf16);
+                        }
+                        if (c1 != c2)
+                            goto no_match;
+                    }
+                }
+            }
+            break;
+        case REOP_range:
+            {
+                int n;
+                uint32_t low, high, idx_min, idx_max, idx;
+                
+                n = get_u16(pc); /* n must be >= 1 */
+                pc += 2;
+                if (cptr >= cbuf_end)
+                    goto no_match;
+                GET_CHAR(c, cptr, cbuf_end);
+                if (s->ignore_case) {
+                    c = lre_canonicalize(c, s->is_utf16);
+                }
+                idx_min = 0;
+                low = get_u16(pc + 0 * 4);
+                if (c < low)
+                    goto no_match;
+                idx_max = n - 1;
+                high = get_u16(pc + idx_max * 4 + 2);
+                /* 0xffff in for last value means +infinity */
+                if (unlikely(c >= 0xffff) && high == 0xffff)
+                    goto range_match;
+                if (c > high)
+                    goto no_match;
+                while (idx_min <= idx_max) {
+                    idx = (idx_min + idx_max) / 2;
+                    low = get_u16(pc + idx * 4);
+                    high = get_u16(pc + idx * 4 + 2);
+                    if (c < low)
+                        idx_max = idx - 1;
+                    else if (c > high)
+                        idx_min = idx + 1;
+                    else
+                        goto range_match;
+                }
+                goto no_match;
+            range_match:
+                pc += 4 * n;
+            }
+            break;
+        case REOP_range32:
+            {
+                int n;
+                uint32_t low, high, idx_min, idx_max, idx;
+                
+                n = get_u16(pc); /* n must be >= 1 */
+                pc += 2;
+                if (cptr >= cbuf_end)
+                    goto no_match;
+                GET_CHAR(c, cptr, cbuf_end);
+                if (s->ignore_case) {
+                    c = lre_canonicalize(c, s->is_utf16);
+                }
+                idx_min = 0;
+                low = get_u32(pc + 0 * 8);
+                if (c < low)
+                    goto no_match;
+                idx_max = n - 1;
+                high = get_u32(pc + idx_max * 8 + 4);
+                if (c > high)
+                    goto no_match;
+                while (idx_min <= idx_max) {
+                    idx = (idx_min + idx_max) / 2;
+                    low = get_u32(pc + idx * 8);
+                    high = get_u32(pc + idx * 8 + 4);
+                    if (c < low)
+                        idx_max = idx - 1;
+                    else if (c > high)
+                        idx_min = idx + 1;
+                    else
+                        goto range32_match;
+                }
+                goto no_match;
+            range32_match:
+                pc += 8 * n;
+            }
+            break;
+        case REOP_prev:
+            /* go to the previous char */
+            if (cptr == s->cbuf)
+                goto no_match;
+            PREV_CHAR(cptr, s->cbuf);
+            break;
+        case REOP_simple_greedy_quant:
+            {
+                uint32_t next_pos, quant_min, quant_max;
+                size_t q;
+                intptr_t res;
+                const uint8_t *pc1;
+                
+                next_pos = get_u32(pc);
+                quant_min = get_u32(pc + 4);
+                quant_max = get_u32(pc + 8);
+                pc += 16;
+                pc1 = pc;
+                pc += (int)next_pos;
+                
+                q = 0;
+                for(;;) {
+                    res = lre_exec_backtrack(s, capture, stack, stack_len,
+                                             pc1, cptr, true);
+                    if (res == -1)
+                        return res;
+                    if (!res)
+                        break;
+                    cptr = (uint8_t *)res;
+                    q++;
+                    if (q >= quant_max && quant_max != INT32_MAX)
+                        break;
+                }
+                if (q < quant_min)
+                    goto no_match;
+                if (q > quant_min) {
+                    /* will examine all matches down to quant_min */
+                    ret = push_state(s, capture, stack, stack_len,
+                                     pc1 - 16, cptr,
+                                     RE_EXEC_STATE_GREEDY_QUANT,
+                                     q - quant_min);
+                    if (ret < 0)
+                        return -1;
+                }
+            }
+            break;
+        default:
+            abort();
+        }
+    }
+}
+
+/* Return 1 if match, 0 if not match or -1 if error. cindex is the
+   starting position of the match and must be such as 0 <= cindex <=
+   clen. */
+int lre_exec(uint8_t **capture,
+             const uint8_t *bc_buf, const uint8_t *cbuf, int cindex, int clen,
+             int cbuf_type, void *opaque)
+{
+    REExecContext s_s, *s = &s_s;
+    int re_flags, i, alloca_size, ret;
+    StackInt *stack_buf;
+    
+    re_flags = bc_buf[RE_HEADER_FLAGS];
+    s->multi_line = (re_flags & LRE_FLAG_MULTILINE) != 0;
+    s->ignore_case = (re_flags & LRE_FLAG_IGNORECASE) != 0;
+    s->is_utf16 = (re_flags & LRE_FLAG_UTF16) != 0;
+    s->capture_count = bc_buf[RE_HEADER_CAPTURE_COUNT];
+    s->stack_size_max = bc_buf[RE_HEADER_STACK_SIZE];
+    s->cbuf = cbuf;
+    s->cbuf_end = cbuf + (clen << cbuf_type);
+    s->cbuf_type = cbuf_type;
+    if (s->cbuf_type == 1 && s->is_utf16)
+        s->cbuf_type = 2;
+    s->opaque = opaque;
+
+    s->state_size = sizeof(REExecState) +
+        s->capture_count * sizeof(capture[0]) * 2 +
+        s->stack_size_max * sizeof(stack_buf[0]);
+    s->state_stack = NULL;
+    s->state_stack_len = 0;
+    s->state_stack_size = 0;
+    
+    for(i = 0; i < s->capture_count * 2; i++)
+        capture[i] = NULL;
+    alloca_size = s->stack_size_max * sizeof(stack_buf[0]);
+    stack_buf = alloca(alloca_size);
+    ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN,
+                             cbuf + (cindex << cbuf_type), false);
+    lre_realloc(s->opaque, s->state_stack, 0);
+    return ret;
+}
+
+int lre_get_capture_count(const uint8_t *bc_buf)
+{
+    return bc_buf[RE_HEADER_CAPTURE_COUNT];
+}
+
+int lre_get_flags(const uint8_t *bc_buf)
+{
+    return bc_buf[RE_HEADER_FLAGS];
+}
+
+/* Return NULL if no group names. Otherwise, return a pointer to
+   'capture_count - 1' zero terminated UTF-8 strings. */
+const char *lre_get_groupnames(const uint8_t *bc_buf)
+{
+    uint32_t re_bytecode_len;
+    if ((lre_get_flags(bc_buf) & LRE_FLAG_NAMED_GROUPS) == 0)
+        return NULL;
+    re_bytecode_len = get_u32(bc_buf + 3);
+    return (const char *)(bc_buf + 7 + re_bytecode_len);
+}
+
+#ifdef TEST
+
+bool lre_check_stack_overflow(void *opaque, size_t alloca_size)
+{
+    return false;
+}
+
+void *lre_realloc(void *opaque, void *ptr, size_t size)
+{
+    return realloc(ptr, size);
+}
+
+int main(int argc, char **argv)
+{
+    int len, ret, i;
+    uint8_t *bc;
+    char error_msg[64];
+    uint8_t *capture[CAPTURE_COUNT_MAX * 2];
+    const char *input;
+    int input_len, capture_count;
+    
+    if (argc < 3) {
+        printf("usage: %s regexp input\n", argv[0]);
+        exit(1);
+    }
+    bc = lre_compile(&len, error_msg, sizeof(error_msg), argv[1],
+                     strlen(argv[1]), 0, NULL);
+    if (!bc) {
+        fprintf(stderr, "error: %s\n", error_msg);
+        exit(1);
+    }
+
+    input = argv[2];
+    input_len = strlen(input);
+    
+    ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL);
+    printf("ret=%d\n", ret);
+    if (ret == 1) {
+        capture_count = lre_get_capture_count(bc);
+        for(i = 0; i < 2 * capture_count; i++) {
+            uint8_t *ptr;
+            ptr = capture[i];
+            printf("%d: ", i);
+            if (!ptr)
+                printf("<nil>");
+            else
+                printf("%u", (int)(ptr - (uint8_t *)input));
+            printf("\n");
+        }
+    }
+    return 0;
+}
+#endif