changeset 195:2557900abd48

[project @ 2002-11-23 22:02:40 by bellard] optimized token strings - optimized token hashing
author bellard
date Sat, 23 Nov 2002 22:02:40 +0000
parents 9965c0f6200d
children 7dbacc008a81
files tcc.c
diffstat 1 files changed, 197 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/tcc.c	Sat Nov 23 18:15:17 2002 +0000
+++ b/tcc.c	Sat Nov 23 22:02:40 2002 +0000
@@ -80,6 +80,9 @@
 
 #define TOK_HASH_SIZE       2048 /* must be a power of two */
 #define TOK_ALLOC_INCR      512  /* must be a power of two */
+#define TOK_STR_ALLOC_INCR_BITS 6
+#define TOK_STR_ALLOC_INCR (1 << TOK_STR_ALLOC_INCR_BITS)
+#define TOK_MAX_SIZE        4 /* token max size in int unit when stored in string */
 
 /* token symbol management */
 typedef struct TokenSym {
@@ -238,6 +241,7 @@
 typedef struct TokenString {
     int *str;
     int len;
+    int allocated_len;
     int last_line_num;
 } TokenString;
 
@@ -1115,26 +1119,11 @@
         expect("lvalue");
 }
 
-TokenSym *tok_alloc(const char *str, int len)
-{
-    TokenSym *ts, **pts, **ptable;
+/* allocate a new token */
+static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
+{
+    TokenSym *ts, **ptable;
     int i;
-    unsigned int h;
-    
-    h = 1;
-    for(i=0;i<len;i++)
-        h = h * 263 +  ((unsigned char *)str)[i];
-    h &= (TOK_HASH_SIZE - 1);
-
-    pts = &hash_ident[h];
-    while (1) {
-        ts = *pts;
-        if (!ts)
-            break;
-        if (ts->len == len && !memcmp(ts->str, str, len))
-            return ts;
-        pts = &(ts->hash_next);
-    }
 
     if (tok_ident >= SYM_FIRST_ANOM) 
         error("memory full");
@@ -1163,6 +1152,33 @@
     return ts;
 }
 
+#define TOK_HASH_INIT 1
+#define TOK_HASH_FUNC(h, c) ((h) * 263 + (c))
+
+/* find a token and add it if not found */
+static TokenSym *tok_alloc(const char *str, int len)
+{
+    TokenSym *ts, **pts;
+    int i;
+    unsigned int h;
+    
+    h = TOK_HASH_INIT;
+    for(i=0;i<len;i++)
+        h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
+    h &= (TOK_HASH_SIZE - 1);
+
+    pts = &hash_ident[h];
+    for(;;) {
+        ts = *pts;
+        if (!ts)
+            break;
+        if (ts->len == len && !memcmp(ts->str, str, len))
+            return ts;
+        pts = &(ts->hash_next);
+    }
+    return tok_alloc_new(pts, str, len);
+}
+
 /* CString handling */
 
 static void cstr_realloc(CString *cstr, int new_size)
@@ -1904,6 +1920,7 @@
 {
     s->str = NULL;
     s->len = 0;
+    s->allocated_len = 0;
     s->last_line_num = -1;
 }
 
@@ -1915,52 +1932,114 @@
 
     p = str;
     for(;;) {
-        t = *p++;
+        t = *p;
+        /* NOTE: we test zero separately so that GCC can generate a
+           table for the following switch */
         if (t == 0)
             break;
-        if (t == TOK_STR || t == TOK_LSTR || t == TOK_PPNUM) {
+        switch(t) {
+        case TOK_CINT:
+        case TOK_CUINT:
+        case TOK_CCHAR:
+        case TOK_LCHAR:
+        case TOK_CFLOAT:
+        case TOK_LINENUM:
+            p += 2;
+            break;
+        case TOK_PPNUM:
+        case TOK_STR:
+        case TOK_LSTR:
             /* XXX: use a macro to be portable on 64 bit ? */
-            cstr = (CString *)(*p++);
+            cstr = (CString *)p[1];
             cstr_free(cstr);
             tcc_free(cstr);
-        } else {
-            p += tok_ext_size(t);
+            p += 2;
+            break;
+        case TOK_CDOUBLE:
+        case TOK_CLLONG:
+        case TOK_CULLONG:
+            p += 3;
+            break;
+        case TOK_CLDOUBLE:
+            p += 1 + (LDOUBLE_SIZE / 4);
+            break;
+        default:
+            p++;
+            break;
         }
     }
     tcc_free(str);
 }
 
+static int *tok_str_realloc(TokenString *s)
+{
+    int *str, len;
+
+    len = s->allocated_len + TOK_STR_ALLOC_INCR;
+    str = tcc_realloc(s->str, len * sizeof(int));
+    if (!str)
+        error("memory full");
+    s->allocated_len = len;
+    s->str = str;
+    return str;
+}
+
 static void tok_str_add(TokenString *s, int t)
 {
     int len, *str;
 
     len = s->len;
     str = s->str;
-    if ((len & 63) == 0) {
-        str = tcc_realloc(str, (len + 64) * sizeof(int));
-        if (!str)
-            return;
-        s->str = str;
-    }
+    if (len >= s->allocated_len)
+        str = tok_str_realloc(s);
     str[len++] = t;
     s->len = len;
 }
 
 static void tok_str_add2(TokenString *s, int t, CValue *cv)
 {
-    int n, i;
-    CValue cv1;
-
-    tok_str_add(s, t);
-    if (t == TOK_STR || t == TOK_LSTR || t == TOK_PPNUM) {
-        /* special case: need to duplicate string */
-        cv1.cstr = cstr_dup(cv->cstr);
-        tok_str_add(s, cv1.tab[0]);
-    } else {
-        n = tok_ext_size(t);
-        for(i=0;i<n;i++)
-            tok_str_add(s, cv->tab[i]);
-    }
+    int len, *str;
+
+    len = s->len;
+    str = s->str;
+
+    /* allocate space for worst case */
+    if (len + TOK_MAX_SIZE > s->allocated_len)
+        str = tok_str_realloc(s);
+    str[len++] = t;
+    switch(t) {
+    case TOK_CINT:
+    case TOK_CUINT:
+    case TOK_CCHAR:
+    case TOK_LCHAR:
+    case TOK_CFLOAT:
+    case TOK_LINENUM:
+        str[len++] = cv->tab[0];
+        break;
+    case TOK_PPNUM:
+    case TOK_STR:
+    case TOK_LSTR:
+        str[len++] = (int)cstr_dup(cv->cstr);
+        break;
+    case TOK_CDOUBLE:
+    case TOK_CLLONG:
+    case TOK_CULLONG:
+        str[len++] = cv->tab[0];
+        str[len++] = cv->tab[1];
+        break;
+    case TOK_CLDOUBLE:
+#if LDOUBLE_SIZE == 12
+        str[len++] = cv->tab[0];
+        str[len++] = cv->tab[1];
+        str[len++] = cv->tab[2];
+#else
+#error add long double size support
+#endif
+        break;
+    default:
+        break;
+    }
+    s->len = len;
 }
 
 /* add the current parse token in token string 's' */
@@ -1977,18 +2056,47 @@
     tok_str_add2(s, tok, &tokc);
 }
 
-/* get a token from an integer array and increment pointer accordingly */
-static int tok_get(int **tok_str, CValue *cv)
-{
-    int *p, t, n, i;
-
-    p = *tok_str;
-    t = *p++;
-    n = tok_ext_size(t);
-    for(i=0;i<n;i++)
-        cv->tab[i] = *p++;
-    *tok_str = p;
-    return t;
+#if LDOUBLE_SIZE == 12
+#define LDOUBLE_GET(p, cv)                      \
+        cv.tab[0] = p[0];                       \
+        cv.tab[1] = p[1];                       \
+        cv.tab[2] = p[2];
+#else
+#error add long double size support
+#endif
+
+
+/* get a token from an integer array and increment pointer
+   accordingly. we code it as a macro to avoid pointer aliasing. */
+#define TOK_GET(t, p, cv)                       \
+{                                               \
+    t = *p++;                                   \
+    switch(t) {                                 \
+    case TOK_CINT:                              \
+    case TOK_CUINT:                             \
+    case TOK_CCHAR:                             \
+    case TOK_LCHAR:                             \
+    case TOK_CFLOAT:                            \
+    case TOK_LINENUM:                           \
+    case TOK_STR:                               \
+    case TOK_LSTR:                              \
+    case TOK_PPNUM:                             \
+        cv.tab[0] = *p++;                       \
+        break;                                  \
+    case TOK_CDOUBLE:                           \
+    case TOK_CLLONG:                            \
+    case TOK_CULLONG:                           \
+        cv.tab[0] = p[0];                       \
+        cv.tab[1] = p[1];                       \
+        p += 2;                                 \
+        break;                                  \
+    case TOK_CLDOUBLE:                          \
+        LDOUBLE_GET(p, cv);                     \
+        p += LDOUBLE_SIZE / 4;                  \
+        break;                                  \
+    default:                                    \
+        break;                                  \
+    }                                           \
 }
 
 /* defines handling */
@@ -2059,7 +2167,7 @@
 }
 
 /* eval an expression for #if/#elif */
-int expr_preprocess(void)
+static int expr_preprocess(void)
 {
     int c, t;
     TokenString str;
@@ -2096,13 +2204,13 @@
 }
 
 #if defined(PARSE_DEBUG) || defined(PP_DEBUG)
-void tok_print(int *str)
+static void tok_print(int *str)
 {
     int t;
     CValue cval;
 
     while (1) {
-        t = tok_get(&str, &cval);
+        TOK_GET(t, str, cval);
         if (!t)
             break;
         printf(" %s", get_tok_str(t, &cval));
@@ -2876,6 +2984,7 @@
     int b, t, c;
     TokenSym *ts;
     uint8_t *p, *p1;
+    unsigned int h;
 
     p = file->buf_ptr;
  redo_no_start:
@@ -2944,10 +3053,10 @@
         break;
 
     case '\n':
-        file->line_num++;
         if (return_linefeed) {
             tok = TOK_LINEFEED;
         } else {
+            file->line_num++;
             tok_flags |= TOK_FLAG_BOL;
             p++;
             goto redo_no_start;
@@ -2989,16 +3098,35 @@
     case '_':
     parse_ident_fast:
         p1 = p;
+        h = TOK_HASH_INIT;
+        h = TOK_HASH_FUNC(h, c);
         p++;
         for(;;) {
             c = *p;
             if (!isid(c) && !isnum(c))
                 break;
+            h = TOK_HASH_FUNC(h, c);
             p++;
         }
         if (c != '\\') {
-            /* fast case : no stray found, so we have the full token */
-            ts = tok_alloc(p1, p - p1);
+            TokenSym **pts;
+            int len;
+
+            /* fast case : no stray found, so we have the full token
+               and we have already hashed it */
+            len = p - p1;
+            h &= (TOK_HASH_SIZE - 1);
+            pts = &hash_ident[h];
+            for(;;) {
+                ts = *pts;
+                if (!ts)
+                    break;
+                if (ts->len == len && !memcmp(ts->str, p1, len))
+                    goto token_found;
+                pts = &(ts->hash_next);
+            }
+            ts = tok_alloc_new(pts, p1, len);
+        token_found: ;
         } else {
             /* slower case */
             cstr_reset(&tokcstr);
@@ -3019,8 +3147,8 @@
         tok = ts->tok;
         break;
     case 'L':
-        c = p[1];
-        if (c != '\\' && c != '\'' && c != '\"') {
+        t = p[1];
+        if (t != '\\' && t != '\'' && t != '\"') {
             /* fast case */
             goto parse_ident_fast;
         } else {
@@ -3268,7 +3396,7 @@
     redo:
         tok = *macro_ptr;
         if (tok) {
-            tok = tok_get(&macro_ptr, &tokc);
+            TOK_GET(tok, macro_ptr, tokc);
             if (tok == TOK_LINENUM) {
                 file->line_num = tokc.i;
                 goto redo;
@@ -3291,12 +3419,12 @@
     tok_str_new(&str);
     last_tok = 0;
     while(1) {
-        t = tok_get(&macro_str, &cval);
+        TOK_GET(t, macro_str, cval);
         if (!t)
             break;
         if (t == '#') {
             /* stringize */
-            t = tok_get(&macro_str, &cval);
+            TOK_GET(t, macro_str, cval);
             if (!t)
                 break;
             s = sym_find2(args, t);
@@ -3307,7 +3435,7 @@
                 while (*st) {
                     if (notfirst)
                         cstr_ccat(&cstr, ' ');
-                    t = tok_get(&st, &cval);
+                    TOK_GET(t, st, cval);
                     cstr_cat(&cstr, get_tok_str(t, &cval));
                     notfirst = 1;
                 }
@@ -3348,7 +3476,7 @@
                         int t1;
                     add_var:
                         for(;;) {
-                            t1 = tok_get(&st, &cval);
+                            TOK_GET(t1, st, cval);
                             if (!t1)
                                 break;
                             tok_str_add2(&str, t1, &cval);
@@ -3392,7 +3520,7 @@
             macro_ptr1 = macro_ptr;
             t = *macro_ptr;
             if (t) {
-                t = tok_get(&macro_ptr, &cval);
+                TOK_GET(t, macro_ptr, cval);
 
                 /* We concatenate the two tokens if we have an
                    identifier or a preprocessing number */