changeset 217:d2e6c20b9f28

[project @ 2003-01-06 20:21:42 by bellard] asm support
author bellard
date Mon, 06 Jan 2003 20:21:42 +0000
parents 2d414573219f
children 3269834f8658
files i386-asm.c i386-asm.h tccasm.c
diffstat 3 files changed, 2265 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i386-asm.c	Mon Jan 06 20:21:42 2003 +0000
@@ -0,0 +1,1077 @@
+/*
+ *  i386 specific functions for TCC assembler
+ * 
+ *  Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define MAX_OPERANDS 3
+
+typedef struct ASMInstr {
+    uint16_t sym;
+    uint16_t opcode;
+    uint16_t instr_type;
+#define OPC_JMP       0x01  /* jmp operand */
+#define OPC_B         0x02  /* only used zith OPC_WL */
+#define OPC_WL        0x04  /* accepts w, l or no suffix */
+#define OPC_BWL       (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
+#define OPC_REG       0x08 /* register is added to opcode */
+#define OPC_MODRM     0x10 /* modrm encoding */
+#define OPC_FWAIT     0x20 /* add fwait opcode */
+#define OPC_TEST      0x40 /* test opcodes */
+#define OPC_SHIFT     0x80 /* shift opcodes */
+#define OPC_D16      0x0100 /* generate data16 prefix */
+#define OPC_ARITH    0x0200 /* arithmetic opcodes */
+#define OPC_SHORTJMP 0x0400 /* short jmp operand */
+#define OPC_FARITH   0x0800 /* FPU arithmetic opcodes */
+#define OPC_GROUP_SHIFT 13
+
+/* in order to compress the operand type, we use specific operands and
+   we or only with EA  */ 
+#define OPT_REG8  0 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_REG16 1 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_REG32 2 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_MMX   3 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_SSE   4 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_CR    5 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_TR    6 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_DB    7 /* warning: value is hardcoded from TOK_ASM_xxx */
+#define OPT_SEG   8
+#define OPT_ST    9
+#define OPT_IM8   10
+#define OPT_IM8S  11
+#define OPT_IM16  12
+#define OPT_IM32  13
+#define OPT_EAX   14 /* %al, %ax or %eax register */
+#define OPT_ST0   15 /* %st(0) register */
+#define OPT_CL    16 /* %cl register */
+#define OPT_DX    17 /* %dx register */
+#define OPT_ADDR  18 /* OP_EA with only offset */
+#define OPT_INDIR 19 /* *(expr) */
+
+/* composite types */ 
+#define OPT_COMPOSITE_FIRST   20
+#define OPT_IM       20 /* IM8 | IM16 | IM32 */
+#define OPT_REG      21 /* REG8 | REG16 | REG32 */ 
+#define OPT_REGW     22 /* REG16 | REG32 */
+#define OPT_IMW      23 /* IM16 | IM32 */ 
+
+/* can be ored with any OPT_xxx */
+#define OPT_EA    0x80
+
+    uint8_t nb_ops;
+    uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
+} ASMInstr;
+
+typedef struct Operand {
+    uint32_t type;
+#define OP_REG8   (1 << OPT_REG8)
+#define OP_REG16  (1 << OPT_REG16)
+#define OP_REG32  (1 << OPT_REG32)
+#define OP_MMX    (1 << OPT_MMX)
+#define OP_SSE    (1 << OPT_SSE)
+#define OP_CR     (1 << OPT_CR)
+#define OP_TR     (1 << OPT_TR)
+#define OP_DB     (1 << OPT_DB)
+#define OP_SEG    (1 << OPT_SEG)
+#define OP_ST     (1 << OPT_ST)
+#define OP_IM8    (1 << OPT_IM8)
+#define OP_IM8S   (1 << OPT_IM8S)
+#define OP_IM16   (1 << OPT_IM16)
+#define OP_IM32   (1 << OPT_IM32)
+#define OP_EAX    (1 << OPT_EAX)
+#define OP_ST0    (1 << OPT_ST0)
+#define OP_CL     (1 << OPT_CL)
+#define OP_DX     (1 << OPT_DX)
+#define OP_ADDR   (1 << OPT_ADDR)
+#define OP_INDIR  (1 << OPT_INDIR)
+
+#define OP_EA     0x40000000
+#define OP_REG    (OP_REG8 | OP_REG16 | OP_REG32)
+#define OP_IM     OP_IM32
+    int8_t  reg; /* register, -1 if none */
+    int8_t  reg2; /* second register, -1 if none */
+    uint8_t shift;
+    ExprValue e;
+} Operand;
+
+static const uint8_t reg_to_size[5] = {
+    [OP_REG8] = 0,
+    [OP_REG16] = 1,
+    [OP_REG32] = 2,
+};
+    
+#define WORD_PREFIX_OPCODE 0x66
+
+#define NB_TEST_OPCODES 30
+
+static const uint8_t test_bits[NB_TEST_OPCODES] = {
+ 0x00, /* o */
+ 0x01, /* no */
+ 0x02, /* b */
+ 0x02, /* c */
+ 0x02, /* nae */
+ 0x03, /* nb */
+ 0x03, /* nc */
+ 0x03, /* ae */
+ 0x04, /* e */
+ 0x04, /* z */
+ 0x05, /* ne */
+ 0x05, /* nz */
+ 0x06, /* be */
+ 0x06, /* na */
+ 0x07, /* nbe */
+ 0x07, /* a */
+ 0x08, /* s */
+ 0x09, /* ns */
+ 0x0a, /* p */
+ 0x0a, /* pe */
+ 0x0b, /* np */
+ 0x0b, /* po */
+ 0x0c, /* l */
+ 0x0c, /* nge */
+ 0x0d, /* nl */
+ 0x0d, /* ge */
+ 0x0e, /* le */
+ 0x0e, /* ng */
+ 0x0f, /* nle */
+ 0x0f, /* g */
+};
+
+static const ASMInstr asm_instrs[] = {
+#define ALT(x) x
+#define DEF_ASM_OP0(name, opcode)
+#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 },
+#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }},
+#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }},
+#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }},
+#include "i386-asm.h"
+
+    /* last operation */
+    { 0, },
+};
+
+static const uint16_t op0_codes[] = {
+#define ALT(x)
+#define DEF_ASM_OP0(x, opcode) opcode,
+#define DEF_ASM_OP0L(name, opcode, group, instr_type)
+#define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
+#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
+#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
+#include "i386-asm.h"
+};
+
+static inline int get_reg_shift(TCCState *s1)
+{
+    int shift, v;
+
+    v = asm_int_expr(s1);
+    switch(v) {
+    case 1:
+        shift = 0;
+        break;
+    case 2:
+        shift = 1;
+        break;
+    case 4:
+        shift = 2;
+        break;
+    case 8:
+        shift = 3;
+        break;
+    default:
+        expect("1, 2, 4 or 8 constant");
+        shift = 0;
+        break;
+    }
+    return shift;
+}
+
+static int asm_parse_reg(void)
+{
+    int reg;
+    if (tok != '%')
+        goto error_32;
+    next();
+    if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
+        reg = tok - TOK_ASM_eax;
+        next();
+        return reg;
+    } else {
+    error_32:
+        expect("32 bit register");
+        return 0;
+    }
+}
+
+static void parse_operand(TCCState *s1, Operand *op)
+{
+    ExprValue e;
+    int reg, indir;
+    const char *p;
+
+    indir = 0;
+    if (tok == '*') {
+        next();
+        indir = OP_INDIR;
+    }
+
+    if (tok == '%') {
+        next();
+        if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
+            reg = tok - TOK_ASM_al;
+            op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
+            op->reg = reg & 7;
+            if ((op->type & OP_REG) && op->reg == TREG_EAX)
+                op->type |= OP_EAX;
+            else if (op->type == OP_REG8 && op->reg == TREG_ECX)
+                op->type |= OP_CL;
+            else if (op->type == OP_REG16 && op->reg == TREG_EDX)
+                op->type |= OP_DX;
+        } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
+            op->type = OP_DB;
+            op->reg = tok - TOK_ASM_dr0;
+        } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
+            op->type = OP_SEG;
+            op->reg = tok - TOK_ASM_es;
+        } else if (tok == TOK_ASM_st) {
+            op->type = OP_ST;
+            op->reg = 0;
+            next();
+            if (tok == '(') {
+                next();
+                if (tok != TOK_PPNUM)
+                    goto reg_error;
+                p = tokc.cstr->data;
+                reg = p[0] - '0';
+                if ((unsigned)reg >= 8 || p[1] != '\0')
+                    goto reg_error;
+                op->reg = reg;
+                next();
+                skip(')');
+            }
+            if (op->reg == 0)
+                op->type |= OP_ST0;
+            goto no_skip;
+        } else {
+        reg_error:
+            error("unknown register");
+        }
+        next();
+    no_skip: ;
+    } else if (tok == '$') {
+        /* constant value */
+        next();
+        asm_expr(s1, &e);
+        op->type = OP_IM32;
+        op->e.v = e.v;
+        op->e.sym = e.sym;
+        if (!op->e.sym) {
+            if (op->e.v == (uint8_t)op->e.v)
+                op->type |= OP_IM8;
+            if (op->e.v == (int8_t)op->e.v)
+                op->type |= OP_IM8S;
+            if (op->e.v == (uint16_t)op->e.v)
+                op->type |= OP_IM16;
+        }
+    } else {
+        /* address(reg,reg2,shift) with all variants */
+        op->type = OP_EA;
+        op->reg = -1;
+        op->reg2 = -1;
+        op->shift = 0;
+        if (tok != '(') {
+            asm_expr(s1, &e);
+            op->e.v = e.v;
+            op->e.sym = e.sym;
+        } else {
+            op->e.v = 0;
+            op->e.sym = NULL;
+        }
+        if (tok == '(') {
+            next();
+            if (tok != ',') {
+                op->reg = asm_parse_reg();
+            }
+            if (tok == ',') {
+                next();
+                if (tok != ',') {
+                    op->reg2 = asm_parse_reg();
+                } 
+                skip(',');
+                op->shift = get_reg_shift(s1);
+            }
+            skip(')');
+        }
+        if (op->reg == -1 && op->reg2 == -1)
+            op->type |= OP_ADDR;
+    }
+    op->type |= indir;
+}
+
+/* XXX: unify with C code output ? */
+static void gen_expr32(ExprValue *pe)
+{
+    if (pe->sym)
+        greloc(cur_text_section, pe->sym, ind, R_386_32);
+    gen_le32(pe->v);
+}
+
+/* XXX: unify with C code output ? */
+static void gen_disp32(ExprValue *pe)
+{
+    Sym *sym;
+    sym = pe->sym;
+    if (sym) {
+        if (sym->r == cur_text_section->sh_num) {
+            /* same section: we can output an absolute value. Note
+               that the TCC compiler behaves differently here because
+               it always outputs a relocation to ease (future) code
+               elimination in the linker */
+            gen_le32(pe->v + (long)sym->next - ind - 4);
+        } else {
+            greloc(cur_text_section, sym, ind, R_386_PC32);
+            gen_le32(pe->v - 4);
+        }
+    } else {
+        /* put an empty PC32 relocation */
+        put_elf_reloc(symtab_section, cur_text_section, 
+                      ind, R_386_PC32, 0);
+        gen_le32(pe->v - 4);
+    }
+}
+
+
+static void gen_le16(int v)
+{
+    g(v);
+    g(v >> 8);
+}
+
+/* generate the modrm operand */
+static inline void asm_modrm(int reg, Operand *op)
+{
+    int mod, reg1, reg2;
+
+    if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
+        g(0xc0 + (reg << 3) + op->reg);
+    } else if (op->reg == -1 && op->reg2 == -1) {
+        /* displacement only */
+        g(0x05 + (reg << 3));
+        gen_expr32(&op->e);
+    } else {
+        /* fist compute displacement encoding */
+        if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
+            mod = 0x00;
+        } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
+            mod = 0x40;
+        } else {
+            mod = 0x80;
+        }
+        /* compute if sib byte needed */
+        reg1 = op->reg;
+        if (op->reg2 != -1)
+            reg1 = 4;
+        g(mod + (reg << 3) + reg1);
+        if (reg1 == 4) {
+            /* add sib byte */
+            reg2 = op->reg2;
+            if (reg2 == -1)
+                reg2 = 4; /* indicate no index */
+            g((op->shift << 6) + (reg2 << 3) + op->reg);
+        }
+
+        /* add offset */
+        if (mod == 0x40) {
+            g(op->e.v);
+        } else if (mod == 0x80) {
+            gen_expr32(&op->e);
+        }
+    }
+}
+
+static void asm_opcode(TCCState *s1, int opcode)
+{
+    const ASMInstr *pa;
+    int i, modrm_index, reg, v, op1, is_short_jmp;
+    int nb_ops, s, ss;
+    Operand ops[MAX_OPERANDS], *pop;
+    int op_type[3]; /* decoded op type */
+
+    /* get operands */
+    pop = ops;
+    nb_ops = 0;
+    for(;;) {
+        if (tok == ';' || tok == TOK_LINEFEED)
+            break;
+        if (nb_ops >= MAX_OPERANDS) {
+            error("incorrect number of operands");
+        }
+        parse_operand(s1, pop);
+        pop++;
+        nb_ops++;
+        if (tok != ',')
+            break;
+        next();
+    }
+
+    is_short_jmp = 0;
+    s = 0; /* avoid warning */
+    
+    /* optimize matching by using a lookup table (no hashing is needed
+       !) */
+    for(pa = asm_instrs; pa->sym != 0; pa++) {
+        s = 0;
+        if (pa->instr_type & OPC_FARITH) {
+            v = opcode - pa->sym;
+            if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
+                continue;
+        } else if (pa->instr_type & OPC_ARITH) {
+            if (!(opcode >= pa->sym && opcode < pa->sym + 8 * 4))
+                continue;
+            goto compute_size;
+        } else if (pa->instr_type & OPC_SHIFT) {
+            if (!(opcode >= pa->sym && opcode < pa->sym + 7 * 4))
+                continue;
+            goto compute_size;
+        } else if (pa->instr_type & OPC_TEST) {
+            if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
+                continue;
+        } else if (pa->instr_type & OPC_B) {
+            if (!(opcode >= pa->sym && opcode <= pa->sym + 3))
+                continue;
+        compute_size:
+            s = (opcode - pa->sym) & 3;
+        } else if (pa->instr_type & OPC_WL) {
+            if (!(opcode >= pa->sym && opcode <= pa->sym + 2))
+                continue;
+            s = opcode - pa->sym + 1;
+        } else {
+            if (pa->sym != opcode)
+                continue;
+        }
+        if (pa->nb_ops != nb_ops)
+            continue;
+        /* now decode and check each operand */
+        for(i = 0; i < nb_ops; i++) {
+            int op1, op2;
+            op1 = pa->op_type[i];
+            op2 = op1 & 0x1f;
+            switch(op2) {
+            case OPT_IM:
+                v = OP_IM8 | OP_IM16 | OP_IM32;
+                break;
+            case OPT_REG:
+                v = OP_REG8 | OP_REG16 | OP_REG32;
+                break;
+            case OPT_REGW:
+                v = OP_REG16 | OP_REG32;
+                break;
+            case OPT_IMW:
+                v = OP_IM16 | OP_IM32;
+                break;
+            default:
+                v = 1 << op2;
+                break;
+            }
+            if (op1 & OPT_EA)
+                v |= OP_EA;
+            op_type[i] = v;
+            if ((ops[i].type & v) == 0)
+                goto next;
+        }
+        /* all is matching ! */
+        break;
+    next: ;
+    }
+    if (pa->sym == 0) {
+        if (opcode >= TOK_ASM_pusha && opcode <= TOK_ASM_emms) {
+            int b;
+            b = op0_codes[opcode - TOK_ASM_pusha];
+            if (b & 0xff00) 
+                g(b >> 8);
+            g(b);
+            return;
+        } else {
+            error("unknown opcode '%s'", 
+                  get_tok_str(opcode, NULL));
+        }
+    }
+    /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
+    if (s == 3) {
+        for(i = 0; s == 3 && i < nb_ops; i++) {
+            if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
+                s = reg_to_size[ops[i].type & OP_REG];
+        }
+        if (s == 3) {
+            error("cannot infer opcode suffix");
+        }
+    }
+
+    /* generate data16 prefix if needed */
+    ss = s;
+    if (s == 1 || (pa->instr_type & OPC_D16))
+        g(WORD_PREFIX_OPCODE);
+    else if (s == 2)
+        s = 1;
+    /* now generates the operation */
+    if (pa->instr_type & OPC_FWAIT)
+        g(0x9b);
+
+    v = pa->opcode;
+    if (v == 0x69 || v == 0x69) {
+        /* kludge for imul $im, %reg */
+        nb_ops = 3;
+        ops[2] = ops[1];
+    } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
+        v--; /* int $3 case */
+        nb_ops = 0;
+    } else if ((v == 0x06 || v == 0x07)) {
+        if (ops[0].reg >= 4) {
+            /* push/pop %fs or %gs */
+            v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
+        } else {
+            v += ops[0].reg << 3;
+        }
+        nb_ops = 0;
+    } else if (v <= 0x05) {
+        /* arith case */
+        v += ((opcode - TOK_ASM_addb) >> 2) << 3;
+    } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) {
+        /* fpu arith case */
+        v += ((opcode - pa->sym) / 6) << 3;
+    }
+    if (pa->instr_type & OPC_REG) {
+        for(i = 0; i < nb_ops; i++) {
+            if (op_type[i] & (OP_REG | OP_ST)) {
+                v += ops[i].reg;
+                break;
+            }
+        }
+        /* mov $im, %reg case */
+        if (pa->opcode == 0xb0 && s >= 1)
+            v += 7;
+    }
+    if (pa->instr_type & OPC_B)
+        v += s;
+    if (pa->instr_type & OPC_TEST)
+        v += test_bits[opcode - pa->sym]; 
+    if (pa->instr_type & OPC_SHORTJMP) {
+        Sym *sym;
+        int jmp_disp;
+
+        /* see if we can really generate the jump with a byte offset */
+        sym = ops[0].e.sym;
+        if (!sym)
+            goto no_short_jump;
+        if (sym->r != cur_text_section->sh_num)
+            goto no_short_jump;
+        jmp_disp = ops[0].e.v + (long)sym->next - ind - 2;
+        if (jmp_disp == (int8_t)jmp_disp) {
+            /* OK to generate jump */
+            is_short_jmp = 1;
+            ops[0].e.v = jmp_disp;
+        } else {
+        no_short_jump:
+            if (pa->instr_type & OPC_JMP) {
+                /* long jump will be allowed. need to modify the
+                   opcode slightly */
+                if (v == 0xeb)
+                    v = 0xe9;
+                else 
+                    v += 0x0f10;
+            } else {
+                error("invalid displacement");
+            }
+        }
+    }
+    op1 = v >> 8;
+    if (op1)
+        g(op1);
+    g(v);
+        
+    /* search which operand will used for modrm */
+    modrm_index = 0;
+    if (pa->instr_type & OPC_SHIFT) {
+        reg = (opcode - pa->sym) >> 2; 
+        if (reg == 6)
+            reg = 7;
+    } else if (pa->instr_type & OPC_ARITH) {
+        reg = (opcode - pa->sym) >> 2;
+    } else if (pa->instr_type & OPC_FARITH) {
+        reg = (opcode - pa->sym) / 6;
+    } else {
+        reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
+    }
+    if (pa->instr_type & OPC_MODRM) {
+        /* first look for an ea operand */
+        for(i = 0;i < nb_ops; i++) {
+            if (op_type[i] & OP_EA)
+                goto modrm_found;
+        }
+        /* then if not found, a register or indirection (shift instructions) */
+        for(i = 0;i < nb_ops; i++) {
+            if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
+                goto modrm_found;
+        }
+#ifdef ASM_DEBUG
+        error("bad op table");
+#endif      
+    modrm_found:
+        modrm_index = i;
+        /* if a register is used in another operand then it is
+           used instead of group */
+        for(i = 0;i < nb_ops; i++) {
+            v = op_type[i];
+            if (i != modrm_index && 
+                (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
+                reg = ops[i].reg;
+                break;
+            }
+        }
+
+        asm_modrm(reg, &ops[modrm_index]);
+    }
+
+    /* emit constants */
+    if (pa->opcode == 0x9a || pa->opcode == 0xea) {
+        /* ljmp or lcall kludge */
+        gen_expr32(&ops[1].e);
+        if (ops[0].e.sym)
+            error("cannot relocate");
+        gen_le16(ops[0].e.v);
+    } else {
+        for(i = 0;i < nb_ops; i++) {
+            v = op_type[i];
+            if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM8S | OP_ADDR)) {
+                /* if multiple sizes are given it means we must look
+                   at the op size */
+                if (v == (OP_IM8 | OP_IM16 | OP_IM32) ||
+                    v == (OP_IM16 | OP_IM32)) {
+                    if (ss == 0)
+                        v = OP_IM8;
+                    else if (ss == 1)
+                        v = OP_IM16;
+                    else
+                        v = OP_IM32;
+                }
+                if (v & (OP_IM8 | OP_IM8S)) {
+                    if (ops[i].e.sym)
+                        goto error_relocate;
+                    g(ops[i].e.v);
+                } else if (v & OP_IM16) {
+                    if (ops[i].e.sym) {
+                    error_relocate:
+                        error("cannot relocate");
+                    }
+                    gen_le16(ops[i].e.v);
+                } else {
+                    if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
+                        if (is_short_jmp)
+                            g(ops[i].e.v);
+                        else
+                            gen_disp32(&ops[i].e);
+                    } else {
+                        gen_expr32(&ops[i].e);
+                    }
+                }
+            }
+        }
+    }
+}
+
+#define NB_SAVED_REGS 3
+#define NB_ASM_REGS 8
+
+/* return the constraint priority (we allocate first the lowest
+   numbered constraints) */
+static inline int constraint_priority(const char *str)
+{
+    int priority, c, pr;
+
+    /* we take the lowest priority */
+    priority = 0;
+    for(;;) {
+        c = *str;
+        if (c == '\0')
+            break;
+        str++;
+        switch(c) {
+        case 'a':
+        case 'b':
+        case 'c':
+        case 'd':
+        case 'S':
+        case 'D':
+            pr = 0;
+        break;
+        case 'q':
+            pr = 1;
+            break;
+        case 'r':
+            pr = 2;
+            break;
+        case 'N':
+        case 'M':
+        case 'I':
+        case 'i':
+        case 'm':
+        case 'g':
+            pr = 3;
+            break;
+        default:
+            error("unknown constraint '%c'", c);
+            pr = 0;
+        }
+        if (pr > priority)
+            priority = pr;
+    }
+    return priority;
+}
+
+static void asm_compute_constraints(uint8_t *regs_allocated,
+                                    ASMOperand *operands, 
+                                    int nb_operands1, int nb_outputs, 
+                                    int is_output,
+                                    uint8_t *input_regs_allocated)
+{
+    ASMOperand *op;
+    int sorted_op[MAX_ASM_OPERANDS];
+    int i, j, k, p1, p2, tmp, reg, c, base, nb_operands;
+    const char *str;
+    
+    if (is_output) {
+        base = 0;
+        nb_operands = nb_outputs;
+    } else {
+        base = nb_outputs;
+        nb_operands = nb_operands1 - nb_outputs;
+    }
+
+    /* compute constraint priority and evaluate references to output
+       constraints if input constraints */
+    for(i=0;i<nb_operands;i++) {
+        j = base + i;
+        op = &operands[j];
+        str = op->constraint;
+        op->ref_index = -1;
+        op->reg = -1;
+        if (!is_output && (isnum(*str) || *str == '[')) {
+            /* this is a reference to another constraint */
+            k = find_constraint(operands, nb_operands1, str, NULL);
+            if ((unsigned)k >= j)
+                error("invalid reference in constraint %d ('%s')",
+                      j, str);
+            op->ref_index = k;
+            str = operands[k].constraint;
+        }
+        while (*str == '=' || *str == '&' || *str == '+')
+            str++;
+        op->priority = constraint_priority(str);
+    }
+    
+    /* sort operands according to their priority */
+    for(i=0;i<nb_operands;i++)
+        sorted_op[i] = base + i;
+    for(i=0;i<nb_operands - 1;i++) {
+        for(j=i+1;j<nb_operands;j++) {
+            p1 = operands[sorted_op[i]].priority; 
+            p2 = operands[sorted_op[j]].priority;
+            if (p2 < p1) {
+                tmp = sorted_op[i];
+                sorted_op[i] = sorted_op[j];
+                sorted_op[j] = tmp;
+            }
+        }
+    }
+
+    memset(regs_allocated, 0, NB_ASM_REGS);
+    regs_allocated[4] = 1; /* esp cannot be used */
+    regs_allocated[5] = 1; /* ebp cannot be used yet */
+    
+    /* allocate registers and generate corresponding asm moves */
+    for(i=0;i<nb_operands;i++) {
+        j = sorted_op[i];
+        op = &operands[j];
+        str = op->constraint;
+        
+        if (op->ref_index >= 0) {
+            str = operands[op->ref_index].constraint;
+        }
+
+        while (*str == '=' || *str == '&' || *str == '+')
+            str++;
+    try_next:
+        c = *str++;
+        switch(c) {
+        case 'a':
+            reg = TREG_EAX;
+            goto alloc_reg;
+        case 'b':
+            reg = 3;
+            goto alloc_reg;
+        case 'c':
+            reg = TREG_ECX;
+            goto alloc_reg;
+        case 'd':
+            reg = TREG_EDX;
+            goto alloc_reg;
+        case 'S':
+            reg = 6;
+            goto alloc_reg;
+        case 'D':
+            reg = 7;
+        alloc_reg:
+            if (regs_allocated[reg])
+                goto try_next;
+            goto reg_found;
+        case 'q':
+            /* eax, ebx, ecx or edx */
+            for(reg = 0; reg < 4; reg++) {
+                if (!regs_allocated[reg])
+                    goto reg_found;
+            }
+            goto try_next;
+        case 'r':
+            /* any general register */
+            for(reg = 0; reg < 8; reg++) {
+                if (!regs_allocated[reg])
+                    goto reg_found;
+            }
+            goto try_next;
+        reg_found:
+            /* now we can reload in the register */
+            op->reg = reg;
+            regs_allocated[reg] = 1;
+            break;
+        case 'i':
+            if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
+                goto try_next;
+            break;
+        case 'I':
+        case 'N':
+        case 'M':
+            if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
+                goto try_next;
+            break;
+        case 'm':
+        case 'g':
+            /* nothing special to do because the operand is
+               already in memory */
+            /* XXX: fix constant case */
+            if (is_output) {
+                /* if it is a reference to a memory zone, it must lie
+                   in a register, so we reserve the register in the
+                   input registers and a load will be generated
+                   later */
+                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
+                    /* any general register */
+                    for(reg = 0; reg < 8; reg++) {
+                        if (!input_regs_allocated[reg])
+                            goto reg_found1;
+                    }
+                    goto try_next;
+                reg_found1:
+                    /* now we can reload in the register */
+                    input_regs_allocated[reg] = 1;
+                    op->reg = reg;
+                }
+            }
+            break;
+        default:
+            error("asm constraint %d ('%s') could not be satisfied", 
+                  j, op->constraint);
+            break;
+        }
+    }
+
+    /* print sorted constraints */
+#ifdef ASM_DEBUG
+    if (is_output)
+        printf("outputs=\n");
+    else
+        printf("inputs=\n");
+    for(i=0;i<nb_operands;i++) {
+        j = sorted_op[i];
+        op = &operands[j];
+        printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n", 
+               j,                
+               op->id ? get_tok_str(op->id, NULL) : "", 
+               op->constraint,
+               op->vt->r,
+               op->reg);
+    }
+#endif
+}
+
+static void subst_asm_operand(CString *add_str, 
+                              SValue *sv, int modifier)
+{
+    int r, reg, size, val;
+    char buf[64];
+
+    r = sv->r;
+    if ((r & VT_VALMASK) == VT_CONST) {
+        if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
+            cstr_ccat(add_str, '$');
+        if (r & VT_SYM) {
+            cstr_cat(add_str, get_tok_str(sv->sym->v, NULL));
+            if (sv->c.i != 0) {
+                cstr_ccat(add_str, '+');
+            } else {
+                return;
+            }
+        }
+        val = sv->c.i;
+        if (modifier == 'n')
+            val = -val;
+        snprintf(buf, sizeof(buf), "%d", sv->c.i);
+        cstr_cat(add_str, buf);
+    } else if ((r & VT_VALMASK) == VT_LOCAL) {
+        snprintf(buf, sizeof(buf), "%d(%%ebp)", sv->c.i);
+        cstr_cat(add_str, buf);
+    } else if (r & VT_LVAL) {
+        reg = r & VT_VALMASK;
+        if (reg >= VT_CONST)
+            error("internal compiler error");
+        snprintf(buf, sizeof(buf), "(%%%s)", 
+                 get_tok_str(TOK_ASM_eax + reg, NULL));
+        cstr_cat(add_str, buf);
+    } else {
+        /* register case */
+        reg = r & VT_VALMASK;
+        if (reg >= VT_CONST)
+            error("internal compiler error");
+
+        /* choose register operand size */
+        if ((sv->type.t & VT_BTYPE) == VT_BYTE)
+            size = 1;
+        else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
+            size = 2;
+        else
+            size = 4;
+        if (size == 1 && reg >= 4)
+            size = 4;
+
+        if (modifier == 'b') {
+            if (reg >= 4)
+                error("cannot use byte register");
+            size = 1;
+        } else if (modifier == 'h') {
+            if (reg >= 4)
+                error("cannot use byte register");
+            size = -1;
+        } else if (modifier == 'w') {
+            size = 2;
+        }
+
+        switch(size) {
+        case -1:
+            reg = TOK_ASM_ah + reg;
+            break;
+        case 1:
+            reg = TOK_ASM_al + reg;
+            break;
+        case 2:
+            reg = TOK_ASM_ax + reg;
+            break;
+        default:
+            reg = TOK_ASM_eax + reg;
+            break;
+        }
+        snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
+        cstr_cat(add_str, buf);
+    }
+}
+
+/* generate prolog and epilog code for asm statment */
+static void asm_gen_code(ASMOperand *operands, int nb_operands, 
+                         int nb_outputs, int is_output,
+                         uint8_t *clobber_regs)
+{
+    uint8_t regs_allocated[NB_ASM_REGS];
+    ASMOperand *op;
+    int i, reg;
+    static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
+
+    /* mark all used registers */
+    memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
+    for(i = 0; i < nb_operands;i++) {
+        op = &operands[i];
+        if (op->reg >= 0)
+            regs_allocated[op->reg] = 1;
+    }
+    if (!is_output) {
+        /* generate reg save code */
+        for(i = 0; i < NB_SAVED_REGS; i++) {
+            reg = reg_saved[i];
+            if (regs_allocated[reg]) 
+                g(0x50 + reg);
+        }
+
+        /* generate load code */
+        for(i = nb_outputs ; i < nb_operands; i++) {
+            op = &operands[i];
+            if (op->reg >= 0) {
+                load(op->reg, op->vt);
+            }
+        }
+        /* generate load code for output memory references */
+        for(i = 0 ; i < nb_outputs; i++) {
+            op = &operands[i];
+            if (op->reg >= 0 && ((op->vt->r & VT_VALMASK) == VT_LLOCAL)) {
+                SValue sv;
+                sv = *op->vt;
+                sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
+                load(op->reg, &sv);
+            }
+        }
+    } else {
+        /* generate save code */
+        for(i = 0 ; i < nb_outputs; i++) {
+            op = &operands[i];
+            if (op->reg >= 0 && ((op->vt->r & VT_VALMASK) != VT_LLOCAL)) {
+                store(op->reg, op->vt);
+            }
+        }
+        /* generate reg restore code */
+        for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
+            reg = reg_saved[i];
+            if (regs_allocated[reg]) 
+                g(0x58 + reg);
+        }
+    }
+}
+
+static void asm_clobber(uint8_t *clobber_regs, const char *str)
+{
+    int reg;
+    TokenSym *ts;
+
+    if (!strcmp(str, "memory") || 
+        !strcmp(str, "cc"))
+        return;
+    ts = tok_alloc(str, strlen(str));
+    reg = ts->tok;
+    if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
+        reg -= TOK_ASM_eax;
+    } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
+        reg -= TOK_ASM_ax;
+    } else {
+        error("invalid clobber register '%s'", str);
+    }
+    clobber_regs[reg] = 1;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i386-asm.h	Mon Jan 06 20:21:42 2003 +0000
@@ -0,0 +1,444 @@
+     DEF_ASM_OP0(pusha, 0x60) /* must be first OP0 */
+     DEF_ASM_OP0(popa, 0x61)
+     DEF_ASM_OP0(clc, 0xf8)
+     DEF_ASM_OP0(cld, 0xfc)
+     DEF_ASM_OP0(cli, 0xfa)
+     DEF_ASM_OP0(clts, 0x0f06)
+     DEF_ASM_OP0(cmc, 0xf5)
+     DEF_ASM_OP0(lahf, 0x9f)
+     DEF_ASM_OP0(sahf, 0x9e)
+     DEF_ASM_OP0(pushfl, 0x9c)
+     DEF_ASM_OP0(popfl, 0x9d)
+     DEF_ASM_OP0(pushf, 0x9c)
+     DEF_ASM_OP0(popf, 0x9d)
+     DEF_ASM_OP0(stc, 0xf9)
+     DEF_ASM_OP0(std, 0xfd)
+     DEF_ASM_OP0(sti, 0xfb)
+     DEF_ASM_OP0(aaa, 0x37)
+     DEF_ASM_OP0(aas, 0x3f)
+     DEF_ASM_OP0(daa, 0x27)
+     DEF_ASM_OP0(das, 0x2f)
+     DEF_ASM_OP0(aad, 0xd50a)
+     DEF_ASM_OP0(aam, 0xd40a)
+     DEF_ASM_OP0(cbw, 0x6698)
+     DEF_ASM_OP0(cwd, 0x6699)
+     DEF_ASM_OP0(cwde, 0x98)
+     DEF_ASM_OP0(cdq, 0x99)
+     DEF_ASM_OP0(cbtw, 0x6698)
+     DEF_ASM_OP0(cwtl, 0x98)
+     DEF_ASM_OP0(cwtd, 0x6699)
+     DEF_ASM_OP0(cltd, 0x99)
+     DEF_ASM_OP0(int3, 0xcc)
+     DEF_ASM_OP0(into, 0xce)
+     DEF_ASM_OP0(iret, 0xcf)
+     DEF_ASM_OP0(rsm, 0x0faa)
+     DEF_ASM_OP0(hlt, 0xf4)
+     DEF_ASM_OP0(wait, 0x9b)
+     DEF_ASM_OP0(nop, 0x90)
+     DEF_ASM_OP0(xlat, 0xd7)
+
+     /* strings */
+ALT(DEF_ASM_OP0L(cmpsb, 0xa6, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(scmpb, 0xa6, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(insb, 0x6c, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(outsb, 0x6e, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(lodsb, 0xac, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(slodb, 0xac, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(movsb, 0xa4, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(smovb, 0xa4, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(scasb, 0xae, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(sscab, 0xae, 0, OPC_BWL))
+
+ALT(DEF_ASM_OP0L(stosb, 0xaa, 0, OPC_BWL))
+ALT(DEF_ASM_OP0L(sstob, 0xaa, 0, OPC_BWL))
+
+     /* bits */
+     
+ALT(DEF_ASM_OP2(bsfw, 0x0fbc, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA, OPT_REGW))
+ALT(DEF_ASM_OP2(bsrw, 0x0fbd, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA, OPT_REGW))
+
+ALT(DEF_ASM_OP2(btw, 0x0fa3, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP2(btw, 0x0fba, 4, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
+
+ALT(DEF_ASM_OP2(btsw, 0x0fab, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP2(btsw, 0x0fba, 5, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
+
+ALT(DEF_ASM_OP2(btrw, 0x0fb3, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP2(btrw, 0x0fba, 6, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
+
+ALT(DEF_ASM_OP2(btcw, 0x0fbb, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP2(btcw, 0x0fba, 7, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW | OPT_EA))
+
+     /* prefixes */
+     DEF_ASM_OP0(aword, 0x67)
+     DEF_ASM_OP0(addr16, 0x67)
+     DEF_ASM_OP0(word, 0x66)
+     DEF_ASM_OP0(data16, 0x66)
+     DEF_ASM_OP0(lock, 0xf0)
+     DEF_ASM_OP0(rep, 0xf3)
+     DEF_ASM_OP0(repe, 0xf3)
+     DEF_ASM_OP0(repz, 0xf3)
+     DEF_ASM_OP0(repne, 0xf2)
+     DEF_ASM_OP0(repnz, 0xf2)
+             
+     DEF_ASM_OP0(invd, 0x0f08)
+     DEF_ASM_OP0(wbinvd, 0x0f09)
+     DEF_ASM_OP0(cpuid, 0x0fa2)
+     DEF_ASM_OP0(wrmsr, 0x0f30)
+     DEF_ASM_OP0(rdtsc, 0x0f31)
+     DEF_ASM_OP0(rdmsr, 0x0f32)
+     DEF_ASM_OP0(rdpmc, 0x0f33)
+     DEF_ASM_OP0(ud2, 0x0f0b)
+
+     /* NOTE: we took the same order as gas opcode definition order */
+ALT(DEF_ASM_OP2(movb, 0xa0, 0, OPC_BWL, OPT_ADDR, OPT_EAX))
+ALT(DEF_ASM_OP2(movb, 0xa2, 0, OPC_BWL, OPT_EAX, OPT_ADDR))
+ALT(DEF_ASM_OP2(movb, 0x88, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(movb, 0x8a, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
+ALT(DEF_ASM_OP2(movb, 0xb0, 0, OPC_REG | OPC_BWL, OPT_IM, OPT_REG))
+ALT(DEF_ASM_OP2(movb, 0xc6, 0, OPC_MODRM | OPC_BWL, OPT_IM, OPT_REG | OPT_EA))
+
+ALT(DEF_ASM_OP2(movw, 0x8c, 0, OPC_MODRM | OPC_WL, OPT_SEG, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(movw, 0x8e, 0, OPC_MODRM | OPC_WL, OPT_EA | OPT_REG, OPT_SEG))
+
+ALT(DEF_ASM_OP2(movw, 0x0f20, 0, OPC_MODRM | OPC_WL, OPT_CR, OPT_REG32))
+ALT(DEF_ASM_OP2(movw, 0x0f21, 0, OPC_MODRM | OPC_WL, OPT_DB, OPT_REG32))
+ALT(DEF_ASM_OP2(movw, 0x0f24, 0, OPC_MODRM | OPC_WL, OPT_TR, OPT_REG32))
+ALT(DEF_ASM_OP2(movw, 0x0f22, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_CR))
+ALT(DEF_ASM_OP2(movw, 0x0f23, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_DB))
+ALT(DEF_ASM_OP2(movw, 0x0f26, 0, OPC_MODRM | OPC_WL, OPT_REG32, OPT_TR))
+
+ALT(DEF_ASM_OP2(movsbl, 0x0fbe, 0, OPC_MODRM, OPT_REG8 | OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(movsbw, 0x0fbe, 0, OPC_MODRM | OPC_D16, OPT_REG8 | OPT_EA, OPT_REG16))
+ALT(DEF_ASM_OP2(movswl, 0x0fbf, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(movzbw, 0x0fb6, 0, OPC_MODRM | OPC_WL, OPT_REG8 | OPT_EA, OPT_REGW))
+ALT(DEF_ASM_OP2(movzwl, 0x0fb7, 0, OPC_MODRM, OPT_REG16 | OPT_EA, OPT_REG32))
+
+ALT(DEF_ASM_OP1(pushw, 0x50, 0, OPC_REG | OPC_WL, OPT_REGW))
+ALT(DEF_ASM_OP1(pushw, 0xff, 6, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP1(push, 0x6a, 0, 0, OPT_IM8S))
+ALT(DEF_ASM_OP1(push, 0x68, 0, 0, OPT_IM32))
+ALT(DEF_ASM_OP1(push, 0x06, 0, 0, OPT_SEG))
+
+ALT(DEF_ASM_OP1(popw, 0x58, 0, OPC_REG | OPC_WL, OPT_REGW))
+ALT(DEF_ASM_OP1(popw, 0x8f, 0, OPC_MODRM | OPC_WL, OPT_REGW | OPT_EA))
+ALT(DEF_ASM_OP1(pop, 0x07, 0, 0, OPT_SEG))
+
+ALT(DEF_ASM_OP2(xchgw, 0x90, 0, OPC_REG | OPC_WL, OPT_REG, OPT_EAX))
+ALT(DEF_ASM_OP2(xchgw, 0x90, 0, OPC_REG | OPC_WL, OPT_EAX, OPT_REG))
+ALT(DEF_ASM_OP2(xchgb, 0x86, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(xchgb, 0x86, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
+
+ALT(DEF_ASM_OP2(inb, 0xe4, 0, OPC_BWL, OPT_IM8, OPT_EAX))
+ALT(DEF_ASM_OP1(inb, 0xe4, 0, OPC_BWL, OPT_IM8))
+ALT(DEF_ASM_OP2(inb, 0xec, 0, OPC_BWL, OPT_DX, OPT_EAX))
+ALT(DEF_ASM_OP1(inb, 0xec, 0, OPC_BWL, OPT_DX))
+
+ALT(DEF_ASM_OP2(outb, 0xe6, 0, OPC_BWL, OPT_EAX, OPT_IM8))
+ALT(DEF_ASM_OP1(outb, 0xe6, 0, OPC_BWL, OPT_IM8))
+ALT(DEF_ASM_OP2(outb, 0xee, 0, OPC_BWL, OPT_EAX, OPT_DX))
+ALT(DEF_ASM_OP1(outb, 0xee, 0, OPC_BWL, OPT_DX))
+
+ALT(DEF_ASM_OP2(leaw, 0x8d, 0, OPC_MODRM | OPC_WL, OPT_EA, OPT_REG))
+
+ALT(DEF_ASM_OP2(les, 0xc4, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(lds, 0xc5, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(lss, 0x0fb2, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(lfs, 0x0fb4, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+ALT(DEF_ASM_OP2(lgs, 0x0fb5, 0, OPC_MODRM, OPT_EA, OPT_REG32))
+
+     /* arith */
+ALT(DEF_ASM_OP2(addb, 0x00, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG)) /* XXX: use D bit ? */
+ALT(DEF_ASM_OP2(addb, 0x02, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
+ALT(DEF_ASM_OP2(addb, 0x04, 0, OPC_ARITH | OPC_BWL, OPT_IM, OPT_EAX))
+ALT(DEF_ASM_OP2(addb, 0x80, 0, OPC_ARITH | OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(addw, 0x83, 0, OPC_ARITH | OPC_MODRM | OPC_WL, OPT_IM8S, OPT_EA | OPT_REG))
+
+ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_EA | OPT_REG, OPT_REG))
+ALT(DEF_ASM_OP2(testb, 0x84, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(testb, 0xa8, 0, OPC_BWL, OPT_IM, OPT_EAX))
+ALT(DEF_ASM_OP2(testb, 0xf6, 0, OPC_MODRM | OPC_BWL, OPT_IM, OPT_EA | OPT_REG))
+
+ALT(DEF_ASM_OP1(incw, 0x40, 0, OPC_REG | OPC_WL, OPT_REGW))
+ALT(DEF_ASM_OP1(incb, 0xfe, 0, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP1(decw, 0x48, 0, OPC_REG | OPC_WL, OPT_REGW))
+ALT(DEF_ASM_OP1(decb, 0xfe, 1, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+
+ALT(DEF_ASM_OP1(notb, 0xf6, 2, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP1(negb, 0xf6, 3, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+
+ALT(DEF_ASM_OP1(mulb, 0xf6, 4, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP1(imulb, 0xf6, 5, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+
+ALT(DEF_ASM_OP2(imulw, 0x0faf, 0, OPC_MODRM | OPC_WL, OPT_REG | OPT_EA, OPT_REG))
+ALT(DEF_ASM_OP3(imulw, 0x6b, 0, OPC_MODRM | OPC_WL, OPT_IM8S, OPT_REGW | OPT_EA, OPT_REGW))
+ALT(DEF_ASM_OP2(imulw, 0x6b, 0, OPC_MODRM | OPC_WL, OPT_IM8S, OPT_REGW))
+ALT(DEF_ASM_OP3(imulw, 0x69, 0, OPC_MODRM | OPC_WL, OPT_IMW, OPT_REGW | OPT_EA, OPT_REGW))
+ALT(DEF_ASM_OP2(imulw, 0x69, 0, OPC_MODRM | OPC_WL, OPT_IMW, OPT_REGW))
+
+ALT(DEF_ASM_OP1(divb, 0xf6, 6, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP2(divb, 0xf6, 6, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA, OPT_EAX))
+ALT(DEF_ASM_OP1(idivb, 0xf6, 7, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA))
+ALT(DEF_ASM_OP2(idivb, 0xf6, 7, OPC_MODRM | OPC_BWL, OPT_REG | OPT_EA, OPT_EAX))
+
+     /* shifts */
+ALT(DEF_ASM_OP2(rolb, 0xc0, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_IM8, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP2(rolb, 0xd2, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_CL, OPT_EA | OPT_REG))
+ALT(DEF_ASM_OP1(rolb, 0xd0, 0, OPC_MODRM | OPC_BWL | OPC_SHIFT, OPT_EA | OPT_REG))
+
+ALT(DEF_ASM_OP3(shldw, 0x0fa4, 0, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP3(shldw, 0x0fa5, 0, OPC_MODRM | OPC_WL, OPT_CL, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP2(shldw, 0x0fa5, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP3(shrdw, 0x0fac, 0, OPC_MODRM | OPC_WL, OPT_IM8, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP3(shrdw, 0x0fad, 0, OPC_MODRM | OPC_WL, OPT_CL, OPT_REGW, OPT_EA | OPT_REGW))
+ALT(DEF_ASM_OP2(shrdw, 0x0fad, 0, OPC_MODRM | OPC_WL, OPT_REGW, OPT_EA | OPT_REGW))
+
+ALT(DEF_ASM_OP1(call, 0xff, 2, OPC_MODRM, OPT_INDIR))
+ALT(DEF_ASM_OP1(call, 0xe8, 0, OPC_JMP, OPT_ADDR))
+ALT(DEF_ASM_OP1(jmp, 0xff, 4, OPC_MODRM, OPT_INDIR))
+ALT(DEF_ASM_OP1(jmp, 0xeb, 0, OPC_SHORTJMP | OPC_JMP, OPT_ADDR))
+
+ALT(DEF_ASM_OP2(lcall, 0x9a, 0, 0, OPT_IM16, OPT_IM32))
+ALT(DEF_ASM_OP1(lcall, 0xff, 3, 0, OPT_EA))
+ALT(DEF_ASM_OP2(ljmp, 0xea, 0, 0, OPT_IM16, OPT_IM32))
+ALT(DEF_ASM_OP1(ljmp, 0xff, 5, 0, OPT_EA))
+
+ALT(DEF_ASM_OP1(int, 0xcd, 0, 0, OPT_IM8))
+ALT(DEF_ASM_OP1(seto, 0x0f90, 0, OPC_MODRM | OPC_TEST, OPT_REG8 | OPT_EA))
+    DEF_ASM_OP2(enter, 0xc8, 0, 0, OPT_IM16, OPT_IM8)
+    DEF_ASM_OP0(leave, 0xc9)
+    DEF_ASM_OP0(ret, 0xc3)
+ALT(DEF_ASM_OP1(ret, 0xc2, 0, 0, OPT_IM16))
+    DEF_ASM_OP0(lret, 0xcb)
+ALT(DEF_ASM_OP1(lret, 0xca, 0, 0, OPT_IM16))
+
+ALT(DEF_ASM_OP1(jo, 0x70, 0, OPC_SHORTJMP | OPC_JMP | OPC_TEST, OPT_ADDR))
+    DEF_ASM_OP1(loopne, 0xe0, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(loopnz, 0xe0, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(loope, 0xe1, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(loopz, 0xe1, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(loop, 0xe2, 0, OPC_SHORTJMP, OPT_ADDR)
+    DEF_ASM_OP1(jecxz, 0xe3, 0, OPC_SHORTJMP, OPT_ADDR)
+     
+     /* float */
+     /* specific fcomp handling */
+ALT(DEF_ASM_OP0L(fcomp, 0xd8d9, 0, 0))
+
+ALT(DEF_ASM_OP1(fadd, 0xd8c0, 0, OPC_FARITH | OPC_REG, OPT_ST))
+ALT(DEF_ASM_OP2(fadd, 0xd8c0, 0, OPC_FARITH | OPC_REG, OPT_ST, OPT_ST0))
+ALT(DEF_ASM_OP0L(fadd, 0xdec1, 0, OPC_FARITH))
+ALT(DEF_ASM_OP1(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST))
+ALT(DEF_ASM_OP2(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST, OPT_ST0))
+ALT(DEF_ASM_OP2(faddp, 0xdec0, 0, OPC_FARITH | OPC_REG, OPT_ST0, OPT_ST))
+ALT(DEF_ASM_OP0L(faddp, 0xdec1, 0, OPC_FARITH))
+ALT(DEF_ASM_OP1(fadds, 0xd8, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
+ALT(DEF_ASM_OP1(fiaddl, 0xda, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
+ALT(DEF_ASM_OP1(faddl, 0xdc, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
+ALT(DEF_ASM_OP1(fiadds, 0xde, 0, OPC_FARITH | OPC_MODRM, OPT_EA))
+
+     DEF_ASM_OP0(fucompp, 0xdae9)
+     DEF_ASM_OP0(ftst, 0xd9e4)
+     DEF_ASM_OP0(fxam, 0xd9e5)
+     DEF_ASM_OP0(fld1, 0xd9e8)
+     DEF_ASM_OP0(fldl2t, 0xd9e9)
+     DEF_ASM_OP0(fldl2e, 0xd9ea)
+     DEF_ASM_OP0(fldpi, 0xd9eb)
+     DEF_ASM_OP0(fldlg2, 0xd9ec)
+     DEF_ASM_OP0(fldln2, 0xd9ed)
+     DEF_ASM_OP0(fldz, 0xd9ee)
+
+     DEF_ASM_OP0(f2xm1, 0xd9f0)
+     DEF_ASM_OP0(fyl2x, 0xd9f1)
+     DEF_ASM_OP0(fptan, 0xd9f2)
+     DEF_ASM_OP0(fpatan, 0xd9f3)
+     DEF_ASM_OP0(fxtract, 0xd9f4)
+     DEF_ASM_OP0(fprem1, 0xd9f5)
+     DEF_ASM_OP0(fdecstp, 0xd9f6)
+     DEF_ASM_OP0(fincstp, 0xd9f7)
+     DEF_ASM_OP0(fprem, 0xd9f8)
+     DEF_ASM_OP0(fyl2xp1, 0xd9f9)
+     DEF_ASM_OP0(fsqrt, 0xd9fa)
+     DEF_ASM_OP0(fsincos, 0xd9fb)
+     DEF_ASM_OP0(frndint, 0xd9fc)
+     DEF_ASM_OP0(fscale, 0xd9fd)
+     DEF_ASM_OP0(fsin, 0xd9fe)
+     DEF_ASM_OP0(fcos, 0xd9ff)
+     DEF_ASM_OP0(fchs, 0xd9e0)
+     DEF_ASM_OP0(fabs, 0xd9e1)
+     DEF_ASM_OP0(fninit, 0xdbe3)
+     DEF_ASM_OP0(fnclex, 0xdbe2)
+     DEF_ASM_OP0(fnop, 0xd9d0)
+     DEF_ASM_OP0(fwait, 0x9b)
+
+    /* fp load */
+    DEF_ASM_OP1(fld, 0xd9c0, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(fldl, 0xd9c0, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(flds, 0xd9, 0, OPC_MODRM, OPT_EA)
+ALT(DEF_ASM_OP1(fldl, 0xdd, 0, OPC_MODRM, OPT_EA))
+    DEF_ASM_OP1(fildl, 0xdb, 0, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fildq, 0xdf, 5, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fildll, 0xdf, 5, OPC_MODRM,OPT_EA)
+    DEF_ASM_OP1(fldt, 0xdb, 5, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fbld, 0xdf, 4, OPC_MODRM, OPT_EA)
+    
+    /* fp store */
+    DEF_ASM_OP1(fst, 0xddd0, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(fstl, 0xddd0, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(fsts, 0xd9, 2, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fstps, 0xd9, 3, OPC_MODRM, OPT_EA)
+ALT(DEF_ASM_OP1(fstl, 0xdd, 2, OPC_MODRM, OPT_EA))
+    DEF_ASM_OP1(fstpl, 0xdd, 3, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fist, 0xdf, 2, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fistp, 0xdf, 3, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fistl, 0xdb, 2, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fistpl, 0xdb, 3, OPC_MODRM, OPT_EA)
+
+    DEF_ASM_OP1(fstp, 0xddd8, 0, OPC_REG, OPT_ST)
+    DEF_ASM_OP1(fistpq, 0xdf, 7, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fistpll, 0xdf, 7, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fstpt, 0xdb, 7, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(fbstp, 0xdf, 6, OPC_MODRM, OPT_EA)
+
+    /* exchange */
+    DEF_ASM_OP0(fxch, 0xd9c9)
+ALT(DEF_ASM_OP1(fxch, 0xd9c8, 0, OPC_REG, OPT_ST))
+
+    /* misc FPU */
+    DEF_ASM_OP1(fucom, 0xdde0, 0, OPC_REG, OPT_ST )
+    DEF_ASM_OP1(fucomp, 0xdde8, 0, OPC_REG, OPT_ST )
+
+    DEF_ASM_OP0L(finit, 0xdbe3, 0, OPC_FWAIT)
+    DEF_ASM_OP1(fldcw, 0xd9, 5, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fnstcw, 0xd9, 7, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fstcw, 0xd9, 7, OPC_MODRM | OPC_FWAIT, OPT_EA )
+    DEF_ASM_OP0(fnstsw, 0xdfe0)
+ALT(DEF_ASM_OP1(fnstsw, 0xdfe0, 0, 0, OPT_EAX ))
+ALT(DEF_ASM_OP1(fnstsw, 0xdd, 7, OPC_MODRM, OPT_EA ))
+    DEF_ASM_OP1(fstsw, 0xdfe0, 0, OPC_FWAIT, OPT_EAX )
+ALT(DEF_ASM_OP0L(fstsw, 0xdfe0, 0, OPC_FWAIT))
+ALT(DEF_ASM_OP1(fstsw, 0xdd, 7, OPC_MODRM | OPC_FWAIT, OPT_EA ))
+    DEF_ASM_OP0L(fclex, 0xdbe2, 0, OPC_FWAIT)
+    DEF_ASM_OP1(fnstenv, 0xd9, 6, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fstenv, 0xd9, 6, OPC_MODRM | OPC_FWAIT, OPT_EA )
+    DEF_ASM_OP1(fldenv, 0xd9, 4, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fnsave, 0xdd, 6, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(fsave, 0xdd, 6, OPC_MODRM | OPC_FWAIT, OPT_EA )
+    DEF_ASM_OP1(frstor, 0xdd, 4, OPC_MODRM, OPT_EA )
+    DEF_ASM_OP1(ffree, 0xddc0, 4, OPC_REG, OPT_ST )
+    DEF_ASM_OP1(ffreep, 0xdfc0, 4, OPC_REG, OPT_ST )
+
+    /* segments */
+    DEF_ASM_OP2(arpl, 0x63, 0, OPC_MODRM, OPT_REG16, OPT_REG16 | OPT_EA)
+    DEF_ASM_OP2(lar, 0x0f02, 0, OPC_MODRM, OPT_REG32 | OPT_EA, OPT_REG32)
+    DEF_ASM_OP1(lgdt, 0x0f01, 2, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(lidt, 0x0f01, 3, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(lldt, 0x0f00, 2, OPC_MODRM, OPT_EA | OPT_REG)
+    DEF_ASM_OP1(lmsw, 0x0f01, 6, OPC_MODRM, OPT_EA | OPT_REG)
+ALT(DEF_ASM_OP2(lslw, 0x0f03, 0, OPC_MODRM | OPC_WL, OPT_EA | OPT_REG, OPT_REG))
+    DEF_ASM_OP1(ltr, 0x0f00, 3, OPC_MODRM, OPT_EA | OPT_REG)
+    DEF_ASM_OP1(sgdt, 0x0f01, 0, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(sidt, 0x0f01, 1, OPC_MODRM, OPT_EA)
+    DEF_ASM_OP1(sldt, 0x0f00, 0, OPC_MODRM, OPT_REG | OPT_EA)
+    DEF_ASM_OP1(smsw, 0x0f01, 4, OPC_MODRM, OPT_REG | OPT_EA)
+    DEF_ASM_OP1(str, 0x0f00, 1, OPC_MODRM, OPT_REG16| OPT_EA)
+    DEF_ASM_OP1(verr, 0x0f00, 4, OPC_MODRM, OPT_REG | OPT_EA)
+    DEF_ASM_OP1(verw, 0x0f00, 5, OPC_MODRM, OPT_REG | OPT_EA)
+
+    /* 486 */
+    DEF_ASM_OP1(bswap, 0x0fc8, 0, OPC_REG, OPT_REG32 )
+ALT(DEF_ASM_OP2(xaddb, 0x0fc0, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_REG | OPT_EA ))
+ALT(DEF_ASM_OP2(cmpxchgb, 0x0fb0, 0, OPC_MODRM | OPC_BWL, OPT_REG, OPT_REG | OPT_EA ))
+    DEF_ASM_OP1(invlpg, 0x0f01, 7, OPC_MODRM, OPT_EA )
+
+    DEF_ASM_OP2(boundl, 0x62, 0, OPC_MODRM, OPT_REG32, OPT_EA)
+    DEF_ASM_OP2(boundw, 0x62, 0, OPC_MODRM | OPC_D16, OPT_REG16, OPT_EA)
+
+    /* pentium */
+    DEF_ASM_OP1(cmpxchg8b, 0x0fc7, 1, OPC_MODRM, OPT_EA )
+    
+    /* pentium pro */
+    ALT(DEF_ASM_OP2(cmovo, 0x0f40, 0, OPC_MODRM | OPC_TEST, OPT_REG32 | OPT_EA, OPT_REG32))
+
+    DEF_ASM_OP2(fcmovb, 0xdac0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmove, 0xdac8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovbe, 0xdad0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovu, 0xdad8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovnb, 0xdbc0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovne, 0xdbc8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovnbe, 0xdbd0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcmovnu, 0xdbd8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+
+    DEF_ASM_OP2(fucomi, 0xdbe8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcomi, 0xdbf0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fucomip, 0xdfe8, 0, OPC_REG, OPT_ST, OPT_ST0 )
+    DEF_ASM_OP2(fcomip, 0xdff0, 0, OPC_REG, OPT_ST, OPT_ST0 )
+
+    /* mmx */
+    DEF_ASM_OP0(emms, 0x0f77) /* must be last OP0 */
+    DEF_ASM_OP2(movd, 0x0f6e, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_MMX )
+ALT(DEF_ASM_OP2(movd, 0x0f7e, 0, OPC_MODRM, OPT_MMX, OPT_EA | OPT_REG32 ))
+    DEF_ASM_OP2(movq, 0x0f6f, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(movq, 0x0f7f, 0, OPC_MODRM, OPT_MMX, OPT_EA | OPT_MMX ))
+    DEF_ASM_OP2(packssdw, 0x0f6b, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(packsswb, 0x0f63, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(packuswb, 0x0f67, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddb, 0x0ffc, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddw, 0x0ffd, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddd, 0x0ffe, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddsb, 0x0fec, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddsw, 0x0fed, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddusb, 0x0fdc, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(paddusw, 0x0fdd, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pand, 0x0fdb, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pandn, 0x0fdf, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpeqb, 0x0f74, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpeqw, 0x0f75, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpeqd, 0x0f76, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpgtb, 0x0f64, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpgtw, 0x0f65, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pcmpgtd, 0x0f66, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pmaddwd, 0x0ff5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pmulhw, 0x0fe5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pmullw, 0x0fd5, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(por, 0x0feb, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psllw, 0x0ff1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psllw, 0x0f71, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(pslld, 0x0ff2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(pslld, 0x0f72, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psllq, 0x0ff3, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psllq, 0x0f73, 6, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psraw, 0x0fe1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psraw, 0x0f71, 4, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psrad, 0x0fe2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psrad, 0x0f72, 4, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psrlw, 0x0fd1, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psrlw, 0x0f71, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psrld, 0x0fd2, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psrld, 0x0f72, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psrlq, 0x0fd3, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+ALT(DEF_ASM_OP2(psrlq, 0x0f73, 2, OPC_MODRM, OPT_IM8, OPT_MMX ))
+    DEF_ASM_OP2(psubb, 0x0ff8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubw, 0x0ff9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubd, 0x0ffa, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubsb, 0x0fe8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubsw, 0x0fe9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubusb, 0x0fd8, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(psubusw, 0x0fd9, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpckhbw, 0x0f68, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpckhwd, 0x0f69, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpckhdq, 0x0f6a, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpcklbw, 0x0f60, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpcklwd, 0x0f61, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(punpckldq, 0x0f62, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+    DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMX, OPT_MMX )
+
+#undef ALT
+#undef DEF_ASM_OP0
+#undef DEF_ASM_OP0L
+#undef DEF_ASM_OP1
+#undef DEF_ASM_OP2
+#undef DEF_ASM_OP3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tccasm.c	Mon Jan 06 20:21:42 2003 +0000
@@ -0,0 +1,744 @@
+/*
+ *  GAS like assembler for TCC
+ * 
+ *  Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+static int asm_get_local_label_name(TCCState *s1, unsigned int n)
+{
+    char buf[64];
+    TokenSym *ts;
+
+    snprintf(buf, sizeof(buf), "L..%u", n);
+    ts = tok_alloc(buf, strlen(buf));
+    return ts->tok;
+}
+
+/* We do not use the C expression parser to handle symbols. Maybe the
+   C expression parser could be tweaked to do so. */
+
+static void asm_expr_unary(TCCState *s1, ExprValue *pe)
+{
+    Sym *sym;
+    int op, n, label;
+    const char *p;
+
+    switch(tok) {
+    case TOK_PPNUM:
+        p = tokc.cstr->data;
+        n = strtol(p, (char **)&p, 0);
+        if (*p == 'b' || *p == 'f') {
+            /* backward or forward label */
+            label = asm_get_local_label_name(s1, n);
+            sym = label_find(label);
+            if (*p == 'b') {
+                /* backward : find the last corresponding defined label */
+                if (sym && sym->r == 0)
+                    sym = sym->prev_tok;
+                if (!sym)
+                    error("local label '%d' not found backward", n);
+            } else {
+                /* forward */
+                if (!sym || sym->r) {
+                    /* if the last label is defined, then define a new one */
+                    sym = label_push(&s1->asm_labels, label, 0);
+                    sym->type.t = VT_STATIC | VT_VOID;
+                }
+            }
+            pe->v = 0;
+            pe->sym = sym;
+        } else if (*p == '\0') {
+            pe->v = n;
+            pe->sym = NULL;
+        } else {
+            error("invalid number syntax");
+        }
+        next();
+        break;
+    case '+':
+        next();
+        asm_expr_unary(s1, pe);
+        break;
+    case '-':
+    case '~':
+        op = tok;
+        next();
+        asm_expr_unary(s1, pe);
+        if (pe->sym)
+            error("invalid operation with label");
+        if (op == '-')
+            pe->v = -pe->v;
+        else
+            pe->v = ~pe->v;
+        break;
+    default:
+        if (tok >= TOK_IDENT) {
+            /* label case : if the label was not found, add one */
+            sym = label_find(tok);
+            if (!sym) {
+                sym = label_push(&s1->asm_labels, tok, 0);
+                /* NOTE: by default, the symbol is global */
+                sym->type.t = VT_VOID;
+            }
+            pe->v = 0;
+            pe->sym = sym;
+            next();
+        } else {
+            error("bad expression syntax [%s]", get_tok_str(tok, &tokc));
+        }
+        break;
+    }
+}
+    
+static void asm_expr_prod(TCCState *s1, ExprValue *pe)
+{
+    int op;
+    ExprValue e2;
+
+    asm_expr_unary(s1, pe);
+    for(;;) {
+        op = tok;
+        if (op != '*' && op != '/' && op != '%' && 
+            op != TOK_SHL && op != TOK_SAR)
+            break;
+        next();
+        asm_expr_unary(s1, &e2);
+        if (pe->sym || e2.sym)
+            error("invalid operation with label");
+        switch(op) {
+        case '*':
+            pe->v *= e2.v;
+            break;
+        case '/':  
+            if (e2.v == 0) {
+            div_error:
+                error("division by zero");
+            }
+            pe->v /= e2.v;
+            break;
+        case '%':  
+            if (e2.v == 0)
+                goto div_error;
+            pe->v %= e2.v;
+            break;
+        case TOK_SHL:
+            pe->v <<= e2.v;
+            break;
+        default:
+        case TOK_SAR:
+            pe->v >>= e2.v;
+            break;
+        }
+    }
+}
+
+static void asm_expr_logic(TCCState *s1, ExprValue *pe)
+{
+    int op;
+    ExprValue e2;
+
+    asm_expr_prod(s1, pe);
+    for(;;) {
+        op = tok;
+        if (op != '&' && op != '|' && op != '^')
+            break;
+        next();
+        asm_expr_prod(s1, &e2);
+        if (pe->sym || e2.sym)
+            error("invalid operation with label");
+        switch(op) {
+        case '&':
+            pe->v &= e2.v;
+            break;
+        case '|':  
+            pe->v |= e2.v;
+            break;
+        default:
+        case '^':
+            pe->v ^= e2.v;
+            break;
+        }
+    }
+}
+
+static inline void asm_expr_sum(TCCState *s1, ExprValue *pe)
+{
+    int op;
+    ExprValue e2;
+
+    asm_expr_logic(s1, pe);
+    for(;;) {
+        op = tok;
+        if (op != '+' && op != '-')
+            break;
+        next();
+        asm_expr_logic(s1, &e2);
+        if (op == '+') {
+            if (pe->sym != NULL && e2.sym != NULL)
+                goto cannot_relocate;
+            pe->v += e2.v;
+            if (pe->sym == NULL && e2.sym != NULL)
+                pe->sym = e2.sym;
+        } else {
+            pe->v -= e2.v;
+            /* NOTE: we are less powerful than gas in that case
+               because we store only one symbol in the expression */
+            if (!pe->sym && !e2.sym) {
+                /* OK */
+            } else if (pe->sym && !e2.sym) {
+                /* OK */
+            } else if (pe->sym && e2.sym) {
+                if (pe->sym == e2.sym) { 
+                    /* OK */
+                } else if (pe->sym->r == e2.sym->r && pe->sym->r != 0) {
+                    /* we also accept defined symbols in the same section */
+                    pe->v += (long)pe->sym->next - (long)e2.sym->next;
+                } else {
+                    goto cannot_relocate;
+                }
+                pe->sym = NULL; /* same symbols can be substracted to NULL */
+            } else {
+            cannot_relocate:
+                error("invalid operation with label");
+            }
+        }
+    }
+}
+
+static void asm_expr(TCCState *s1, ExprValue *pe)
+{
+    asm_expr_sum(s1, pe);
+}
+
+static int asm_int_expr(TCCState *s1)
+{
+    ExprValue e;
+    asm_expr(s1, &e);
+    if (e.sym)
+        expect("constant");
+    return e.v;
+}
+
+/* NOTE: the same name space as C labels is used to avoid using too
+   much memory when storing labels in TokenStrings */
+static void asm_new_label(TCCState *s1, int label, int is_local)
+{
+    Sym *sym;
+
+    sym = label_find(label);
+    if (sym) {
+        if (sym->r) {
+            /* the label is already defined */
+            if (!is_local) {
+                error("assembler label '%s' already defined", 
+                      get_tok_str(label, NULL));
+            } else {
+                /* redefinition of local labels is possible */
+                goto new_label;
+            }
+        }
+    } else {
+    new_label:
+        sym = label_push(&s1->asm_labels, label, 0);
+        sym->type.t = VT_STATIC | VT_VOID;
+    }
+    sym->r = cur_text_section->sh_num;
+    sym->next = (void *)ind;
+}
+
+static void asm_free_labels(TCCState *st)
+{
+    Sym *s, *s1;
+    for(s = st->asm_labels; s != NULL; s = s1) {
+        s1 = s->prev;
+        /* define symbol value in object file */
+        if (s->r) {
+            put_extern_sym(s, st->sections[s->r], (long)s->next, 0);
+        }
+        /* remove label */
+        table_ident[s->v - TOK_IDENT]->sym_label = NULL;
+        tcc_free(s);
+    }
+    st->asm_labels = NULL;
+}
+
+static void asm_parse_directive(TCCState *s1)
+{
+    int n, offset, v, size, tok1;
+    Section *sec;
+    uint8_t *ptr;
+
+    /* assembler directive */
+    next();
+    sec = cur_text_section;
+    switch(tok) {
+    case TOK_ASM_align:
+    case TOK_ASM_skip:
+    case TOK_ASM_space:
+        tok1 = tok;
+        next();
+        n = asm_int_expr(s1);
+        if (tok1 == TOK_ASM_align) {
+            if (n < 0 || (n & (n-1)) != 0)
+                error("alignment must be a positive power of two");
+            offset = (ind + n - 1) & -n;
+            size = offset - ind;
+        } else {
+            size = n;
+        }
+        v = 0;
+        if (tok == ',') {
+            next();
+            v = asm_int_expr(s1);
+        }
+        if (sec->sh_type != SHT_NOBITS) {
+            sec->data_offset = ind;
+            ptr = section_ptr_add(sec, size);
+            memset(ptr, v, size);
+        }
+        ind += size;
+        break;
+    case TOK_ASM_byte:
+        size = 1;
+        goto asm_data;
+    case TOK_ASM_word:
+    case TOK_SHORT:
+        size = 2;
+        goto asm_data;
+    case TOK_LONG:
+    case TOK_INT:
+        size = 4;
+    asm_data:
+        next();
+        for(;;) {
+            ExprValue e;
+            asm_expr(s1, &e);
+            if (sec->sh_type != SHT_NOBITS) {
+                if (size == 4) {
+                    gen_expr32(&e);
+                } else {
+                    if (e.sym)
+                        expect("constant");
+                    if (size == 1)
+                        g(e.v);
+                    else
+                        gen_le16(e.v);
+                }
+            } else {
+                ind += size;
+            }
+            if (tok != ',')
+                break;
+            next();
+        }
+        break;
+    default:
+        error("unknown assembler directive .%s", get_tok_str(tok, NULL));
+        break;
+    }
+}
+
+
+/* assemble a file */
+static int tcc_assemble_internal(TCCState *s1, int do_preprocess)
+{
+    int opcode;
+
+#if 0
+    /* print stats about opcodes */
+    {
+        const ASMInstr *pa;
+        int freq[4];
+        int op_vals[500];
+        int nb_op_vals, i, j;
+
+        nb_op_vals = 0;
+        memset(freq, 0, sizeof(freq));
+        for(pa = asm_instrs; pa->sym != 0; pa++) {
+            freq[pa->nb_ops]++;
+            for(i=0;i<pa->nb_ops;i++) {
+                for(j=0;j<nb_op_vals;j++) {
+                    if (pa->op_type[i] == op_vals[j])
+                        goto found;
+                }
+                op_vals[nb_op_vals++] = pa->op_type[i];
+            found: ;
+            }
+        }
+        for(i=0;i<nb_op_vals;i++) {
+            int v = op_vals[i];
+            if ((v & (v - 1)) != 0)
+                printf("%3d: %08x\n", i, v);
+        }
+        printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
+               sizeof(asm_instrs), sizeof(asm_instrs) / sizeof(ASMInstr),
+               freq[0], freq[1], freq[2], freq[3]);
+    }
+#endif
+
+    /* XXX: undefine C labels */
+
+    ch = file->buf_ptr[0];
+    tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
+    parse_flags = 0;
+    if (do_preprocess)
+        parse_flags |= PARSE_FLAG_PREPROCESS;
+    next();
+    for(;;) {
+        if (tok == TOK_EOF)
+            break;
+        parse_flags |= PARSE_FLAG_LINEFEED; /* XXX: suppress that hack */
+    redo:
+        if (tok == '#') {
+            /* horrible gas comment */
+            while (tok != TOK_LINEFEED)
+                next();
+        } else if (tok == '.') {
+            asm_parse_directive(s1);
+        } else if (tok == TOK_PPNUM) {
+            const char *p;
+            int n;
+            p = tokc.cstr->data;
+            n = strtol(p, (char **)&p, 10);
+            if (*p != '\0')
+                expect("':'");
+            /* new local label */
+            asm_new_label(s1, asm_get_local_label_name(s1, n), 1);
+            next();
+            skip(':');
+            goto redo;
+        } else if (tok >= TOK_IDENT) {
+            /* instruction or label */
+            opcode = tok;
+            next();
+            if (tok == ':') {
+                /* new label */
+                asm_new_label(s1, opcode, 0);
+                next();
+                goto redo;
+            } else {
+                asm_opcode(s1, opcode);
+            }
+        }
+        /* end of line */
+        if (tok != ';' && tok != TOK_LINEFEED){
+            expect("end of line");
+        }
+        parse_flags &= ~PARSE_FLAG_LINEFEED; /* XXX: suppress that hack */
+        next();
+    }
+
+    asm_free_labels(s1);
+
+    return 0;
+}
+
+/* Assemble the current file */
+static int tcc_assemble(TCCState *s1, int do_preprocess)
+{
+    int ret;
+
+    preprocess_init(s1);
+
+    /* default section is text */
+    cur_text_section = text_section;
+    ind = cur_text_section->data_offset;
+
+    ret = tcc_assemble_internal(s1, do_preprocess);
+
+    cur_text_section->data_offset = ind;
+    return ret;
+}
+
+/********************************************************************/
+/* GCC inline asm support */
+
+/* assemble the string 'str' in the current C compilation unit without
+   C preprocessing. NOTE: str is modified by modifying the '\0' at the
+   end */
+static void tcc_assemble_inline(TCCState *s1, char *str, int len)
+{
+    BufferedFile *bf, *saved_file;
+    int saved_parse_flags, *saved_macro_ptr;
+
+    bf = tcc_malloc(sizeof(BufferedFile));
+    memset(bf, 0, sizeof(BufferedFile));
+    bf->fd = -1;
+    bf->buf_ptr = str;
+    bf->buf_end = str + len;
+    str[len] = CH_EOB;
+    /* same name as current file so that errors are correctly
+       reported */
+    pstrcpy(bf->filename, sizeof(bf->filename), file->filename);
+    bf->line_num = file->line_num;
+    saved_file = file;
+    file = bf;
+    saved_parse_flags = parse_flags;
+    saved_macro_ptr = macro_ptr;
+    macro_ptr = NULL;
+    
+    tcc_assemble_internal(s1, 0);
+
+    parse_flags = saved_parse_flags;
+    macro_ptr = saved_macro_ptr;
+    file = saved_file;
+    tcc_free(bf);
+}
+
+/* find a constraint by its number or id (gcc 3 extended
+   syntax). return -1 if not found. Return in *pp in char after the
+   constraint */
+static int find_constraint(ASMOperand *operands, int nb_operands, 
+                           const char *name, const char **pp)
+{
+    int index;
+    TokenSym *ts;
+    const char *p;
+
+    if (isnum(*name)) {
+        index = 0;
+        while (isnum(*name)) {
+            index = (index * 10) + (*name) - '0';
+            name++;
+        }
+        if ((unsigned)index >= nb_operands)
+            index = -1;
+    } else if (*name == '[') {
+        name++;
+        p = strchr(name, ']');
+        if (p) {
+            ts = tok_alloc(name, p - name);
+            for(index = 0; index < nb_operands; index++) {
+                if (operands[index].id == ts->tok)
+                    goto found;
+            }
+            index = -1;
+        found:
+            name = p + 1;
+        } else {
+            index = -1;
+        }
+    } else {
+        index = -1;
+    }
+    if (pp)
+        *pp = name;
+    return index;
+}
+
+static void subst_asm_operands(ASMOperand *operands, int nb_operands, 
+                               int nb_outputs,
+                               CString *out_str, CString *in_str)
+{
+    int c, index, modifier;
+    const char *str;
+    ASMOperand *op;
+    SValue sv;
+
+    cstr_new(out_str);
+    str = in_str->data;
+    for(;;) {
+        c = *str++;
+        if (c == '%') {
+            if (*str == '%') {
+                str++;
+                goto add_char;
+            }
+            modifier = 0;
+            if (*str == 'c' || *str == 'n' ||
+                *str == 'b' || *str == 'w' || *str == 'h')
+                modifier = *str++;
+            index = find_constraint(operands, nb_operands, str, &str);
+            if (index < 0)
+                error("invalid operand reference after %%");
+            op = &operands[index];
+            sv = *op->vt;
+            if (op->reg >= 0) {
+                sv.r = op->reg;
+                if ((op->vt->r & VT_VALMASK) == VT_LLOCAL)
+                    sv.r |= VT_LVAL;
+            }
+            subst_asm_operand(out_str, &sv, modifier);
+        } else {
+        add_char:
+            cstr_ccat(out_str, c);
+            if (c == '\0')
+                break;
+        }
+    }
+}
+
+
+static void parse_asm_operands(ASMOperand *operands, int *nb_operands_ptr,
+                               int is_output)
+{
+    ASMOperand *op;
+    int nb_operands;
+
+    if (tok != ':') {
+        nb_operands = *nb_operands_ptr;
+        for(;;) {
+            if (nb_operands >= MAX_ASM_OPERANDS)
+                error("too many asm operands");
+            op = &operands[nb_operands++];
+            op->id = 0;
+            if (tok == '[') {
+                next();
+                if (tok < TOK_IDENT)
+                    expect("identifier");
+                op->id = tok;
+                next();
+                skip(']');
+            }
+            if (tok != TOK_STR)
+                expect("string constant");
+            op->constraint = tcc_malloc(tokc.cstr->size);
+            strcpy(op->constraint, tokc.cstr->data);
+            next();
+            skip('(');
+            gexpr();
+            if (is_output) {
+                test_lvalue();
+            } else {
+                /* we want to avoid LLOCAL case. note that it may come
+                   from register storage, so we need to convert (reg)
+                   case */
+                if ((vtop->r & VT_LVAL) &&
+                    ((vtop->r & VT_VALMASK) == VT_LLOCAL ||
+                     (vtop->r & VT_VALMASK) < VT_CONST)) {
+                    gv(RC_INT);
+                }
+            }
+            op->vt = vtop;
+            skip(')');
+            if (tok == ',') {
+                next();
+            } else {
+                break;
+            }
+        }
+        *nb_operands_ptr = nb_operands;
+    }
+}
+
+/* parse the GCC asm() instruction */
+static void asm_instr(void)
+{
+    CString astr, astr1;
+    ASMOperand operands[MAX_ASM_OPERANDS];
+    int nb_inputs, nb_outputs, nb_operands, i;
+    uint8_t input_regs_allocated[NB_ASM_REGS];
+    uint8_t output_regs_allocated[NB_ASM_REGS];
+    uint8_t clobber_regs[NB_ASM_REGS];
+
+    next();
+    /* since we always generate the asm() instruction, we can ignore
+       volatile */
+    if (tok == TOK_VOLATILE1 || tok == TOK_VOLATILE2 || tok == TOK_VOLATILE3) {
+        next();
+    }
+    skip('(');
+    /* read the string */
+    if (tok != TOK_STR)
+        expect("string constant");
+    cstr_new(&astr);
+    while (tok == TOK_STR) {
+        /* XXX: add \0 handling too ? */
+        cstr_cat(&astr, tokc.cstr->data);
+        next();
+    }
+    cstr_ccat(&astr, '\0');
+    nb_operands = 0;
+    nb_outputs = 0;
+    memset(clobber_regs, 0, sizeof(clobber_regs));
+    if (tok == ':') {
+        next();
+        /* output args */
+        parse_asm_operands(operands, &nb_operands, 1);
+        nb_outputs = nb_operands;
+        if (tok == ':') {
+            next();
+            /* input args */
+            parse_asm_operands(operands, &nb_operands, 0);
+            if (tok == ':') {
+                /* clobber list */
+                /* XXX: handle registers */
+                next();
+                for(;;) {
+                    if (tok != TOK_STR)
+                        expect("string constant");
+                    asm_clobber(clobber_regs, tokc.cstr->data);
+                    next();
+                    if (tok == ',') {
+                        next();
+                    } else {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    skip(')');
+    /* NOTE: we do not eat the ';' so that we can restore the current
+       token after the assembler parsing */
+    if (tok != ';')
+        expect("';'");
+    nb_inputs = nb_operands - nb_outputs;
+    
+    /* save all values in the memory */
+    save_regs(0);
+
+    /* compute constraints */
+    asm_compute_constraints(input_regs_allocated, 
+                            operands, nb_operands, nb_outputs, 0, 
+                            NULL);
+    asm_compute_constraints(output_regs_allocated,
+                            operands, nb_operands, nb_outputs, 1, 
+                            input_regs_allocated);
+
+    /* substitute the operands in the asm string */
+#ifdef ASM_DEBUG
+    printf("asm: \"%s\"\n", (char *)astr.data);
+#endif
+    subst_asm_operands(operands, nb_operands, nb_outputs, &astr1, &astr);
+    cstr_free(&astr);
+#ifdef ASM_DEBUG
+    printf("subst_asm: \"%s\"\n", (char *)astr1.data);
+#endif
+
+    /* generate loads */
+    asm_gen_code(operands, nb_operands, nb_outputs, 0, clobber_regs);    
+
+    /* assemble the string with tcc internal assembler */
+    tcc_assemble_inline(tcc_state, astr1.data, astr1.size - 1);
+
+    /* restore the current C token */
+    next();
+
+    /* store the output values if needed */
+    asm_gen_code(operands, nb_operands, nb_outputs, 1, clobber_regs);    
+    
+    /* free everything */
+    for(i=0;i<nb_operands;i++) {
+        ASMOperand *op;
+        op = &operands[i];
+        tcc_free(op->constraint);
+        vpop();
+    }
+    cstr_free(&astr1);
+}
+