#!/bin/bash
#
# This script compiles snippets of x86 assembler to find what the binary/machine code is for those instructions.
# It does this for various register combinations and specific immediate/address values. The snippets of assembler
# are what will be required by a JIT compiler for a VM to translate VM instructions in to native instructions,
# or for a simple assembler to be able to convert and output native code.
#
mkdir -p tmp
# output contains zeros for padding at end of file, so we need to insert an instruction at the
# end to determine which zero bytes are just for padding, and which might be part of the instruction.
# We can do this with a nop which is 0x90. If at the end of the file it is say 0xAD, 0x90, 0x00, 0x00
# we can then say that there are 2 zero padding bytes, and we can remove the last 3 (including the 0x90
# of the nop). If it was 0xAD, 0x90, we know there was no padding, and we just remove the inserted nop.
terminal_instruction="nop"
# can't used the stack pointer register as a general register, so perhaps the VM should have 15 or less general purpose registers?
# if 15, that will probably be able to translate fine to x64 and ARM easily, and others too like MIPS or whatever others, but not
# sure about the really really low end. Seems the ESP8266 has a 106micro processor which has 16 registers, so also okay.
x86_regs=(0:rax 1:rcx 2:rdx 3:rbx 4:rbp 5:rsi 6:rdi 7:r8 8:r9 9:r10 10:r11 11:r12 12:r13 13:r14 14:r15) # eax ebs ecx edx esi edi)
imm=(0xB4B5B6B7:0xB4B5B6B7)
# 0 register operand instructions
ops0=(Nop:nop Ret:ret)
# 1 register operand instructions
ops1=(Push:push Pop:pop)
# 1 immediate operand instructions
ops1i=(
Jmp:"jmp \$I"
Je:"je \$I"
Jne:"jne \$I"
Call:"call \$I"
MovI0:"mov \$I,%rax"
MovM0:"mov \(\$I\),%rax" # TODO: might need to add segment/selector prefix
MovC0:"mov \(\$I\),%rax" # TODO: same
Mov0M:"mov %rax,\(\$I\)" # TODO: same
)
# 1 register and 1 immediate operand instructions
ops2i=(MovIR:mov)
# MovRR, // reg -> reg
# MovMR, // mem[r] -> reg
# MovI0, // imm25 -> reg0
# MovM0, // mem[addr] -> reg0
# MovC0, // const[addr] -> reg0
# Mov0M, // reg0 -> mem[addr]
# MovRM, // reg -> mem[r]
# MovMM, // mem[r] -> mem[r]
# MovCM, // const[r] -> mem[r]
# MovIM, // imm20 -> mem[r]
ops0=()
ops1=()
# ops1i=()
ops2i=()
# 2 register operand instructions
ops2=(
MovMM:"push %rax
mov \(\$S\),%rax
mov %rax,\(\$D\)
pop %rax" # TODO: seg/sel
MovCM:"push %rax
mov \(\$S\),%rax
mov %rax,\(\$D\)
pop %rax" # TODO: seg/sel
MovMR:"mov \(\$S\),\$D"
MovRM:"mov \$S,\(\$D\)"
Div:"push %rax
push %rdx
xor %rdx,%rdx
mov %rax,\$D
div \$S
mov \$D,%rax
pop %rdx
pop %rax"
Mod:"push %rax
push %rdx
xor %rdx,%rdx
mov %rax,\$D
div \$S
mov \$D,%rdx
pop %rdx
pop %rax"
Not:"mov \$S,\$D
not \$D"
Shl:"push %rcx
mov %rcx,\$S
shl %cl,\$D
pop %rcx"
Shr:"push %rcx
mov %rcx,\$S
shr %cl,\$D
pop %rcx"
Cmp:"cmp \$S,\$D"
Or:"or \$S,\$D"
Xor:"xor \$S,\$D"
And:"and \$S,\$D"
MovRR:"mov \$S,\$D"
# TODO:
# MovIR:"mov \$S,\$D"
Add:"add \$S,\$D"
Sub:"sub \$S,\$D"
Mul:"imul \$S,\$D"
)
# TODO: memory and immediate variations of mov
# TODO: control flow instructions
# TODO:
# MovIR, // imm20 -> reg
# MovRR, // reg -> reg
# MovMR, // mem[r] -> reg
# MovCR, // const[r] -> reg
# MovI0, // imm25 -> reg0
# MovM0, // mem[addr] -> reg0
# MovC0, // const[addr] -> reg0
# MovIM, // imm20 -> mem[r]
# MovRM, // reg -> mem[r]
# Mov0M, // reg0 -> mem[addr]
# MovMM, // mem[r] -> mem[r]
# MovCM, // const[r] -> mem[r]
# Ioctl,
# case MovIR: a_machine.m_registers[a_regA] = currentInstruction & 0xFFFFF; break;
# case MovMR: a_machine.m_registers[a_regA] = a_machine.m_memory[a_machine.m_registers[a_regB]]; break;
# case MovCR: a_machine.m_registers[a_regA] = a_machine.m_constants[a_machine.m_registers[a_regB]]; break;
#
# case MovI0: a_machine.m_registers[0] = currentInstruction & 0x1FFFFFF; break;
# case MovM0: a_machine.m_registers[0] = a_machine.m_memory[currentInstruction & 0x1FFFFFF]; break;
# case MovC0: a_machine.m_registers[0] = a_machine.m_constants[currentInstruction & 0x1FFFFFF]; break;
#
# case MovIM: a_machine.m_memory[a_machine.m_registers[a_regA]] = currentInstruction & 0xFFFFF; break;
# case MovRM: a_machine.m_memory[a_machine.m_registers[a_regA]] = a_machine.m_registers[a_regB]; break;
# case Mov0M: a_machine.m_memory[currentInstruction & 0x1FFFFFF] = a_machine.m_registers[0]; break;
# case MovMM: a_machine.m_memory[a_machine.m_registers[a_regA]] = a_machine.m_memory[a_machine.m_registers[a_regB]]; break;
# case MovCM: a_machine.m_memory[a_machine.m_registers[a_regA]] = a_machine.m_constants[a_machine.m_registers[a_regB]]; break;
#
# case Ioctl:
# a_machine.m_ioctls[currentInstruction & 0x1FFFFFF](a_machine.m_registers);
# break;
function generate {
gnu_x86_instr=${1}
vm_instr=${2}
echo "${gnu_x86_instr}" > tmp/x86_opcode.asm
echo "${terminal_instruction}" >> tmp/x86_opcode.asm
gcc -c tmp/x86_opcode.asm -o tmp/x86_opcode.o
if [ "$?" != "0" ]
then
exit -1
fi
size=$(($(stat -f '%z' tmp/x86_opcode.o) - 208))
opcode=`tail -c +209 tmp/x86_opcode.o | head -c ${size} | xxd -c 255 -i | tail -c +3`
# echo "opcodes[${vm_instr}] = { ${size}, { ${opcode} } };"
echo "opcodes[${vm_instr}] = { ${opcode} };"
}
for op in "${ops0[@]}"
do
op_vm="${op%%:*}"
op_x86="${op##*:}"
gnu_x86_instr="${op_x86}"
vm_instr="MakeOp(${op_vm},0,0,0)"
generate "${gnu_x86_instr}" "${vm_instr}"
done
for op in "${ops1[@]}"
do
op_vm="${op%%:*}"
op_x86="${op##*:}"
for reg in "${x86_regs[@]}"
do
reg_vm="${reg%%:*}"
reg_x86="${reg##*:}"
gnu_x86_instr="${op_x86} %${reg_x86}"
vm_instr="MakeOp(${op_vm},${reg_vm},0,0)"
generate "${gnu_x86_instr}" "${vm_instr}"
done
done
for op in "${ops1i[@]}"
do
op_vm="${op%%:*}"
op_x86="${op##*:}"
for reg in "${imm[@]}"
do
reg_vm="${reg%%:*}"
reg_x86="${reg##*:}"
# gnu_x86_instr="${op_x86} ${reg_x86}"
I=${reg_x86}
gnu_x86_instr=`eval echo ${op_x86}`
vm_instr="MakeOp(${op_vm},0,0,${reg_vm})"
generate "${gnu_x86_instr}" "${vm_instr}"
done
done
for op in "${ops2i[@]}"
do
op_vm="${op%%:*}"
op_x86="${op##*:}"
for regA in "${x86_regs[@]}"
do
for src in "${imm[@]}"
do
# dst
regA_vm="${regA%%:*}"
regA_x86="${regA##*:}"
# src
src_vm="${src%%:*}"
src_x86="${src##*:}"
# in GNU order: op src,dst
gnu_x86_instr="${op_x86} ${src_x86},%${regA_x86}"
# in Intel order: op dst,src
vm_instr="MakeOp(${op_vm},${regA_vm},0,${src_vm})"
generate "${gnu_x86_instr}" "${vm_instr}"
done
done
done
for op in "${ops2[@]}"
do
op_vm="${op%%:*}"
op_x86="${op##*:}"
for regA in "${x86_regs[@]}"
do
for regB in "${x86_regs[@]}"
do
# dst
regA_vm="${regA%%:*}"
regA_x86="${regA##*:}"
# src
regB_vm="${regB%%:*}"
regB_x86="${regB##*:}"
# in GNU order: op src,dst
S=%${regB_x86}
D=%${regA_x86}
gnu_x86_instr=`eval echo ${op_x86}` # %${regB_x86},%${regA_x86}"
# in Intel order: op dst,src
vm_instr="MakeOp(${op_vm},${regA_vm},${regB_vm},0)"
generate "${gnu_x86_instr}" "${vm_instr}"
done
done
done
rm tmp/x86_opcode.asm
rm tmp/x86_opcode.o