#!/bin/bash

#
# This script compiles snippets of x86 assembler to find what the binary/machine code is for those instructions.
# It does this for various register combinations and specific immediate/address values. The snippets of assembler
# are what will be required by a JIT compiler for a VM to translate VM instructions in to native instructions,
# or for a simple assembler to be able to convert and output native code.
#


mkdir -p tmp

# output contains zeros for padding at end of file, so we need to insert an instruction at the
# end to determine which zero bytes are just for padding, and which might be part of the instruction.
# We can do this with a nop which is 0x90. If at the end of the file it is say  0xAD, 0x90, 0x00, 0x00
# we can then say that there are 2 zero padding bytes, and we can remove the last 3 (including the 0x90
# of the nop). If it was 0xAD, 0x90, we know there was no padding, and we just remove the inserted nop. 
terminal_instruction="nop"

# can't used the stack pointer register as a general register, so perhaps the VM should have 15 or less general purpose registers?
# if 15, that will probably be able to translate fine to x64 and ARM easily, and others too like MIPS or whatever others, but not
# sure about the really really low end. Seems the ESP8266 has a 106micro processor which has 16 registers, so also okay.

x86_regs=(0:rax 1:rcx 2:rdx 3:rbx 4:rbp 5:rsi 6:rdi 7:r8 8:r9 9:r10 10:r11 11:r12 12:r13 13:r14 14:r15) # eax ebs ecx edx esi edi)

imm=(0xB4B5B6B7:0xB4B5B6B7)

# 0 register operand instructions
ops0=(Nop:nop Ret:ret)

# 1 register operand instructions
ops1=(Push:push Pop:pop)

# 1 immediate operand instructions
ops1i=(
       Jmp:"jmp \$I"
       Je:"je \$I"
       Jne:"jne \$I"
       Call:"call \$I"
       MovI0:"mov \$I,%rax"
       MovM0:"mov \(\$I\),%rax"            # TODO: might need to add segment/selector prefix 
       MovC0:"mov \(\$I\),%rax"            # TODO: same
       Mov0M:"mov %rax,\(\$I\)"            # TODO: same
      )

# 1 register and 1 immediate operand instructions
ops2i=(MovIR:mov)


#  MovRR,  // reg         -> reg
#  MovMR,  // mem[r]      -> reg
#  MovI0,  // imm25       -> reg0
#  MovM0,  // mem[addr]   -> reg0
#  MovC0,  // const[addr] -> reg0
#  Mov0M,  // reg0        -> mem[addr]
#  MovRM,  // reg         -> mem[r]
#  MovMM,  // mem[r]      -> mem[r]
#  MovCM,  // const[r]    -> mem[r]

#  MovIM,  // imm20       -> mem[r]

ops0=()
ops1=()
# ops1i=()
ops2i=()

# 2 register operand instructions
ops2=(
       MovMM:"push %raxmov \(\$S\),%raxmov %rax,\(\$D\)pop %rax"   # TODO: seg/sel
       MovCM:"push %raxmov \(\$S\),%raxmov %rax,\(\$D\)pop %rax"   # TODO: seg/sel

       MovMR:"mov \(\$S\),\$D"
       MovRM:"mov \$S,\(\$D\)"

       Div:"push %raxpush %rdxxor %rdx,%rdxmov %rax,\$Ddiv \$Smov \$D,%raxpop %rdxpop %rax"
       Mod:"push %raxpush %rdxxor %rdx,%rdxmov %rax,\$Ddiv \$Smov \$D,%rdxpop %rdxpop %rax"
       Not:"mov \$S,\$Dnot \$D"
       Shl:"push %rcxmov %rcx,\$Sshl %cl,\$Dpop %rcx"
       Shr:"push %rcxmov %rcx,\$Sshr %cl,\$Dpop %rcx"
       Cmp:"cmp \$S,\$D"
       Or:"or \$S,\$D"
       Xor:"xor \$S,\$D"
       And:"and \$S,\$D"
       MovRR:"mov \$S,\$D"


       # TODO:
       # MovIR:"mov \$S,\$D"
       
       Add:"add \$S,\$D"
       Sub:"sub \$S,\$D"
       Mul:"imul \$S,\$D"
     )

# TODO: memory and immediate variations of mov
# TODO: control flow instructions
# TODO: 

#   MovIR,  // imm20       -> reg
#   MovRR,  // reg         -> reg
#   MovMR,  // mem[r]      -> reg
#   MovCR,  // const[r]    -> reg
#   MovI0,  // imm25       -> reg0
#   MovM0,  // mem[addr]   -> reg0
#   MovC0,  // const[addr] -> reg0
#   MovIM,  // imm20       -> mem[r]
#   MovRM,  // reg         -> mem[r]
#   Mov0M,  // reg0        -> mem[addr]
#   MovMM,  // mem[r]      -> mem[r]
#   MovCM,  // const[r]    -> mem[r]
#   Ioctl,

#       case MovIR: a_machine.m_registers[a_regA]  =  currentInstruction & 0xFFFFF;                          break;
#       case MovMR: a_machine.m_registers[a_regA]  =  a_machine.m_memory[a_machine.m_registers[a_regB]];     break;
#       case MovCR: a_machine.m_registers[a_regA]  =  a_machine.m_constants[a_machine.m_registers[a_regB]];  break;
# 
#       case MovI0: a_machine.m_registers[0]       =  currentInstruction & 0x1FFFFFF;                        break;
#       case MovM0: a_machine.m_registers[0]       =  a_machine.m_memory[currentInstruction & 0x1FFFFFF];    break;
#       case MovC0: a_machine.m_registers[0]       =  a_machine.m_constants[currentInstruction & 0x1FFFFFF]; break;
# 
#       case MovIM: a_machine.m_memory[a_machine.m_registers[a_regA]]  = currentInstruction & 0xFFFFF;                         break;
#       case MovRM: a_machine.m_memory[a_machine.m_registers[a_regA]]  = a_machine.m_registers[a_regB];                        break;
#       case Mov0M: a_machine.m_memory[currentInstruction & 0x1FFFFFF] = a_machine.m_registers[0];                             break;
#       case MovMM: a_machine.m_memory[a_machine.m_registers[a_regA]]  = a_machine.m_memory[a_machine.m_registers[a_regB]];    break;
#       case MovCM: a_machine.m_memory[a_machine.m_registers[a_regA]]  = a_machine.m_constants[a_machine.m_registers[a_regB]]; break;
# 
#       case Ioctl:
#                   a_machine.m_ioctls[currentInstruction & 0x1FFFFFF](a_machine.m_registers);
#                   break;


function generate {
  gnu_x86_instr=${1}
  vm_instr=${2}
  echo "${gnu_x86_instr}" > tmp/x86_opcode.asm
  echo "${terminal_instruction}" >> tmp/x86_opcode.asm
  gcc -c tmp/x86_opcode.asm -o tmp/x86_opcode.o
  if [ "$?" != "0" ]
  then
    exit -1
  fi
  size=$(($(stat -f '%z' tmp/x86_opcode.o) - 208))
  opcode=`tail -c +209 tmp/x86_opcode.o | head -c ${size} | xxd -c 255 -i | tail -c +3`
  # echo "opcodes[${vm_instr}] = { ${size}, { ${opcode} } };" 
  echo "opcodes[${vm_instr}] = { ${opcode} };" 
}


for op in "${ops0[@]}"
do
  op_vm="${op%%:*}"
  op_x86="${op##*:}"
  gnu_x86_instr="${op_x86}"
  vm_instr="MakeOp(${op_vm},0,0,0)"
  generate "${gnu_x86_instr}" "${vm_instr}"
done


for op in "${ops1[@]}"
do
  op_vm="${op%%:*}"
  op_x86="${op##*:}"
  for reg in "${x86_regs[@]}"
  do
    reg_vm="${reg%%:*}"
    reg_x86="${reg##*:}"
    gnu_x86_instr="${op_x86} %${reg_x86}"
    vm_instr="MakeOp(${op_vm},${reg_vm},0,0)"
    generate "${gnu_x86_instr}" "${vm_instr}"
  done
done


for op in "${ops1i[@]}"
do
  op_vm="${op%%:*}"
  op_x86="${op##*:}"
  for reg in "${imm[@]}"
  do
    reg_vm="${reg%%:*}"
    reg_x86="${reg##*:}"

    # gnu_x86_instr="${op_x86} ${reg_x86}"
    I=${reg_x86}
    gnu_x86_instr=`eval echo ${op_x86}`

    vm_instr="MakeOp(${op_vm},0,0,${reg_vm})"
    generate "${gnu_x86_instr}" "${vm_instr}"
  done
done


for op in "${ops2i[@]}"
do
  op_vm="${op%%:*}"
  op_x86="${op##*:}"
  for regA in "${x86_regs[@]}"
  do
    for src in "${imm[@]}"
    do
      # dst
      regA_vm="${regA%%:*}"
      regA_x86="${regA##*:}"
      # src
      src_vm="${src%%:*}"
      src_x86="${src##*:}"

      # in GNU order:    op src,dst
      gnu_x86_instr="${op_x86} ${src_x86},%${regA_x86}"

      # in Intel order:  op dst,src
      vm_instr="MakeOp(${op_vm},${regA_vm},0,${src_vm})"
    
      generate "${gnu_x86_instr}" "${vm_instr}"
    done
  done
done


for op in "${ops2[@]}"
do
  op_vm="${op%%:*}"
  op_x86="${op##*:}"
  for regA in "${x86_regs[@]}"
  do
    for regB in "${x86_regs[@]}"
    do
      # dst
      regA_vm="${regA%%:*}"
      regA_x86="${regA##*:}"
      # src
      regB_vm="${regB%%:*}"
      regB_x86="${regB##*:}"

      # in GNU order:    op src,dst
      S=%${regB_x86}
      D=%${regA_x86}
      
      gnu_x86_instr=`eval echo ${op_x86}` # %${regB_x86},%${regA_x86}"

      # in Intel order:  op dst,src
      vm_instr="MakeOp(${op_vm},${regA_vm},${regB_vm},0)"

      generate "${gnu_x86_instr}" "${vm_instr}"
    done
  done
done


rm tmp/x86_opcode.asm
rm tmp/x86_opcode.o