Newer
Older
Import / research / other / simpVM / src / assembler.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>


#define TEXT	0
#define RODATA	1
#define DATA	2
#define STACK	3
#define UNDEFINED 4


#define PUSH1	0x0
#define ADD	0x1
#define SUB	0x2
#define MUL	0x3
#define DIV	0x4
#define AND	0x5
#define OR	0x6
#define XOR	0x7
#define RET	0x8
#define SHIFT	0x9 // r/l rot/shf s/u val/immed (5bits)  // type8
#define CALL	0xA // func
#define JMP	0xB // cond4 addr
#define POP	0xC // mem32
#define PUSHval	0xD // *mem32
#define PUSHref	0xE // mem32
#define NOP	0xF

/*
  To deal with labels that are declared after where they are first used
  a list is needed of where to fixup the already parsed code to rewrite
  the label address the label really is. Instead of an explicit list, the
  list is in the output. When a jmp to a label not added yet, we add the
  label and give it a location of where the jmp is in the label table
  and store the jmp as going to zero in the output. Then the next time
  a jmp to that lable is found, we write to the output what is in the
  label table, and then update the lable table with this jmp's location.
  When we finally get the declaration of the label, we lookup the label
  table and find it is there and walk the locations, starting with the
  location in the label table to find the first place to fixup, and then
  use its old value to find the next place and so on until the next location
  is zero. Then we update the label table with the labels real location.
*/
typedef struct {
    char	    *string;
    unsigned int    offset;
    int		    section;
} Label;


unsigned char *sectionData[4];
unsigned int sectionSize[4];
int currentSection = TEXT;
Label labelTable[1024];
int labelCount = 0;
char *output = "a.out";
char *input = 0;
int verbose = 0;


Label *findLabel(char *label)
{
    int i = 0;
    char tmpLab[128];
    while (*label && *label != '\n' && *label != ':')
	tmpLab[i++] = *label++;
    tmpLab[i] = '\0';
    for (i = 0; i < labelCount; i++) {
	if (!strcmp(labelTable[i].string,tmpLab)) {
	    return &labelTable[i];
	}
    }
    return 0;
}

void addLabel(char *label)
{
    Label *l = findLabel(label);
    if ( !l ) {
      printf("adding label: %s\n", label);
      int len = strlen(label);
      char *copy = malloc(len + 1);
      labelTable[labelCount].string = copy;
      labelTable[labelCount].section = currentSection;
      labelTable[labelCount].offset = sectionSize[currentSection];
      while (*label != ':' && *label != '\0' && *label != '\n')
          *copy++ = *label++;
      if (*label != ':')
          printf("Syntax error, expecting label but got %s instead\n", label);
      *copy = '\0';
      labelCount++;
      printf("added label: %s\n", labelTable[labelCount-1].string);
      return;
    }
    // Case where label already in the label table because it was used
    // before it was declared and we need to fixup the previous output
    unsigned int prev = *((unsigned int*)&(sectionData[currentSection][l->offset]));
    printf("label: %s, prev: %i, current %i\n", label, prev, sectionSize[currentSection]);
    *((unsigned int*)&(sectionData[currentSection][l->offset])) = sectionSize[currentSection];
    l->offset = sectionSize[currentSection];
    while ( prev ) {
      unsigned int current = prev;
      prev = sectionData[currentSection][current];
      *((unsigned int*)&(sectionData[currentSection][current])) = sectionSize[currentSection];
    }
}


Label *lookupLabel(char *label)
{
    Label *l = findLabel(label);
/*
    // we handle this specifically in the code for each case
    // we need the address of where the address will be output
    if ( !l ) {
      addLabel(label);
      labelTable[labelCount-1].offset = 0;
      l = &labelTable[labelCount-1];
    }
*/
    return l;
//    printf("symbol %s not found\n", tmpLab);
//    return 0;
}


void init()
{
    int i;
    for (i = 0; i < 4; i++) {
	sectionData[i] = (unsigned char *)malloc(65536 * 4);
	sectionSize[i] = 0;
    }
}

int outputHalf = 0;
void outputNibble(unsigned char b)
{
    if (outputHalf) {
	sectionData[currentSection][sectionSize[currentSection]] |= (b << 4);
	sectionSize[currentSection]++;
	outputHalf = 0;
    } else {
        sectionData[currentSection][sectionSize[currentSection]] = b;
	outputHalf = 1;
    }
}

/*
void outputByte(unsigned char b)
{
    if (outputHalf) {
	sectionSize[currentSection]++;
	outputHalf = 0;
    }
    sectionData[currentSection][sectionSize[currentSection]] = b;
    sectionSize[currentSection]++;
}
*/

/*
    Byte order will be same when compiler and vm are used on same machine
    Byte order might not be same if compiler and vm used on different machines
*/
void outputDWord(unsigned int b)
{
    if (outputHalf) {
	sectionSize[currentSection]++;
	outputHalf = 0;
    }
    // cross-platform alignment issue here
    *((unsigned int *)&sectionData[currentSection][sectionSize[currentSection]]) = b;
    sectionSize[currentSection] += 4;
}

int parse()
{
    FILE *file = fopen(input, "ro");
    char buffer[1024];
    char filename[128];

    while (fgets(buffer, 1024, file)) {
	// ';' is a comment, ignore
	if (buffer[0] != ';' && buffer[0] != '\n') {
	    if (!isspace(buffer[0])) {
		// it is a label
		addLabel(buffer);
	    } else if (buffer[0] == '\t') {
		// and instruction
		if (buffer[1] == '.') {
		    // meta instruction
		    if (!strncmp(buffer,"\t.file ",7)) {
			// rest of buffer is file name,  XXX TODO deal with quotes
			int i = 7;
			while (buffer[i] != '\n') {
			    filename[i-7] = buffer[i];
			    i++;
			}
			filename[i-7] = '\0';
		    } else if (!strncmp(buffer,"\t.text",6)) {
			currentSection = TEXT;
		    } else if (!strncmp(buffer,"\t.data",6)) {
			currentSection = DATA;
		    } else if (!strncmp(buffer,"\t.section",9)) {
			if (!strncmp(buffer,"\t.section .rodata",17)) {
			    currentSection = RODATA;
			} else if (!strncmp(buffer,"\t.section .data",15)) {
			    currentSection = DATA;
			} else if (!strncmp(buffer,"\t.section .stack",17)) {
			    currentSection = STACK;
			} else if (!strncmp(buffer,"\t.section .text",15)) {
			    currentSection = TEXT;
			}
		    } else if (!strncmp(buffer,"\t.long ",7)) {
			outputDWord(atoi(buffer + 7));
		    } else if (!strncmp(buffer,"\t.string ",9)) {
			int i = 9; // XXX TODO deal with quotes
			int t = 0;
			char *dst = (char*)&sectionData[currentSection][sectionSize[currentSection]];
			while (buffer[i] != '\n') {
			    if ( buffer[i] != '\"' ) {
				if ( buffer[i] != '\\' ) {
				    *dst++ = buffer[i];
				    t++;
				} else {
				    i++;
				    if (buffer[i] == '0') *dst++ = '\0';
				    else if (buffer[i] == 'a') *dst++ = '\a';
				    else if (buffer[i] == 'b') *dst++ = '\b';
				    else if (buffer[i] == 't') *dst++ = '\t';
				    else if (buffer[i] == 'n') *dst++ = '\n';
				    else if (buffer[i] == 'v') *dst++ = '\v';
				    else if (buffer[i] == 'f') *dst++ = '\f';
				    else if (buffer[i] == 'r') *dst++ = '\r';
				    else if (buffer[i] == '\\') *dst++ = '\\';
				    else *dst++ = buffer[i];
				    t++;
				}
			    }
			    i++;
			}
			*dst++ = '\0';
			t++;
			sectionSize[currentSection] += t;
		    }
		} else {

		    if (currentSection != TEXT) {
			printf("Warning: expecting an instruction but we are not in a code section\n");
		    }

		    // real instruction
		    if (!strcmp(buffer, "\tpush1\n")) {
			outputNibble(PUSH1);
#define NEXT_INST_CASE(str, val) \
		    } else if (!strcmp(buffer, str)) {\
			outputNibble(val);
			NEXT_INST_CASE("\tadd\n",ADD)
			NEXT_INST_CASE("\tsub\n",SUB)
			NEXT_INST_CASE("\tmul\n",MUL)
			NEXT_INST_CASE("\tdiv\n",DIV)
			NEXT_INST_CASE("\tand\n",AND)
			NEXT_INST_CASE("\tor\n",OR)
			NEXT_INST_CASE("\txor\n",XOR)
			NEXT_INST_CASE("\tret\n",RET)
		    } else if (!strncmp(buffer, "\tshift ", 7)) {
			outputNibble(SHIFT);
			outputDWord(atoi(buffer + 7));
		    } else if (!strncmp(buffer, "\tcall ", 6)) {
			Label *lab = lookupLabel(buffer+6);
			// Call probably can be just like jmp
			if (lab && lab->section != TEXT) {
			    printf("segmentation violation, attempting to execute non-code data\n");
			    exit(-1);
			}
			outputNibble(CALL);
			outputDWord(lab ? lab->offset : 0);
		    } else if (!strncmp(buffer, "\tjmp ", 5)) {
			Label *lab = lookupLabel(buffer+5);
			outputNibble(JMP);
			outputDWord(lab ? lab->offset : 0);
			if (!lab) {
			    addLabel(buffer+5);
			    lab = &labelTable[labelCount-1];
    			    lab->section = UNDEFINED; // sentinel value to say this is a placeholder entry
			    //printf("jump label not found\n");
			}
			if (lab->section != TEXT && lab->section != UNDEFINED) {
			    printf("segmentation violation, attempting to execute non-code data\n");
			    exit(-1);
			}
			// outputDWord(atoi(buffer + 5)); // XXX
			if (lab->section == UNDEFINED) {
      			    lab->offset = sectionSize[currentSection]-4;
			}
		    } else if (!strncmp(buffer, "\tpop ", 5)) {
			Label *lab = lookupLabel(buffer+5);
			//if (lab && (lab->section == TEXT || lab->section == RODATA)) {
			if (lab && lab->section == TEXT) {
			    printf("segmentation violation, attempting to write to read only data area\n");
			    exit(-1);
			}
			outputNibble(POP);
			outputDWord(lab ? lab->offset : 0);
		    } else if (!strncmp(buffer, "\tpush ", 6)) {
			int i = 6;
			if (buffer[i] == '*')
			    i++;
			Label *lab = lookupLabel(buffer+i);
			if (lab && lab->section == TEXT) {
			    printf("segmentation violation, attempting to read code area\n");
			    exit(-1);
			}
			outputNibble((i==6)?PUSHref:PUSHval);
			outputDWord(lab ? lab->offset : 0);
		    } else if (!strncmp(buffer, "\tnop", 4)) {
			outputNibble(NOP);
		    } else {
			printf("unknown instr: %s\n", buffer);
		    }
		}
	    } else {
		// error
		printf("invalid line: %s\n", buffer);
	    }
	}
    }


    // print out data
    int i, j;
    FILE *outfile = fopen(output, "w");
    for (i = 0; i < 4; i++) {
	if (verbose)
	    printf("size[%i] = %i\n", i, sectionSize[i]);
	fwrite(&sectionSize[i], 4, 1, outfile);
    }
    for (i = 0; i < 4; i++) {
	for (j = 0; j < sectionSize[i]; j++) {
	    if (verbose)
		printf("data[%i][%i] = %i\n", i, j, sectionData[i][j]);
	    fwrite(&sectionData[i][j], 1, 1, outfile);
	}
    }

    return 0;
}


int main(int argc, char *argv[])
{
    int i;

    for (i = 1; i < argc; i++) {
	if (argv[i][0] == '-') {
	    switch (argv[i][1]) {
		case 'o': // output
		    i++;
		    output = argv[i];
		    break;
		case 'v': // verbose
		    verbose = 1;
		    break;
	    }
	} else {
	    // unmatched option is input filename
	    input = argv[i];
	}
    }

    if (!input) {
	printf("No input files.\n");
	return -1;
    }

    init();

    return parse();
}