#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TEXT 0
#define RODATA 1
#define DATA 2
#define STACK 3
#define UNDEFINED 4
#define PUSH1 0x0
#define ADD 0x1
#define SUB 0x2
#define MUL 0x3
#define DIV 0x4
#define AND 0x5
#define OR 0x6
#define XOR 0x7
#define RET 0x8
#define SHIFT 0x9 // r/l rot/shf s/u val/immed (5bits) // type8
#define CALL 0xA // func
#define JMP 0xB // cond4 addr
#define POP 0xC // mem32
#define PUSHval 0xD // *mem32
#define PUSHref 0xE // mem32
#define NOP 0xF
/*
To deal with labels that are declared after where they are first used
a list is needed of where to fixup the already parsed code to rewrite
the label address the label really is. Instead of an explicit list, the
list is in the output. When a jmp to a label not added yet, we add the
label and give it a location of where the jmp is in the label table
and store the jmp as going to zero in the output. Then the next time
a jmp to that lable is found, we write to the output what is in the
label table, and then update the lable table with this jmp's location.
When we finally get the declaration of the label, we lookup the label
table and find it is there and walk the locations, starting with the
location in the label table to find the first place to fixup, and then
use its old value to find the next place and so on until the next location
is zero. Then we update the label table with the labels real location.
*/
typedef struct {
char *string;
unsigned int offset;
int section;
} Label;
unsigned char *sectionData[4];
unsigned int sectionSize[4];
int currentSection = TEXT;
Label labelTable[1024];
int labelCount = 0;
char *output = "a.out";
char *input = 0;
int verbose = 0;
Label *findLabel(char *label)
{
int i = 0;
char tmpLab[128];
while (*label && *label != '\n' && *label != ':')
tmpLab[i++] = *label++;
tmpLab[i] = '\0';
for (i = 0; i < labelCount; i++) {
if (!strcmp(labelTable[i].string,tmpLab)) {
return &labelTable[i];
}
}
return 0;
}
void addLabel(char *label)
{
Label *l = findLabel(label);
if ( !l ) {
printf("adding label: %s\n", label);
int len = strlen(label);
char *copy = malloc(len + 1);
labelTable[labelCount].string = copy;
labelTable[labelCount].section = currentSection;
labelTable[labelCount].offset = sectionSize[currentSection];
while (*label != ':' && *label != '\0' && *label != '\n')
*copy++ = *label++;
if (*label != ':')
printf("Syntax error, expecting label but got %s instead\n", label);
*copy = '\0';
labelCount++;
printf("added label: %s\n", labelTable[labelCount-1].string);
return;
}
// Case where label already in the label table because it was used
// before it was declared and we need to fixup the previous output
unsigned int prev = *((unsigned int*)&(sectionData[currentSection][l->offset]));
printf("label: %s, prev: %i, current %i\n", label, prev, sectionSize[currentSection]);
*((unsigned int*)&(sectionData[currentSection][l->offset])) = sectionSize[currentSection];
l->offset = sectionSize[currentSection];
while ( prev ) {
unsigned int current = prev;
prev = sectionData[currentSection][current];
*((unsigned int*)&(sectionData[currentSection][current])) = sectionSize[currentSection];
}
}
Label *lookupLabel(char *label)
{
Label *l = findLabel(label);
/*
// we handle this specifically in the code for each case
// we need the address of where the address will be output
if ( !l ) {
addLabel(label);
labelTable[labelCount-1].offset = 0;
l = &labelTable[labelCount-1];
}
*/
return l;
// printf("symbol %s not found\n", tmpLab);
// return 0;
}
void init()
{
int i;
for (i = 0; i < 4; i++) {
sectionData[i] = (unsigned char *)malloc(65536 * 4);
sectionSize[i] = 0;
}
}
int outputHalf = 0;
void outputNibble(unsigned char b)
{
if (outputHalf) {
sectionData[currentSection][sectionSize[currentSection]] |= (b << 4);
sectionSize[currentSection]++;
outputHalf = 0;
} else {
sectionData[currentSection][sectionSize[currentSection]] = b;
outputHalf = 1;
}
}
/*
void outputByte(unsigned char b)
{
if (outputHalf) {
sectionSize[currentSection]++;
outputHalf = 0;
}
sectionData[currentSection][sectionSize[currentSection]] = b;
sectionSize[currentSection]++;
}
*/
/*
Byte order will be same when compiler and vm are used on same machine
Byte order might not be same if compiler and vm used on different machines
*/
void outputDWord(unsigned int b)
{
if (outputHalf) {
sectionSize[currentSection]++;
outputHalf = 0;
}
// cross-platform alignment issue here
*((unsigned int *)§ionData[currentSection][sectionSize[currentSection]]) = b;
sectionSize[currentSection] += 4;
}
int parse()
{
FILE *file = fopen(input, "ro");
char buffer[1024];
char filename[128];
while (fgets(buffer, 1024, file)) {
// ';' is a comment, ignore
if (buffer[0] != ';' && buffer[0] != '\n') {
if (!isspace(buffer[0])) {
// it is a label
addLabel(buffer);
} else if (buffer[0] == '\t') {
// and instruction
if (buffer[1] == '.') {
// meta instruction
if (!strncmp(buffer,"\t.file ",7)) {
// rest of buffer is file name, XXX TODO deal with quotes
int i = 7;
while (buffer[i] != '\n') {
filename[i-7] = buffer[i];
i++;
}
filename[i-7] = '\0';
} else if (!strncmp(buffer,"\t.text",6)) {
currentSection = TEXT;
} else if (!strncmp(buffer,"\t.data",6)) {
currentSection = DATA;
} else if (!strncmp(buffer,"\t.section",9)) {
if (!strncmp(buffer,"\t.section .rodata",17)) {
currentSection = RODATA;
} else if (!strncmp(buffer,"\t.section .data",15)) {
currentSection = DATA;
} else if (!strncmp(buffer,"\t.section .stack",17)) {
currentSection = STACK;
} else if (!strncmp(buffer,"\t.section .text",15)) {
currentSection = TEXT;
}
} else if (!strncmp(buffer,"\t.long ",7)) {
outputDWord(atoi(buffer + 7));
} else if (!strncmp(buffer,"\t.string ",9)) {
int i = 9; // XXX TODO deal with quotes
int t = 0;
char *dst = (char*)§ionData[currentSection][sectionSize[currentSection]];
while (buffer[i] != '\n') {
if ( buffer[i] != '\"' ) {
if ( buffer[i] != '\\' ) {
*dst++ = buffer[i];
t++;
} else {
i++;
if (buffer[i] == '0') *dst++ = '\0';
else if (buffer[i] == 'a') *dst++ = '\a';
else if (buffer[i] == 'b') *dst++ = '\b';
else if (buffer[i] == 't') *dst++ = '\t';
else if (buffer[i] == 'n') *dst++ = '\n';
else if (buffer[i] == 'v') *dst++ = '\v';
else if (buffer[i] == 'f') *dst++ = '\f';
else if (buffer[i] == 'r') *dst++ = '\r';
else if (buffer[i] == '\\') *dst++ = '\\';
else *dst++ = buffer[i];
t++;
}
}
i++;
}
*dst++ = '\0';
t++;
sectionSize[currentSection] += t;
}
} else {
if (currentSection != TEXT) {
printf("Warning: expecting an instruction but we are not in a code section\n");
}
// real instruction
if (!strcmp(buffer, "\tpush1\n")) {
outputNibble(PUSH1);
#define NEXT_INST_CASE(str, val) \
} else if (!strcmp(buffer, str)) {\
outputNibble(val);
NEXT_INST_CASE("\tadd\n",ADD)
NEXT_INST_CASE("\tsub\n",SUB)
NEXT_INST_CASE("\tmul\n",MUL)
NEXT_INST_CASE("\tdiv\n",DIV)
NEXT_INST_CASE("\tand\n",AND)
NEXT_INST_CASE("\tor\n",OR)
NEXT_INST_CASE("\txor\n",XOR)
NEXT_INST_CASE("\tret\n",RET)
} else if (!strncmp(buffer, "\tshift ", 7)) {
outputNibble(SHIFT);
outputDWord(atoi(buffer + 7));
} else if (!strncmp(buffer, "\tcall ", 6)) {
Label *lab = lookupLabel(buffer+6);
// Call probably can be just like jmp
if (lab && lab->section != TEXT) {
printf("segmentation violation, attempting to execute non-code data\n");
exit(-1);
}
outputNibble(CALL);
outputDWord(lab ? lab->offset : 0);
} else if (!strncmp(buffer, "\tjmp ", 5)) {
Label *lab = lookupLabel(buffer+5);
outputNibble(JMP);
outputDWord(lab ? lab->offset : 0);
if (!lab) {
addLabel(buffer+5);
lab = &labelTable[labelCount-1];
lab->section = UNDEFINED; // sentinel value to say this is a placeholder entry
//printf("jump label not found\n");
}
if (lab->section != TEXT && lab->section != UNDEFINED) {
printf("segmentation violation, attempting to execute non-code data\n");
exit(-1);
}
// outputDWord(atoi(buffer + 5)); // XXX
if (lab->section == UNDEFINED) {
lab->offset = sectionSize[currentSection]-4;
}
} else if (!strncmp(buffer, "\tpop ", 5)) {
Label *lab = lookupLabel(buffer+5);
//if (lab && (lab->section == TEXT || lab->section == RODATA)) {
if (lab && lab->section == TEXT) {
printf("segmentation violation, attempting to write to read only data area\n");
exit(-1);
}
outputNibble(POP);
outputDWord(lab ? lab->offset : 0);
} else if (!strncmp(buffer, "\tpush ", 6)) {
int i = 6;
if (buffer[i] == '*')
i++;
Label *lab = lookupLabel(buffer+i);
if (lab && lab->section == TEXT) {
printf("segmentation violation, attempting to read code area\n");
exit(-1);
}
outputNibble((i==6)?PUSHref:PUSHval);
outputDWord(lab ? lab->offset : 0);
} else if (!strncmp(buffer, "\tnop", 4)) {
outputNibble(NOP);
} else {
printf("unknown instr: %s\n", buffer);
}
}
} else {
// error
printf("invalid line: %s\n", buffer);
}
}
}
// print out data
int i, j;
FILE *outfile = fopen(output, "w");
for (i = 0; i < 4; i++) {
if (verbose)
printf("size[%i] = %i\n", i, sectionSize[i]);
fwrite(§ionSize[i], 4, 1, outfile);
}
for (i = 0; i < 4; i++) {
for (j = 0; j < sectionSize[i]; j++) {
if (verbose)
printf("data[%i][%i] = %i\n", i, j, sectionData[i][j]);
fwrite(§ionData[i][j], 1, 1, outfile);
}
}
return 0;
}
int main(int argc, char *argv[])
{
int i;
for (i = 1; i < argc; i++) {
if (argv[i][0] == '-') {
switch (argv[i][1]) {
case 'o': // output
i++;
output = argv[i];
break;
case 'v': // verbose
verbose = 1;
break;
}
} else {
// unmatched option is input filename
input = argv[i];
}
}
if (!input) {
printf("No input files.\n");
return -1;
}
init();
return parse();
}