Mein Mikroprozessor Klassenlehrer gab uns einen Auftrag und sagte:
"Schreiben Sie einen Assembler in C." - Mein geliebter Professor
Es kam mir ein bisschen unlogisch vor.
Wenn ich mich nicht irre, ist die Assemblersprache der erste Schritt vom Maschinencode zum Erlernen übergeordneter Sprachen. Ich meine, C ist eine höhere Sprache als Assembly. Was bringt es also, einen Assembler in C zu schreiben? Was machten sie in der Vergangenheit, während die C-Sprache fehlte? Haben sie Assembler in Maschinencode geschrieben?
Es macht für mich keinen Sinn, einen Maschinencode-Übersetzer für eine niedrigere Sprache in einer höheren Sprache zu schreiben.
Angenommen, wir haben eine brandneue Mikroprozessorarchitektur erstellt, für die es nicht einmal einen C-Compiler gibt. Wird unser in C geschriebener Assembler die neue Architektur simulieren können? Ich meine, wird es nutzlos sein oder nicht?
Übrigens ist mir bekannt, dass GNU Assembler und Netwide Assembler in C geschrieben wurden. Ich frage mich auch, warum sie in C geschrieben wurden.
Zuletzt ist dies der Beispielquellcode für einen einfachen Assembler, den uns unser Professor gegeben hat:
// to compile, gcc assembler.c -o assembler
// No error check is provided.
// Variable names cannot start with 0-9.
// hexadecimals are twos complement.
// first address of the code section is zero, data section follows the code section.
//fout tables are formed: jump table, ldi table, label table and variable table.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//Converts a hexadecimal string to integer.
int hex2int( char* hex)
{
int result=0;
while ((*hex)!='\0')
{
if (('0'<=(*hex))&&((*hex)<='9'))
result = result*16 + (*hex) -'0';
else if (('a'<=(*hex))&&((*hex)<='f'))
result = result*16 + (*hex) -'a'+10;
else if (('A'<=(*hex))&&((*hex)<='F'))
result = result*16 + (*hex) -'A'+10;
hex++;
}
return(result);
}
main()
{
FILE *fp;
char line[100];
char *token = NULL;
char *op1, *op2, *op3, *label;
char ch;
int chch;
int program[1000];
int counter=0; //holds the address of the machine code instruction
// A label is a symbol which mark a location in a program. In the example
// program above, the string "lpp", "loop" and "lp1" are labels.
struct label
{
int location;
char *label;
};
struct label labeltable[50]; //there can be 50 labels at most in our programs
int nooflabels = 0; //number of labels encountered during assembly.
// Jump instructions cannot be assembled readily because we may not know the value of
// the label when we encountered a jump instruction. This happens if the label used by
// that jump instruction appear below that jump instruction. This is the situation
// with the label "loop" in the example program above. Hence, the location of jump
// instructions must be stored.
struct jumpinstruction
{
int location;
char *label;
};
struct jumpinstruction jumptable[100]; //There can be at most 100 jumps
int noofjumps=0; //number of jumps encountered during assembly.
// The list of variables in .data section and their locations.
struct variable
{
int location;
char *name;
};
struct variable variabletable[50]; //There can be 50 varables at most.
int noofvariables = 0;
//Variables and labels are used by ldi instructions.
//The memory for the variables are traditionally allocated at the end of the code section.
//Hence their addresses are not known when we assemble a ldi instruction. Also, the value of
//a label may not be known when we encounter a ldi instruction which uses that label.
//Hence, the location of the ldi instructions must be kept, and these instructions must be
//modified when we discover the address of the label or variable that it uses.
struct ldiinstruction
{
int location;
char *name;
};
struct ldiinstruction lditable[100];
int noofldis=0;
fp = fopen("name_of_program","r");
if (fp != NULL)
{
while(fgets(line,sizeof line,fp)!= NULL) //skip till .code section
{
token=strtok(line,"\n\t\r ");
if (strcmp(token,".code")==0 )
break;
}
while(fgets(line,sizeof line,fp)!= NULL)
{
token=strtok(line,"\n\t\r "); //get the instruction mnemonic or label
//======================================== FIRST PASS ======================================================
while (token)
{
if (strcmp(token,"ldi")==0) //---------------LDI INSTRUCTION--------------------
{
op1 = strtok(NULL,"\n\t\r "); //get the 1st operand of ldi, which is the register that ldi loads
op2 = strtok(NULL,"\n\t\r "); //get the 2nd operand of ldi, which is the data that is to be loaded
program[counter]=0x1000+hex2int(op1); //generate the first 16-bit of the ldi instruction
counter++; //move to the second 16-bit of the ldi instruction
if ((op2[0]=='0')&&(op2[1]=='x')) //if the 2nd operand is twos complement hexadecimal
program[counter]=hex2int(op2+2)&0xffff; //convert it to integer and form the second 16-bit
else if (( (op2[0])=='-') || ((op2[0]>='0')&&(op2[0]<='9'))) //if the 2nd operand is decimal
program[counter]=atoi(op2)&0xffff; //convert it to integer and form the second 16-bit
else //if the second operand is not decimal or hexadecimal, it is a laber or a variable.
{ //in this case, the 2nd 16-bits of the ldi instruction cannot be generated.
lditable[noofldis].location = counter; //record the location of this 2nd 16-bit
op1=(char*)malloc(sizeof(op2)); //and the name of the label/variable that it must contain
strcpy(op1,op2); //in the lditable array.
lditable[noofldis].name = op1;
noofldis++;
}
counter++; //skip to the next memory location
}
else if (strcmp(token,"ld")==0) //------------LD INSTRUCTION---------------------
{
op1 = strtok(NULL,"\n\t\r "); //get the 1st operand of ld, which is the destination register
op2 = strtok(NULL,"\n\t\r "); //get the 2nd operand of ld, which is the source register
ch = (op1[0]-48)| ((op2[0]-48) << 3); //form bits 11-0 of machine code. 48 is ASCII value of '0'
program[counter]=0x2000+((ch)&0x00ff); //form the instruction and write it to memory
counter++; //skip to the next empty location in memory
}
else if (strcmp(token,"st")==0) //-------------ST INSTRUCTION--------------------
{
//to be added
}
else if (strcmp(token,"jz")==0) //------------- CONDITIONAL JUMP ------------------
{
//to be added
}
else if (strcmp(token,"jmp")==0) //-------------- JUMP -----------------------------
{
op1 = strtok(NULL,"\n\t\r "); //read the label
jumptable[noofjumps].location = counter; //write the jz instruction's location into the jumptable
op2=(char*)malloc(sizeof(op1)); //allocate space for the label
strcpy(op2,op1); //copy the label into the allocated space
jumptable[noofjumps].label=op2; //point to the label from the jumptable
noofjumps++; //skip to the next empty location in jumptable
program[counter]=0x5000; //write the incomplete instruction (just opcode) to memory
counter++; //skip to the next empty location in memory.
}
else if (strcmp(token,"add")==0) //----------------- ADD -------------------------------
{
op1 = strtok(NULL,"\n\t\r ");
op2 = strtok(NULL,"\n\t\r ");
op3 = strtok(NULL,"\n\t\r ");
chch = (op1[0]-48)| ((op2[0]-48)<<3)|((op3[0]-48)<<6);
program[counter]=0x7000+((chch)&0x00ff);
counter++;
}
else if (strcmp(token,"sub")==0)
{
//to be added
}
else if (strcmp(token,"and")==0)
{
//to be added
}
else if (strcmp(token,"or")==0)
{
//to be added
}
else if (strcmp(token,"xor")==0)
{
//to be added
}
else if (strcmp(token,"not")==0)
{
op1 = strtok(NULL,"\n\t\r ");
op2 = strtok(NULL,"\n\t\r ");
ch = (op1[0]-48)| ((op2[0]-48)<<3);
program[counter]=0x7500+((ch)&0x00ff);
counter++;
}
else if (strcmp(token,"mov")==0)
{
//to be added
}
else if (strcmp(token,"inc")==0)
{
op1 = strtok(NULL,"\n\t\r ");
ch = (op1[0]-48)| ((op1[0]-48)<<3);
program[counter]=0x7700+((ch)&0x00ff);
counter++;
}
else if (strcmp(token,"dec")==0)
{
//to be added
}
else //------WHAT IS ENCOUNTERED IS NOT AN INSTRUCTION BUT A LABEL. UPDATE THE LABEL TABLE--------
{
labeltable[nooflabels].location = counter; //buraya bir counter koy. error check
op1=(char*)malloc(sizeof(token));
strcpy(op1,token);
labeltable[nooflabels].label=op1;
nooflabels++;
}
token = strtok(NULL,",\n\t\r ");
}
}
//================================= SECOND PASS ==============================
//supply the address fields of the jump and jz instructions from the
int i,j;
for (i=0; i<noofjumps;i++) //for all jump/jz instructions
{
j=0;
while ( strcmp(jumptable[i].label , labeltable[j].label) != 0 ) //if the label for this jump/jz does not match with the
j++; // jth label in the labeltable, check the next label..
program[jumptable[i].location] +=(labeltable[j].location-jumptable[i].location-1)&0x0fff; //copy the jump address into memory.
}
// search for the start of the .data segment
rewind(fp);
while(fgets(line,sizeof line,fp)!= NULL) //skip till .data, if no .data, also ok.
{
token=strtok(line,"\n\t\r ");
if (strcmp(token,".data")==0 )
break;
}
// process the .data segment and generate the variabletable[] array.
int dataarea=0;
while(fgets(line,sizeof line,fp)!= NULL)
{
token=strtok(line,"\n\t\r ");
if (strcmp(token,".code")==0 ) //go till the .code segment
break;
else if (token[strlen(token)-1]==':')
{
token[strlen(token)-1]='\0'; //will not cause memory leak, as we do not do malloc
variabletable[noofvariables].location=counter+dataarea;
op1=(char*)malloc(sizeof(token));
strcpy(op1,token);
variabletable[noofvariables].name=op1;
token = strtok(NULL,",\n\t\r ");
if (token==NULL)
program[counter+dataarea]=0;
else if (strcmp(token, ".space")==0)
{
token=strtok(NULL,"\n\t\r ");
dataarea+=atoi(token);
}
else if((token[0]=='0')&&(token[1]=='x'))
program[counter+dataarea]=hex2int(token+2)&0xffff;
else if (( (token[0])=='-') || ('0'<=(token[0])&&(token[0]<='9')) )
program[counter+dataarea]=atoi(token)&0xffff;
noofvariables++;
dataarea++;
}
}
// supply the address fields for the ldi instructions from the variable table
for( i=0; i<noofldis;i++)
{
j=0;
while ((j<noofvariables)&&( strcmp( lditable[i].name , variabletable[j].name)!=0 ))
j++;
if (j<noofvariables)
program[lditable[i].location] = variabletable[j].location;
}
// supply the address fields for the ldi instructions from the label table
for( i=0; i<noofldis;i++)
{
j=0;
while ((j<nooflabels)&&( strcmp( lditable[i].name , labeltable[j].label)!=0 ))
j++;
if (j<nooflabels){
program[lditable[i].location] = (labeltable[j].location)&0x0fff;
printf("%d %d %d\n", i, j, (labeltable[j].location));
}
}
//display the resulting tables
printf("LABEL TABLE\n");
for (i=0;i<nooflabels;i++)
printf("%d %s\n", labeltable[i].location, labeltable[i].label);
printf("\n");
printf("JUMP TABLE\n");
for (i=0;i<noofjumps;i++)
printf("%d %s\n", jumptable[i].location, jumptable[i].label);
printf("\n");
printf("VARIABLE TABLE\n");
for (i=0;i<noofvariables;i++)
printf("%d %s\n", variabletable[i].location, variabletable[i].name);
printf("\n");
printf("LDI INSTRUCTIONS\n");
for (i=0;i<noofldis;i++)
printf("%d %s\n", lditable[i].location, lditable[i].name);
printf("\n");
fclose(fp);
fp = fopen("RAM","w");
fprintf(fp,"v2.0 raw\n");
for (i=0;i<counter+dataarea;i++)
fprintf(fp,"%04x\n",program[i]);
}
}