Development of a stacked virtual machine and a compiler for it (part I)

It so happened that over the past 18 years, I didn't have to write in C / C ++. At work, Java was used, and due to the positions, the activities were more related to entrepreneurship - negotiations, corporate sales, building production operations and structuring investment transactions. I wanted to restore my skills in my free time from work, to stretch the part of my brain that I had not strained for all 18 years and, naturally, to start from the very basics. It remains to come up with a task.





, 70-80 , - (, ) . "", (Go, Kotlin ) .



32-bit C , . Computer Science . , , , . . . .





, :





CPU: 32-bit , , , IP (Instruction Pointer) SP (Stack Pointer), (__int32), .





RAM: 65536 32-bit`. . (code/text) (data, heap), (stack). .





:





  typedef __int32 WORD;
  
	constexpr WORD OP_CODE_MASK = 0b00000000000000000000000011111111;
	constexpr WORD OP_TYPE_MASK = 0b00000000000000000000111000000000;

	constexpr WORD OP_HALT      = 0b00000000000000000000000000000000;
	constexpr WORD OP_CONST     = 0b00000000000000000000000000000001;
	constexpr WORD OP_PUSH      = 0b00000000000000000000000000000010;
	constexpr WORD OP_POP       = 0b00000000000000000000000000000011;

	constexpr WORD OP_INC       = 0b00000000000000000000000000000100;
	constexpr WORD OP_DEC       = 0b00000000000000000000000000000101;
	constexpr WORD OP_ADD       = 0b00000000000000000000000000000110;
	constexpr WORD OP_SUB       = 0b00000000000000000000000000000111;
	constexpr WORD OP_MUL       = 0b00000000000000000000000000001000;
	constexpr WORD OP_DIV       = 0b00000000000000000000000000001001;

	constexpr WORD OP_AND       = 0b00000000000000000000000000001010;
	constexpr WORD OP_OR        = 0b00000000000000000000000000001011;
	constexpr WORD OP_XOR       = 0b00000000000000000000000000001100;
	constexpr WORD OP_NOT       = 0b00000000000000000000000000001101;
	constexpr WORD OP_SHL       = 0b00000000000000000000000000001110;
	constexpr WORD OP_SHR       = 0b00000000000000000000000000001111;

	constexpr WORD OP_JMP       = 0b00000000000000000000000000010001;
	constexpr WORD OP_CMPJE     = 0b00000000000000000000000000010010;
	constexpr WORD OP_CMPJNE    = 0b00000000000000000000000000010011;
	constexpr WORD OP_CMPJG     = 0b00000000000000000000000000010100;
	constexpr WORD OP_CMPJGE    = 0b00000000000000000000000000010101;
	constexpr WORD OP_CMPJL     = 0b00000000000000000000000000010110;
	constexpr WORD OP_CMPJLE    = 0b00000000000000000000000000010111;

	constexpr WORD OP_DUP       = 0b00000000000000000000000000011000;
	constexpr WORD OP_CALL      = 0b00000000000000000000000000011001;
	constexpr WORD OP_RET       = 0b00000000000000000000000000011010;
	constexpr WORD OP_SYSCALL   = 0b00000000000000000000000000011011;

	constexpr WORD OP_RESERVED1 = 0b00000000000000000000000000011100;
	constexpr WORD OP_RESERVED2 = 0b00000000000000000000000000011101;
	constexpr WORD OP_RESERVED3 = 0b00000000000000000000000000011110;
	constexpr WORD OP_RESERVED4 = 0b00000000000000000000000000011111;
  
  constexpr WORD MAX_MEMORY = 65536;
      
      



8 32 (opcode), 1 (immediate ), 3 (byte, short, int, long, char, float, double ), . .





class VMRuntime {
	public:
		VMRuntime();                                // Constructor
		~VMRuntime();                               // Desctructor
		bool loadImage(void* image, size_t size);   // Load executable image
		void run();                                 // Runs image from address 0
		WORD readWord(WORD address);                // Read WORD from memory
		void writeWord(WORD address, WORD value);   // Write WORD to memory 
		WORD getMaxAddress();                       // Get max address in 32-bit words
		WORD getIP();                               // Get Instruction Pointer address
		WORD getSP();                               // Get Stack Pointer address
	private:
		WORD  memory[MAX_MEMORY];                   // Random access memory array
		WORD  ip;                                   // Instruction pointer
		WORD  sp;                                   // Stack pointer
		WORD  fp;                                   // Frame pointer
		void systemCall(WORD n);                    // System call
		void printState();                          // Print current VM state
	};
      
      



, (loadImage), (run), / (readWord, writeWord), IP, SP. printState ( ), systemCall, - ( - API).





- , , , . HALT.





void VMRuntime::run() {
	WORD a, b;
	WORD opcode;

	ip = 0;
	sp = MAX_MEMORY - 1;

	while (1) {

		opcode = memory[ip++];
		
		switch (opcode) {
		//------------------------------------------------------------------------
		// STACK OPERATIONS
		//------------------------------------------------------------------------
		case OP_CONST: 
		    memory[--sp] = memory[ip++]; 
			break;
		case OP_PUSH:
			memory[--sp] = memory[memory[ip++]];
			break;
		case OP_POP:  
		  memory[memory[ip++]] = memory[sp++]; 
			break;
		case OP_DUP:
			a = memory[sp];
			memory[--sp] = a;
			break;
		//------------------------------------------------------------------------
		// ARITHMETIC OPERATIONS
		//------------------------------------------------------------------------
		case OP_INC:
			memory[sp]++;
			break;
		case OP_DEC:
			memory[sp]--;
			break;
		case OP_ADD:  
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a + b;
			break;
		case OP_SUB:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a - b;
			break;
		case OP_MUL:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a * b;
			break;
		case OP_DIV:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a / b;
			break;
		//------------------------------------------------------------------------
		// BITWISE OPERATIONS
		//------------------------------------------------------------------------
		case OP_AND:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a & b;
			break;
		case OP_OR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a | b;
			break;
		case OP_XOR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a ^ b;
			break;
		case OP_NOT:
			a = memory[sp++];
			memory[--sp] = ~a;
			break;
		case OP_SHL:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a << b;
			break;
		case OP_SHR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a >> b;
			break;
		//------------------------------------------------------------------------
		// FLOW CONTROL OPERATIONS
		//------------------------------------------------------------------------
		case OP_JMP:
			ip = memory[ip];
			break;
		case OP_CMPJE:
			b = memory[sp++];
			a = memory[sp++];
			if (a == b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJNE:
			b = memory[sp++];
			a = memory[sp++];
			if (a != b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJG:
			b = memory[sp++];
			a = memory[sp++];
			if (a > b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJGE:
			a = memory[sp++];
			b = memory[sp++];
			if (a >= b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJL:
			b = memory[sp++];
			a = memory[sp++];
			if (a < b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJLE:
			b = memory[sp++];
			a = memory[sp++];
			if (a <= b) ip = memory[ip]; else ip++;
			break;
		//------------------------------------------------------------------------
		// PROCEDURE CALL OPERATIONS
		//------------------------------------------------------------------------
		case OP_CALL:
			a = memory[ip++];
			memory[--sp] = ip;       
			ip = a;                  
			break;
		case OP_RET:
			ip = memory[sp++];       
			break;
		case OP_SYSCALL:
			a = memory[ip++];
			systemCall(a);
			break;
		case OP_HALT: 
			printState();
		  return;
		default:
			cout << "Runtime error - unknown opcode=" << opcode << endl;
			printState();
			return;
		}
	}
  
  // Only one system call implemented - print string (0x20)
  void VMRuntime::systemCall(WORD n) {
		WORD ptr;
		switch (n) {
		case 0x20:  // print C style string
			ptr = memory[sp++];
			cout << ((char*)&memory[ptr]);
		break;
	}
}
      
      



, , , ( , ).





. , , "" .





class VMImage {
	public:
		VMImage();
		~VMImage();
		void clear();
		WORD setEmitPointer(WORD address);
		WORD getEmitPointer();
		WORD emit(WORD opcode);
		WORD emit(WORD opcode, WORD operand);
		WORD readWord(WORD address);
		void writeWord(WORD address, WORD value);
		WORD writeData(WORD address, void* data, size_t length);
		void* getImage();
		size_t getImageSize();
		void dissasemble();
	private:
		WORD memory[MAX_MEMORY];
		WORD imageSize;
		WORD ep;

	};
      
      



, "Hello, world from VM!" 10 , , . ( , ) :





start:                           //  [0]
push   iVar                      //   iVar  
dec                              //   
call   fn                        //   fn
dup                              //      (Top Of Stack)
pop    iVar                      //        iVar
const  0                         //     0   
cmpjg  start                     //  iVar > 0   start:
halt                             //   


fn:                              //  [64]
const  myStr                     //     
syscall 0x20                     //        
ret                              //     


dataSeg:                         //  [128]
iVar = 10                       
myStr = "Hello, world from VM!\n"
      
      



Now it's too lazy to write a translator for the assembler of a virtual machine for this task, because we are making a high-level language that we will immediately compile into commands of the virtual machine. But to write this to the image executed by the virtual machine, we will use the VMImage class:





void createExecutableImage(VMImage* img) {
	
	WORD dataSeg = 128;							// Data segment starts at 128
	
	WORD iVar = dataSeg;
	WORD myStr = dataSeg + 1;
	img->writeWord(iVar, 10);
	img->writeData(myStr, "Hello, world from VM!\n", 23);    
	
	WORD fn = 64;

	WORD start = img->emit(OP_PUSH, iVar);      // stack <- [iVar] (operand 1)
	img->emit(OP_DEC);                          // stack[top]--  (operand 1 decrement)
	img->emit(OP_CALL, fn);                     // Call function fn()     
	img->emit(OP_DUP);                          // duplicate stack top (operand 1 duplicate)
	img->emit(OP_POP, iVar);                    // stack -> [iVar] (pop operand 1 duplicate to iVar)
	img->emit(OP_CONST, 0);                     // push const 0 (operand 2)
	img->emit(OP_CMPJG, start);                 // if (operand1 > operand2) jump to addr           
	img->emit(OP_HALT);                         // end of program

	img->setEmitPointer(fn);                    // Function fn()
	img->emit(OP_CONST, myStr);                 // Push constant string address
	img->emit(OP_SYSCALL, 0x20);                // Call system call 0x20, to print C style string to standard output
	img->emit(OP_RET);                          // Return
  
}
      
      



And then we will start the execution of our image in a virtual machine, measuring the time:





int main() {
	VMImage* img = new VMImage();
	createExecutableImage(img);
	VMRuntime* vm = new VMRuntime();
	vm->loadImage(img->getImage(), img->getImageSize());
  auto start = std::chrono::high_resolution_clock::now();
	
  vm->run();
  
	auto end = std::chrono::high_resolution_clock::now();
	auto ms_int = chrono::duration_cast<chrono::nanoseconds>(end - start).count();
	cout << "EXECUTION TIME: " << ms_int / 1000000000.0 << "s" << endl;
	
  delete vm;
  delete img;
}
      
      



We get in the console:





Hooray! Cool! Stack operations, arithmetic, conditional jump instructions and function calls work! This is encouraging. Apparently I will continue to develop this story ...








All Articles