Place FORTH in 512 bytes

CONNECTION OF WORDS through a dictionary
CONNECTION OF WORDS through a dictionary

Original text June 10, 2021 - 38 min read





Software is full of its dependencies when you look deep enough. Compilers written in the language in which they are compiled are the most obvious, but not the only example. To compile the kernel, we need a working kernel. Linkers, build systems, shells. Even text editors if you want to write code and not just download it. How to break this cycle? 1 Since the problem of bootstrapping first came to my attention, I have become interested in this unique area of ​​software engineering. Not out of fear that someone would try to implement an attack on trust , but simply as an interesting challenge.





11 years ago, vanjos72 posted on Reddit what he calls a thought experiment: What if you were locked in a room with an IBM PC that doesn't have an operating system? What is the minimum amount of software you would need to get started to get back to working comfortably?





It just so happens that I have had a lot of free time lately, so I decided to do it more than just a thought experiment. Alas, my computer was not equipped with switches on the front panel, so some programs must already be present on the computer ...





, , . BIOS escape- alt+numpad, .2 , , . 14 :





6a00    push word 0
07      pop es
fd      std
bf1e7c  mov di, buffer+16 ; Adjust to taste. Beware of fenceposting.
        input_loop:
b400    mov ah, 0
cd16    int 0x16
aa      stosb
ebf9    jmp short input_loop
        buffer:

      
      



. , , BIOS , , , .3, . 510 ?





. , DooM- , BASIC, , , bootOS:





bootOS - , . , . .





, , . , , , , .





, . , , , BASIC, . , , . , FORTH. Miniforth GitHub, .





FORTH 504 . , . , , , , 24 , ! .





FORTH

- - FORTH, .





FORTH - , . , , + . , Miniforth, - .s, .





ο»Ώ1 2 3 + .s  <enter>

<2> 1 5 ok

      
      



: ok -





: ;. :





 : double dup + ; <enter>

ok  3 double  .  <enter>

6 

      
      



double, , dup +. dup, , FORTH . :





42 dup .s <enter>

<2> 42 42 ok

      
      



. , , Miniforth .





, :





dup ( a -- a a ) 

swap ( a b -- b a )

      
      



-- - , . -- , . .





FORTH , , , . , , FORTH, , :





DOUBLE:
    call DUP
    call PLUS
    ret

      
      



x86 , ( ). , push pop, , call. -, :





DOUBLE:
    dw DUP
    dw PLUS

      
      



: . SI, lodsw :





DUP:
    pop ax
    push ax
    push ax

    lodsw
    jmp ax

PLUS:
    pop ax
    pop bx 
    add ax, bx
    push ax

    lodsw
    jmp ax

      
      



, NEXT:





%macro NEXT 0
    lodsw
    jmp ax
%endmacro

      
      



, , . .





, ? . BP . 16- x86 [bp]. - [bp+imm8], , bp , , . di . , 4 .





, , . , stosw.





DOUBLE:
    call DOCOL
    dw DUP
    dw PLUS
    dw EXIT

DOCOL:            ;   "do colon word".
    xchg ax, si ;     mov ax, si ,

              ;     .
                ; ax -   ,   mov  -  .
    stosw
    pop si ;  ,   call .
    NEXT

EXIT:
    dec di
    dec di
    mov si, [di]
    NEXT

      
      



, Miniforth, , - BX. push pop :





PLUS:
    pop ax
    add bx, ax
    NEXT

DROP:
    pop bx
    NEXT

DUP:
      push  bx 
      NEXT

      
      



. , , : DOUBLE 2 \ ;? LIT, , , :





DOUBLE:
      call DOCOL
      dw LIT, 2
      dw MULT
      dw EXIT

LIT:
      push  bx 
      lodsw
      xchg  bx ,  ax 
      NEXT

      
      



FORTH , . . , FORTH - , . , LATEST.





:





F_IMMEDIATE equ 0x80
F_HIDDEN    equ 0x40
F_LENMASK   equ 0x1f

      
      



IMMEDIATE, , . , ;. HIDDEN, . , FORTH, ( RECURSE , , ). , , , : ;.





, , 512 . FORTH , , - NEXT.





ο»Ώ





ad      lodsw
ffe0    jmp  ax 

      
      



, . , . , , , , : NEXT , .





, 0xff NEXT, , 0xff. 19 , .4





0x90, , nop, . . , , 0x90 , . 0xff, , .





, LATEST LATEST, , . , , . :, .





;       DI.

MakeLink:
      mov  ax ,  di 
      xchg [LATEST],  ax    ; AX     , 

                  ;  LATEST  DI   .

      stosw
      ret

      
      



, , , , , . ,





     jmp short .after
.write:
      stosb
.after:



3c      db 0x3c ;  stosb ,     AL 
      .write:
aa      stosb

      
      



, - .write, stosb, cmp al, 0xaa. cmp al, mov . - , .





, "". . , , stosb, , , , dec di, :





SPECIAL_BYTE equ 0xff

      mov  si , CompressedData
      mov  di , CompressedBegin
      mov  cx , COMPRESSED_SIZE
.decompress:
      lodsb
      stosb
      cmp  al , SPECIAL_BYTE
      jnz short .not_special
      dec  di 
      mov  ax , 0xffad ; lodsw / jmp ax
      stosw
      mov  al , 0xe0
      stosb
      call MakeLink
.not_special:
      loop .decompress

      
      



. , , , , . , org SPECIAL_BYTE, , , yasm .





boot.s:137: error: program origin redefined

      
      



, . , :





%macro compression_sentinel 0
      db SPECIAL_BYTE
      dd 0xdeadbeef
%endmacro

      
      



, o , SPECIAL_BYTE .





. :





\1. 7C00 - , .





\2. , 7E00.





\3. , yasm .





, , . , compression_sentinel:





%assign savings 0

%macro compression_sentinel 0
%assign savings savings+4
      db SPECIAL_BYTE
      dd 0xdeadbeef
%endmacro

      
      



:





CompressedData:
      times COMPRESSED_SIZE db 0xcc

CompressedBegin:
; ...
CompressedEnd:

COMPRESSED_SIZE equ CompressedEnd - CompressedBegin - savings

      
      



Python:





SPECIAL_BYTE =  b'\xff'
SENTINEL = SPECIAL_BYTE +  b '\xef\xbe\xad\xde'

with open('raw.bin', 'rb') as f:
      data = f.read()

output_offset = data.index( b '\xcc' \  20)
chunks = data[output_offset:].lstrip( b '\xcc').split(SENTINEL)

assert SPECIAL_BYTE not in chunks[0]
compressed =  bytearray (chunks[0])

for chunk in chunks[1:]:
      assert SPECIAL_BYTE not in chunk
      compressed.extend(SPECIAL_BYTE)
      compressed.extend(chunk)

\# ,        ,  .

\#   .

assert  b '\xcc' \  len(compressed) in data
assert  b '\xcc' \  (len(compressed) + 1) not in data

output = data[:output_offset] + compressed

print(len(output), 'bytes used')
output +=  b '\x00' \  (510 - len(output))
output +=  b '\x55\xaa'

with open('boot.bin', 'wb') as f:
      f.write(output)

      
      



, 1:





output +=  b '\x00' \  512
output += open('test.fth', 'rb').read().replace( b '\n',  b ' ')
output +=  b ' ' \  (2048 - len(output))

with open('test.img', 'wb') as f:
      f.write(output)

      
      



compression_sentinel defcode, . ( ), , , , :





; defcode PLUS, "+"
; defcode SEMI, ";", F_IMMEDIATE
%macro defcode 2-3 0
      compression_sentinel
%strlen namelength %2
      db %3 | namelength, %2
%1:
%endmacro

      
      



. , , defcode:





defcode PLUS, "+"
      pop  ax 
      add  bx ,  ax 

defcode MINUS, "-"
      pop  ax 
      sub  ax ,  bx 
      xchg  bx ,  ax 

defcode PEEK, "@"
      ; ...

      
      



DOCOL, EXIT LIT NEXT. , . , EXIT LIT F_HIDDEN, :





CompressedBegin:





DOCOL:
      xchg  ax ,  si 
      stosw
      pop  si  ; grab the pointer pushed by  call 
      compression_sentinel

LIT:
      push  bx 
      lodsw
      xchg  bx ,  ax 
      compression_sentinel

EXIT:
      dec  di 
      dec  di 
      mov  si , [ di ]

defcode PLUS, "+"
      ; ...

      
      



?

, :





be3412    mov  si , 0x1234
8b363412  mov  si , [0x1234]

      
      



Miniforth . , , , , FORTH. - , . . , , . , 2 , :





      org 0x7c00

      jmp 0:start
stack:
      dw HERE
      dw BASE
      dw STATE
      dw LATEST
start:
      ; ...
      mov  sp , stack

      
      



, , . - , .





, , . , .





      jmp 0:start
      ; ...
start:
      push  cs 
      push  cs 
      push  cs 
      pop  ds 
      pop  es 
      pop  ss 
      mov  sp , stack
      cld

      
      



. -, . , bootBASIC Γ’ :





ο»Ώ

31c0    xor  ax ,  ax   ; through AX - 8 bytes
8ed8    mov  ds ,  ax 
8ec0    mov  es ,  ax 
8ed0    mov  ss ,  ax 

0e      push  cs      ; through the stack - 6 bytes
0e      push  cs 
0e      push  cs 
1f      pop  ds 
07      pop  es 
17      pop  ss 

      
      



-, , pop ss mov sp, , SP . , , , 2 , cli/sti, . , x86. 2B x86:





SS POP5 . ( ). ESP (POP ESP)6 , .





, . , DL, BIOS, . load ( ) :





      mov [DRIVE_NUMBER],  dl 
      push  dx  ; for FORTH code

      
      



- FORTH, . " " , , , NEXT, DOCOL, EXIT LIT.





FORTH ,





  • REFILL ( ),





    • WORD ( ),





    • FIND ( ),





    • NUMBER ( ).





Miniforth . , . , WORD >NUMBER , . , , .





, BX DI , , . . .





ReadLine, . , . 0x500, BDA. FORTH , NULL-, . InputPtr, , , .





InputBuf equ 0x500 InputPtr equ 0xa02 ; dw





ReadLine:
      mov  di , InputBuf
      mov [InputPtr],  di 
.loop:
      mov  ah , 0
      int 0x16
      cmp  al , 0x0d
      je short .enter
      stosb
      cmp  al , 0x08
      jne short .write
      dec  di 
      cmp  di , InputBuf ; underflow check
      je short .loop
      dec  di 
.write:
      call PutChar
      jmp short .loop
.enter:
      call PutChar
      mov  al , 0x0a
      int 0x10
      xchg  ax ,  bx  ; write the null terminator by using the BX = 0 from PutChar
      stosb
InterpreterLoop:
      call ParseWord ; returns length in CX. Zero implies no more input.
      jcxz short ReadLine

      
      



BIOS . "TELETYPE OUTPUT", , backspace newline.





ο»Ώ





PutChar:
      xor  bx ,  bx 
      mov  ah , 0x0e
      int 0x10
      ret

      
      



. , "" CRLF (CR LF ). , backspace , . , , \b \b ( , ). .





, " " , BIOS BP, . , .





, . ( , ), . , ParseWord : ( ).





, , , . , , , , , .





:





; returns
; DX = pointer to string
; CX = string length
; BX = numeric value
; clobbers SI and BP
ParseWord:
      mov  si , [InputPtr]
      ; repe scasb ,    ,      - SCASB  ;  DI  SI :( - scasb
      ; uses DI instead of SI :(
.skiploop:
      mov  dx ,  si  ;        , DX    
      lodsb
      cmp  al , " "
      je short .skiploop

      
      



, . , . , , lodsb.





AL . , lodsb .





      xor  cx ,  cx 
      xor  bx ,  bx 
.takeloop:
      and  al , ~0x20
      jz short Return ; jump to a borrowed  ret  from some other routine

      
      



, . , , , .





, :





      inc  cx 
      sub  al , "0" &~0x20
      cmp  al , 9
      jbe .digit_ok
      sub  al , "A" - ("0" &~0x20) - 10
.digit_ok
      cbw

      
      



cbw - , , mov ah, 0. , imul, , mul. DX .7





, 2 .8





      ; imul bx, bx, <BASE>  yasm       ... 
      db 0x69, 0xdb
BASE equ $
      dw 16
      add  bx ,  ax  ; add the new digit

      
      



, . , , , , , .





   ο»Ώ   mov [InputPtr],  si 
      lodsb
      jmp short .takeloop

      
      



, . , , . F_HIDDEN , . , , . , F_IMMEDIATE AL, . .





ο»Ώ





    InterpreterLoop:
          call ParseWord
          jcxz short ReadLine
    
    ;     ..
    ; SI =  
    ; DX =  
    ; CX =  
    ;    BX,     
    LATEST equ $+1
          mov  si , 0
    .find:
          lodsw
          push  ax  ;     
          lodsb
          xor  al ,  cl  ;   ,  AL   
          test  al , F_HIDDEN | F_LENMASK
          jnz short .next
    
          mov  di ,  dx 
          push  cx 
          repe cmpsb
          pop  cx 
          je short .found
    .next:
          pop  si 
          or  si ,  si 
          jnz short .find
    
          ;      ,    .
          ; ...

.found:
      pop  bx  ;     
      ;      , SI    ,  AL  

;  F_IMMEDIATE

      
      



: , .





?

:





  • -





  • -





, , , . immediate or, 0:





ο»Ώ





      ;    , SI    ,  AL  

;  F_IMMEDIATE STATE
STATE equ $+1
      or  al , 1
      xchg  ax ,  si  ;         AX
      jz short .compile

      ;  
      ; ...

      
      



, , : ;, [ ] , . - :





 : third-foo [ foos 3 cells + ] literal @ ;

      
      



STATE 1, inc dec. , , :





defcode LBRACK, "[", F_IMMEDIATE
      inc byte[STATE]

defcode RBRACK, "]"
      dec byte[STATE]

      
      



, BX DI, SI , NEXT .executed:





;   RetSP ο»Ώ

RetSP equ $+1
      mov  di , RS0
      pop  bx 
      mov  si , .return
      jmp  ax 
.return:
      dw .executed
.executed:
      mov [RetSP],  di 
      push  bx 
      jmp short InterpreterLoop

      
      



F_IMMEDIATE, . , , . , . AH, ?





.find:
      lodsw
      push  ax  ;      
      lodsb
      xor  al ,  cl  ;   ,  AL   
      test  al , F_HIDDEN | F_LENMASK
      jnz short .next

      mov  di ,  dx 
      push  cx 
      repe cmpsb
      pop  cx 
      je short .found
.next:
      pop  si 
      or  si ,  si 
      jnz short .find

      ; AH = ?

      
      



? AH , , , , NULL, . STATE, - :





      ;  .    -    ,  ,    

;   . 

      push  bx 
      cmp byte[STATE],  ah 
      jnz short InterpreterLoop

      ;   . ; ...

      
      



HERE. . , , COMMA, FORTH, , - ,.





COMMA:
HERE equ $+1
      mov [CompressedEnd],  ax 
      add word[HERE], 2
      ret

      
      



, . , Γ’ :





      ; ,  . 
      mov  ax , LIT
      call COMMA
      pop  ax 
.compile:
      call COMMA
      jmp short InterpreterLoop

      
      



: ;. :. ParseWord BX SI, . , , HERE DI, . , . , pusha.





defcode COLON, ":"
      pusha
      mov  di , [HERE]
      call MakeLink    ; link field
      call ParseWord
      mov  ax ,  cx 
      stosb            ; length field
      mov  si ,  dx 
      rep movsb        ; name field

      mov  al , 0xe8     ; call
      stosb



   ;   mov si, dx rep movsb ;   mov al, 0xe8 

;  stosb ;    ( ) - (ip   ) 

;  DOCOL - (di + 2) = DOCOL - 2 - di 

ο»Ώ

     mov  ax , DOCOL - 2
      sub  ax ,  di 
      stosw
      mov [HERE],  di 
      popa
      jmp short RBRACK ; enter compilation mode
   ;    

      
      



; . EXIT :





defcode SEMI, ";", F_IMMEDIATE
      mov  ax , EXIT
      call COMMA
      jmp short LBRACK

      
      



, , . , NEXT ? , " ". : ; - , NEXT.





, , . , FORTH : 1 , , 16 64 . load ( blknum -- ) .





0 LBA 0 1, 1 LBA 2 3 . , 0 MBR, LBA 1 , .





BIOS int 0x13 / ah = 0x02 CHS, EDD (ah = 0x42). , , .





EDD, , :





      db 0x10 ;  
      db 0    ; 
      dw sector_count
      dw buffer_offset, buffer_segment
      dq LBA

      
      



. , , . " , , - :





>  
>  
> DiskPacket:       db 0x10, 0 .count:       dw 2 .buffer:

      
      



; , ,





;





CompressedData: times COMPRESSED_SIZE db 0xcc





, . , , . InputPtr. :





c706020a0006    mov word[InputPtr], BlockBuf

      
      



AX :





ο»Ώb80006          mov  ax , BlockBuf
a3020a          mov [InputPtr],  ax 

      
      



, 1 :





defcode LOAD, "load"
      pusha
      mov  di , DiskPacket.buffer
      mov  ax , BlockBuf
      mov word[InputPtr],  ax 
      stosw

      
      



(0000) LBA ( 00). :





31c0    xor  ax ,  ax     ; LBA zeroes
ab      stosw         ; segment
d1e3    shl  bx , 1     ; LBA data
93      xchg  ax ,  bx    ; LBA data
ab      stosw         ; LBA data
93      xchg  ax ,  bx    ; segment
ab      stosw         ; LBA zeroes
ab      stosw         ; LBA zeroes
ab      stosw         ; LBA zeroes

      
      



, LBA 5 . xor ax, ax , stosw xchg ax, bx. , 2 ( , ). , , LBA, .





AX , :





      mov [BlockBuf.end],  al 

      
      



BIOS. , , , , .





DRIVE_NUMBER equ $+1
      mov  dl , 0
      mov  ah , 0x42
      mov  si , DiskPacket
      int 0x13
      jc short $
      popa
      pop  bx 

      
      



u. , . , , . "", . , , .





defcode UDOT, "u."
    xchg  ax ,  bx 
    push " " - "0"
.split:
      xor  dx ,  dx 
      div word[BASE]
      push  dx 
      or  ax ,  ax 
      jnz .split
.print:
      pop  ax 
      add  al , "0"
      cmp  al , "9"
      jbe .got_digit
      add  al , "A" - "0" - 10
.got_digit:
      call PutChar
      cmp  al , " "
      jne short .print
      pop  bx 

      
      



s:

s: - , , , . . : , , , .





, SI, SI, . xchg, [InputPtr] :





;;     buf. 

   defcode LINE, "s:" ; ( buf -- buf+len )
      xchg  si , [InputPtr]
.copy:
      lodsb
      mov [ bx ],  al 
      inc  bx 
      or  al ,  al 
      jnz short .copy
.done:
      dec  bx 
      dec  si 
      xchg  si , [InputPtr]

      
      



BX. DI , DI . . , , s: . , .





Miniforth - , , , . , , . , . , FORTH, .





, +, . +, -, , - , - ο»Ώ: negate 0 swap - ; : + negate - ;.





, . ! , [bx]:





defcode PEEK, "@" ; ( addr -- val )
    mov bx, [bx]

defcode POKE, "!" ; ( val addr -- )
    pop word [bx]
    pop bx

      
      



, :





ο»Ώdefcode CPEEK, "c@" ; ( addr -- ch )
      movzx  bx , byte[ bx ]

defcode CPOKE, "c!" ; ( ch addr -- )
      pop  ax 
      mov [bx],  al 
      pop  bx 

      
      



. dup



drop



, swap



, .





defcode DUP, "dup" ; ( a -- a a )
      push  bx 

defcode DROP, "drop" ; ( a -- )
      pop  bx 

defcode SWAP, "swap" ; ( a b -- b a )
      pop  ax 
      push  bx 
      xchg  ax ,  bx 

      
      



>r



r>



, (, , ). . , dup



, drop



swap



, .9





defcode TO_R, ">r"
      xchg  ax ,  bx 
      stosw
      pop  bx 

defcode FROM_R, "r>"
      dec  di 
      dec  di 
      push  bx 
      mov  bx , [ di ]

      
      



, emit



. bootstrap, .





defcode EMIT, "emit"
      xchg  bx ,  ax 
      call PutChar
      pop  bx 

      
      



, . , , - , , , . - s:



.





, . Miniforth :





Miniforth . ( , , , ), RSS- Twitter, .





1





, , .





2





, , x86, ASCII. .





3 , , , . !





4





, NEXT



.





5





pop ss, mov



.





6





, SDM pop esp



. 6.8.3 (" ") 3A , SP



. , , , , , . . , , , , SS



. , .





7





, , imul



, , . mul



, ...





8





, , BASE



- ? , u.



, , , div word[BASE]



, 16-.





9





PICK



, . , ( < > -- < >)



, . . 10





10





Seriously though, try to devise an unmistakable strategy for turning a stack effect into a sequence of words that implement it. This is a good problem.





github.com/NieDzejkob








All Articles