Original text June 10, 2021 - 38 min read
Software is full of its dependencies when you look deep enough. Compilers written in the language in which they are compiled are the most obvious, but not the only example. To compile the kernel, we need a working kernel. Linkers, build systems, shells. Even text editors if you want to write code and not just download it. How to break this cycle? 1 Since the problem of bootstrapping first came to my attention, I have become interested in this unique area of ββsoftware engineering. Not out of fear that someone would try to implement an attack on trust , but simply as an interesting challenge.
11 years ago, vanjos72 posted on Reddit what he calls a thought experiment: What if you were locked in a room with an IBM PC that doesn't have an operating system? What is the minimum amount of software you would need to get started to get back to working comfortably?
It just so happens that I have had a lot of free time lately, so I decided to do it more than just a thought experiment. Alas, my computer was not equipped with switches on the front panel, so some programs must already be present on the computer ...
, , . BIOS escape- alt+numpad, .2 , , . 14 :
6a00 push word 0
07 pop es
fd std
bf1e7c mov di, buffer+16 ; Adjust to taste. Beware of fenceposting.
input_loop:
b400 mov ah, 0
cd16 int 0x16
aa stosb
ebf9 jmp short input_loop
buffer:
. , , BIOS , , , .3, . 510 ?
. , DooM- , BASIC, , , bootOS:
bootOS - , . , . .
, , . , , , , .
, . , , , BASIC, . , , . , FORTH. Miniforth GitHub, .
FORTH 504 . , . , , , , 24 , ! .
FORTH
- - FORTH, .
FORTH - , . , , + . , Miniforth, - .s, .
ο»Ώ1 2 3 + .s <enter>
<2> 1 5 ok
: ok -
: ;. :
: double dup + ; <enter>
ok 3 double . <enter>
6
double, , dup +. dup, , FORTH . :
42 dup .s <enter>
<2> 42 42 ok
. , , Miniforth .
, :
dup ( a -- a a )
swap ( a b -- b a )
-- - , . -- , . .
FORTH , , , . , , FORTH, , :
DOUBLE:
call DUP
call PLUS
ret
x86 , ( ). , push pop, , call. -, :
DOUBLE:
dw DUP
dw PLUS
: . SI, lodsw :
DUP:
pop ax
push ax
push ax
lodsw
jmp ax
PLUS:
pop ax
pop bx
add ax, bx
push ax
lodsw
jmp ax
, NEXT:
%macro NEXT 0
lodsw
jmp ax
%endmacro
, , . .
, ? . BP . 16- x86 [bp]. - [bp+imm8], , bp , , . di . , 4 .
, , . , stosw.
DOUBLE:
call DOCOL
dw DUP
dw PLUS
dw EXIT
DOCOL: ; "do colon word".
xchg ax, si ; mov ax, si ,
; .
; ax - , mov - .
stosw
pop si ; , call .
NEXT
EXIT:
dec di
dec di
mov si, [di]
NEXT
, Miniforth, , - BX. push pop :
PLUS:
pop ax
add bx, ax
NEXT
DROP:
pop bx
NEXT
DUP:
push bx
NEXT
. , , : DOUBLE 2 \ ;? LIT, , , :
DOUBLE:
call DOCOL
dw LIT, 2
dw MULT
dw EXIT
LIT:
push bx
lodsw
xchg bx , ax
NEXT
FORTH , . . , FORTH - , . , LATEST.
:
F_IMMEDIATE equ 0x80
F_HIDDEN equ 0x40
F_LENMASK equ 0x1f
IMMEDIATE, , . , ;. HIDDEN, . , FORTH, ( RECURSE , , ). , , , : ;.
, , 512 . FORTH , , - NEXT.
ο»Ώ
ad lodsw
ffe0 jmp ax
, . , . , , , , : NEXT , .
, 0xff NEXT, , 0xff. 19 , .4
0x90, , nop, . . , , 0x90 , . 0xff, , .
, LATEST LATEST, , . , , . :, .
; DI.
MakeLink:
mov ax , di
xchg [LATEST], ax ; AX ,
; LATEST DI .
stosw
ret
, , , , , . ,
jmp short .after
.write:
stosb
.after:
3c db 0x3c ; stosb , AL
.write:
aa stosb
, - .write, stosb, cmp al, 0xaa. cmp al, mov . - , .
, "". . , , stosb, , , , dec di, :
SPECIAL_BYTE equ 0xff
mov si , CompressedData
mov di , CompressedBegin
mov cx , COMPRESSED_SIZE
.decompress:
lodsb
stosb
cmp al , SPECIAL_BYTE
jnz short .not_special
dec di
mov ax , 0xffad ; lodsw / jmp ax
stosw
mov al , 0xe0
stosb
call MakeLink
.not_special:
loop .decompress
. , , , , . , org SPECIAL_BYTE, , , yasm .
boot.s:137: error: program origin redefined
, . , :
%macro compression_sentinel 0
db SPECIAL_BYTE
dd 0xdeadbeef
%endmacro
, o , SPECIAL_BYTE .
. :
\1. 7C00 - , .
\2. , 7E00.
\3. , yasm .
, , . , compression_sentinel:
%assign savings 0
%macro compression_sentinel 0
%assign savings savings+4
db SPECIAL_BYTE
dd 0xdeadbeef
%endmacro
:
CompressedData:
times COMPRESSED_SIZE db 0xcc
CompressedBegin:
; ...
CompressedEnd:
COMPRESSED_SIZE equ CompressedEnd - CompressedBegin - savings
Python:
SPECIAL_BYTE = b'\xff'
SENTINEL = SPECIAL_BYTE + b '\xef\xbe\xad\xde'
with open('raw.bin', 'rb') as f:
data = f.read()
output_offset = data.index( b '\xcc' \ 20)
chunks = data[output_offset:].lstrip( b '\xcc').split(SENTINEL)
assert SPECIAL_BYTE not in chunks[0]
compressed = bytearray (chunks[0])
for chunk in chunks[1:]:
assert SPECIAL_BYTE not in chunk
compressed.extend(SPECIAL_BYTE)
compressed.extend(chunk)
\# , , .
\# .
assert b '\xcc' \ len(compressed) in data
assert b '\xcc' \ (len(compressed) + 1) not in data
output = data[:output_offset] + compressed
print(len(output), 'bytes used')
output += b '\x00' \ (510 - len(output))
output += b '\x55\xaa'
with open('boot.bin', 'wb') as f:
f.write(output)
, 1:
output += b '\x00' \ 512
output += open('test.fth', 'rb').read().replace( b '\n', b ' ')
output += b ' ' \ (2048 - len(output))
with open('test.img', 'wb') as f:
f.write(output)
compression_sentinel defcode, . ( ), , , , :
; defcode PLUS, "+"
; defcode SEMI, ";", F_IMMEDIATE
%macro defcode 2-3 0
compression_sentinel
%strlen namelength %2
db %3 | namelength, %2
%1:
%endmacro
. , , defcode:
defcode PLUS, "+"
pop ax
add bx , ax
defcode MINUS, "-"
pop ax
sub ax , bx
xchg bx , ax
defcode PEEK, "@"
; ...
DOCOL, EXIT LIT NEXT. , . , EXIT LIT F_HIDDEN, :
CompressedBegin:
DOCOL:
xchg ax , si
stosw
pop si ; grab the pointer pushed by call
compression_sentinel
LIT:
push bx
lodsw
xchg bx , ax
compression_sentinel
EXIT:
dec di
dec di
mov si , [ di ]
defcode PLUS, "+"
; ...
?
, :
be3412 mov si , 0x1234
8b363412 mov si , [0x1234]
Miniforth . , , , , FORTH. - , . . , , . , 2 , :
org 0x7c00
jmp 0:start
stack:
dw HERE
dw BASE
dw STATE
dw LATEST
start:
; ...
mov sp , stack
, , . - , .
jmp 0:start
; ...
start:
push cs
push cs
push cs
pop ds
pop es
pop ss
mov sp , stack
cld
. -, . , bootBASIC Γ’ :
ο»Ώ
31c0 xor ax , ax ; through AX - 8 bytes
8ed8 mov ds , ax
8ec0 mov es , ax
8ed0 mov ss , ax
0e push cs ; through the stack - 6 bytes
0e push cs
0e push cs
1f pop ds
07 pop es
17 pop ss
-, , pop ss mov sp, , SP . , , , 2 , cli/sti, . , x86. 2B x86:
SS POP5 . ( ). ESP (POP ESP)6 , .
, . , DL, BIOS, . load ( ) :
mov [DRIVE_NUMBER], dl
push dx ; for FORTH code
- FORTH, . " " , , , NEXT, DOCOL, EXIT LIT.
FORTH ,
REFILL ( ),
WORD ( ),
FIND ( ),
NUMBER ( ).
Miniforth . , . , WORD >NUMBER , . , , .
, BX DI , , . . .
ReadLine, . , . 0x500, BDA. FORTH , NULL-, . InputPtr, , , .
InputBuf equ 0x500 InputPtr equ 0xa02 ; dw
ReadLine:
mov di , InputBuf
mov [InputPtr], di
.loop:
mov ah , 0
int 0x16
cmp al , 0x0d
je short .enter
stosb
cmp al , 0x08
jne short .write
dec di
cmp di , InputBuf ; underflow check
je short .loop
dec di
.write:
call PutChar
jmp short .loop
.enter:
call PutChar
mov al , 0x0a
int 0x10
xchg ax , bx ; write the null terminator by using the BX = 0 from PutChar
stosb
InterpreterLoop:
call ParseWord ; returns length in CX. Zero implies no more input.
jcxz short ReadLine
BIOS . "TELETYPE OUTPUT", , backspace newline.
ο»Ώ
PutChar:
xor bx , bx
mov ah , 0x0e
int 0x10
ret
. , "" CRLF (CR LF ). , backspace , . , , \b \b ( , ). .
, " " , BIOS BP, . , .
, . ( , ), . , ParseWord : ( ).
, , , . , , , , , .
:
; returns
; DX = pointer to string
; CX = string length
; BX = numeric value
; clobbers SI and BP
ParseWord:
mov si , [InputPtr]
; repe scasb , , - SCASB ; DI SI :( - scasb
; uses DI instead of SI :(
.skiploop:
mov dx , si ; , DX
lodsb
cmp al , " "
je short .skiploop
, . , . , , lodsb.
AL . , lodsb .
xor cx , cx
xor bx , bx
.takeloop:
and al , ~0x20
jz short Return ; jump to a borrowed ret from some other routine
, . , , , .
, :
inc cx
sub al , "0" &~0x20
cmp al , 9
jbe .digit_ok
sub al , "A" - ("0" &~0x20) - 10
.digit_ok
cbw
cbw - , , mov ah, 0. , imul, , mul. DX .7
, 2 .8
; imul bx, bx, <BASE> yasm ...
db 0x69, 0xdb
BASE equ $
dw 16
add bx , ax ; add the new digit
, . , , , , , .
ο»Ώ mov [InputPtr], si
lodsb
jmp short .takeloop
, . , , . F_HIDDEN , . , , . , F_IMMEDIATE AL, . .
ο»Ώ
InterpreterLoop:
call ParseWord
jcxz short ReadLine
; ..
; SI =
; DX =
; CX =
; BX,
LATEST equ $+1
mov si , 0
.find:
lodsw
push ax ;
lodsb
xor al , cl ; , AL
test al , F_HIDDEN | F_LENMASK
jnz short .next
mov di , dx
push cx
repe cmpsb
pop cx
je short .found
.next:
pop si
or si , si
jnz short .find
; , .
; ...
.found:
pop bx ;
; , SI , AL
; F_IMMEDIATE
: , .
?
:
-
-
, , , . immediate or, 0:
ο»Ώ
; , SI , AL
; F_IMMEDIATE STATE
STATE equ $+1
or al , 1
xchg ax , si ; AX
jz short .compile
;
; ...
, , : ;, [ ] , . - :
: third-foo [ foos 3 cells + ] literal @ ;
STATE 1, inc dec. , , :
defcode LBRACK, "[", F_IMMEDIATE
inc byte[STATE]
defcode RBRACK, "]"
dec byte[STATE]
, BX DI, SI , NEXT .executed:
; RetSP ο»Ώ
RetSP equ $+1
mov di , RS0
pop bx
mov si , .return
jmp ax
.return:
dw .executed
.executed:
mov [RetSP], di
push bx
jmp short InterpreterLoop
F_IMMEDIATE, . , , . , . AH, ?
.find:
lodsw
push ax ;
lodsb
xor al , cl ; , AL
test al , F_HIDDEN | F_LENMASK
jnz short .next
mov di , dx
push cx
repe cmpsb
pop cx
je short .found
.next:
pop si
or si , si
jnz short .find
; AH = ?
? AH , , , , NULL, . STATE, - :
; . - , , ; . push bx cmp byte[STATE], ah jnz short InterpreterLoop ; . ; ...
HERE. . , , COMMA, FORTH, , - ,.
COMMA:
HERE equ $+1
mov [CompressedEnd], ax
add word[HERE], 2
ret
, . , Γ’ :
; , .
mov ax , LIT
call COMMA
pop ax
.compile:
call COMMA
jmp short InterpreterLoop
: ;. :. ParseWord BX SI, . , , HERE DI, . , . , pusha.
defcode COLON, ":"
pusha
mov di , [HERE]
call MakeLink ; link field
call ParseWord
mov ax , cx
stosb ; length field
mov si , dx
rep movsb ; name field
mov al , 0xe8 ; call
stosb
; mov si, dx rep movsb ; mov al, 0xe8
; stosb ; ( ) - (ip )
; DOCOL - (di + 2) = DOCOL - 2 - di
ο»Ώ
mov ax , DOCOL - 2
sub ax , di
stosw
mov [HERE], di
popa
jmp short RBRACK ; enter compilation mode
;
; . EXIT :
defcode SEMI, ";", F_IMMEDIATE
mov ax , EXIT
call COMMA
jmp short LBRACK
, , . , NEXT ? , " ". : ; - , NEXT.
, , . , FORTH : 1 , , 16 64 . load ( blknum -- ) .
0 LBA 0 1, 1 LBA 2 3 . , 0 MBR, LBA 1 , .
BIOS int 0x13 / ah = 0x02 CHS, EDD (ah = 0x42). , , .
EDD, , :
db 0x10 ;
db 0 ;
dw sector_count
dw buffer_offset, buffer_segment
dq LBA
. , , . " , , - :
>
>
> DiskPacket: db 0x10, 0 .count: dw 2 .buffer:
; , ,
;
CompressedData: times COMPRESSED_SIZE db 0xcc
, . , , . InputPtr. :
c706020a0006 mov word[InputPtr], BlockBuf
AX :
ο»Ώb80006 mov ax , BlockBuf
a3020a mov [InputPtr], ax
, 1 :
defcode LOAD, "load"
pusha
mov di , DiskPacket.buffer
mov ax , BlockBuf
mov word[InputPtr], ax
stosw
(0000) LBA ( 00). :
31c0 xor ax , ax ; LBA zeroes
ab stosw ; segment
d1e3 shl bx , 1 ; LBA data
93 xchg ax , bx ; LBA data
ab stosw ; LBA data
93 xchg ax , bx ; segment
ab stosw ; LBA zeroes
ab stosw ; LBA zeroes
ab stosw ; LBA zeroes
, LBA 5 . xor ax, ax , stosw xchg ax, bx. , 2 ( , ). , , LBA, .
AX , :
mov [BlockBuf.end], al
BIOS. , , , , .
DRIVE_NUMBER equ $+1
mov dl , 0
mov ah , 0x42
mov si , DiskPacket
int 0x13
jc short $
popa
pop bx
u. , . , , . "", . , , .
defcode UDOT, "u."
xchg ax , bx
push " " - "0"
.split:
xor dx , dx
div word[BASE]
push dx
or ax , ax
jnz .split
.print:
pop ax
add al , "0"
cmp al , "9"
jbe .got_digit
add al , "A" - "0" - 10
.got_digit:
call PutChar
cmp al , " "
jne short .print
pop bx
s:
s: - , , , . . : , , , .
, SI, SI, . xchg, [InputPtr] :
;; buf.
defcode LINE, "s:" ; ( buf -- buf+len )
xchg si , [InputPtr]
.copy:
lodsb
mov [ bx ], al
inc bx
or al , al
jnz short .copy
.done:
dec bx
dec si
xchg si , [InputPtr]
BX. DI , DI . . , , s: . , .
Miniforth - , , , . , , . , . , FORTH, .
, +, . +, -, , - , - ο»Ώ: negate 0 swap - ; : + negate - ;.
, . ! , [bx]:
defcode PEEK, "@" ; ( addr -- val )
mov bx, [bx]
defcode POKE, "!" ; ( val addr -- )
pop word [bx]
pop bx
, :
ο»Ώdefcode CPEEK, "c@" ; ( addr -- ch )
movzx bx , byte[ bx ]
defcode CPOKE, "c!" ; ( ch addr -- )
pop ax
mov [bx], al
pop bx
. dup
drop
, swap
, .
defcode DUP, "dup" ; ( a -- a a )
push bx
defcode DROP, "drop" ; ( a -- )
pop bx
defcode SWAP, "swap" ; ( a b -- b a )
pop ax
push bx
xchg ax , bx
>r
r>
, (, , ). . , dup
, drop
swap
, .9
defcode TO_R, ">r"
xchg ax , bx
stosw
pop bx
defcode FROM_R, "r>"
dec di
dec di
push bx
mov bx , [ di ]
, emit
. bootstrap, .
defcode EMIT, "emit"
xchg bx , ax
call PutChar
pop bx
, . , , - , , , . - s:
.
, . Miniforth :
Miniforth . ( , , , ), RSS- Twitter, .
1
, , .
2
3 , , , . !
4
, NEXT
.
5
pop ss, mov
.
6
, SDM pop esp
. 6.8.3 (" ") 3A , SP
. , , , , , . . , , , , SS
. , .
7
, , imul
, , . mul
, ...
8
, , BASE
- ? , u.
, , , div word[BASE]
, 16-.
9
PICK
, . , ( < > -- < >)
, . . 10
10
Seriously though, try to devise an unmistakable strategy for turning a stack effect into a sequence of words that implement it. This is a good problem.