Every person who is fond of programming is obliged to write his own version of the solution to this problem. I decided to be no exception.
In accordance with, x64 software conventions
we will assume that the number to be converted is located in XMM0
.
We will use x64
bit code for x32
bit addressing. This way of addressing allows you to take advantage of both dialects.
Save the stack value and create a paragraph-aligned data point to improve performance:
;
mov r9d, esp
lea r8d,[r9d - 70h]
and r8d, 0FFFFFFF0h
mov esp, r8d
We prepare it by FPU
freeing it from data and set the increased accuracy and rounding to zero:
fsave [esp]
finit
mov dword ptr[esp - dword], 037F0F7Fh
fldcw [esp - dword]
We overload the number from XMM0
to FPU
:
movd qword ptr[esp - xmmword], xmm0
fld qword ptr[esp - xmmword]
Find the decimal order of the Number:
fld st(0)
fxtract
fldl2t
fst st(1)
fdivr st(0),st(2)
frndint
Set rounding to the nearest number:
fldcw [esp - word]
We preserve the order of the Number and find the decimal order of the Multiplier for converting the significant digits of the Number to the integer part:
fist dword ptr[esp - dword]
movzx edx, word ptr[esp - dword]
mov dword ptr[esp - dword], 10h
fisubr dword ptr[esp - dword]
Find the decimal Multiplier and multiply it by the Number:
fmulp st(1),st(0)
fst st(1)
frndint
fsub st(1),st(0)
fld1
fscale
fstp st(1)
fmulp st(2),st(0)
f2xm1
fld1
faddp st(1),st(0)
fmulp st(1),st(0)
frndint
We reload the resulting number from FPU
into registers AX
and XMM0
in the size of the first 2 and 8 subsequent bytes, respectively. When loading 8 bytes into a register, XMM0
we simultaneously change the order of the bytes by pre-aligning the stack pointer by the paragraph:
fbstp tbyte ptr[esp - xmmword]
mov ax, word ptr[esp - qword]
pshuflw xmm0, xmmword ptr[esp - xmmword], 00011011b
We restore the state FPU
:
frstor [esp]
0
:
punpcklbw xmm0, xmm0
pshuflw xmm0, xmm0, 10110001b
pshufhw xmm0, xmm0, 10110001b
:
mov dword ptr[esp], 0FF00FF0h
pshufd xmm1, xmmword ptr[esp], 0
pand xmm0, xmm1
psrlw xmm1, 4
movdqa xmm2, xmm1
pand xmm1, xmm0
psrlw xmm1, 4
pandn xmm2, xmm0
paddb xmm1, xmm2
:
pxor xmm0, xmm0
pcmpeqb xmm0, xmm1
:
mov dword ptr[esp], 30303030h
pshufd xmm2, xmmword ptr[esp], 0
paddb xmm1, xmm2
:
mov byte ptr[esp],'-'
btr ax, 0Fh
adc esp, 0
add ax,'.0'
mov word ptr[esp], ax
0
:
movdqu xmmword ptr[esp + word], xmm1
pmovmskb ecx, xmm0
bsf ecx, ecx
add esp, ecx
:
mov ecx,(word + dword)
mov eax, edx
neg dx
jnc @f
cmovns eax, edx
setns dh
:
cmp ax, 0Ah
sbb ecx, ecx
mov dl, 0Ah
div dl
cmp al, 0Ah
sbb ecx, 0
shl eax, 8
shr ax, 8
div dl
add eax, 303030h
lea edx,[edx * 2 + 2B51h]
mov dword ptr[esp + word + ecx + word], eax
mov word ptr[esp + word], dx
EAX
ECX
:
@@: lea ecx,[esp + ecx + qword] sub ecx, r8d mov eax,ecx
XMM1
XMM2
:
movdqa xmm1, xmmword ptr[r8d]
movdqa xmm2, xmmword ptr[r8d + xmmword]
:
mov esp, r9d
.
/ . x64 software conventions
.
- .
- , .
- .
- SIMD
FPU
.
, - .