Statistics

Members: 1925
News: 293
Web Links: 1
Visitors: 3823170

Who's Online

We have 1 guest online
Damn Vulnerable LinuxDamn Vulnerable Linux (DVL) is a Linux-based (modified Damn Small Linux) tool for IT-Security & IT-Anti- Security and Attack & Defense. [CLICK HERE FOR MORE INFOS! ]

Featured Conference Video

T16-Recon2006-Joe_Stewart-OllyBonE.gif OllyBone - Semi-Automatic Unpacking on IA-32. View the conference video here!
Home
Converting Strings to Numbers
User Rating: / 0
PoorBest 
Written by Chris Dragan   


Many programs require user input, which is often numbers. For this purpose there are library functions, like for example sscanf() in C. But in assembly all has to be done by hand, even under Windows (with the exception of edit controls - GetDlgItemInt() function).

 

 

My last project required a flexible function for reading numbers stored as strings. From this project I carried out a great function which handles most of common number formats.

The function expects esi register to point at a string, which is a number. The string can have one of the following forms:

     10   decimal integer
10D  decimal integer
1010B     binary integer
AH   hexadecimal integer (does not require leading zero)
0XA  hexadecimal integer
$A   hexadecimal integer
12Q  octal integer
12O  octal integer
10F  float
10.0 float
10.0F     float
1.0E+1F float
1.E+1     float

The string is required to have all letters (hex digits, number type specifiers) uppercase. If a number is to contain lowercase letters, it has to be converted before calling the function.

The function returns in eax number type:

  • 0 if the number is invalid,
  • 1 if the number is a dword integer,
  • 2 if the number is a qword integer and
  • 3 if the number is a float. The number is returned in edx (dword), ecx:edx (qword) or st(0) (float). The number will be a qword integer if it exceedes 0xFFFFFFFF boundary. Also notice that the number is assumed to be positive, '-' before the number is not accepted and has to be handled externally.

Floating point conversion is done using multiplication, not by means of fbld instruction. This is because fbld instruction limits numbers to 19 characters, but the function can accept longer numbers if only they are not too large/small.

And here is the function. It was written (and tested) in TASM's ideal mode, but it can be easily ported to MASM or NASM. The function preserves all registers but eax, ecx and edx, which are used for return value.

; This helper macro checks if there was an error on the fpu

macro     chkfpu _endinglabel
fxam
fstsw     ax
sahf
jc   _endinglabel

endm

proc ConvertNumber uses edi

;---------------- Identify number format

          ; Search for 0 at the end
mov  edi, esi
or   ecx, -1
xor  eax, eax
cld
repne scasb
; Move to the last character
dec  edi
dec  edi
; Is there anything ?
cmp  esi, edi
ja   __invalid
; Identify C-style and Pascal-style hexadecimals
cmp  [byte esi+1], 'X'
je   _chex
cmp  [byte esi], '$'
je   __pas_hex
; Identify other types using the last character
movzx     eax, [byte edi]
cmp  eax, 'H'
je   __asm_hex
cmp  eax, 'B'
je   __binary
cmp  eax, 'D'
je   __decimal
cmp  eax, 'Q'
je   __octal
cmp  eax, 'O'
je   __octal
cmp  eax, 'F'
je   __float_clr
; Find a comma (distinguish between integer and float)
not  ecx
dec  ecx
mov  eax, '.'
mov  edi, esi
repne scasb
je   __float

;---------------- Process decimal integer

          ; Prepare
__decimal:          mov  [byte edi], 0
mov  edi, esi
xor  eax, eax
; Get a digit
__next_decimal:     movzx     ecx, [byte edi]
inc  edi
xor  edx, edx
; Zero ends the string
test ecx, ecx
jz   __finito
; Multiply the already loaded part by ten
add  edx, 10
mul  edx
; If an overflow occurs - the number is a quadword
jo   __decimal_qword
; Check digit validity
sub  ecx, '0'
jc   __invalid
cmp  ecx, 9
ja   __invalid
; Add the digit
add  eax, ecx
; Next digit or process a quadword if carry occurs
jnc  __next_decimal
jmp  __decimal_carry

;---------------- Decimal (appears to be greater than 0FFFF_FFFFh)

; Check digit validity
__decimal_qword: sub ecx, '0'

               jc   __invalid
cmp  ecx, 9
ja   __invalid
; Add the digit (qword addition)
add  eax, ecx

__decimal_carry: adc edx, 0

          ; Load next digit
movzx     ecx, [byte edi]
inc  edi
; Check for ending zero
test ecx, ecx
jz   __finito
; Multiply high part by 10
push eax
mov  eax, edx
mov  edx, 10
mul  edx
; Number too large if an overflow occurs
jo   __decimal_overflow
; Multiply low part by 10
xchg eax, [esp]
mov  edx, 10
mul  edx
; Join high parts
add  edx, [esp]
; Number too large if carry
jc   __decimal_overflow
; Next digit
add  esp, 4
jmp  __decimal_qword
; Handle overflow
__decimal_overflow: pop  eax
jmp  __invalid

;---------------- Process hexadecimal integer

          ; Was Pascal-style hex (leading '$')
__pas_hex:          lea  edi, [esi+1]
jmp  __hex
; Was C-style hex (leading '0X')
_chex:       cmp  [byte esi], '0'
jne  __invalid
lea  edi, [esi+2]
jmp  __hex
; Was asm-style hex (ending with 'H')
__asm_hex:          mov  [byte edi], 0
mov  edi, esi
; Clear what will become the number
__hex:              xor  eax, eax
xor  edx, edx
; Get a digit
__get_hex:          movzx     ecx, [byte edi]
inc  edi
; Zero ends the string
test ecx, ecx
jz   __finito
; Number too large if the most significant nibble of edx
; is nonzero
cmp  edx, 0FFFFFFFh
ja   __invalid
; Multiply the already converted part by 16
shld edx, eax, 4
add  eax, eax ; to avoid shift (see lea below)
; Convert ASCII to digit
sub  ecx, '0'
jc   __invalid
cmp  ecx, 9
jna  __hex_ok
sub  ecx, 7
cmp  ecx, 9
jna  __invalid
cmp  ecx, 15
ja   __invalid
; Add the digit
__hex_ok:      lea  eax, [eax*8+ecx]
jmp  __get_hex

;---------------- Return integer

__finito:      mov  ecx, edx
mov  edx, eax
cmp  ecx, 1
sbb  eax, eax
add  eax, 2
ret

;---------------- Process binary integer

          ; Prepare
__binary:      mov  [byte edi], 0
xor  eax, eax
xor  edx, edx
mov  edi, esi
; Get a digit
__get_binary:       movzx     ecx, [byte edi]
inc  edi
; Zero ends the string
test ecx, ecx
jz   __finito
; Shift everything left and add the digit
shr  ecx, 1
adc  eax, eax
adc  edx, edx
jc   __invalid
; Check digit validity and get next digit if OK
cmp  ecx, '0' shr 1
jne  __invalid
jmp  __get_binary

;---------------- Process octal integer

          ; Prepare
__octal:       mov  [byte edi], 0
xor  eax, eax
xor  edx, edx
mov  edi, esi
; Get a digit
__get_octal:        movzx     ecx, [byte edi]
inc  edi
; Zero ends the string
test ecx, ecx
jz   __finito
; Check if there is a room for another digit
cmp  edx, 1FFFFFFFh
ja   __invalid
; Multiply the already converted part by 8
shld edx, eax, 3
; Convert ASCII to number
sub  ecx, '0'
jc   __invalid
cmp  ecx, 7
ja   __invalid
; Add the digit
lea  eax, [eax*8+ecx]
jmp  __get_octal

;---------------- Invalid number

__invalid:          fninit
xor  eax, eax
ret

;---------------- Process integer part of a float

          ; Prepare (st0=0, st1=10)
__float_clr:        mov  [byte edi], 0
__float:       finit
push 0300h ; mask off all interrupts
fldcw     [word esp]
push 10
fild [dword esp]
add  esp, 8
fldz
mov  edi, esi
; Get a digit
__get_integer:      movzx     ecx, [byte edi]
inc  edi
; Zero ends the string
test ecx, ecx
jz   __float_ready
; Comma starts fraction part
cmp  ecx, '.'
je   __float_fraction
; Multiply the already converted part by 10
fmul st, st(1)
chkfpu    __invalid
; Convert ASCII to number
sub  ecx, '0'
jc   __invalid
cmp  ecx, 9
ja   __invalid
; Add the digit
push ecx
fiadd     [dword esp]
add  esp, 4
chkfpu    __invalid
jmp  __get_integer

;---------------- Process fractional part of a float

; Prepare (st0=0, st1=1, st2=num, st3=10) __float_fraction: fld1

fldz

          ; Get a digit
__get_fraction:     movzx     ecx, [byte edi]
inc  edi
; Zero ends the string
test ecx, ecx
jz   __fraction_ready
; E starts exponent
cmp  ecx, 'E'
je   __fraction_ready
; Multiply the already converted part by 10
fmul st, st(3)
; Multiply the divisor by 10
fxch st(1)
fmul st, st(3)
fxch st(1)
chkfpu    __invalid
fxch st(1)
chkfpu    __invalid
fxch st(1)
; Convert ASCII to number
sub  ecx, '0'
jc   __invalid
cmp  ecx, 9
ja   __invalid
; Add the digit
push ecx
fiadd     [dword esp]
add  esp, 4
chkfpu    __invalid
jmp  __get_fraction

;---------------- Process exponent part of a float

; Divide the fraction by the divisor __fraction_ready: fdivrp st(1), st

          ; Add fraction to integer
faddp     st(1), st
; E indicates start of exponent
cmp  ecx, 'E'
jne  __float_ready
; Prepare (st0=0, st1=num, st2=10)
fldz
; Sign of the exponent
xor  edx, edx
cmp  [byte edi], '-'
jne  __no_minus
not  edx
inc  edi
__no_minus:         cmp  [byte edi], '+'
jne  __get_exponent
inc  edi
; Get a digit
__get_exponent:     movzx     ecx, [byte edi]
inc  edi
; Zero ends the string
test ecx, ecx
jz   __exponent_ready
; Multiply the already converted part by 10
fmul st, st(2)
chkfpu    __invalid
; Convert ASCII to number
sub  ecx, '0'
jc   __invalid
cmp  ecx, 9
ja   __invalid
; Add the digit
push ecx
fiadd     [dword esp]
add  esp, 4
chkfpu    __invalid
jmp  __get_exponent
; Multiply by 10**exp (** is a power operation)
__exponent_ready:   test edx, edx
jz   __positive_exp
fchs
__positive_exp:     fldl2t;ÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿10**x = 2**(x*log2(10))
fmulp     st(1), st ;³
fld  st        ;³
frndint        ;³
fsub st(1), st ;³
fld1           ;³
fscale              ;³
fstp st(1)          ;³
fxch st(1)          ;³
f2xm1               ;³
fld1           ;³
faddp     st(1), st ;³
fmulp     st(1), st;ÄÄÄÄÄÄÄÙ
fmulp     st(1), st
; Return float
__float_ready:      chkfpu    __invalid
fstp st(1)
mov  eax, 3
ret

endp

And that is it. The function is not meant to work as fast possible and was not optimized, but it does the task it has to do.