3076 Assignment 1: Desigining a Float8 type

In this assignment you will construct a Float8 type, containing one field data, an 8-bit UInt8:

In [18]:
import Base: show, bits, exponent, significand, +, -, *, /, Float64

type Float8
   data::UInt8
end

For this types, we will interpret the 8-bits in data as: 1 sign bit, 3 exponent bits and 4 signficiand bits. That is, numbers will be represented by

$$x = \pm 2^{q-S}*(1.b_1b_2b_3b_4)_2$$

where $S = 3$, $q$ is an unsigned 3-bit integer in the 2nd through 4th bits and $b_1b_2b_3b_4$ are in the last 4 bits.

We will not implement Inf, NaN or subnormal numbers. We will however implement both $±0$.

We will use a globally defined constant S to represent $S$:

In [19]:
const S=UInt8(3)  # The shift
Out[19]:
0x03

We will use the bits to get access to the bits. The following function defines bits for a Float8:

In [20]:
function bits(x::Float8)
    bits(x.data)
end
Out[20]:
bits (generic function with 6 methods)

Exercise 1

(a) Complete the following exponent function, that returns $q-S$ (as an integer).

(b) Check that

exponent(Float8(UInt8(123)))

returns 4.

In [28]:
print(parse(UInt8,"11111111",2))
255
In [30]:
print(parse(Int8,"-1111111",2))
-127
In [37]:
function exponent(x::Float8)
    parse(Int8,bits(x)[2:4],2)-S
end


@show exponent(Float8(parse(UInt8,"11111111",2)))  # 4
@show exponent(Float8(parse(UInt8,"10011111",2)))  # -2
@show exponent(Float8(parse(UInt8,"11101101",2)))  # 3
@show exponent(Float8(parse(UInt8,"01110000",2)))  # 4
exponent(Float8(parse(UInt8,"11111111",2))) = 4
exponent(Float8(parse(UInt8,"10011111",2))) = -2
exponent(Float8(parse(UInt8,"11101101",2))) = 3
exponent(Float8(parse(UInt8,"01110000",2))) = 4
Out[37]:
4

Exercise 2

(a) Complete the following significand function, that returns the significand of a Float8. Don't forget to incorporate the sign bit.

(b) Add comments explaining the purpose of the first 4 lines.

(c) Check that

significand(Float8(UInt8(123)))

returns 1.6875.

In [41]:
function significand(x::Float8)
    if x.data==0
        0.0
    elseif x.data==128
        -0.0
    else
        bts=bits(x)
        if bts[1]=='1'
            s=-1
        else
            s=1
        end
        sig_int=parse(Int,"1"*bts[5:8],2)
        s*2.0^(-4)*sig_int
    end
end

@show significand(Float8(parse(UInt8,"11111111",2)))  #-1.9375
@show significand(Float8(parse(UInt8,"00011011",2)))  #1.6875
@show significand(Float8(parse(UInt8,"11101101",2)))  #-1.8125
@show significand(Float8(parse(UInt8,"01110000",2)))  #1.0
significand(Float8(parse(UInt8,"11111111",2))) = -1.9375
significand(Float8(parse(UInt8,"00011011",2))) = 1.6875
significand(Float8(parse(UInt8,"11101101",2))) = -1.8125
significand(Float8(parse(UInt8,"01110000",2))) = 1.0
Out[41]:
1.0

Excercise 3

(a) Use exponent and significand to complete the definition of Float64(x::Float8), which converts a Float8 to a Float64

(b) Check that

Float8(UInt8(123))

now displays as 27.0f8.

In [42]:
function Float64(x::Float8)
    2.0^exponent(x)*significand(x)
end


function show(io::IO,x::Float8)
    print(io,Float64(x))
    print(io,"f8")
end


@show Float64(Float8(parse(UInt8,"11111111",2)))  #-31.0
@show Float64(Float8(parse(UInt8,"00011011",2)))  #0.421875
@show Float64(Float8(parse(UInt8,"11101101",2)))  #-14.5
@show Float64(Float8(parse(UInt8,"01110000",2)))  #16.0
Float64(Float8(parse(UInt8,"11111111",2))) = -31.0
Float64(Float8(parse(UInt8,"00011011",2))) = 0.421875
Float64(Float8(parse(UInt8,"11101101",2))) = -14.5
Float64(Float8(parse(UInt8,"01110000",2))) = 16.0
Out[42]:
16.0

Exercise 4

(a) Complete the following chop_to_8_bits function that returns a string for normal numbers containing the 8-bits for the Float8 representation. For this question, you can simply chop the significand bits of a Float64. (Recall that a Float64 has 1 sign bit, 11 exponent bits and 52 significand bits.)

(b) Add comments explaining the definition of Float8(::Float64).

(c) Check that

Float8(1.25)

returns 1.25f8.

(d) Explain why

Float8(1.3)

returns the same number.

In [58]:
function chop_to_8_bits(x::Float64)
    if x≥0
        str="0"
    else
        str="1"
    end

    q=exponent(x)+S
    str=str*bits(q)[end-2:end]  # add exponent bits
    str*bits(x)[13:13+4-1]
end




function Float8(x::Float64)
    if x===0.0
        Float8(UInt8(0))
    elseif x===-0.0
        Float8(UInt8(128))
    else
        Float8(parse(UInt8,chop_to_8_bits(x),2))
    end
end


@show Float8(1.32)   # 1.3125f8
@show Float8(10.32)  # 10.0f8
@show Float8(12.5)   # 12.5f8
@show Float8(-20.5)  # -20.0f8
Float8(1.32) = 1.3125f8
Float8(10.32) = 10.0f8
Float8(12.5) = 12.5f8
Float8(-20.5) = -20.0f8
Out[58]:
-20.0f8

Exercise 5

Complete the following function that negates a Float8:

In [62]:
function -(x::Float8)
    bts=bits(x)
    if bts[1]=='1'
        bts="0"*bts[2:end]
    else
        bts="1"*bts[2:end]
    end
    
    Float8(parse(UInt8,bts,2))
end

-(Float8(25.0))
Out[62]:
-25.0f8

Exercise 6

(a) Complete the following algebra operations, ensuring that each one returns a Float8. You can use Float64(x::Float8) and Float8(x::Float64) to use the inbuilt Float64 arithmetic.

(b) Check that

Float8(1.25)+Float8(2.25)

returns 3.5f8

In [66]:
function +(x::Float8,y::Float8)
   Float8(Float64(x)+Float64(y))
end

function *(x::Float8,y::Float8)
   Float8(Float64(x)*Float64(y))
end

function /(x::Float8,y::Float8)
   Float8(Float64(x)/Float64(y))
end

function -(x::Float8,y::Float8)
   Float8(Float64(x)-Float64(y))
end

Float8(1.3)+Float8(15.)
Out[66]:
16.0f8

Exercise 7

(a) Implement the following routine round_to_8bits that rounds to the nearest Float8, rather than chops.

(b) Check that

Float8(parse(UInt8,round_to_8_bits(1.3),2))

returns 1.3125f8.

In [73]:
function round_to_8_bits(x::Float64)
    if x≥0
        str="0"
    else
        str="1"
    end

    q=exponent(x)+S
    str=str*bits(q)[end-2:end]  # add exponent bits
    sig=parse(UInt32,bits(x)[13:13+4-1],2)
    
    if parse(UInt64,bits(x)[13+4:end],2) > 2^47
        sig+=1
    end
    
    if sig >= 2^4
        q+=1
        sig = sig >> 1
    end
    
    str*bits(sig)[end-3:end]
end


@show Float8(parse(UInt8,round_to_8_bits(1.0+1/2^5),2))        # 1.0f8
@show Float8(parse(UInt8,round_to_8_bits(1.0+1/2^6),2))        # 1.0f8
@show Float8(parse(UInt8,round_to_8_bits(1.0+1/2^5+1/2^6),2))  # 1.0625f8

@show Float8(parse(UInt8,round_to_8_bits(3*(1.0+1/2^5)),2))    # 3.125f8
@show Float8(parse(UInt8,round_to_8_bits(4*(1.0+1/2^6)),2))    # 4.0f8
@show Float8(parse(UInt8,round_to_8_bits(1.0+1/2^5+1/2^6),2))  # 1.0625f8
Float8(parse(UInt8,round_to_8_bits(1.0 + 1 / 2 ^ 5),2)) = 1.0f8
Float8(parse(UInt8,round_to_8_bits(1.0 + 1 / 2 ^ 6),2)) = 1.0f8
Float8(parse(UInt8,round_to_8_bits(1.0 + 1 / 2 ^ 5 + 1 / 2 ^ 6),2)) = 1.0625f8
Float8(parse(UInt8,round_to_8_bits(3 * (1.0 + 1 / 2 ^ 5)),2)) = 3.125f8
Float8(parse(UInt8,round_to_8_bits(4 * (1.0 + 1 / 2 ^ 6)),2)) = 4.0f8
Float8(parse(UInt8,round_to_8_bits(1.0 + 1 / 2 ^ 5 + 1 / 2 ^ 6),2)) = 1.0625f8
Out[73]:
1.0625f8
In [ ]: