# 3076 Assignment 1: Desigining a Float8 type¶

In this assignment you will construct a Float8 type, containing one field data, an 8-bit UInt8:

In [18]:
import Base: show, bits, exponent, significand, +, -, *, /, Float64

type Float8
data::UInt8
end


For this types, we will interpret the 8-bits in data as: 1 sign bit, 3 exponent bits and 4 signficiand bits. That is, numbers will be represented by

$$x = \pm 2^{q-S}*(1.b_1b_2b_3b_4)_2$$

where $S = 3$, $q$ is an unsigned 3-bit integer in the 2nd through 4th bits and $b_1b_2b_3b_4$ are in the last 4 bits.

We will not implement Inf, NaN or subnormal numbers. We will however implement both $±0$.

We will use a globally defined constant S to represent $S$:

In [19]:
const S=UInt8(3)  # The shift

Out[19]:
0x03

We will use the bits to get access to the bits. The following function defines bits for a Float8:

In [20]:
function bits(x::Float8)
bits(x.data)
end

Out[20]:
bits (generic function with 6 methods)

Exercise 1

(a) Complete the following exponent function, that returns $q-S$ (as an integer).

(b) Check that

exponent(Float8(UInt8(123)))


returns 4.

In [28]:
print(parse(UInt8,"11111111",2))

255
In [30]:
print(parse(Int8,"-1111111",2))

-127
In [37]:
function exponent(x::Float8)
parse(Int8,bits(x)[2:4],2)-S
end

@show exponent(Float8(parse(UInt8,"11111111",2)))  # 4
@show exponent(Float8(parse(UInt8,"10011111",2)))  # -2
@show exponent(Float8(parse(UInt8,"11101101",2)))  # 3
@show exponent(Float8(parse(UInt8,"01110000",2)))  # 4

exponent(Float8(parse(UInt8,"11111111",2))) = 4
exponent(Float8(parse(UInt8,"10011111",2))) = -2
exponent(Float8(parse(UInt8,"11101101",2))) = 3
exponent(Float8(parse(UInt8,"01110000",2))) = 4

Out[37]:
4

Exercise 2

(a) Complete the following significand function, that returns the significand of a Float8. Don't forget to incorporate the sign bit.

(c) Check that

significand(Float8(UInt8(123)))


returns 1.6875.

In [41]:
function significand(x::Float8)
if x.data==0
0.0
elseif x.data==128
-0.0
else
bts=bits(x)
if bts[1]=='1'
s=-1
else
s=1
end
sig_int=parse(Int,"1"*bts[5:8],2)
s*2.0^(-4)*sig_int
end
end

@show significand(Float8(parse(UInt8,"11111111",2)))  #-1.9375
@show significand(Float8(parse(UInt8,"00011011",2)))  #1.6875
@show significand(Float8(parse(UInt8,"11101101",2)))  #-1.8125
@show significand(Float8(parse(UInt8,"01110000",2)))  #1.0

significand(Float8(parse(UInt8,"11111111",2))) = -1.9375
significand(Float8(parse(UInt8,"00011011",2))) = 1.6875
significand(Float8(parse(UInt8,"11101101",2))) = -1.8125
significand(Float8(parse(UInt8,"01110000",2))) = 1.0

Out[41]:
1.0

Excercise 3

(a) Use exponent and significand to complete the definition of Float64(x::Float8), which converts a Float8 to a Float64

(b) Check that

Float8(UInt8(123))


now displays as 27.0f8.

In [42]:
function Float64(x::Float8)
2.0^exponent(x)*significand(x)
end

function show(io::IO,x::Float8)
print(io,Float64(x))
print(io,"f8")
end

@show Float64(Float8(parse(UInt8,"11111111",2)))  #-31.0
@show Float64(Float8(parse(UInt8,"00011011",2)))  #0.421875
@show Float64(Float8(parse(UInt8,"11101101",2)))  #-14.5
@show Float64(Float8(parse(UInt8,"01110000",2)))  #16.0

Float64(Float8(parse(UInt8,"11111111",2))) = -31.0
Float64(Float8(parse(UInt8,"00011011",2))) = 0.421875
Float64(Float8(parse(UInt8,"11101101",2))) = -14.5
Float64(Float8(parse(UInt8,"01110000",2))) = 16.0

Out[42]:
16.0

Exercise 4

(a) Complete the following chop_to_8_bits function that returns a string for normal numbers containing the 8-bits for the Float8 representation. For this question, you can simply chop the significand bits of a Float64. (Recall that a Float64 has 1 sign bit, 11 exponent bits and 52 significand bits.)

(b) Add comments explaining the definition of Float8(::Float64).

(c) Check that

Float8(1.25)


returns 1.25f8.

(d) Explain why

Float8(1.3)


returns the same number.

In [58]:
function chop_to_8_bits(x::Float64)
if x≥0
str="0"
else
str="1"
end

q=exponent(x)+S
str*bits(x)[13:13+4-1]
end

function Float8(x::Float64)
if x===0.0
Float8(UInt8(0))
elseif x===-0.0
Float8(UInt8(128))
else
Float8(parse(UInt8,chop_to_8_bits(x),2))
end
end

@show Float8(1.32)   # 1.3125f8
@show Float8(10.32)  # 10.0f8
@show Float8(12.5)   # 12.5f8
@show Float8(-20.5)  # -20.0f8

Float8(1.32) = 1.3125f8
Float8(10.32) = 10.0f8
Float8(12.5) = 12.5f8
Float8(-20.5) = -20.0f8

Out[58]:
-20.0f8

Exercise 5

Complete the following function that negates a Float8:

In [62]:
function -(x::Float8)
bts=bits(x)
if bts[1]=='1'
bts="0"*bts[2:end]
else
bts="1"*bts[2:end]
end

Float8(parse(UInt8,bts,2))
end

-(Float8(25.0))

Out[62]:
-25.0f8

Exercise 6

(a) Complete the following algebra operations, ensuring that each one returns a Float8. You can use Float64(x::Float8) and Float8(x::Float64) to use the inbuilt Float64 arithmetic.

(b) Check that

Float8(1.25)+Float8(2.25)


returns 3.5f8

In [66]:
function +(x::Float8,y::Float8)
Float8(Float64(x)+Float64(y))
end

function *(x::Float8,y::Float8)
Float8(Float64(x)*Float64(y))
end

function /(x::Float8,y::Float8)
Float8(Float64(x)/Float64(y))
end

function -(x::Float8,y::Float8)
Float8(Float64(x)-Float64(y))
end

Float8(1.3)+Float8(15.)

Out[66]:
16.0f8

Exercise 7

(a) Implement the following routine round_to_8bits that rounds to the nearest Float8, rather than chops.

(b) Check that

Float8(parse(UInt8,round_to_8_bits(1.3),2))


returns 1.3125f8.

In [73]:
function round_to_8_bits(x::Float64)
if x≥0
str="0"
else
str="1"
end

q=exponent(x)+S
sig=parse(UInt32,bits(x)[13:13+4-1],2)

if parse(UInt64,bits(x)[13+4:end],2) > 2^47
sig+=1
end

if sig >= 2^4
q+=1
sig = sig >> 1
end

str*bits(sig)[end-3:end]
end

@show Float8(parse(UInt8,round_to_8_bits(1.0+1/2^5),2))        # 1.0f8
@show Float8(parse(UInt8,round_to_8_bits(1.0+1/2^6),2))        # 1.0f8
@show Float8(parse(UInt8,round_to_8_bits(1.0+1/2^5+1/2^6),2))  # 1.0625f8

@show Float8(parse(UInt8,round_to_8_bits(3*(1.0+1/2^5)),2))    # 3.125f8
@show Float8(parse(UInt8,round_to_8_bits(4*(1.0+1/2^6)),2))    # 4.0f8
@show Float8(parse(UInt8,round_to_8_bits(1.0+1/2^5+1/2^6),2))  # 1.0625f8

Float8(parse(UInt8,round_to_8_bits(1.0 + 1 / 2 ^ 5),2)) = 1.0f8
Float8(parse(UInt8,round_to_8_bits(1.0 + 1 / 2 ^ 6),2)) = 1.0f8
Float8(parse(UInt8,round_to_8_bits(1.0 + 1 / 2 ^ 5 + 1 / 2 ^ 6),2)) = 1.0625f8
Float8(parse(UInt8,round_to_8_bits(3 * (1.0 + 1 / 2 ^ 5)),2)) = 3.125f8
Float8(parse(UInt8,round_to_8_bits(4 * (1.0 + 1 / 2 ^ 6)),2)) = 4.0f8
Float8(parse(UInt8,round_to_8_bits(1.0 + 1 / 2 ^ 5 + 1 / 2 ^ 6),2)) = 1.0625f8

Out[73]:
1.0625f8
In [ ]: