The main goal of this project is to create a class that represents our inventory and causes efficient data retrieval and processing using the knowledge of 'Time complexity' and 'Space Complexity' used by different algorithms.
The methods in that class will implement the queries that we want to answer about our inventory, using which we will answer some business questions.
Through the project we will use the time module to check the time required for each of our queries.
We own a laptop store and we need to answer a few business questions related to our inventory. We can get the original dataset here.
Let's see the description of the rows in our dataset :
Name | Description |
---|---|
ID |
A unique identifier for the laptop. |
Company |
The name of the company that produces the laptop. |
Product |
The name of the laptop. |
TypeName |
The type of laptop. |
Inches |
The size of the screen in inches. |
ScreenResolution |
The resolution of the screen. |
CPU |
The laptop CPU. |
RAM |
The amount of RAM in the laptop. |
Memory |
The size of the hard drive. |
GPU |
The graphics card name. |
OpSys |
The name of the operating system. |
Weight |
The laptop weight. |
Price |
The price of the laptop. |
First, let's get the dataset :
#importing the required libraries:
import csv
from csv import reader
import time
with open('laptops.csv', encoding='utf8') as file:
read_file = csv.reader(file)
data = list(read_file)
header = data[:1]
rows = data[1:]
print("\033[1m" + 'Header for the data :' + "\033[0m")
print(header)
print('\n')
print("\033[1m" + 'First 3 rows in our dataset:' + "\033[0m")
for _ in range(3):
print(rows[_])
Header for the data : [['Id', 'Company', 'Product', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight', 'Price']] First 3 rows in our dataset: ['6571244', 'Apple', 'MacBook Pro', 'Ultrabook', '13.3', 'IPS Panel Retina Display 2560x1600', 'Intel Core i5 2.3GHz', '8GB', '128GB SSD', 'Intel Iris Plus Graphics 640', 'macOS', '1.37kg', '1339'] ['7287764', 'Apple', 'Macbook Air', 'Ultrabook', '13.3', '1440x900', 'Intel Core i5 1.8GHz', '8GB', '128GB Flash Storage', 'Intel HD Graphics 6000', 'macOS', '1.34kg', '898'] ['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', '575']
# creating a class for the inventory with a constructor :
class Inventory():
def __init__(self,csv_filename):
with open(csv_filename, encoding='utf8') as file:
read_file = csv.reader(file)
data = list(read_file)
## assigning thr header and rows:
self.header = data[:1]
self.rows = data[1:]
## converting the price into integer datatype:
for row in rows:
row[-1] = int(row[-1])
# testing the creaed class on the laptops.csv :
inventory = Inventory('laptops.csv')
print("\033[1m" + 'Header for the data :' + "\033[0m")
print(inventory.header)
print('\n')
print("\033[1m" + 'Number of rows in the data :' + "\033[0m")
print(len(inventory.rows))
Header for the data : [['Id', 'Company', 'Product', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight', 'Price']] Number of rows in the data : 1303
id
:¶class Inventory():
def __init__(self,csv_filename):
with open(csv_filename, encoding='utf8') as file:
read_file = csv.reader(file)
data = list(read_file)
self.header = data[:1]
self.rows = data[1:]
for row in rows:
row[-1] = int(row[-1])
# creating a function to get the laptop details using the id :
def get_laptop_from_id(self,id_laptop):
for row in self.rows:
if row[0] == id_laptop:
return row
return None
3362737
: Exists in our dataset3362736
: Doesn't exist in our dataset# Testing our updated funtion :
inventory = Inventory('laptops.csv')
start = time.time()
print("\033[1m" + 'The data for laptop with id-3362737 :' + "\033[0m")
print(inventory.get_laptop_from_id('3362737'))
print('\n')
print("\033[1m" + 'The data for laptop with id-3362736 :' + "\033[0m")
print(inventory.get_laptop_from_id('3362736'))
print('\n')
end = time.time()
time_taken = end-start
print("\033[1m" + 'The total time(seconds) taken for the query to process : ' +
str(time_taken) + "\033[0m")
The data for laptop with id-3362737 : ['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', '575'] The data for laptop with id-3362736 : None The total time(seconds) taken for the query to process : 0.0006456375122070312
We know that *Lists* have a time complexity of *O(N), with N as the number of rows. In order to reduce the time complexity we can use Sets or Dictionaries* which have Constant time look ups, enabling faster data retrieval.
class Inventory():
def __init__(self,csv_filename):
with open(csv_filename, encoding='utf8') as file:
read_file = csv.reader(file)
data = list(read_file)
self.header = data[:1]
self.rows = data[1:]
for row in self.rows:
row[-1] = int(row[-1])
# creating a dictionary to store the values of 'id' as keys:
self.id_to_row = {}
for row in self.rows:
self.id_to_row[row[0]] = row
def get_laptop_from_id(self,id_laptop):
for row in self.rows:
if row[0] == id_laptop:
return row
return None
# creating a method using the dictionary to get values:
def get_laptop_from_id_fast(self,id_laptop):
if id_laptop in self.id_to_row:
return self.id_to_row[id_laptop]
else:
return None
3362737
: Exists in our dataset3362736
: Doesn't exist in our dataset# Testing our updated funtion :
inventory = Inventory('laptops.csv')
start = time.time()
print("\033[1m" + 'The data for laptop with id-3362737 :' + "\033[0m")
print(inventory.get_laptop_from_id_fast('3362737'))
print('\n')
print("\033[1m" + 'The data for laptop with id-3362736 :' + "\033[0m")
print(inventory.get_laptop_from_id_fast('3362736'))
print('\n')
end = time.time()
time_taken = end-start
print("\033[1m" + 'The total time(seconds) taken for the query to process : ' +
str(time_taken) + "\033[0m")
The data for laptop with id-3362737 : ['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', 575] The data for laptop with id-3362736 : None The total time(seconds) taken for the query to process : 0.0007631778717041016
Now that we have both the methods, one using lists with the other using dictionaries, let's compare their performance by using both the methods to find the same number of random ids from the dataset :
# To generate a list of random ids:
import random
ids = [str(random.randint(1000000,9999999)) for _ in range(10000)]
inventory = Inventory('laptops.csv')
## without using the dictionary
total_time_no_dict = 0
for i in ids:
start = time.time()
inventory.get_laptop_from_id(i)
end = time.time()
total_time_no_dict += (end-start)
## with using the dictionary
total_time_dict = 0
for i in ids:
start = time.time()
inventory.get_laptop_from_id_fast(i)
end = time.time()
total_time_dict += (end-start)
print("\033[1m" + 'Time taken(seconds) by using lists : ' + str(total_time_no_dict) +"\033[0m")
print("\033[1m" + 'Time taken(seconds) by using a dictionary: ' + str(total_time_dict) +"\033[0m")
print("\033[1m" + 'Time advantage(x times) by using a dictionary for 10,000 values: ' +
str(total_time_no_dict/total_time_dict) +"\033[0m")
Time taken(seconds) by using lists : 0.968533992767334 Time taken(seconds) by using a dictionary: 0.004564046859741211 Time advantage(x times) by using a dictionary for 10,000 values: 212.2094760486862
*Sometimes, the store offers a promotion where a gift card is issued. A customer can use the gift to buy up to two laptops. To avoid having to keep track of what was already spent, the gift card has a single time usage. This means that, even if there is leftover money, it cannot be used anymore.*
eg. If there are three laptops in the inventory with prices : 1339 USD, 898 USD, and 575 USD and the gift card issued is worth 2500 USD. Since a customer can buy, at most, two laptops with a gift card, the maximum they can spend is 2237 USD (1339 USD plus 898 USD). Therefore, they might feel cheated because, no matter how they spend their gift card, they cannot spend the full 2500 USD.
*Since we don't want to make a customer feel cheated, so whenever a gift card is isued, there should be at least one way to spend it in full.*
In the Inventory class writing a function that, given a dollar amount, checks whether it is possible to spend precisely that amount by purchasing up to two laptops.
class Inventory():
def __init__(self,csv_filename):
with open(csv_filename, encoding='utf8') as file:
read_file = csv.reader(file)
data = list(read_file)
self.header = data[:1]
self.rows = data[1:]
for row in self.rows:
row[-1] = int(row[-1])
self.id_to_row = {}
for row in self.rows:
self.id_to_row[row[0]] = row
def get_laptop_from_id(self,id_laptop):
for row in self.rows:
if row[0] == id_laptop:
return row
return None
def get_laptop_from_id_fast(self,id_laptop):
if id_laptop in self.id_to_row:
return self.id_to_row[id_laptop]
else:
return None
## the required function
def check_promotion_dollars(self,dollars):
## to check if there is a laptop with price exactly equal to the value of the gift card
for row in self.rows:
if row[-1] == dollars:
return True
## to check if there are two laptops with the sum of the prices equal to the value of the gift card
for i in self.rows:
for j in self.rows:
if (i+j) == dollars:
return True
return False
# Testing the created class :
inventory = Inventory('laptops.csv')
start = time.time()
print(inventory.check_promotion_dollars(1000))
print(inventory.check_promotion_dollars(442))
end = time.time()
time_taken = end-start
print("\033[1m" + 'The total time(seconds) taken for the query to process : ' +
str(time_taken) + "\033[0m")
True
False
The total time(seconds) taken for the query to process : 0.26534271240234375
We know that *Lists* have a time complexity of *O(N), with the query running 2 nested for loops, the time complexity becomes O(N^2).* In order to reduce the time complexity let's use *Sets* to store the prices for Constant Time Lookups
class Inventory():
def __init__(self,csv_filename):
with open(csv_filename, encoding='utf8') as file:
read_file = csv.reader(file)
data = list(read_file)
self.header = data[:1]
self.rows = data[1:]
for row in self.rows:
row[-1] = int(row[-1])
self.id_to_row = {}
for row in self.rows:
self.id_to_row[row[0]] = row
# creating a set to store all the prices
self.prices = set()
for row in self.rows:
self.prices.add(row[-1])
def get_laptop_from_id(self,id_laptop):
for row in self.rows:
if row[0] == id_laptop:
return row
return None
def get_laptop_from_id_fast(self,id_laptop):
if id_laptop in self.id_to_row:
return self.id_to_row[id_laptop]
else:
return None
def check_promotion_dollars(self,dollars):
for row in self.rows:
if row[-1] == dollars:
return True
for i in self.rows:
for j in self.rows:
if (i[-1]+j[-1]) == dollars:
return True
return False
## creating a method using sets to get the laptop prices and their required sums:
def check_promotion_dollars_fast(self,dollars):
## to check if the price exists in the set
if dollars in self.prices:
return True
## to check if (dollars-price) exists in the set:
for price in self.prices:
if (dollars-price) in self.prices:
return True
return False
Let's test our updated class to check the prices for :
## Testing the created function :
inventory = Inventory('laptops.csv')
start = time.time()
print(inventory.check_promotion_dollars_fast(1000))
print(inventory.check_promotion_dollars_fast(442))
end = time.time()
time_taken = end-start
print("\033[1m" + 'The total time(seconds) taken for the query to process : ' +
str(time_taken) + "\033[0m")
True
False
The total time(seconds) taken for the query to process : 0.00029540061950683594
Now that we have both the methods, one using *lists* with the other using *sets*, let's compare their performance by using both the methods to find the same number of random prices from the dataset :
# To generate a random list of 100 prices:
prices = [random.randint(100,5000) for _ in range(100)]
inventory = Inventory('laptops.csv')
## Without using sets:
total_time_no_set = 0
for price in prices:
start = time.time()
inventory.check_promotion_dollars(price)
end = time.time()
total_time_no_set += (end-start)
## Using sets:
total_time_set = 0
for price in prices:
start = time.time()
inventory.check_promotion_dollars_fast(price)
end = time.time()
total_time_set += (end-start)
print("\033[1m" + 'Time taken(seconds) by using lists : ' + str(total_time_no_set) +"\033[0m")
print("\033[1m" + 'Time taken(seconds) by using a set: ' + str(total_time_set) +"\033[0m")
print("\033[1m" + 'Time advantage(x times) by using a set for 100 values: ' +
str(total_time_no_set/total_time_set) +"\033[0m")
Time taken(seconds) by using lists : 1.308384895324707 Time taken(seconds) by using a set: 0.0005381107330322266 Time advantage(x times) by using a set for 100 values: 2431.441736818786
The function 'find_laptops' needs to sort all the laptop prices and use binary search to find the laptop with the given price, then return the laptops with prices less than the given price.
class Inventory():
def __init__(self,csv_filename):
with open(csv_filename, encoding='utf8') as file:
read_file = csv.reader(file)
data = list(read_file)
self.header = data[:1]
self.rows = data[1:]
for row in self.rows:
row[-1] = int(row[-1])
self.id_to_row = {}
for row in self.rows:
self.id_to_row[row[0]] = row
self.prices = set()
for row in self.rows:
self.prices.add(row[-1])
## Sorting rows by the price:
self.rows_by_price = sorted(self.rows, key=lambda row:row[-1])
def get_laptop_from_id(self,id_laptop):
for row in self.rows:
if row[0] == id_laptop:
return row
return None
def get_laptop_from_id_fast(self,id_laptop):
if id_laptop in self.id_to_row:
return self.id_to_row[id_laptop]
else:
return None
def check_promotion_dollars(self,dollars):
for row in self.rows:
if row[-1] == dollars:
return True
for i in self.rows:
for j in self.rows:
if (i[-1]+j[-1]) == dollars:
return True
return False
def check_promotion_dollars_fast(self,dollars):
if dollars in self.prices:
return True
for price in self.prices:
if (dollars-price) in self.prices:
return True
return False
## a function to give the index of the laptop whose price is just greater than listed:
def find_first_laptop_more_expensive(self,target_price):
range_start = 0
range_end = len(self.rows_by_price)-1
while range_start < range_end:
range_middle = (range_start + range_end)//2
price = self.rows_by_price[range_middle][-1]
if target_price < price :
range_end = range_middle
else:
range_start = range_middle + 1
## if all the laptops are below target price
if self.rows_by_price[range_start][-1] <= target_price:
return ('All laptops are below the given budget.')
## Number of laptops under the given budget:
return ('Number of laptops available : ' + str(range_start))
Let's test our updated class to check the prices for :
# checking our function:
inventory = Inventory('laptops.csv')
start = time.time()
print(inventory.find_first_laptop_more_expensive(1000))
print(inventory.find_first_laptop_more_expensive(10000))
end = time.time()
time_taken = end-start
print('\n')
print("\033[1m" + 'Time taken(seconds) to find : ' + str(time_taken) +"\033[0m")
Number of laptops available : 683
All laptops are below the given budget.
Time taken(seconds) to find : 0.00027298927307128906
In the end, using the *'Time and Space complexity' and pre-processing* we developed a class that efficiently enabled :