This project is about identifying profitable profiles for the App Store and Google Play Markets
### Google Play data set ###
opened_file = open('googleplaystore.csv')
from csv import reader
read_file = reader(opened_file)
android = list(read_file)
android_header = android[0]
android = android[1:]
### App Store data set ###
opened_file = open('AppleStore.csv')
read_file = reader(opened_file)
ios = list(read_file)
ios_header =ios[0]
ios = ios[2:]
def explore_data(dataset, start, end, rows_and_columns=False):
dataset_slice = dataset[start:end]
for row in dataset_slice:
print(row)
print('\n') # adds a new (empty) line after each row
if rows_and_columns:
print('Number of rows:', len(dataset))
print('Number of columns:', len(dataset[0]))
print(android_header)
print('\n')
explore_data(android, 0, 3, True)
['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver'] ['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] Number of rows: 10841 Number of columns: 13
print(android[10472]) # incorrect row
print('\n')
print(android_header) # header
print('\n')
print(android[0]) # correct row
del(android[10472])
print(android[10472]) # deleted row
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'] ['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver'] ['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['osmino Wi-Fi: free WiFi', 'TOOLS', '4.2', '134203', '4.1M', '10,000,000+', 'Free', '0', 'Everyone', 'Tools', 'August 7, 2018', '6.06.14', '4.4 and up']
### Google Play data set has duplicates, so let's get rid of em! ###
for app in android:
name = app[0]
if name == 'Instagram':
print(app)
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
### I'm not going to remove duplicates randomly.
### Rather, I'm going to delete all except the first one found ###
seen_duplicates = []
unique_apps = []
for app in android:
name == app[0]
if name in unique_apps:
seen_duplicates.append(name)
else:
unique_apps.append(name)
print('Number of duplicates:', len(seen_duplicates))
print('\n')
print('Number of unique', len(unique_apps))
print('Examples of duplicate apps:', seen_duplicates[:15] )
Number of duplicates: 10839 Number of unique 1 Examples of duplicate apps: ['iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology', 'iHoroscope - 2018 Daily Horoscope & Astrology']
### Removing duplicate entries and store
### separate lists for new cleaned data set and
### just app names for detecting duplicates ###
reviews_max = {}
for app in android[1:]:
name = app[0]
n_reviews = float(app[3])
if (name in reviews_max) and (reviews_max[name] < n_reviews):
reviews_max[name] = n_reviews
if (name not in reviews_max):
reviews_max[name] = n_reviews
print('Expected length:', len(android) - 1181)
print('Actual length:', len(reviews_max))
android_clean = []
already_added = []
for app in android[1:]:
name = app[0]
n_reviews = float(app[3])
if n_reviews == reviews_max[name] and name not in already_added:
android_clean.append(app)
already_added.append(name)
Expected length: 9659 Actual length: 9658
### exploring android_clean data set to ensure it displays as expected ###
explore_data(android_clean, 0, 3, True)
['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] ['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up'] ['Pixel Draw - Number Art Coloring Book', 'ART_AND_DESIGN', '4.3', '967', '2.8M', '100,000+', 'Free', '0', 'Everyone', 'Art & Design;Creativity', 'June 20, 2018', '1.1', '4.4 and up'] Number of rows: 9658 Number of columns: 13
## adding function that takes a string and determines if there is any
## character that doesn't belong to the set of common English characters
## if there are more than 3 chars that fall outside the ASCII range (0-127)
## it is determined to be non-english
def english_only(language):
count = 0
for char in language:
if(ord(char) > 127):
count += 1;
if count > 3:
return False;
else:
return True;
print(english_only('Instagram'))
print(english_only('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(english_only('Docs To Go™ Free Office Suite'))
print(english_only('Instachat 😜'))
True False True True
## separate Android and iOS apps and find out how many of each we have ###
android_english = []
ios_english = []
for app in android_clean:
name = app[0]
if english_only(name):
android_english.append(app)
for app in ios:
name = app[1];
if english_only(name):
ios_english.append(app)
explore_data(android_english, 0, 3, True)
print('\n')
explore_data(ios_english, 0, 3, True)
['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] ['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up'] ['Pixel Draw - Number Art Coloring Book', 'ART_AND_DESIGN', '4.3', '967', '2.8M', '100,000+', 'Free', '0', 'Everyone', 'Art & Design;Creativity', 'June 20, 2018', '1.1', '4.4 and up'] Number of rows: 9613 Number of columns: 13 ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1'] ['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1'] ['420009108', 'Temple Run', '65921024', 'USD', '0.0', '1724546', '3842', '4.5', '4.0', '1.6.2', '9+', 'Games', '40', '5', '1', '1'] Number of rows: 6182 Number of columns: 16
## isolating the free android and iOS apps ###
for app in android_clean:
name = app[0]
if(english_only):
android_english.append(name)
for app in ios:
name = app[1];
if(english_only):
ios_english.append(name)
### We want to find and app profile that fits both the App Store and Google Play because by analyzing apps that are successful on both, we have a way to measure the threshold of entry
### in order to be/remain competitive on those platforms ###
explore_data(android_english, 0, 3, True)
['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] ['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up'] ['Pixel Draw - Number Art Coloring Book', 'ART_AND_DESIGN', '4.3', '967', '2.8M', '100,000+', 'Free', '0', 'Everyone', 'Art & Design;Creativity', 'June 20, 2018', '1.1', '4.4 and up'] Number of rows: 19271 Number of columns: 13
### function to generate frequency tables to show percentages ###
def freq_table(dataset, index):
table = {}
total = 0
for row in dataset:
total += 1
value = row[index]
if value in table:
table[value] += 1
else:
table[value] = 1
table_percentages = {}
for key in table:
percentage = (table[key] / total) * 100
table_percentages[key] = percentage
return table_percentages
### function to display the percentages in desc ###
def display_table(dataset, index):
table = freq_table(dataset, index)
table_display = []
for key in table:
key_val_as_tuple = (table[key], key)
table_display.append(key_val_as_tuple)
table_sorted = sorted(table_display, reverse = True)
for entry in table_sorted:
print(entry[1], ':', entry[0])
display_table(ios_final, -5)