from csv import reader
### The Google Play data set ###
opened_file = open('googleplaystore.csv')
read_file = reader(opened_file)
android = list(read_file)
android_header = android[0]
android = android[1:]
### The App Store data set ###
opened_file = open('AppleStore.csv')
read_file = reader(opened_file)
ios = list(read_file)
ios_header = ios[0]
ios = ios[1:]
def explore_data(dataset, start, end, rows_and_columns=False):
dataset_slice = dataset[start:end]
for row in dataset_slice:
print(row)
print('\n') # adds a new (empty) line between rows
if rows_and_columns:
print('Number of rows:', len(dataset))
print('Number of columns:', len(dataset[0]))
print(android_header)
print('\n')
explore_data(android,0,3, True)
['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver'] ['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] Number of rows: 10841 Number of columns: 13
print(ios_header)
print('\n')
explore_data(ios,0,3,True)
['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic'] ['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'] ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1'] ['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1'] Number of rows: 7197 Number of columns: 16
for row in android:
header_length = len(android_header)
row_length = len(row)
if row_length != header_length:
print(row)
print(android.index(row))
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'] 10472
print(android[10472]) # incorrect row
print('\n')
print(android_header) # header
print('\n')
print(android[0]) # a correct row
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'] ['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver'] ['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']
print(len(android))
del android[10472] # don't run del statement more than once
print(len(android))
print(android[10472])
10841 10840 ['osmino Wi-Fi: free WiFi', 'TOOLS', '4.2', '134203', '4.1M', '10,000,000+', 'Free', '0', 'Everyone', 'Tools', 'August 7, 2018', '6.06.14', '4.4 and up']
for app in android:
name = app[0]
if name == 'Insightly CRM':
print(app)
if name == 'Instagram':
print('\n')
print(app)
['Insightly CRM', 'BUSINESS', '3.8', '1383', '51M', '100,000+', 'Free', '0', 'Everyone', 'Business', 'July 12, 2018', '3.24.1', '5.0 and up'] ['Insightly CRM', 'BUSINESS', '3.8', '1383', '51M', '100,000+', 'Free', '0', 'Everyone', 'Business', 'July 12, 2018', '3.24.1', '5.0 and up'] ['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
Here two examples are taken, one app is Insightly CRM and another is Instagram. The first app is just a duplicate data with no change in any cell or value.
But the Instagram app has four data all having different reviews (look for the fourth column). So one can create a code that will accept the data with most recent data that is the data with highest number of reviews.
duplicate_apps = [] # store duplicate data
unique_apps = [] # store unique data
for app in android:
name = app[0] # 0 is the index value of first column
if name in unique_apps:
duplicate_apps.append(name)
else:
unique_apps.append(name)
print('Number of duplicate apps:', len(duplicate_apps)) # count the number of duplicates
print('\n')
print('Some of duplicate apps are:', *duplicate_apps[:15], sep ='\n')
Number of duplicate apps: 1181 Some of duplicate apps are: Quick PDF Scanner + OCR FREE Box Google My Business ZOOM Cloud Meetings join.me - Simple Meetings Box Zenefits Google Ads Google My Business Slack FreshBooks Classic Insightly CRM QuickBooks Accounting: Invoicing & Expenses HipChat - Chat Built for Teams Xero Accounting Software
Set a criterion to remove the duplicates in a subtle way with at least one argument and not in a random fashion.
reviews_max={}
for app_name in android:
name = app_name[0]
n_reviews = float(app_name[3])
if name in reviews_max and reviews_max[name] < n_reviews:
reviews_max[name] = n_reviews
if name not in reviews_max:
reviews_max[name] = n_reviews
print(len(reviews_max))
9659
We have 9659 rows in our dataset.
android_clean = [] # store new cleaned data set
already_added = [] # just store app names
for app in android:
name = app[0]
n_reviews = float(app[3])
if (reviews_max[name] == n_reviews) and (name not in already_added):
android_clean.append(app)
already_added.append(name) # make sure this is inside the if block
explore_data(android_clean, 0, 3, True)
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] ['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up'] Number of rows: 9659 Number of columns: 13
Now we will create a function that finds out non-English apps.
def check_non_english(string):
non_ascii=0
for i in string:
if ord(i)>127:
non_ascii +=1
if non_ascii >3:
return False
else:
return True
print(check_non_english('Instachat 😜'))
print(check_non_english('Docs To Go™ Free Office Suite'))
print(check_non_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(check_non_english('Instagram'))
True True False True
In here, first we started with a function that returns true or false based on if there is one character that is non-ascii. But due to this apps like 'Docs To Go™ Free Office Suite' dot removed. So ,we tweaked the code a little bit and now we check for three characters. If there are more than three characters that fall outside the ASCII range (0 - 127), then the function should identify the string as non-English.
android_english=[]
ios_english=[]
for i in android_clean:
name_character= i[0]
if check_non_english(name_character):
android_english.append(i)
for i in ios:
name_character= i[1]
if check_non_english(name_character):
ios_english.append(i)
explore_data(android_english, 0, 3, True)
print('\n')
explore_data(ios_english, 0, 3, True)
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] ['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up'] Number of rows: 9614 Number of columns: 13 ['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'] ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1'] ['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1'] Number of rows: 6183 Number of columns: 16
Now, after cleaning we have 9614 android apps and 6183 ios apps.
Now we would be doing final cleaning by Isolating the free apps.
android_final=[]
ios_final=[]
for i in android_english:
name = i[7]
if name == '0':
android_final.append(i)
for i in ios_english:
name = i[4]
if name == '0.0':
ios_final.append(i)
print(*android_final[:5], sep = '\n')
print(len(android_final))
print(*ios_final[:5], sep = '\n')
print(len(ios_final))
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] ['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up'] ['Pixel Draw - Number Art Coloring Book', 'ART_AND_DESIGN', '4.3', '967', '2.8M', '100,000+', 'Free', '0', 'Everyone', 'Art & Design;Creativity', 'June 20, 2018', '1.1', '4.4 and up'] ['Paper flowers instructions', 'ART_AND_DESIGN', '4.4', '167', '5.6M', '50,000+', 'Free', '0', 'Everyone', 'Art & Design', 'March 26, 2017', '1.0', '2.3 and up'] 8864 ['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'] ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1'] ['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1'] ['420009108', 'Temple Run', '65921024', 'USD', '0.0', '1724546', '3842', '4.5', '4.0', '1.6.2', '9+', 'Games', '40', '5', '1', '1'] ['284035177', 'Pandora - Music & Radio', '130242560', 'USD', '0.0', '1126879', '3594', '4.0', '4.5', '8.4.1', '12+', 'Music', '37', '4', '1', '1'] 3222
After cleaning the data we are left with 8864 Android apps and 3222 iOS apps for analysis.
Now our next mission is to determine the kinds of apps that are likely to attract more users because our revenue is highly influenced by the number of people using our apps.
To minimize risks and overhead, our validation strategy for an app idea is comprised of three steps:
For this, one can inspect both data sets and identify the columns to generate frequency tables to find out what are the most common genres in each market.
def freq_table(dataset, index):
table = {}
total = 0
for row in dataset:
total += 1
value = row[index]
if value in table:
table[value] += 1
else:
table[value] = 1
table_percentages = {}
for key in table:
percentage = (table[key] / total) * 100
table_percentages[key] = percentage
return table_percentages
# display_table function prints the entries of the frequency table in descending order
def display_table(dataset, index):
table = freq_table(dataset, index)
table_display = []
for key in table:
key_val_as_tuple = (table[key], key)
table_display.append(key_val_as_tuple)
table_sorted = sorted(table_display, reverse = True)
for entry in table_sorted:
print(entry[1], ':', entry[0])
display_table(ios_final, 11)
Games : 58.16263190564867 Entertainment : 7.883302296710118 Photo & Video : 4.9658597144630665 Education : 3.662321539416512 Social Networking : 3.2898820608317814 Shopping : 2.60707635009311 Utilities : 2.5139664804469275 Sports : 2.1415270018621975 Music : 2.0484171322160147 Health & Fitness : 2.0173805090006205 Productivity : 1.7380509000620732 Lifestyle : 1.5828677839851024 News : 1.3345747982619491 Travel : 1.2414649286157666 Finance : 1.1173184357541899 Weather : 0.8690254500310366 Food & Drink : 0.8069522036002483 Reference : 0.5586592178770949 Business : 0.5276225946617008 Book : 0.4345127250155183 Navigation : 0.186219739292365 Medical : 0.186219739292365 Catalogs : 0.12414649286157665
Here, we can see Games cover the major portion. Followed by Entertainment, Photo & Video, Education and Social Networking. In total these top 5 apps covers 75% of free English apps. This means people download more leisure apps than learning or lifestyle apps.
display_table(android_final, 1) # Category column
FAMILY : 18.907942238267147 GAME : 9.724729241877256 TOOLS : 8.461191335740072 BUSINESS : 4.591606498194946 LIFESTYLE : 3.9034296028880866 PRODUCTIVITY : 3.892148014440433 FINANCE : 3.7003610108303246 MEDICAL : 3.531137184115524 SPORTS : 3.395758122743682 PERSONALIZATION : 3.3167870036101084 COMMUNICATION : 3.2378158844765346 HEALTH_AND_FITNESS : 3.0798736462093865 PHOTOGRAPHY : 2.944494584837545 NEWS_AND_MAGAZINES : 2.7978339350180503 SOCIAL : 2.6624548736462095 TRAVEL_AND_LOCAL : 2.33528880866426 SHOPPING : 2.2450361010830324 BOOKS_AND_REFERENCE : 2.1435018050541514 DATING : 1.861462093862816 VIDEO_PLAYERS : 1.7937725631768955 MAPS_AND_NAVIGATION : 1.3989169675090252 FOOD_AND_DRINK : 1.2409747292418771 EDUCATION : 1.1620036101083033 ENTERTAINMENT : 0.9589350180505415 LIBRARIES_AND_DEMO : 0.9363718411552346 AUTO_AND_VEHICLES : 0.9250902527075812 HOUSE_AND_HOME : 0.8235559566787004 WEATHER : 0.8009927797833934 EVENTS : 0.7107400722021661 PARENTING : 0.6543321299638989 ART_AND_DESIGN : 0.6430505415162455 COMICS : 0.6204873646209386 BEAUTY : 0.5979241877256317
In Android's category column, if we see the division some what divided in such a manner that Top 5 covers only 42%. Family category also contains games. So, in here also majoy portion is covered by games. Few people download apps related to beauty, education, shopping etc.
There is also genres column in the dataset. Lets see how the apps are divided in that part.
display_table(android_final, -4) # Genres column
Tools : 8.449909747292418 Entertainment : 6.069494584837545 Education : 5.347472924187725 Business : 4.591606498194946 Productivity : 3.892148014440433 Lifestyle : 3.892148014440433 Finance : 3.7003610108303246 Medical : 3.531137184115524 Sports : 3.463447653429603 Personalization : 3.3167870036101084 Communication : 3.2378158844765346 Action : 3.1024368231046933 Health & Fitness : 3.0798736462093865 Photography : 2.944494584837545 News & Magazines : 2.7978339350180503 Social : 2.6624548736462095 Travel & Local : 2.3240072202166067 Shopping : 2.2450361010830324 Books & Reference : 2.1435018050541514 Simulation : 2.0419675090252705 Dating : 1.861462093862816 Arcade : 1.8501805054151623 Video Players & Editors : 1.7712093862815883 Casual : 1.7599277978339352 Maps & Navigation : 1.3989169675090252 Food & Drink : 1.2409747292418771 Puzzle : 1.128158844765343 Racing : 0.9927797833935018 Role Playing : 0.9363718411552346 Libraries & Demo : 0.9363718411552346 Auto & Vehicles : 0.9250902527075812 Strategy : 0.9138086642599278 House & Home : 0.8235559566787004 Weather : 0.8009927797833934 Events : 0.7107400722021661 Adventure : 0.6768953068592057 Comics : 0.6092057761732852 Beauty : 0.5979241877256317 Art & Design : 0.5979241877256317 Parenting : 0.4963898916967509 Card : 0.45126353790613716 Casino : 0.42870036101083037 Trivia : 0.41741877256317694 Educational;Education : 0.39485559566787 Board : 0.3835740072202166 Educational : 0.3722924187725632 Education;Education : 0.33844765342960287 Word : 0.2594765342960289 Casual;Pretend Play : 0.236913357400722 Music : 0.2030685920577617 Racing;Action & Adventure : 0.16922382671480143 Puzzle;Brain Games : 0.16922382671480143 Entertainment;Music & Video : 0.16922382671480143 Casual;Brain Games : 0.13537906137184114 Casual;Action & Adventure : 0.13537906137184114 Arcade;Action & Adventure : 0.12409747292418773 Action;Action & Adventure : 0.10153429602888085 Educational;Pretend Play : 0.09025270758122744 Simulation;Action & Adventure : 0.078971119133574 Parenting;Education : 0.078971119133574 Entertainment;Brain Games : 0.078971119133574 Board;Brain Games : 0.078971119133574 Parenting;Music & Video : 0.06768953068592057 Educational;Brain Games : 0.06768953068592057 Casual;Creativity : 0.06768953068592057 Art & Design;Creativity : 0.06768953068592057 Education;Pretend Play : 0.056407942238267145 Role Playing;Pretend Play : 0.04512635379061372 Education;Creativity : 0.04512635379061372 Role Playing;Action & Adventure : 0.033844765342960284 Puzzle;Action & Adventure : 0.033844765342960284 Entertainment;Creativity : 0.033844765342960284 Entertainment;Action & Adventure : 0.033844765342960284 Educational;Creativity : 0.033844765342960284 Educational;Action & Adventure : 0.033844765342960284 Education;Music & Video : 0.033844765342960284 Education;Brain Games : 0.033844765342960284 Education;Action & Adventure : 0.033844765342960284 Adventure;Action & Adventure : 0.033844765342960284 Video Players & Editors;Music & Video : 0.02256317689530686 Sports;Action & Adventure : 0.02256317689530686 Simulation;Pretend Play : 0.02256317689530686 Puzzle;Creativity : 0.02256317689530686 Music;Music & Video : 0.02256317689530686 Entertainment;Pretend Play : 0.02256317689530686 Casual;Education : 0.02256317689530686 Board;Action & Adventure : 0.02256317689530686 Video Players & Editors;Creativity : 0.01128158844765343 Trivia;Education : 0.01128158844765343 Travel & Local;Action & Adventure : 0.01128158844765343 Tools;Education : 0.01128158844765343 Strategy;Education : 0.01128158844765343 Strategy;Creativity : 0.01128158844765343 Strategy;Action & Adventure : 0.01128158844765343 Simulation;Education : 0.01128158844765343 Role Playing;Brain Games : 0.01128158844765343 Racing;Pretend Play : 0.01128158844765343 Puzzle;Education : 0.01128158844765343 Parenting;Brain Games : 0.01128158844765343 Music & Audio;Music & Video : 0.01128158844765343 Lifestyle;Pretend Play : 0.01128158844765343 Lifestyle;Education : 0.01128158844765343 Health & Fitness;Education : 0.01128158844765343 Health & Fitness;Action & Adventure : 0.01128158844765343 Entertainment;Education : 0.01128158844765343 Communication;Creativity : 0.01128158844765343 Comics;Creativity : 0.01128158844765343 Casual;Music & Video : 0.01128158844765343 Card;Action & Adventure : 0.01128158844765343 Books & Reference;Education : 0.01128158844765343 Art & Design;Pretend Play : 0.01128158844765343 Art & Design;Action & Adventure : 0.01128158844765343 Arcade;Pretend Play : 0.01128158844765343 Adventure;Education : 0.01128158844765343
Here, one can see that this column is divided more categorically. So, we would utilize Caterogy column for further analysis as this data is more specific and refined.
One way to find out what genres are the most popular (have the most users) is to calculate the average number of installs for each app genre. For the Google Play data set, we can find this information in the Installs column, but this information is missing for the App Store data set. As a workaround, we'll take the total number of user ratings as a proxy, which we can find in the rating_count_tot app.
import operator
common_genres_ios = freq_table(ios_final, -5)
unsort_it = {}
for genre in common_genres_ios:
total = 0 # store the sum of user ratings specific to each genre.
len_genre = 0 # store the number of apps specific to each genre.
for app in ios_final:
genre_app = app[-5]
if genre_app == genre:
total += float(app[5])
len_genre +=1
avg_num = total / len_genre
unsort_it[genre] = avg_num
print (genre, ':', "{:.2f}".format(avg_num))
print ('\n')
print ('After sorting')
print ('-------------')
# Sort the dictionary values into a list of tuples and assign it to a new variable
sort_it = sorted(unsort_it.items(), key=operator.itemgetter(1), reverse=True)
for item in sort_it:
print(item)
Social Networking : 71548.35 Photo & Video : 28441.54 Games : 22788.67 Music : 57326.53 Reference : 74942.11 Health & Fitness : 23298.02 Weather : 52279.89 Utilities : 18684.46 Travel : 28243.80 Shopping : 26919.69 News : 21248.02 Navigation : 86090.33 Lifestyle : 16485.76 Entertainment : 14029.83 Food & Drink : 33333.92 Sports : 23008.90 Book : 39758.50 Finance : 31467.94 Education : 7003.98 Productivity : 21028.41 Business : 7491.12 Catalogs : 4004.00 Medical : 612.00 After sorting ------------- ('Navigation', 86090.33333333333) ('Reference', 74942.11111111111) ('Social Networking', 71548.34905660378) ('Music', 57326.530303030304) ('Weather', 52279.892857142855) ('Book', 39758.5) ('Food & Drink', 33333.92307692308) ('Finance', 31467.944444444445) ('Photo & Video', 28441.54375) ('Travel', 28243.8) ('Shopping', 26919.690476190477) ('Health & Fitness', 23298.015384615384) ('Sports', 23008.898550724636) ('Games', 22788.6696905016) ('News', 21248.023255813954) ('Productivity', 21028.410714285714) ('Utilities', 18684.456790123455) ('Lifestyle', 16485.764705882353) ('Entertainment', 14029.830708661417) ('Business', 7491.117647058823) ('Education', 7003.983050847458) ('Catalogs', 4004.0) ('Medical', 612.0)
Navigation app has the highest number of user reviews followed by Reference. We can see in the below code that Waze and Google Maps in the navigation elevates the ratings.
Similarly Bible and Dictionary elevate the ratings of Reference.
for app in ios_final:
if app[-5] == 'Navigation':
print(app[1], ':', app[5]) # print name and number of ratings
print ('\n')
for app in ios_final:
if app[-5] == 'Reference':
print (app[1], ':', app[5])
Waze - GPS Navigation, Maps & Real-time Traffic : 345046 Google Maps - Navigation & Transit : 154911 Geocaching® : 12811 CoPilot GPS – Car Navigation & Offline Maps : 3582 ImmobilienScout24: Real Estate Search in Germany : 187 Railway Route Search : 5 Bible : 985920 Dictionary.com Dictionary & Thesaurus : 200047 Dictionary.com Dictionary & Thesaurus for iPad : 54175 Google Translate : 26786 Muslim Pro: Ramadan 2017 Prayer Times, Azan, Quran : 18418 New Furniture Mods - Pocket Wiki & Game Tools for Minecraft PC Edition : 17588 Merriam-Webster Dictionary : 16849 Night Sky : 12122 City Maps for Minecraft PE - The Best Maps for Minecraft Pocket Edition (MCPE) : 8535 LUCKY BLOCK MOD ™ for Minecraft PC Edition - The Best Pocket Wiki & Mods Installer Tools : 4693 GUNS MODS for Minecraft PC Edition - Mods Tools : 1497 Guides for Pokémon GO - Pokemon GO News and Cheats : 826 WWDC : 762 Horror Maps for Minecraft PE - Download The Scariest Maps for Minecraft Pocket Edition (MCPE) Free : 718 VPN Express : 14 Real Bike Traffic Rider Virtual Reality Glasses : 8 教えて!goo : 0 Jishokun-Japanese English Dictionary & Translator : 0
We have data about the number of installs for the Google Play market, so we should be able to get a clearer picture about genre popularity. However, the install numbers don't seem precise enough — we can see that most values are open-ended (100+, 1,000+, 5,000+, etc.):
display_table(android_final, 5)
1,000,000+ : 15.726534296028879 100,000+ : 11.552346570397113 10,000,000+ : 10.548285198555957 10,000+ : 10.198555956678701 1,000+ : 8.393501805054152 100+ : 6.915613718411552 5,000,000+ : 6.825361010830325 500,000+ : 5.561823104693141 50,000+ : 4.7721119133574 5,000+ : 4.512635379061372 10+ : 3.5424187725631766 500+ : 3.2490974729241873 50,000,000+ : 2.3014440433213 100,000,000+ : 2.1322202166064983 50+ : 1.917870036101083 5+ : 0.78971119133574 1+ : 0.5076714801444043 500,000,000+ : 0.2707581227436823 1,000,000,000+ : 0.22563176895306858 0+ : 0.04512635379061372 0 : 0.01128158844765343
We're going to leave the numbers as they are, which means that we'll consider that an app with 100,000+ installs has 100,000 installs, and an app with 1,000,000+ installs has 1,000,000 installs, and so on. To perform computations, however, we'll need to convert each install number from string to float. This means we need to remove the commas and the plus characters, otherwise the conversion will fail and raise an error.
Now, we will calculate the average number of installs per app genre for the Google Play data set
categories_android = freq_table(android_final, 1)
for category in categories_android:
total = 0 # store the sum of installs specific to each genre
len_category = 0 # store the number of apps specific to each genre
for app in android_final:
category_app = app[1]
if category_app == category:
n_installs = app[5]
n_installs = n_installs.replace('+', '')
n_installs = n_installs.replace(',', '')
total += float(n_installs)
len_category += 1
avg_n_installs = total / len_category
print(category, ':', "{:.2f}".format(avg_n_installs))
ART_AND_DESIGN : 1986335.09 AUTO_AND_VEHICLES : 647317.82 BEAUTY : 513151.89 BOOKS_AND_REFERENCE : 8767811.89 BUSINESS : 1712290.15 COMICS : 817657.27 COMMUNICATION : 38456119.17 DATING : 854028.83 EDUCATION : 1833495.15 ENTERTAINMENT : 11640705.88 EVENTS : 253542.22 FINANCE : 1387692.48 FOOD_AND_DRINK : 1924897.74 HEALTH_AND_FITNESS : 4188821.99 HOUSE_AND_HOME : 1331540.56 LIBRARIES_AND_DEMO : 638503.73 LIFESTYLE : 1437816.27 GAME : 15588015.60 FAMILY : 3695641.82 MEDICAL : 120550.62 SOCIAL : 23253652.13 SHOPPING : 7036877.31 PHOTOGRAPHY : 17840110.40 SPORTS : 3638640.14 TRAVEL_AND_LOCAL : 13984077.71 TOOLS : 10801391.30 PERSONALIZATION : 5201482.61 PRODUCTIVITY : 16787331.34 PARENTING : 542603.62 WEATHER : 5074486.20 VIDEO_PLAYERS : 24727872.45 NEWS_AND_MAGAZINES : 9549178.47 MAPS_AND_NAVIGATION : 4056941.77
Communication apps have the highest installs: 38,456,119. This number is high that is over one billion installs due to (WhatsApp, Facebook Messenger, Skype, Google Chrome, Gmail, and Hangouts), and a few others with over 100 and 500 million installs.
Below is the code showing this.
for app in android_final:
if app[1] == 'COMMUNICATION' and (app[5] == '1,000,000,000+'
or app[5] == '500,000,000+'
or app[5] == '100,000,000+'):
print(app[0], ':', app[5])
WhatsApp Messenger : 1,000,000,000+ imo beta free calls and text : 100,000,000+ Android Messages : 100,000,000+ Google Duo - High Quality Video Calls : 500,000,000+ Messenger – Text and Video Chat for Free : 1,000,000,000+ imo free video calls and chat : 500,000,000+ Skype - free IM & video calls : 1,000,000,000+ Who : 100,000,000+ GO SMS Pro - Messenger, Free Themes, Emoji : 100,000,000+ LINE: Free Calls & Messages : 500,000,000+ Google Chrome: Fast & Secure : 1,000,000,000+ Firefox Browser fast & private : 100,000,000+ UC Browser - Fast Download Private & Secure : 500,000,000+ Gmail : 1,000,000,000+ Hangouts : 1,000,000,000+ Messenger Lite: Free Calls & Messages : 100,000,000+ Kik : 100,000,000+ KakaoTalk: Free Calls & Text : 100,000,000+ Opera Mini - fast web browser : 100,000,000+ Opera Browser: Fast and Secure : 100,000,000+ Telegram : 100,000,000+ Truecaller: Caller ID, SMS spam blocking & Dialer : 100,000,000+ UC Browser Mini -Tiny Fast Private & Secure : 100,000,000+ Viber Messenger : 500,000,000+ WeChat : 100,000,000+ Yahoo Mail – Stay Organized : 100,000,000+ BBM - Free Calls & Messages : 100,000,000+
Let's take a look at some of the apps from this genre and their number of installs:
The books and reference genre looks fairly popular as well, with an average number of installs of 8,767,811
for app in android_final:
if app[1] == 'BOOKS_AND_REFERENCE':
print(app[0], ':', app[5])
E-Book Read - Read Book for free : 50,000+ Download free book with green book : 100,000+ Wikipedia : 10,000,000+ Cool Reader : 10,000,000+ Free Panda Radio Music : 100,000+ Book store : 1,000,000+ FBReader: Favorite Book Reader : 10,000,000+ English Grammar Complete Handbook : 500,000+ Free Books - Spirit Fanfiction and Stories : 1,000,000+ Google Play Books : 1,000,000,000+ AlReader -any text book reader : 5,000,000+ Offline English Dictionary : 100,000+ Offline: English to Tagalog Dictionary : 500,000+ FamilySearch Tree : 1,000,000+ Cloud of Books : 1,000,000+ Recipes of Prophetic Medicine for free : 500,000+ ReadEra – free ebook reader : 1,000,000+ Anonymous caller detection : 10,000+ Ebook Reader : 5,000,000+ Litnet - E-books : 100,000+ Read books online : 5,000,000+ English to Urdu Dictionary : 500,000+ eBoox: book reader fb2 epub zip : 1,000,000+ English Persian Dictionary : 500,000+ Flybook : 500,000+ All Maths Formulas : 1,000,000+ Ancestry : 5,000,000+ HTC Help : 10,000,000+ English translation from Bengali : 100,000+ Pdf Book Download - Read Pdf Book : 100,000+ Free Book Reader : 100,000+ eBoox new: Reader for fb2 epub zip books : 50,000+ Only 30 days in English, the guideline is guaranteed : 500,000+ Moon+ Reader : 10,000,000+ SH-02J Owner's Manual (Android 8.0) : 50,000+ English-Myanmar Dictionary : 1,000,000+ Golden Dictionary (EN-AR) : 1,000,000+ All Language Translator Free : 1,000,000+ Azpen eReader : 500,000+ URBANO V 02 instruction manual : 100,000+ Bible : 100,000,000+ C Programs and Reference : 50,000+ C Offline Tutorial : 1,000+ C Programs Handbook : 50,000+ Amazon Kindle : 100,000,000+ Aab e Hayat Full Novel : 100,000+ Aldiko Book Reader : 10,000,000+ Google I/O 2018 : 500,000+ R Language Reference Guide : 10,000+ Learn R Programming Full : 5,000+ R Programing Offline Tutorial : 1,000+ Guide for R Programming : 5+ Learn R Programming : 10+ R Quick Reference Big Data : 1,000+ V Made : 100,000+ Wattpad 📖 Free Books : 100,000,000+ Dictionary - WordWeb : 5,000,000+ Guide (for X-MEN) : 100,000+ AC Air condition Troubleshoot,Repair,Maintenance : 5,000+ AE Bulletins : 1,000+ Ae Allah na Dai (Rasa) : 10,000+ 50000 Free eBooks & Free AudioBooks : 5,000,000+ Ag PhD Field Guide : 10,000+ Ag PhD Deficiencies : 10,000+ Ag PhD Planting Population Calculator : 1,000+ Ag PhD Soybean Diseases : 1,000+ Fertilizer Removal By Crop : 50,000+ A-J Media Vault : 50+ Al-Quran (Free) : 10,000,000+ Al Quran (Tafsir & by Word) : 500,000+ Al Quran Indonesia : 10,000,000+ Al'Quran Bahasa Indonesia : 10,000,000+ Al Quran Al karim : 1,000,000+ Al-Muhaffiz : 50,000+ Al Quran : EAlim - Translations & MP3 Offline : 5,000,000+ Al-Quran 30 Juz free copies : 500,000+ Koran Read &MP3 30 Juz Offline : 1,000,000+ Hafizi Quran 15 lines per page : 1,000,000+ Quran for Android : 10,000,000+ Surah Al-Waqiah : 100,000+ Hisnul Al Muslim - Hisn Invocations & Adhkaar : 100,000+ Satellite AR : 1,000,000+ Audiobooks from Audible : 100,000,000+ Kinot & Eichah for Tisha B'Av : 10,000+ AW Tozer Devotionals - Daily : 5,000+ Tozer Devotional -Series 1 : 1,000+ The Pursuit of God : 1,000+ AY Sing : 5,000+ Ay Hasnain k Nana Milad Naat : 10,000+ Ay Mohabbat Teri Khatir Novel : 10,000+ Arizona Statutes, ARS (AZ Law) : 1,000+ Oxford A-Z of English Usage : 1,000,000+ BD Fishpedia : 1,000+ BD All Sim Offer : 10,000+ Youboox - Livres, BD et magazines : 500,000+ B&H Kids AR : 10,000+ B y H Niños ES : 5,000+ Dictionary.com: Find Definitions for English Words : 10,000,000+ English Dictionary - Offline : 10,000,000+ Bible KJV : 5,000,000+ Borneo Bible, BM Bible : 10,000+ MOD Black for BM : 100+ BM Box : 1,000+ Anime Mod for BM : 100+ NOOK: Read eBooks & Magazines : 10,000,000+ NOOK Audiobooks : 500,000+ NOOK App for NOOK Devices : 500,000+ Browsery by Barnes & Noble : 5,000+ bp e-store : 1,000+ Brilliant Quotes: Life, Love, Family & Motivation : 1,000,000+ BR Ambedkar Biography & Quotes : 10,000+ BU Alsace : 100+ Catholic La Bu Zo Kam : 500+ Khrifa Hla Bu (Solfa) : 10+ Kristian Hla Bu : 10,000+ SA HLA BU : 1,000+ Learn SAP BW : 500+ Learn SAP BW on HANA : 500+ CA Laws 2018 (California Laws and Codes) : 5,000+ Bootable Methods(USB-CD-DVD) : 10,000+ cloudLibrary : 100,000+ SDA Collegiate Quarterly : 500+ Sabbath School : 100,000+ Cypress College Library : 100+ Stats Royale for Clash Royale : 1,000,000+ GATE 21 years CS Papers(2011-2018 Solved) : 50+ Learn CT Scan Of Head : 5,000+ Easy Cv maker 2018 : 10,000+ How to Write CV : 100,000+ CW Nuclear : 1,000+ CY Spray nozzle : 10+ BibleRead En Cy Zh Yue : 5+ CZ-Help : 5+ Modlitební knížka CZ : 500+ Guide for DB Xenoverse : 10,000+ Guide for DB Xenoverse 2 : 10,000+ Guide for IMS DB : 10+ DC HSEMA : 5,000+ DC Public Library : 1,000+ Painting Lulu DC Super Friends : 1,000+ Dictionary : 10,000,000+ Fix Error Google Playstore : 1,000+ D. H. Lawrence Poems FREE : 1,000+ Bilingual Dictionary Audio App : 5,000+ DM Screen : 10,000+ wikiHow: how to do anything : 1,000,000+ Dr. Doug's Tips : 1,000+ Bible du Semeur-BDS (French) : 50,000+ La citadelle du musulman : 50,000+ DV 2019 Entry Guide : 10,000+ DV 2019 - EDV Photo & Form : 50,000+ DV 2018 Winners Guide : 1,000+ EB Annual Meetings : 1,000+ EC - AP & Telangana : 5,000+ TN Patta Citta & EC : 10,000+ AP Stamps and Registration : 10,000+ CompactiMa EC pH Calibration : 100+ EGW Writings 2 : 100,000+ EGW Writings : 1,000,000+ Bible with EGW Comments : 100,000+ My Little Pony AR Guide : 1,000,000+ SDA Sabbath School Quarterly : 500,000+ Duaa Ek Ibaadat : 5,000+ Spanish English Translator : 10,000,000+ Dictionary - Merriam-Webster : 10,000,000+ JW Library : 10,000,000+ Oxford Dictionary of English : Free : 10,000,000+ English Hindi Dictionary : 10,000,000+ English to Hindi Dictionary : 5,000,000+ EP Research Service : 1,000+ Hymnes et Louanges : 100,000+ EU Charter : 1,000+ EU Data Protection : 1,000+ EU IP Codes : 100+ EW PDF : 5+ BakaReader EX : 100,000+ EZ Quran : 50,000+ FA Part 1 & 2 Past Papers Solved Free – Offline : 5,000+ La Fe de Jesus : 1,000+ La Fe de Jesús : 500+ Le Fe de Jesus : 500+ Florida - Pocket Brainbook : 1,000+ Florida Statutes (FL Code) : 1,000+ English To Shona Dictionary : 10,000+ Greek Bible FP (Audio) : 1,000+ Golden Dictionary (FR-AR) : 500,000+ Fanfic-FR : 5,000+ Bulgarian French Dictionary Fr : 10,000+ Chemin (fr) : 1,000+ The SCP Foundation DB fr nn5n : 1,000+
Similar to the IOS App Store market, Books and Reference also shows up as the second highest rated genre on the Google Play market. This means that the app market is willing to advocate for apps within the Book genre. We should proceed with our idea to develop an app within the Book genre because there is good potential for growth and monetization.
In this project, we analyzed IOS App Store and Google Play data to determine what type of app should our company build that can be profitable for both markets.
Our conclusion is to build a book app that functions as a reference or guide for photographers. We found that both markets are already oversaturated by entertainment apps. We concluded that taking a popular book (perhaps a more recent book) and turning it into an app could be profitable for both the Google Play and the App Store markets. The markets are already full of libraries, so we need to add some special features besides the raw version of the book. This might include daily quotes from the book, an audio version of the book, quizzes on the book, a forum where people can discuss the book, etc. Based on the ratings data, we also affirmed that both markets are eager to support book apps, which can help us with our future growth and monetization plans.