This project analyzes data for a company that build Android and iOS mobile apps .Its main objective is to help developers undersatand the type of apps that can attaract more users on Google Play and App Store Markets
#Read in Google play data set
open_file = open('googleplaystore.csv',encoding = 'utf8')
from csv import reader
read_file = reader(open_file)
android = list(read_file)
android_header = android[0]
android = android[1:]
# Read in AppleStore data set
open_file = open('AppleStore.csv',encoding = 'utf8')
from csv import reader
read_file = reader(open_file)
ios = list(read_file)
ios_header = ios[0]
ios = ios[1:]
After reading both datasets above, explore_data() function that take data_set, start,end,row_and_column as input parameters was created.
def explore_data(data_set,start,end,rows_and_columns = False):
dataset_slice = data_set[start:end]
for row in dataset_slice:
print(row)
print('\n')
if rows_and_columns:
print('Number of rows:',len(data_set))
print('Number of columns:', len(data_set[0]))
print(ios_header)
explore_data(ios, 0, 3, True)
print('\n')
['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic'] ['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'] ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1'] ['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1'] Number of rows: 7197 Number of columns: 16
To check for correct row, two techniques are used: one way is by creating function, second way is to print directly row if its known.
def row_check(data_set_list, store):
selected_store = ''
length = 0
if store == 'google':
selected_store, length = 'google', 13
elif store == 'apple':
selected_store, length = 'apple', 16
else:
return print('Wrong App Store name')
for row in data_set_list:
if len(row) != length:
print(row)
row_check(android, 'google')
row_check(ios, ' apple')
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'] Wrong App Store name
print(android[10472]) # incorrect row
print('\n')
print(android_header) # header
print('\n')
print(android[0]) # correct row
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'] ['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver'] ['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']
print(len(android))
del android[10472] # don't run this more than once,it will delete some rows in the data
print(len(android))
10841 10840
for app in android:# This is to see that Instagram is duplicate apps.
name = app[0]
if name == 'Instagram':
print(app)
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
Not all column names are clearly explained, For more details, see column names at documentation.
duplicate_apps = []
unique_apps = []
for app in android:
name = app[0]
if name in unique_apps:
duplicate_apps.append(name)
else:
unique_apps.append(name)
if name == 'Instagram':
print(app)
if name == 'ZOOM Cloud Meetings':
print(app)
print('Number of duplicate apps:', len(duplicate_apps)) # This is to find lenght of duplicate apps
print('\n')
print('Expected length:', len(android)-1181)# This is to get the expected length of android data set
print('\n')
print('Exapmles of duplicate apps:', duplicate_apps[:15])# Example of duplicate apps
['ZOOM Cloud Meetings', 'BUSINESS', '4.4', '31614', '37M', '10,000,000+', 'Free', '0', 'Everyone', 'Business', 'July 20, 2018', '4.1.28165.0716', '4.0 and up'] ['ZOOM Cloud Meetings', 'BUSINESS', '4.4', '31614', '37M', '10,000,000+', 'Free', '0', 'Everyone', 'Business', 'July 20, 2018', '4.1.28165.0716', '4.0 and up'] ['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] Number of duplicate apps: 1181 Expected length: 9659 Exapmles of duplicate apps: ['Quick PDF Scanner + OCR FREE', 'Box', 'Google My Business', 'ZOOM Cloud Meetings', 'join.me - Simple Meetings', 'Box', 'Zenefits', 'Google Ads', 'Google My Business', 'Slack', 'FreshBooks Classic', 'Insightly CRM', 'QuickBooks Accounting: Invoicing & Expenses', 'HipChat - Chat Built for Teams', 'Xero Accounting Software']
To understand more about Google play data set duplicate entries, two lists were created above, one for storing the duplicate apps and one for unique apps. Then loop through the android data set. A variable name "name" is assigned to the app. if the name of an apps exist in unique apps, append the name of the apps to duplicate apps. otherwise, if name of apps doesnt exist in unique apps, append the name of apps to unique apps. To remove the duplicate apps for example 'Instagram', number of reviews in each row is used. The higher the review number, the more recent the review. Therefore, my strategy is to remove smaller number of reviews first.
reviews_max = {} # To check the maximum length of reviews in android data set
for app in android:
name = app[0]
n_reviews = float(app[3])
if name in reviews_max and reviews_max[name] < n_reviews:
reviews_max[name] = n_reviews
elif name not in reviews_max:
reviews_max[name] = n_reviews
print('Expected length:', len(android) - 1181)
print('Actual length:', len(reviews_max))
Expected length: 9659 Actual length: 9659
In order to clean duplicate data, android_clean and already_added lists were created. Loop through the app and append, explore_data() function is used to display the clean android data set
android_clean = []
already_added = []
for app in android:
name = app[0]
n_reviews = float(app[3])
if (reviews_max[name] == n_reviews) and (name not in already_added):
android_clean.append(app)
already_added.append(name)
explore_data(android_clean, 0, 3, True)
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] ['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up'] Number of rows: 9659 Number of columns: 13
print(ios[813][1])
print(ios[6731][1])
#print(ios[5229][1])
print('\n')
print(android_clean[4412][0])# Exmaple of Non_English apps
print(android_clean[7940][0])
爱奇艺PPS -《欢乐颂2》电视剧热播 【脱出ゲーム】絶対に最後までプレイしないで 〜謎解き&ブロックパズル〜 中国語 AQリスニング لعبة تقدر تربح DZ
a_character_english function with string as input is created to check for english or non-english apps. loop through the string, check using ord() funtion for the corresponding number of each character. if number of character is more than 127, return False (non-english app), otherwise return True(english-apps)
def a_character_english(string):
outside_ascii = 0 # set character outside 0-127
for character in string:
if ord(character) > 127:
outside_ascii += 1 # increment if outside 0-127
if outside_ascii > 3: # if outside 0-127, and more than 3 character, return False(non-english apps), otherwise, return True(english apps)
return False
else:
return True
#print(a_character_english('Instagram'))
print(a_character_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(a_character_english('Docs To Go™ Free Office Suite'))
print(a_character_english('Instachat 😜'))
False True True
The newly created function(a_character_english) is used to filter out non-english apps for both dataset. lists for both data set is created and loop through each. if an app name is english, append the whole row to a separate list. Finally, the previous function explore_data() is used to check the rows that remained in the datasets
android_english = []
for app in android:
android_english.append(app)
ios_english = []
for app in ios:
ios_english.append(app)
explore_data(android_english, 0, 3, True)
print('\n')
explore_data(ios_english, 0, 3, True)
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] Number of rows: 10840 Number of columns: 13 ['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'] ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1'] ['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1'] Number of rows: 7197 Number of columns: 16
To determine free apps for both datasets, free_app_android_english & free_app_ios_english lists were created, loop through each data set. I use price row to check: if price ==0.0, then an app is free, else it is not free
free_app_android_english = []
for app in android_english:
price = app[7]
if price == '0':
free_app_android_english.append(app)
free_app_ios_english = []
for app in ios_english:
price = app[4]
if price == '0.0':
free_app_ios_english.append(app)
print(len(free_app_android_english))
print(len(free_app_ios_english))
10040 4056
There are: 10,040(free_app_android_english) and 4,056(free_app_ios_english) remaining for our analysis
In this project, apps that attract more users is determined because our revenue is highly influenced by the number of people using our apps.
Our validation strategy is to: Build a minimal Android version of the app,and add it to Google play. If the app has large number of users, we develop it further. We asses if the app is profitable after six months, we then build an ios version and add it to the App store.
We started by identifying most common genres for each market. For instance, category and genres columns are used for buidling frequency table for andriod data set(googleplaystore). While, prime_genre column is used to build frequency table for ios data set(applestore)
def freq_table(dataset,index):
frequency_table = {}
total = 0
for row in dataset:
total +=1
item = row[index]
if item in frequency_table:
frequency_table[item] +=1
else:
frequency_table[item] = 1
frequency_table_percentage = {}
for key in frequency_table:
percentage = (frequency_table[key]/ total)*100
frequency_table_percentage[key] = percentage
return frequency_table_percentage
def display_table(dataset, index):
frequency_table = freq_table(dataset, index)
frequency_table_display = []
for key in frequency_table:
key_val_as_tuple = (frequency_table[key], key)
frequency_table_display.append(key_val_as_tuple)
frequency_table_sorted = sorted(frequency_table_display, reverse = True)
for entry in frequency_table_sorted:
print(entry[1], ':', entry[0])
display_table(free_app_android_english , 1) # android app by category
print('\n')
FAMILY : 17.739043824701195 GAME : 10.56772908366534 TOOLS : 7.6195219123505975 BUSINESS : 4.442231075697211 PRODUCTIVITY : 3.944223107569721 LIFESTYLE : 3.6155378486055776 SPORTS : 3.5856573705179287 COMMUNICATION : 3.5856573705179287 MEDICAL : 3.5258964143426295 FINANCE : 3.4760956175298805 HEALTH_AND_FITNESS : 3.237051792828685 PHOTOGRAPHY : 3.117529880478088 PERSONALIZATION : 3.0776892430278884 SOCIAL : 2.908366533864542 NEWS_AND_MAGAZINES : 2.7988047808764938 SHOPPING : 2.5697211155378485 TRAVEL_AND_LOCAL : 2.450199203187251 DATING : 2.2609561752988045 BOOKS_AND_REFERENCE : 2.0219123505976095 VIDEO_PLAYERS : 1.7031872509960162 EDUCATION : 1.5139442231075697 ENTERTAINMENT : 1.4641434262948207 MAPS_AND_NAVIGATION : 1.3147410358565739 FOOD_AND_DRINK : 1.245019920318725 HOUSE_AND_HOME : 0.8764940239043826 LIBRARIES_AND_DEMO : 0.8366533864541833 AUTO_AND_VEHICLES : 0.8167330677290837 WEATHER : 0.7370517928286853 EVENTS : 0.6274900398406374 ART_AND_DESIGN : 0.6175298804780877 COMICS : 0.5976095617529881 PARENTING : 0.5776892430278884 BEAUTY : 0.5278884462151394
display_table(free_app_android_english , 9) # android app by Genres
Tools : 7.609561752988048 Entertainment : 6.01593625498008 Education : 5.169322709163347 Business : 4.442231075697211 Productivity : 3.944223107569721 Sports : 3.7250996015936253 Lifestyle : 3.6055776892430282 Communication : 3.5856573705179287 Medical : 3.5258964143426295 Finance : 3.4760956175298805 Action : 3.396414342629482 Health & Fitness : 3.237051792828685 Photography : 3.117529880478088 Personalization : 3.0776892430278884 Social : 2.908366533864542 News & Magazines : 2.7988047808764938 Shopping : 2.5697211155378485 Travel & Local : 2.4402390438247012 Dating : 2.2609561752988045 Books & Reference : 2.0219123505976095 Arcade : 1.9920318725099602 Simulation : 1.902390438247012 Casual : 1.8326693227091633 Video Players & Editors : 1.6832669322709164 Maps & Navigation : 1.3147410358565739 Food & Drink : 1.245019920318725 Puzzle : 1.205179282868526 Racing : 0.9462151394422311 Strategy : 0.9362549800796812 House & Home : 0.8764940239043826 Role Playing : 0.8665338645418327 Libraries & Demo : 0.8366533864541833 Auto & Vehicles : 0.8167330677290837 Weather : 0.7370517928286853 Events : 0.6274900398406374 Adventure : 0.6274900398406374 Comics : 0.5876494023904383 Art & Design : 0.5478087649402391 Beauty : 0.5278884462151394 Parenting : 0.4382470119521913 Education;Education : 0.4382470119521913 Card : 0.40836653386454186 Trivia : 0.3784860557768924 Educational;Education : 0.3784860557768924 Casino : 0.3784860557768924 Board : 0.348605577689243 Educational : 0.32868525896414347 Word : 0.2888446215139442 Entertainment;Music & Video : 0.26892430278884466 Casual;Pretend Play : 0.24900398406374502 Music : 0.20916334661354583 Casual;Action & Adventure : 0.199203187250996 Racing;Action & Adventure : 0.1892430278884462 Puzzle;Brain Games : 0.1693227091633466 Educational;Pretend Play : 0.13944223107569723 Action;Action & Adventure : 0.13944223107569723 Casual;Brain Games : 0.1294820717131474 Arcade;Action & Adventure : 0.1195219123505976 Simulation;Action & Adventure : 0.10956175298804782 Adventure;Action & Adventure : 0.10956175298804782 Entertainment;Brain Games : 0.0796812749003984 Education;Pretend Play : 0.0796812749003984 Board;Brain Games : 0.0796812749003984 Parenting;Education : 0.06972111553784861 Casual;Creativity : 0.06972111553784861 Art & Design;Creativity : 0.06972111553784861 Role Playing;Action & Adventure : 0.0597609561752988 Parenting;Music & Video : 0.0597609561752988 Educational;Brain Games : 0.0597609561752988 Role Playing;Pretend Play : 0.049800796812749 Puzzle;Action & Adventure : 0.049800796812749 Education;Music & Video : 0.049800796812749 Education;Creativity : 0.049800796812749 Educational;Action & Adventure : 0.0398406374501992 Education;Brain Games : 0.0398406374501992 Education;Action & Adventure : 0.0398406374501992 Video Players & Editors;Music & Video : 0.0298804780876494 Simulation;Pretend Play : 0.0298804780876494 Entertainment;Creativity : 0.0298804780876494 Entertainment;Action & Adventure : 0.0298804780876494 Educational;Creativity : 0.0298804780876494 Video Players & Editors;Creativity : 0.0199203187250996 Sports;Action & Adventure : 0.0199203187250996 Puzzle;Creativity : 0.0199203187250996 Music;Music & Video : 0.0199203187250996 Entertainment;Pretend Play : 0.0199203187250996 Casual;Music & Video : 0.0199203187250996 Casual;Education : 0.0199203187250996 Board;Action & Adventure : 0.0199203187250996 Art & Design;Pretend Play : 0.0199203187250996 Art & Design;Action & Adventure : 0.0199203187250996 Adventure;Education : 0.0199203187250996 Trivia;Education : 0.0099601593625498 Travel & Local;Action & Adventure : 0.0099601593625498 Tools;Education : 0.0099601593625498 Strategy;Education : 0.0099601593625498 Strategy;Creativity : 0.0099601593625498 Strategy;Action & Adventure : 0.0099601593625498 Simulation;Education : 0.0099601593625498 Role Playing;Brain Games : 0.0099601593625498 Racing;Pretend Play : 0.0099601593625498 Puzzle;Education : 0.0099601593625498 Parenting;Brain Games : 0.0099601593625498 Music & Audio;Music & Video : 0.0099601593625498 Lifestyle;Pretend Play : 0.0099601593625498 Lifestyle;Education : 0.0099601593625498 Health & Fitness;Education : 0.0099601593625498 Health & Fitness;Action & Adventure : 0.0099601593625498 Entertainment;Education : 0.0099601593625498 Communication;Creativity : 0.0099601593625498 Comics;Creativity : 0.0099601593625498 Card;Brain Games : 0.0099601593625498 Card;Action & Adventure : 0.0099601593625498 Books & Reference;Education : 0.0099601593625498 Arcade;Pretend Play : 0.0099601593625498
display_table(free_app_ios_english, -5) # ios app by prime_genre
Games : 55.64595660749507 Entertainment : 8.234714003944774 Photo & Video : 4.117357001972387 Social Networking : 3.5256410256410255 Education : 3.2544378698224854 Shopping : 2.983234714003945 Utilities : 2.687376725838264 Lifestyle : 2.3175542406311638 Finance : 2.0710059171597637 Sports : 1.947731755424063 Health & Fitness : 1.8737672583826428 Music : 1.6518737672583828 Book : 1.6272189349112427 Productivity : 1.5285996055226825 News : 1.4299802761341223 Travel : 1.3806706114398422 Food & Drink : 1.0601577909270217 Weather : 0.7642998027613412 Reference : 0.4930966469428008 Navigation : 0.4930966469428008 Business : 0.4930966469428008 Catalogs : 0.22189349112426035 Medical : 0.19723865877712032
The Most common ios apps by prime_genre is Games : 55.6% and the runner-up is Entertainment : 8.2%
Other patterns are: ios apps for practical purpose such as education 3.25 %, Shopping : 2.98%, Utilities : 2.68% and Productivity : 1.5% have lower frequencies compared to fun ios apps such as: Games : 55.6%, Entertainment : 8.2%
The general impression is that, ios apps in the Applestore markets are mostly used for fun e.g.games and entertaintment. However, ios apps are less used for practical purposes
Note: We cant recommend app profile for App store market based on the frquency table alone, beacuse,though fun apps are the most numerous doesn't also imply that they also have the greatest number of users — the demand might not be the same as the offer.
Most common apps by category in google play store are: family 17.7%, games 10.56% and tools 7.62%. while tools 7.6%, entertainment 6.0% education 5.2% and business 4.4% are most common app by genres in google play store.
Nevertheless, in the google play store, most apps are designed for practical purposes (family, tools) and less apps designed for fun (entertainment, games) unlike in the ios app appstore in which more apps are designed for fun than practical usage.
Note:To sum up the difference between the two stores: Most apps in ios app store are designed for fun with highest number being games. while in android google app store, there is more balance between apps designed for practical usage and apps designed for fun based on cateorgy and Genres column.
Started by generating frequency table for prime_genre column to get the unique app genres. freq_table () function created previously is used. firstly loop over the unique genres of the App store data set. initiate both total and len_genre and secondly loop over the app store data set. For details, see code below
genre_ios = freq_table(free_app_ios_english,-5)
for genre in genre_ios:
total = 0 # store sum of user ratingsi.e.number of ratings:not actual rating
len_genre = 0 # store number of apps spectific to each genre
for app in free_app_ios_english:
genre_app = app[-5]
if genre_app == genre:
n_ratings = float(app[5])
total += n_ratings
len_genre +=1
avg_number_rating = total / len_genre
print(genre, ':',avg_number_rating )
Social Networking : 53078.195804195806 Photo & Video : 27249.892215568863 Games : 18924.68896765618 Music : 56482.02985074627 Reference : 67447.9 Health & Fitness : 19952.315789473683 Weather : 47220.93548387097 Utilities : 14010.100917431193 Travel : 20216.01785714286 Shopping : 18746.677685950413 News : 15892.724137931034 Navigation : 25972.05 Lifestyle : 8978.308510638299 Entertainment : 10822.961077844311 Food & Drink : 20179.093023255813 Sports : 20128.974683544304 Book : 8498.333333333334 Finance : 13522.261904761905 Education : 6266.333333333333 Productivity : 19053.887096774193 Business : 6367.8 Catalogs : 1779.5555555555557 Medical : 459.75
Based on ios app result above, the recommended profile for developers to develop are: Book & Reference adding up to 75,945.9 users. However, if we analyze book and reference as different app profiles, then social networking profile stands the chance to have more users.
unique_genre_android = freq_table(free_app_android_english, 1)
for category in unique_genre_android:
total = 0
len_category = 0
for app in free_app_android_english:
category_app = app[1]
if category_app == category:
n_installs = app[5]
n_installs = n_installs.replace(',', '')
n_installs = n_installs.replace('+', '')
total += float(n_installs)
len_category += 1
avg_number_installs = total / len_category
print(category, ':', avg_number_installs)
ART_AND_DESIGN : 2005195.1612903227 AUTO_AND_VEHICLES : 647317.8170731707 BEAUTY : 513151.88679245283 BOOKS_AND_REFERENCE : 9465252.512315271 BUSINESS : 2245520.3811659194 COMICS : 934769.1666666666 COMMUNICATION : 90683100.55833334 DATING : 1164270.7356828193 EDUCATION : 5729276.315789473 ENTERTAINMENT : 19516734.69387755 EVENTS : 253542.22222222222 FINANCE : 2511355.6790830945 FOOD_AND_DRINK : 2190710.008 HEALTH_AND_FITNESS : 4869225.852307692 HOUSE_AND_HOME : 1917187.0568181819 LIBRARIES_AND_DEMO : 749950.119047619 LIFESTYLE : 1477863.44077135 GAME : 33048939.16116871 FAMILY : 5742274.952835485 MEDICAL : 147563.28813559323 SOCIAL : 48184458.56849315 SHOPPING : 12588522.03488372 PHOTOGRAPHY : 32218111.54952077 SPORTS : 4860918.563888889 TRAVEL_AND_LOCAL : 27921561.32520325 TOOLS : 14968685.586928105 PERSONALIZATION : 7508854.330097088 PRODUCTIVITY : 35794644.73232323 PARENTING : 542603.6206896552 WEATHER : 5747142.162162162 VIDEO_PLAYERS : 36385565.614035085 NEWS_AND_MAGAZINES : 26677267.829181496 MAPS_AND_NAVIGATION : 5486066.590909091
In average, Communication genre has largest number of users (90,683,100.6) in google store market. Therefore, its recommended for developers to develop Books & Reference and communication apps for the google play store markets.
To get better understanding of communication apps genre in google store, we loop through android data set and grouped the data using if statement into (1,000,000,000+), (500,000,000+) and (100,000,000+). see for loop below
for app in free_app_android_english:
if app[1] == 'COMMUNICATION' and (app[5] == '1,000,000,000+'
or app[5] == '500,000,000+'
or app[5] == '100,000,000+'):
print(app[0], ':', app[5])
Messenger – Text and Video Chat for Free : 1,000,000,000+ WhatsApp Messenger : 1,000,000,000+ Google Chrome: Fast & Secure : 1,000,000,000+ Messenger Lite: Free Calls & Messages : 100,000,000+ Gmail : 1,000,000,000+ Hangouts : 1,000,000,000+ Viber Messenger : 500,000,000+ Firefox Browser fast & private : 100,000,000+ Yahoo Mail – Stay Organized : 100,000,000+ imo beta free calls and text : 100,000,000+ imo free video calls and chat : 500,000,000+ Opera Mini - fast web browser : 100,000,000+ Opera Browser: Fast and Secure : 100,000,000+ Who : 100,000,000+ WeChat : 100,000,000+ UC Browser Mini -Tiny Fast Private & Secure : 100,000,000+ Android Messages : 100,000,000+ Telegram : 100,000,000+ Google Duo - High Quality Video Calls : 500,000,000+ UC Browser - Fast Download Private & Secure : 500,000,000+ WhatsApp Messenger : 1,000,000,000+ Messenger – Text and Video Chat for Free : 1,000,000,000+ imo free video calls and chat : 500,000,000+ Viber Messenger : 500,000,000+ Hangouts : 1,000,000,000+ WeChat : 100,000,000+ Skype - free IM & video calls : 1,000,000,000+ Telegram : 100,000,000+ Who : 100,000,000+ GO SMS Pro - Messenger, Free Themes, Emoji : 100,000,000+ Android Messages : 100,000,000+ LINE: Free Calls & Messages : 500,000,000+ BBM - Free Calls & Messages : 100,000,000+ KakaoTalk: Free Calls & Text : 100,000,000+ Google Chrome: Fast & Secure : 1,000,000,000+ Firefox Browser fast & private : 100,000,000+ Opera Browser: Fast and Secure : 100,000,000+ Opera Mini - fast web browser : 100,000,000+ UC Browser Mini -Tiny Fast Private & Secure : 100,000,000+ UC Browser - Fast Download Private & Secure : 500,000,000+ Viber Messenger : 500,000,000+ Truecaller: Caller ID, SMS spam blocking & Dialer : 100,000,000+ Gmail : 1,000,000,000+ Yahoo Mail – Stay Organized : 100,000,000+ Hangouts : 1,000,000,000+ imo free video calls and chat : 500,000,000+ Viber Messenger : 500,000,000+ Skype - free IM & video calls : 1,000,000,000+ WeChat : 100,000,000+ LINE: Free Calls & Messages : 500,000,000+ KakaoTalk: Free Calls & Text : 100,000,000+ WhatsApp Messenger : 1,000,000,000+ UC Browser - Fast Download Private & Secure : 500,000,000+ Google Chrome: Fast & Secure : 1,000,000,000+ Google Duo - High Quality Video Calls : 500,000,000+ Firefox Browser fast & private : 100,000,000+ Gmail : 1,000,000,000+ Messenger – Text and Video Chat for Free : 1,000,000,000+ Messenger Lite: Free Calls & Messages : 100,000,000+ LINE: Free Calls & Messages : 500,000,000+ imo beta free calls and text : 100,000,000+ Hangouts : 1,000,000,000+ imo free video calls and chat : 500,000,000+ Skype - free IM & video calls : 1,000,000,000+ Kik : 100,000,000+ KakaoTalk: Free Calls & Text : 100,000,000+ Opera Mini - fast web browser : 100,000,000+ Opera Browser: Fast and Secure : 100,000,000+ Telegram : 100,000,000+ Truecaller: Caller ID, SMS spam blocking & Dialer : 100,000,000+ UC Browser Mini -Tiny Fast Private & Secure : 100,000,000+ Viber Messenger : 500,000,000+ WeChat : 100,000,000+ Yahoo Mail – Stay Organized : 100,000,000+ BBM - Free Calls & Messages : 100,000,000+
Since we classified communication apps genre in google store into (1,000,000,000+), (500,000,000+) and (100,000,000+) above, we still cant tell exactly which communication app is dominant. There fore to get better understanding on this, we loop again through android data, got the average on each as seen below.
less_100_million = []
for app in free_app_android_english:
n_installs = app[5]
n_installs = n_installs.replace(',', '')
n_installs = n_installs.replace('+', '')
if (app[1] == 'COMMUNICATION') and (float(n_installs) <= 100000000):
less_100_million.append(float(n_installs))
sum(less_100_million) / len(less_100_million)
15882457.410493826
between_1_billion_and_500_million = []
for app in free_app_android_english:
n_installs = app[5]
n_installs = n_installs.replace(',', '')
n_installs = n_installs.replace('+', '')
if (app[1] == 'COMMUNICATION') and (float(n_installs) <= 1000000000) or (float(n_installs)>500000000 ):
between_1_billion_and_500_million.append(float(n_installs))
sum(between_1_billion_and_500_million) / len(between_1_billion_and_500_million)
179563699.7518797
less_500_million_and_above_100_million = []
for app in free_app_android_english:
n_installs = n_installs.replace(',', '')
n_installs = n_installs.replace('+', '')
if (app[1] == 'COMMUNICATION') and (float(n_installs) <= 500000000) or (float(n_installs)>100000000 ):
less_500_million_and_above_100_million.append(float(n_installs))
sum(less_500_million_and_above_100_million) / len(less_500_million_and_above_100_million)
10000000.0
From computed averages above, the trend shows that, most communication apps are between (100,000,000+ and 500,000,000+) with average of (179563699.8), followed by apps less 100+ million with average of (15882457.4)and lastly with apps less 500 million(10000000.0). More precisely, vedio category is dominant among communication apps (messenger, imo, google Duo and skype vedio call), followed by google category(google chrome, gmail).
for app in free_app_android_english:
if app[1] == 'BOOKS_AND_REFERENCE' and (app[5] == '1,000,000,000+'
or app[5] == '500,000,000+'
or app[5] == '100,000,000+'):
print(app[0], ':', app[5])
Wattpad 📖 Free Books : 100,000,000+ Amazon Kindle : 100,000,000+ Google Play Books : 1,000,000,000+ Bible : 100,000,000+ Amazon Kindle : 100,000,000+ Wattpad 📖 Free Books : 100,000,000+ Audiobooks from Audible : 100,000,000+
under_100_million = []
for app in free_app_android_english:
n_installs = app[5]
n_installs = n_installs.replace(',', '')
n_installs = n_installs.replace('+', '')
if (app[1] == 'BOOKS_AND_REFERENCE') and (float(n_installs) <= 100000000):
under_100_million.append(float(n_installs))
sum(under_100_million) / len(under_100_million)
4561615.148514852
less_or_equal_1_billion = []
for app in free_app_android_english:
n_installs = app[5]
n_installs = n_installs.replace(',', '')
n_installs = n_installs.replace('+', '')
if (app[1] == 'BOOKS_AND_REFERENCE') and (float(n_installs) <= 1000000000):
less_or_equal_1_billion.append(float(n_installs))
sum(less_or_equal_1_billion) / len(less_or_equal_1_billion)
9465252.512315271
For Books and Reference genre, Google Play Books has highest number of installs (1,000,000,000+). However, giants like Amazon Kindle and Wattpad 📖 Free Books are the major competitors in the google markets.
Our project objective is to help developers undersatand the type of apps that can attaract more users on Google Play and App Store Markets. Based on the analysis, it can be clearly seen that, google markets has lots of libraries for books. My recommendation for developers is to add motivational codes for readers. In addition, cummunication app genres such messenger vedio calls, skype vedio are dominant. This genre(communication) has more competitors, thus might not be financially attractive for delelopers to invest on it.