#!/usr/bin/env python # coding: utf-8 # # User Rating Data Analysis # We only build apps that are free to download and install, and our main source of revenue consists of in-app ads. This means that the number of users of our apps determines our revenue for any given app the more users who see and engage with the ads, the better.\ # Our goal for this project is to analyze data to help our developers understand what type of apps are likely to attract more users. # # ## Opening and Exploring Dataset # In[1]: from csv import reader ### Google Plat data set ### opened_file = open('googleplaystore.csv') read_file = reader(opened_file) android = list(read_file) android_header = android[0] android = android[1:] ### App Store data set ### opened_file = open('AppleStore.csv') read_file = reader(opened_file) ios = list(read_file) ios_header = ios[0] ios = ios[1:] # In[2]: def explore_data(dataset, start, end, rows_and_columns=False): dataset_slice = dataset[start:end] for row in dataset_slice: print(row) print('\n') if rows_and_columns: print('Number of rows:', len(dataset)) print('Number of columns:', len(dataset[0])) print(android_header) print('\n') explore_data(android, 0, 4, True) print(ios_header) print('\n') explore_data(android, 0, 4, True) # In[3]: print(android[10472]) del android[10472] print(len(android)) # ## Removing Duplicates # The Google Play dataset has duplicate entries that may distort our data analysis process hence the need to remove them inorder to remain with unique data. # ### Part One # In[4]: for app in android: name = app[0] if name == 'Instagram': print(app) duplicate_apps = [] unique_apps = [] for app in android: name = app[0] if name in unique_apps: duplicate_apps.append(name) else: unique_apps.append(name) print('Number of duplicate apps:', len(duplicate_apps)) print('/n') print('Examples of duplicate apps:', duplicate_apps[:15]) # ### Part Two # In[5]: reviews_max = {} for app in android: name = app[0] n_reviews = float(app[3]) if name in reviews_max and reviews_max[name] < n_reviews: reviews_max[name] = n_reviews elif name not in reviews_max: reviews_max[name] = n_reviews print('Expected length:', len(android) - 1181) print('Actual length:', len(reviews_max)) # In[6]: android_clean = [] already_added = [] for app in android: name = app[0] n_reviews = float(app[3]) if (reviews_max[name] == n_reviews) and (name not in already_added): android_clean.append(app) already_added.append(name) explore_data(android_clean, 0, 3, True) # ## Removing Non-English Apps # ### Part One # In[7]: def is_english(string): for character in string: if ord(character) > 127: return False return True print (is_english('Instagram')) print (is_english('爱奇艺PPS -《欢乐颂2》电视剧热播')) print(is_english('Docs To Go™ Free Office Suite')) print(is_english('Instachat 😜')) # ### Part Two # In[8]: def is_english(string): non_ascii = 0 for character in string: if ord(character) > 127: non_ascii =+ 1 if non_ascii > 3: return False else: return True print(is_english('Docs To Go™ Free Office Suite')) print(is_english('Instachat 😜')) # In[9]: android_english = [] ios_english = [] for app in android_clean: name = app[0] if is_english(name): android_english.append(app) for app in ios: name = app[1] if is_english(name): ios_english.append(app) explore_data(android_english, 0, 3, True) print('\n') explore_data(ios_english, 0, 3, True) # ## Isolating the Free Apps # In[10]: android_final = [] ios_final = [] for app in android: price = app[7] if price == '0': android_final.append(app) for app in ios: price = app[4] if price == '0.0': ios_final.append(app) print(len(android_final)) print(len(ios_final)) # ## Most Common Apps by Genre # ### Part One # As we mentioned earlier, our objective is to identify the types of apps that have a high response rate from the users since the number of users of the apps is directly proportional to our revenue\. To mitigate the risks and overhead, our ploy for app generation has three levels. # # * Build a minimal Android version of the app, and add it to Google Play # * If the app has a good response from users, we develop it further. # * If the app is profitable after six months, we build an iOS version of the app and add it to the App Store # # Our aim at the end of our analysis is to add the app on both Google Play and the App Store, we need to find app profiles that are successful in both markets. # ### Part Two # In[11]: def freq_table(dataset, index): table = {} total = 0 for row in dataset: total += 1 value = row[index] if value in table: table[value] += 1 else: table[value] = 1 table_percentages = {} for key in table: percentage = (table[key] / total) * 100 table_percentages[key] = percentage return table_percentages def display_table(dataset, index): table = freq_table(dataset, index) table_display = [] for key in table: key_val_as_tuple = (table[key], key) table_display.append(key_val_as_tuple) table_sorted = sorted(table_display, reverse = True) for entry in table_sorted: print(entry[1], ':', entry[0]) print(freq_table(ios_final, 11)) print('\n') print(freq_table(android_final, 9)) print('\n') print(freq_table(android_final, 1)) # ### Part Three # In[12]: display_table(ios_final, 11) # Among the free English apps, 55.64% are games (more than a half). Entertainment apps are close to 8%, followed by photo and video apps, which are 4%. 3.25% of the apps are designed for education, followed by social networking apps which amount for 3.52% of the apps in our data set. # # The general conclusion from our data set is that App Store is majorly dominated by apps that are designed for fun (games, entertainment, photo and video, social networking, sports, music, etc.), while apps with practical purposes (education, shopping, utilities, productivity, lifestyle, etc.) are more rare. # # The fact that fun apps are the most numerous doesn't also imply that they also have the greatest number of users — the demand might not be the same as the offer # # In[13]: display_table(android_final, 1) # The most common genres are family with a 17.74% followed by game which is close to 12%. Business is at 4.4% followed closely by productivity which is close to 4%. The trend seems significantly different on Google Play compared to AppleStore. # # There are not that many apps designed for fun, and it seems that a good number of apps are designed for practical purposes (family, tools, business, lifestyle, productivity, etc.) # # # In[14]: display_table(android_final, 9) # There is a thin line separating Genre and Category columns in our dataset, but one observable feature we see is that the Genres column has more categories than the Genre column. # # Up to this point, we found that the App Store is dominated by apps designed for fun, while Google Play shows a more balanced landscape of both practical and for-fun apps. # ## Most Popular Apps by Genre on the App Store # In[15]: genres_ios = freq_table(ios_final, 11) for genre in genres_ios: total = 0 len_genre = 0 for app in ios_final: genre_app = app[11] if genre_app == genre: n_ratings = float(app[5]) total += n_ratings len_genre += 1 avg_n_ratings = total/len_genre print(genre, ':', avg_n_ratings) # On average, reference apps have the highest number of user reviews, perhaps this is heavily influenced by a few big players like Bible and Dictionary.com # ## Most Popular Apps by Genre on Google Play # In[18]: category_android = freq_table(android_final, 1) for category in category_android: total = 0 len_category = 0 for app in android_final: category_app = app[1] if category_app == category: n_installs = app[5] n_installs = n_installs.replace(',', '') n_installs = n_installs.replace('+', '') total += float(n_installs) len_category += 1 avg_n_installs = total / len_category print(category, ':', avg_n_installs) # On average, communication apps in the Google Play data set have the highest number of user installations, perhaps this is heavily influenced by a few apps that have over a huge number of installs (Facebook, Whatsapp, Gmail, Google Chrome, etc.). # # On the other hand Social apps follow (though by a wide margin) with close to 50,000,000 installs. # # The books and reference genre looks fairly popular as well, with an average number of installs of 9,465,252. It's interesting since we found this genre has some potential to work well on the App Store, and our aim is to recommend an app genre that shows potential for being profitable on both the App Store and Google Play. # ## Conclusion # In this project, we analyzed data about the App Store and Google Play mobile apps with the goal of recommending an app profile that can be profitable for both markets. # # We concluded that taking a popular book and turning it into an app could be profitable for both the Google Play and the App Store markets. The markets are already full of libraries, so we need to add some special features besides the raw version of the book. This might include daily quotes from the book, an audio version of the book, quizzes on the book, a forum where people can discuss the book, etc.