#!/usr/bin/env python # coding: utf-8 # # Profitable App Profiles for the App Store and Google Play Markets # # Our aim in this project is to find mobile app profiles that are profitable for the App Store and Google Play markets. We're working as data analysts for a company that builds Android and iOS mobile apps, and our job is to enable our team of developers to make data-driven decisions with respect to the kind of apps they build. # # At our company, we only build apps that are free to download and install, and our main source of revenue consists of in-app ads. This means that our revenue for any given app is mostly influenced by the number of users that use our app. Our goal for this project is to analyze data to help our developers understand what kinds of apps are likely to attract more users. # In[19]: from csv import reader ### The Google Play data set ### opened_file = open('googleplaystore.csv') read_file = reader(opened_file) android = list(read_file) android_header = android[0] android = android[1:] ### The App Store data set ### opened_file = open('AppleStore.csv') read_file = reader(opened_file) ios = list(read_file) ios_header = ios[0] ios = ios[1:] # In[20]: def explore_data(dataset, start, end, rows_and_columns=False): dataset_slice = dataset[start:end] for row in dataset_slice: print(row) print('\n') # adds a new (empty) line between rows if rows_and_columns: print('Number of rows:', len(dataset)) print('Number of columns:', len(dataset[0])) print(android_header) print('\n') explore_data(android, 0, 3, True) # In[21]: print(len(android)) print(len(ios)) #incorrect data print(android[10472]) # In[22]: #remove the row with wrong data del android[10472] #run this once only # In[23]: #check total rows again print(len(android)) # In[24]: #check app data and how many rows associated with the app for app in android: name = app[0] if name == 'Facebook': print(app) # ## Check duplicates # In[25]: #duplicates in android dataset duplicates = [] unique = [] for app in android: name = app[0] if name not in unique: unique.append(name) else: duplicates.append(name) print(len(duplicates)) print(len(unique)) print('Number of duplicate apps:', len(duplicates)) print('\n') print('Examples of duplicate apps:', duplicates[:15]) # In[26]: #dictionary code to remove duplicate entries #my code review_max = {} for apps in android: name = apps[0] n_reviews = float(app[3]) if name in review_max and review_max[name] < n_reviews: review_max[name]=n_reviews elif name not in review_max: review_max[name] = n_reviews #solution code # reviews_max = {} # for app in android: # name = app[0] # n_reviews = float(app[3]) # if name in reviews_max and reviews_max[name] < n_reviews: # reviews_max[name] = n_reviews # elif name not in reviews_max: # reviews_max[name] = n_reviews # In[27]: #check length print('Expected length:', len(android) - 1181) print('Actual length:', len(review_max)) # In[28]: #removing duplicate rows #my code # android_clean = [] # already_added = [] # for app in android: # name = app[0] # n_reviews = float(app[3]) # if (reviews_max[name] == n_reviews) and (name not in already_added): # android_clean.append(app) # already_added.append(name) # print(len(android_clean)) #solution code android_clean = [] already_added = [] for app in android: name = app[0] n_reviews = float(app[3]) if (review_max[name] == n_reviews) and (name not in already_added): android_clean.append(app) already_added.append(name) # make sure this is inside the if block print(len(android_clean)) # In[29]: explore_data(android_clean, 0, 3, True)