Title: Profitable App Profiles for the App Store and Google Play Markets
Introduction: To analyse the number of users who use our apps, and what apps attract the most users on Google Play and App Store.
def explore_data(dataset, start, end, rows_and_columns=False):
dataset_slice = dataset[start:end]
for row in dataset_slice:
print(row)
print('\n') # adds a new (empty) line after each row
if rows_and_columns:
print('Number of rows:', len(dataset))
print('Number of columns:', len(dataset[0]))
from csv import reader
opened_file = open('AppleStore.csv')
read_file = reader(opened_file)
#opened_file.close()
apps_data = list(read_file)
appsapple_header = apps_data[0]
appsapple_data = apps_data[1:]
opened_file2 = open('googleplaystore.csv')
read_file2 = reader(opened_file2)
#opened_file2.close()
appsg_data = list(read_file2)
appsgoogle_header = appsg_data[0]
appsgoogle_data = appsg_data[1:]
#print(appsgoogle_header)
#print(appsgoogle_data)
#print(appsgoogle_header)
#print('\n')
#explore_data(appsapple_data, 0, 2, True)
print(appsgoogle_header)
print('\n')
explore_data(appsgoogle_data, 0, 2, True)
['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver'] ['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up'] Number of rows: 10841 Number of columns: 13
#print(appsgoogle_data[10472])
#print(len(appsgoogle_data))
del appsgoogle_data[10472] # don't run this more than once
#print(len(appsgoogle_data))
The Google play data set has duplicate entries
#for app in appsgoogle_data:
# name = app[0]
# if name == 'Instagram':
# print(app)
duplicate_apps = []
unique_apps = []
for app in appsgoogle_data:
name = app[0]
if name in unique_apps:
duplicate_apps.append(name)
else:
unique_apps.append(name)
print('Number of duplicate apps:', len(duplicate_apps))
Number of duplicate apps: 1181
Going to remove all duplicates, but keep the record that has the most number of reviews as that is probably the most recent record.
reviews_max = {}
for app in appsgoogle_data:
name = app[0]
n_reviews = float(app[3])
if name in reviews_max and reviews_max[name] < n_reviews:
reviews_max[name] = n_reviews
elif name not in reviews_max:
reviews_max[name] = n_reviews
#print(reviews_max)
#print (len(reviews_max))
#del appsgoogle_data[10472]
android_clean = []
already_added = []
for app in appsgoogle_data:
name = app[0]
n_reviews = float(app[3])
if (reviews_max[name] == n_reviews) and (name not in already_added):
android_clean.append(app)
already_added.append(name) # make sure this is inside the if block
print(len(android_clean))
9659
def check_string (appname):
asciierror = 0
for char in appname:
if ord(char) > 127:
asciierror += 1
if asciierror > 3:
return False
else:
return True
#print(check_string('Instagram'))
#print(check_string('爱奇艺PPS -《欢乐颂2》电视剧热播'))
#print(check_string('Docs To Go™ Free Office Suite'))
#print(check_string('Instachat 😜'))
android_english = []
apple_english = []
for row in android_clean:
if check_string(row[0]):
android_english.append(row)
for row in appsapple_data:
if check_string(row[0]):
apple_english.append(row)
print('Lenght of android english: ', len(android_english))
print('\n')
print('Length of apple_english: ', len(apple_english))
Lenght of android english: 9614 Length of apple_english: 7197