User Engaging Mobile Apps

In this project we are going to be looking for what makes apps more engaging than others and where to focus on.

The goal is to have a better understanding of what differenciate each app from each other and which category is more interesting for users

In [1]:
from csv import reader

opened_file = open('AppleStore.csv')
read_file_ios = reader(opened_file)
ios_data = list(read_file_ios)
ios_header = ios_data[0]
ios = ios_data[1:]

opened_file_andr = open('googleplaystore.csv')
read_file_android = reader(opened_file_andr)
android_data = list(read_file_android)
android_header = android_data[0]
android = android_data[1:]

print(ios[:2:])
print('\n')
print(android[:2:])
[['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'], ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']]


[['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'], ['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']]
In [2]:
def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]
    for row in dataset_slice:
        print(row)
        print('\n')
        
    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

explore_data(ios,0,3,True)
['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


Number of rows: 7197
Number of columns: 16
In [3]:
explore_data(android,0,3,True)
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


Number of rows: 10841
Number of columns: 13
In [4]:
del android[10472]
print(len(android))
10840

The next step will be a test to see if there are duplicate apps, in which I used Instagram as an example. The criterion for deleting theduplicates will be based on how many reviews it has, assuming that the more reviews the most up to date the data is.

In [5]:
for app in android:
    name = app[0]
    if name == 'Instagram':
        print(app)
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']

As predicted, there are duplicate items on the list. I will identify how many and compare it to unique entries.

In [6]:
duplicate_apps = []
unique_apps = []

for app in android:
    name = app[0]
    if name in unique_apps:
        duplicate_apps.append(name)
    else:
        unique_apps.append(name)
        
print('Number of duplicates:', len(duplicate_apps))
print('\n')
print('Number of uniques:', len(unique_apps))
print('\n')
print('Examples of duplicate apps:', duplicate_apps[:10])
Number of duplicates: 1181


Number of uniques: 9659


Examples of duplicate apps: ['Quick PDF Scanner + OCR FREE', 'Box', 'Google My Business', 'ZOOM Cloud Meetings', 'join.me - Simple Meetings', 'Box', 'Zenefits', 'Google Ads', 'Google My Business', 'Slack']
In [7]:
reviews_max = {}
android_clean = []
already_added = []

for app in android:
    name = app[0]
    n_reviews = float(app[3])
    if name in reviews_max and reviews_max[name] < n_reviews:
        reviews_max[name] = n_reviews
    elif name not in reviews_max:
        reviews_max[name] = n_reviews
    
    if (reviews_max[name] == n_reviews) and (name not in already_added):
        android_clean.append(app)
        already_added.append(name)
        
print(len(reviews_max))
print(len(android_clean))
9659
9659
In [8]:
def common_english(string):
    for character in string:
        if ord(character) > 127:
            return False
    
    return True

print(common_english('Instagram'))
print(common_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))
            
True
False
In [9]:
def common_english(string):
    
    non_ascii = 0
    
    for character in string:
        if ord(character) > 127:
            non_ascii += 1
            
    if non_ascii > 3:
        return False
        
    else:
        return True
     

print(common_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(common_english('Docs To Go™ Free Office Suite'))
print(common_english('Instachat 😜'))
False
True
True
In [10]:
android_english = []
ios_english = []

for app in android_clean:
    name = app[0]
    if common_english(name) == True:
        android_english.append(app)

for app in ios:
    name = app[1]
    if common_english(name) == True:
        ios_english.append(app)        

explore_data(android_english, 0, 3, True)
print('\n')
explore_data(ios_english, 0, 3, True)
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


Number of rows: 9614
Number of columns: 13


['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


Number of rows: 6183
Number of columns: 16
In [11]:
android_final = []
ios_final = []

for app in android_english:
    price = app[7]
    if price == '0':
        android_final.append(app)
        
for app in ios_english:
    price = app[4]
    if price == '0.0':
        ios_final.append(app)
        
print(len(android_final))
print(len(ios_final))
8862
3222

Our goal is to launch the app on both Apple Store and Google Play to maximize user reachability. The focus is on making free apps for english speakers.

In [12]:
def freq_table(dataset, index):
    table = {}
    total = 0
    
    for row in dataset:
        total += 1
        value = row[index]
        if value in table:
            table[value] += 1
        else:
            table[value] = 1
    
    table_percentages = {}
    for key in table:
        percentage = (table[key] / total) * 100
        table_percentages[key] = percentage

    return table_percentages

def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)
        
    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])
In [13]:
display_table(ios_final, -5)
Games : 58.16263190564867
Entertainment : 7.883302296710118
Photo & Video : 4.9658597144630665
Education : 3.662321539416512
Social Networking : 3.2898820608317814
Shopping : 2.60707635009311
Utilities : 2.5139664804469275
Sports : 2.1415270018621975
Music : 2.0484171322160147
Health & Fitness : 2.0173805090006205
Productivity : 1.7380509000620732
Lifestyle : 1.5828677839851024
News : 1.3345747982619491
Travel : 1.2414649286157666
Finance : 1.1173184357541899
Weather : 0.8690254500310366
Food & Drink : 0.8069522036002483
Reference : 0.5586592178770949
Business : 0.5276225946617008
Book : 0.4345127250155183
Navigation : 0.186219739292365
Medical : 0.186219739292365
Catalogs : 0.12414649286157665
In [14]:
display_table(android_final, -4)
Tools : 8.429248476641842
Entertainment : 6.070864364703228
Education : 5.348679756262695
Business : 4.5926427443015125
Productivity : 3.8930264048747465
Lifestyle : 3.8930264048747465
Finance : 3.7011961182577298
Medical : 3.5206499661475967
Sports : 3.4642292936131795
Personalization : 3.3175355450236967
Communication : 3.238546603475513
Action : 3.1031369893929135
Health & Fitness : 3.080568720379147
Photography : 2.945159106296547
News & Magazines : 2.798465357707064
Social : 2.663055743624464
Travel & Local : 2.324531708417964
Shopping : 2.2455427668697814
Books & Reference : 2.143985556307831
Simulation : 2.0424283457458814
Dating : 1.8618821936357481
Arcade : 1.8505980591288649
Video Players & Editors : 1.7716091175806816
Casual : 1.7603249830737984
Maps & Navigation : 1.399232678853532
Food & Drink : 1.2412547957571656
Puzzle : 1.128413450688332
Racing : 0.9930038366057323
Role Playing : 0.9365831640713158
Libraries & Demo : 0.9365831640713158
Auto & Vehicles : 0.9252990295644324
Strategy : 0.9140148950575491
House & Home : 0.8350259535093659
Weather : 0.8011735499887158
Events : 0.7109004739336493
Adventure : 0.6770480704129994
Comics : 0.6093432633716994
Beauty : 0.598059128864816
Art & Design : 0.598059128864816
Parenting : 0.49650191830286616
Card : 0.45136538027533285
Casino : 0.4287971112615662
Trivia : 0.41751297675468296
Educational;Education : 0.3949447077409162
Educational : 0.3723764387271496
Board : 0.3723764387271496
Education;Education : 0.3385240352064997
Word : 0.2595350936583164
Casual;Pretend Play : 0.23696682464454977
Music : 0.2031144211238998
Racing;Action & Adventure : 0.16926201760324985
Puzzle;Brain Games : 0.16926201760324985
Entertainment;Music & Video : 0.16926201760324985
Casual;Brain Games : 0.13540961408259986
Casual;Action & Adventure : 0.13540961408259986
Arcade;Action & Adventure : 0.12412547957571654
Action;Action & Adventure : 0.1015572105619499
Educational;Pretend Play : 0.09027307605506657
Board;Brain Games : 0.09027307605506657
Simulation;Action & Adventure : 0.07898894154818326
Parenting;Education : 0.07898894154818326
Entertainment;Brain Games : 0.07898894154818326
Parenting;Music & Video : 0.06770480704129993
Educational;Brain Games : 0.06770480704129993
Casual;Creativity : 0.06770480704129993
Art & Design;Creativity : 0.06770480704129993
Education;Pretend Play : 0.056420672534416606
Role Playing;Pretend Play : 0.045136538027533285
Education;Creativity : 0.045136538027533285
Role Playing;Action & Adventure : 0.033852403520649964
Puzzle;Action & Adventure : 0.033852403520649964
Entertainment;Creativity : 0.033852403520649964
Entertainment;Action & Adventure : 0.033852403520649964
Educational;Creativity : 0.033852403520649964
Educational;Action & Adventure : 0.033852403520649964
Education;Music & Video : 0.033852403520649964
Education;Brain Games : 0.033852403520649964
Education;Action & Adventure : 0.033852403520649964
Adventure;Action & Adventure : 0.033852403520649964
Video Players & Editors;Music & Video : 0.022568269013766643
Sports;Action & Adventure : 0.022568269013766643
Simulation;Pretend Play : 0.022568269013766643
Puzzle;Creativity : 0.022568269013766643
Music;Music & Video : 0.022568269013766643
Entertainment;Pretend Play : 0.022568269013766643
Casual;Education : 0.022568269013766643
Board;Action & Adventure : 0.022568269013766643
Video Players & Editors;Creativity : 0.011284134506883321
Trivia;Education : 0.011284134506883321
Travel & Local;Action & Adventure : 0.011284134506883321
Tools;Education : 0.011284134506883321
Strategy;Education : 0.011284134506883321
Strategy;Creativity : 0.011284134506883321
Strategy;Action & Adventure : 0.011284134506883321
Simulation;Education : 0.011284134506883321
Role Playing;Brain Games : 0.011284134506883321
Racing;Pretend Play : 0.011284134506883321
Puzzle;Education : 0.011284134506883321
Parenting;Brain Games : 0.011284134506883321
Music & Audio;Music & Video : 0.011284134506883321
Lifestyle;Pretend Play : 0.011284134506883321
Lifestyle;Education : 0.011284134506883321
Health & Fitness;Education : 0.011284134506883321
Health & Fitness;Action & Adventure : 0.011284134506883321
Entertainment;Education : 0.011284134506883321
Communication;Creativity : 0.011284134506883321
Comics;Creativity : 0.011284134506883321
Casual;Music & Video : 0.011284134506883321
Card;Action & Adventure : 0.011284134506883321
Books & Reference;Education : 0.011284134506883321
Art & Design;Pretend Play : 0.011284134506883321
Art & Design;Action & Adventure : 0.011284134506883321
Arcade;Pretend Play : 0.011284134506883321
Adventure;Education : 0.011284134506883321
In [ ]: