#!/usr/bin/env python
# coding: utf-8

# # GUIDED PROJECT APP STORE
# 
# In this project i will analyze the data to help developers understand the most appealing apps
# 

# In[1]:


from csv import reader

Apple_store = open('AppleStore.csv',encoding='utf8')
data_apple = reader(Apple_store)
list_apple= list(data_apple)
apple_header = list_apple[0]
Apple = list_apple[1:]

Google_Store = open('googleplaystore.csv',encoding='utf8')
data_google = reader(Google_Store)
list_google = list(data_google)
google_header = list_google[0]
Android = list_google[1:]     


# In[2]:


def explore_data(dataset, start, end, rows_and_columns = False):
    dataset_slice = dataset[start:end]
    
    for row in dataset_slice:
        print(row)
        print('\n')
        
    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

print(apple_header)
print('\n')
explore_data( Apple, 0, 3, True ) 


# In[3]:


print(google_header)
print('\n')
explore_data( Android, 0, 3, True )


# In[6]:


for row in Android:
    length_header = len(google_header)
    if len(row) != length_header:
        print(row)
       
print(len(Android)) 


# In[4]:


del Android[10472]


# In[5]:


print(len(Android))


# the google play store has duplicates that we are trying to find

# In[7]:


duplicate_name = []
unique_name = []
for row in Android:
    name = row[0]
    if name in unique_name:
        duplicate_name.append(name)
    else:
        unique_name.append(name)
number_duplicate = len(duplicate_name)        
print(number_duplicate)


# We will not remove duplicates randomly. innstead we will remove all duplicates but the one with the highest review

# In[8]:


reviews_max = {}
for row in Android:
    name = row[0]
    n_reviews = float(row[3])
    if name in reviews_max and reviews_max[name] < n_reviews:
        reviews_max[name] = n_reviews
    elif name not in reviews_max:
        reviews_max[name] = n_reviews

print(len(reviews_max))


# In[9]:


android_clean = []
already_added = []
for row in Android:
    name = row[0]
    n_reviews = float(row[3])
    if (n_reviews == reviews_max[name]) and (name not in already_added):
        android_clean.append(row)
        already_added.append(name)
        
print(len(android_clean)) 


# In[10]:


def is_english(string):
    for letter in string:
        if ord(letter) > 127:
            return False
    return True

print(is_english('Instagram'))
print(is_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(is_english('Docs To Go™ Free Office Suite'))
print(is_english('Instachat 😜'))

Our function is not perfect yet because it doesnt recognize special character like emoji. We will modify it and allow it to recognize those special characters.  
# In[11]:


def is_english(string):
    ascii_count = 0 
    for letter in string:
        if ord(letter) > 127:
            ascii_count += 1
        if ascii_count > 3: 
            return False
    else:
        return True
    
print(is_english('Docs To Go™ Free Office Suite'))
print(is_english('Instachat 😜'))
print(is_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))


# In[12]:


apple_english = []
android_english = []

for app in android_clean:
    name = app[0]
    if is_english(name):
        android_english.append(app)
        
for app in Apple:
    name = app[1]
    if is_english(name):
        apple_english.append(app)
        
explore_data(android_english,0,3,True)
print('\n')
explore_data(apple_english,0,3,True)     


# In[13]:


free_android = []
free_apple = []

for app in android_english:
    app_price = app[7]
    if app_price == '0':
        free_android.append(app)
        
for app in apple_english:
    app_price = app[4]
    if app_price == '0.0':
        free_apple.append(app)
        
        
print(len(free_android))  
print(len(free_apple))


# We are trying to find an app idea. For that we build a minimal version on google play and if the response from users are good we develop ot further. If it is profitable we build a io version of it after 6 months.

# In order to generate frequency tables and find out the most common genres , we will use the columns genre for both store.

# In[15]:


def freq_table(dataset, index):
    frequency_table = {}
    total = 0
    for row in dataset:
        total += 1
        value = row[index]
        if value in frequency_table:
            frequency_table[value] += 1
        else:
            frequency_table[value] = 1
     
    percentage_table = {}
    for key in frequency_table:
        percentage = (frequency_table[key]/total)*100
        percentage_table[key] = percentage
    
    return percentage_table
    
def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)

    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])            


# we will display the prime genre frequency table  

# In[16]:


display_table(free_apple, 11)


# we will use the display table function to display the Genres frequency table

# In[17]:


display_table(free_android, 9)


# We will then display the category frequency table

# In[18]:


display_table(free_android, 1)


# 1-the most common genre is Games. The runner-up is entertainment.
#  - there are a lot of apps on education and  photo and video are the 3rd most common app on the apple store.
#  - most apps are designed for entertainment.
#  - no i cannot recommend a app profile base on these data because it represent just an sample of the app store . Moreover a particular genre that are more common doesnt mean a ot of people are using it.
#  
#  2-The most common genre are Tools, Entertainment, Education, Business and Productivity.
#   - There are more genre than the app store . also, apps that are supposed to be popular are not the most common (social, gaming genres...). No i cannot recommend an app profile based on these sample because there represent a small part of the store

# In[19]:


prime_genre = freq_table(free_apple,-5)
for genre in prime_genre:
    total = 0
    len_genre = 0
    for app in free_apple:
        genre_app = app[-5]
        if genre_app == genre:
            user_rating = float(app[5])
            total += user_rating
            len_genre += 1
    average_user_rating = total/len_genre
    print(genre, ':' ,average_user_rating)


# Based on the data , i will recommend to build a social networking or a navigation because there are the most download( they have the most users rating) 

# In[26]:


Category = freq_table(free_android, 1)
for user in Category:
    total = 0
    len_category = 0
    for app in free_android:
        category_app = app[1]
        if category_app == user:
            number_install = app[5]
            number_install = number_install.replace('+', '')
            number_install = number_install.replace(',','')
            total += float(number_install)
            len_category += 1
    average_install = total / len_category
    print(user, ':', average_install)


# I will recommend to build a social networking app, travelling or entertainment