#!/usr/bin/env python # coding: utf-8 # ## Imports # In[1]: import pandas as pd import networkx as nx import numpy as np import matplotlib.pyplot as plt # ## Loading the data # In[2]: people_df = pd.read_csv("./data/data_people_dump.zip", usecols=[1, 2, 3, 4]) people_df.drop_duplicates(subset='uuid', inplace=True) people_df.set_index('uuid', inplace=True) df = pd.read_csv('./data/data_survey_dump.zip') df = df[(df.selected != 0) & (~df.uuid.isna())] df['not_selected'] = np.where(df.selected != df.option_a, df.option_a, df.option_b) # ## Defining the functions to create and analyze the graphs # In[3]: def pref_graph_from_df(df_user, default_edge_params=None): default_edge_params = default_edge_params if default_edge_params is not None else {} g = nx.DiGraph() for record in df_user.itertuples(): g.add_edge(record.selected, record.not_selected, **default_edge_params) return g def find_n_cycles(graph, max_n_cycles=100): cyles_iterator = nx.simple_cycles(graph) cycles = [] for __ in range(max_n_cycles): try: cycle = next(cyles_iterator) cycles.append(cycle) except StopIteration: break return cycles def check_possible_inconsistencies(df_user): g = pref_graph_from_df(df_user) try: nx.find_cycle(g, orientation='ignore') return True except nx.exception.NetworkXNoCycle: return False def find_inconsistencies(df_user, max_n_inconsistencies=100): g = pref_graph_from_df(df_user) inconsistencies = find_n_cycles(g, max_n_cycles=max_n_inconsistencies) return pd.Series([inconsistencies]) def draw_preferences_graph(df_user, max_cycles=100): default_edge_parameters = {'edge_line_color': 'black', 'edge_line_width': 0.3, 'weight': 1} g = pref_graph_from_df(df_user, default_edge_parameters) cycles = find_n_cycles(g, max_cycles) for cycle in cycles: cycle_shifted = cycle[1:] + cycle[:1] paired_cycle_nodes = zip(cycle, cycle_shifted) for start_node, end_node in paired_cycle_nodes: cycle_edge_parameters = {'edge_line_color': 'red', 'edge_line_width': 1.5, 'weight': 0.001} g.add_edge(start_node, end_node, **cycle_edge_parameters) edge_line_colors = list(nx.get_edge_attributes(g, 'edge_line_color').values()) edge_line_widths = list(nx.get_edge_attributes(g, 'edge_line_width').values()) node_labels = dict(zip(g.nodes, g.nodes)) plt.figure(figsize=(20, 10)) pos = nx.spring_layout(g, weight='weight') nx.draw_networkx_edges(g, pos, edge_color=edge_line_colors, width=edge_line_widths, arrowsize=17) nx.draw_networkx_nodes(g, pos, node_size=600, alpha=0.5) nx.draw_networkx_labels(g, pos, node_labels, font_weight='bold', font_size=12) uuid = df_user.uuid.iloc[0] plt.title(f"Preferences of {uuid} (#Inconsistencies = {len(cycles)})") plt.show() def count_unique_options(df_user): all_options = pd.concat([df_user.selected, df_user.not_selected]) unique_options = all_options.unique() return len(unique_options) # In[4]: sample_uuids = df.uuid.drop_duplicates()#.sample() sample_df = df[df.uuid.isin(sample_uuids)].copy() gr_data = pd.DataFrame(index=sample_df.uuid.drop_duplicates()) gr_data['n_questions'] = sample_df.groupby('uuid').size() gr_data['n_unique_options'] = sample_df.groupby('uuid').apply(count_unique_options) gr_data['inconsistencies'] = sample_df.groupby('uuid').apply(find_inconsistencies) gr_data['has_possible_inconsistencies'] = sample_df.groupby('uuid').apply(check_possible_inconsistencies) gr_data['n_nonunique_options'] = gr_data.n_questions*2 gr_data['n_inconsistencies'] = gr_data.inconsistencies.apply(len) gr_data['has_inconsistencies'] = gr_data.n_inconsistencies > 0 gr_data['options_density'] = 1 - (gr_data.n_unique_options/gr_data.n_nonunique_options) # ## Random example # In[5]: sample_user = df.uuid.drop_duplicates().sample().iloc[0] user_df = df[df.uuid == sample_user] draw_preferences_graph(user_df) # ## Percentage of users that show intransitive preferences # In[7]: percentage_intransitive = 100*(gr_data.has_inconsistencies.sum() / gr_data.has_possible_inconsistencies.sum()) print(f"Percentage of users who can show intransitive preferences and actualy show one or more {percentage_intransitive:.4}")