import psycopg2
from sqlalchemy import create_engine
import ast
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline
dbname="kick"
tblname="info"
engine = create_engine(
'postgresql://localhost:5432/{dbname}'.format(dbname=dbname))
# Connect to database
conn = psycopg2.connect(dbname=dbname)
cur = conn.cursor()
Remind myself of the columns in the table:
cur.execute("SELECT column_name,data_type FROM information_schema.columns WHERE table_name = '{table}';".format(table=tblname))
rows = cur.fetchall()
pd.DataFrame(rows, columns=["column_name", "data_type"])
column_name | data_type | |
---|---|---|
0 | index | integer |
1 | id | integer |
2 | photo | character varying |
3 | name | character varying |
4 | blurb | character varying |
5 | goal | double precision |
6 | pledged | double precision |
7 | state | character varying |
8 | slug | character varying |
9 | disable_communication | boolean |
10 | country | character varying |
11 | currency | character varying |
12 | currency_symbol | character varying |
13 | currency_trailing_code | boolean |
14 | deadline | integer |
15 | state_changed_at | integer |
16 | created_at | integer |
17 | launched_at | integer |
18 | staff_pick | boolean |
19 | backers_count | integer |
20 | static_usd_rate | double precision |
21 | usd_pledged | double precision |
22 | creator | character varying |
23 | location | character varying |
24 | category | character varying |
25 | profile | character varying |
26 | spotlight | boolean |
27 | urls | character varying |
28 | source_url | character varying |
29 | friends | character varying |
30 | is_starred | character varying |
31 | is_backing | character varying |
32 | permissions | character varying |
33 | topic | character varying |
Number of records in table:
cur.execute("SELECT COUNT(*) from {table}".format(table=tblname))
cur.fetchone()
(177140,)
cur.execute("SELECT topic, COUNT(*) from {table} GROUP BY topic ORDER BY count DESC;".format(table=tblname))
rows = cur.fetchall()
df = pd.DataFrame(rows, columns=["topic", "count"])
# Plot findings
plt.rcParams["figure.figsize"] = [17,5]
df.plot(kind="bar", x="topic", y="count", legend=False)
plt.ylabel("Kickstarter projects")
plt.xlabel("Topic")
plt.title("Kickstarter projects by topic")
plt.tick_params(axis='x', labelsize=7)
"There are {num_topics} different types of Kickstarter projects".format(num_topics=df.shape[0])
'There are 143 different types of Kickstarter projects'
# Most popular project topic is
df[df["count"] == df["count"].max()]
topic | count | |
---|---|---|
0 | Web | 4476 |
# Most rare project topic is
df[df["count"] == df["count"].min()]
topic | count | |
---|---|---|
142 | Taxidermy | 12 |
What are the rare projects?
cur.execute("SELECT id, blurb, goal*static_usd_rate as goal_usd FROM {table} WHERE topic = '{topic}'".format(table=tblname, topic="Taxidermy"))
rows = cur.fetchall()
for row in rows:
row_id, blurb, goal = row
print(">>> $%d | id: %s" % (goal, row_id),
blurb, sep="\n")
>>> $8319 | id: 2009417734 Master prop maker and creator of the dead fairy hoax that captured the world’s imagination invites you into his studio to reveal all! >>> $4300 | id: 874566085 Gallery | Public Dissections | Events | A space in downtown San Francisco to reflect upon the less considered means of living & dying. >>> $5000 | id: 304809544 TaxiClear transforms biology into brilliant works of art that illustrate the beauty and complexity of life >>> $500 | id: 1111529080 Insects: the most abundant and intriguing organisms on Earth. May be seen as pests but are works of art that I will add to wreaths. >>> $3000 | id: 48529158 I take my passion for leather working and create hand carved notebook covers to honor a person's legacy and career accomplishments. >>> $2000 | id: 588593322 Casting skulls from rare and unique species of animal. To include miniature skulls in the future. >>> $705 | id: 1539840572 The "Meteor Grip" ergonomic craft, taxidermy knife fro precise accurate cutting. This hand specific design is a leader in grip design. >>> $456 | id: 1450117214 Creating beautiful and unique statement pieces with the use of taxidermy, skulls and ornate frames. >>> $15450 | id: 1370425060 Bringing art and science together through hand made ethically sourced bone jewellery and art sculptures >>> $5464 | id: 1052896826 A new and creative way to display the beauty of ocean,swamp and land creatures of the world by electroless silver plating. >>> $7208 | id: 1208011665 Hollow Earth is slated to be Perth's 1st true Oddity/Collectables store. Selling anything from hand-made trinkets to occult items. >>> $250000 | id: 1984141754 I want to go into the wilds to find and subsequently kill sasquatch. Donors will be sent a small piece of his hide if I succeed.
sql = "SELECT id, topic, goal*static_usd_rate as goal_usd FROM {table}".format(table=tblname)
cur.execute(sql)
rows = cur.fetchall()
df = pd.DataFrame(rows, columns=["id", "topic", "goal_usd"])
# Asking average
np.log10(df.goal_usd).plot.kde()
plt.xlabel("log(funding goal in USD)")
<matplotlib.text.Text at 0x11db23320>
"Most projects are asking for: $%d - $%d" % (10**2.5, 10**5)
'Most projects are asking for: $316 - $100000'
sns.barplot(x="topic", y="goal_usd",
data=df.groupby("topic").mean().reset_index().sort_values(by="goal_usd", ascending=False))
_ = plt.xticks(rotation='vertical')
plt.ylabel("Average goal (USD)")
plt.xlabel("Kickstarter project topic")
plt.title("Funding goals on Kickstarter by topic")
plt.tick_params(axis='x', labelsize=7)
"Movie Theaters" and "Space exploration" have the average higest funding goals
What tends to get funded?
sql = "SELECT id, topic, goal, pledged, pledged/goal as progress FROM info ORDER BY progress DESC;"
cur.execute(sql)
rows = cur.fetchall()
df = pd.DataFrame(rows, columns=["id", "topic", "goal", "pledged", "progress"])
df["well_funded"] = df.progress >= 1
plt.rcParams["figure.figsize"] = [17,5]
sns.boxplot(x="topic", y="progress", data=df[df.well_funded].sort_values(by="topic"))
_ = plt.xticks(rotation='vertical')
plt.yscale('log')
plt.ylabel("Percent of funding goal")
plt.xlabel("Topic")
plt.title("Projects that were successfully funded by Topic")
plt.tick_params(axis='x', labelsize=7)
sns.barplot(x="topic", y="progress",
data=df[df.well_funded].groupby("topic").count().reset_index().sort_values(by="progress", ascending=False))
_ = plt.xticks(rotation='vertical')
plt.ylabel("Project that were successfully funded")
plt.xlabel("Topic")
plt.title("Projects that were successfully funded by Topic")
plt.tick_params(axis='x', labelsize=7)
plt.rcParams["figure.figsize"] = [17,5]
sns.boxplot(x="topic", y="progress",
data=df[np.invert(df.well_funded)].sort_values(by="topic"))
_ = plt.xticks(rotation='vertical')
plt.ylabel("Percent of funding goal met")
plt.xlabel("Topic")
plt.title("Pojects that have yet to meet their funding goals")
plt.tick_params(axis='x', labelsize=7)
sns.barplot(x="topic", y="progress",
data=df[np.invert(df.well_funded)].groupby("topic").count().reset_index().sort_values(by="progress", ascending=False))
_ = plt.xticks(rotation='vertical')
plt.ylabel("Project that were not yet successfully funded")
plt.xlabel("Topic")
plt.title("Pojects that have yet to meet their funding goals")
plt.tick_params(axis='x', labelsize=7)
# close communication with the PostgreSQL database server
cur.close()
# commit the changes
conn.commit()
# close connection
conn.close()