In this you will learn about the following:
from IPython.display import HTML
input_form = """
<a id="admin_link" target="_blank" href="#">Ajenti Administration Interface</a>
<p>User: root<br> Password: admin</p>
"""
javascript = """
<script type="text/Javascript">
document.getElementById('admin_link').href = "https://" + window.location.hostname + ":8000"
</script>
"""
HTML(input_form + javascript)
User: root
Password: admin
Unlike classification and regression, clustering is unsupervised. That means the algorithm will classify your data not based on predeined categories. The algorithm will look for similar data points in the dataset and cluster them together. Different algorithms can solve specific problems but k-means is a general use algorithm and that what we will cover in this tutorial.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick, quotes_historical_yahoo, date2num
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize
from datetime import datetime, timedelta
pd.options.display.max_columns=50
def download_data(symbol, days_delta=30):
# Set Start and End Date
finish_date = datetime.today()
start_date = finish_date - timedelta(days=days_delta)
# Read from Yahoo! Finance
stocks_raw = quotes_historical_yahoo(symbol, start_date, finish_date)
stocks_df = pd.DataFrame(stocks_raw, columns=["n_date", "open", "close", "high", "low", "volume"])
return stocks_df
stocks_df = download_data("GOOG")
stocks_df
n_date | open | close | high | low | volume | |
---|---|---|---|---|---|---|
0 | 735456 | 569.99 | 567.88 | 570.49 | 566.00 | 1211400 |
1 | 735457 | 564.52 | 562.73 | 565.90 | 560.88 | 1537800 |
2 | 735458 | 567.31 | 574.78 | 575.00 | 565.75 | 1435300 |
3 | 735459 | 576.18 | 574.65 | 577.90 | 570.88 | 982800 |
4 | 735460 | 577.86 | 573.48 | 579.38 | 570.52 | 1515000 |
5 | 735463 | 576.11 | 582.16 | 584.51 | 576.00 | 1280600 |
6 | 735464 | 585.00 | 586.86 | 587.34 | 584.00 | 976000 |
7 | 735465 | 585.88 | 584.49 | 586.70 | 582.57 | 1033900 |
8 | 735466 | 583.82 | 583.37 | 584.50 | 581.14 | 912300 |
9 | 735467 | 583.59 | 582.56 | 585.24 | 580.64 | 786900 |
10 | 735470 | 584.72 | 580.20 | 585.00 | 579.00 | 1357700 |
11 | 735471 | 581.26 | 577.86 | 581.80 | 576.58 | 1635200 |
12 | 735472 | 577.27 | 571.00 | 578.49 | 570.10 | 1698700 |
13 | 735473 | 569.56 | 569.20 | 573.25 | 567.10 | 1289400 |
14 | 735474 | 571.33 | 571.60 | 572.04 | 567.07 | 1080800 |
15 | 735478 | 571.85 | 577.33 | 577.83 | 571.19 | 1574100 |
16 | 735479 | 580.00 | 577.94 | 582.99 | 575.00 | 1211800 |
17 | 735480 | 580.00 | 581.98 | 586.00 | 579.22 | 1454200 |
18 | 735481 | 583.98 | 586.08 | 586.55 | 581.95 | 1627900 |
19 rows × 6 columns
def process_date(stocks_df):
stocks_df["n_date"] = stocks_df["n_date"].astype(np.int32)
stocks_df["date"] = stocks_df["n_date"].apply(datetime.fromordinal)
return stocks_df
process_date(stocks_df)
n_date | open | close | high | low | volume | date | |
---|---|---|---|---|---|---|---|
0 | 735456 | 569.99 | 567.88 | 570.49 | 566.00 | 1211400 | 2014-08-11 |
1 | 735457 | 564.52 | 562.73 | 565.90 | 560.88 | 1537800 | 2014-08-12 |
2 | 735458 | 567.31 | 574.78 | 575.00 | 565.75 | 1435300 | 2014-08-13 |
3 | 735459 | 576.18 | 574.65 | 577.90 | 570.88 | 982800 | 2014-08-14 |
4 | 735460 | 577.86 | 573.48 | 579.38 | 570.52 | 1515000 | 2014-08-15 |
5 | 735463 | 576.11 | 582.16 | 584.51 | 576.00 | 1280600 | 2014-08-18 |
6 | 735464 | 585.00 | 586.86 | 587.34 | 584.00 | 976000 | 2014-08-19 |
7 | 735465 | 585.88 | 584.49 | 586.70 | 582.57 | 1033900 | 2014-08-20 |
8 | 735466 | 583.82 | 583.37 | 584.50 | 581.14 | 912300 | 2014-08-21 |
9 | 735467 | 583.59 | 582.56 | 585.24 | 580.64 | 786900 | 2014-08-22 |
10 | 735470 | 584.72 | 580.20 | 585.00 | 579.00 | 1357700 | 2014-08-25 |
11 | 735471 | 581.26 | 577.86 | 581.80 | 576.58 | 1635200 | 2014-08-26 |
12 | 735472 | 577.27 | 571.00 | 578.49 | 570.10 | 1698700 | 2014-08-27 |
13 | 735473 | 569.56 | 569.20 | 573.25 | 567.10 | 1289400 | 2014-08-28 |
14 | 735474 | 571.33 | 571.60 | 572.04 | 567.07 | 1080800 | 2014-08-29 |
15 | 735478 | 571.85 | 577.33 | 577.83 | 571.19 | 1574100 | 2014-09-02 |
16 | 735479 | 580.00 | 577.94 | 582.99 | 575.00 | 1211800 | 2014-09-03 |
17 | 735480 | 580.00 | 581.98 | 586.00 | 579.22 | 1454200 | 2014-09-04 |
18 | 735481 | 583.98 | 586.08 | 586.55 | 581.95 | 1627900 | 2014-09-05 |
19 rows × 7 columns
We'll be calculating the daily average using this equation to compile the daily price into one number:
$Average = \frac{(Close + High + Low)}{3}$
Then we will be calculating the daily change amount and percentage using these equations:
$Change Amount = Close - Open$
$Change Percentage = \frac{Change Amount}{Average}$
$Range = \frac{(High - Low)}{Average}$
def calculate_stats(stocks_df):
stocks_df["average"] = (stocks_df["close"] + stocks_df["high"] + stocks_df["low"]) / 3.0
stocks_df["change_amount"] = stocks_df["close"] - stocks_df["open"]
stocks_df["change_per"] = stocks_df["change_amount"] / stocks_df["average"]
stocks_df["range"] = (stocks_df["high"] - stocks_df["low"]) / stocks_df["average"]
stocks_df["change_1_amount"] = pd.Series(0.0)
stocks_df["change_1_amount"][1:] = stocks_df["average"][1:].values - stocks_df["average"][:-1].values
stocks_df["change_1_per"] = stocks_df["change_1_amount"] / stocks_df["average"]
return stocks_df
calculate_stats(stocks_df)
n_date | open | close | high | low | volume | date | average | change_amount | change_per | range | change_1_amount | change_1_per | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 735456 | 569.99 | 567.88 | 570.49 | 566.00 | 1211400 | 2014-08-11 | 568.123333 | -2.11 | -0.003714 | 0.007903 | 0.000000 | 0.000000 |
1 | 735457 | 564.52 | 562.73 | 565.90 | 560.88 | 1537800 | 2014-08-12 | 563.170000 | -1.79 | -0.003178 | 0.008914 | -4.953333 | -0.008795 |
2 | 735458 | 567.31 | 574.78 | 575.00 | 565.75 | 1435300 | 2014-08-13 | 571.843333 | 7.47 | 0.013063 | 0.016176 | 8.673333 | 0.015167 |
3 | 735459 | 576.18 | 574.65 | 577.90 | 570.88 | 982800 | 2014-08-14 | 574.476667 | -1.53 | -0.002663 | 0.012220 | 2.633333 | 0.004584 |
4 | 735460 | 577.86 | 573.48 | 579.38 | 570.52 | 1515000 | 2014-08-15 | 574.460000 | -4.38 | -0.007625 | 0.015423 | -0.016667 | -0.000029 |
5 | 735463 | 576.11 | 582.16 | 584.51 | 576.00 | 1280600 | 2014-08-18 | 580.890000 | 6.05 | 0.010415 | 0.014650 | 6.430000 | 0.011069 |
6 | 735464 | 585.00 | 586.86 | 587.34 | 584.00 | 976000 | 2014-08-19 | 586.066667 | 1.86 | 0.003174 | 0.005699 | 5.176667 | 0.008833 |
7 | 735465 | 585.88 | 584.49 | 586.70 | 582.57 | 1033900 | 2014-08-20 | 584.586667 | -1.39 | -0.002378 | 0.007065 | -1.480000 | -0.002532 |
8 | 735466 | 583.82 | 583.37 | 584.50 | 581.14 | 912300 | 2014-08-21 | 583.003333 | -0.45 | -0.000772 | 0.005763 | -1.583333 | -0.002716 |
9 | 735467 | 583.59 | 582.56 | 585.24 | 580.64 | 786900 | 2014-08-22 | 582.813333 | -1.03 | -0.001767 | 0.007893 | -0.190000 | -0.000326 |
10 | 735470 | 584.72 | 580.20 | 585.00 | 579.00 | 1357700 | 2014-08-25 | 581.400000 | -4.52 | -0.007774 | 0.010320 | -1.413333 | -0.002431 |
11 | 735471 | 581.26 | 577.86 | 581.80 | 576.58 | 1635200 | 2014-08-26 | 578.746667 | -3.40 | -0.005875 | 0.009019 | -2.653333 | -0.004585 |
12 | 735472 | 577.27 | 571.00 | 578.49 | 570.10 | 1698700 | 2014-08-27 | 573.196667 | -6.27 | -0.010939 | 0.014637 | -5.550000 | -0.009683 |
13 | 735473 | 569.56 | 569.20 | 573.25 | 567.10 | 1289400 | 2014-08-28 | 569.850000 | -0.36 | -0.000632 | 0.010792 | -3.346667 | -0.005873 |
14 | 735474 | 571.33 | 571.60 | 572.04 | 567.07 | 1080800 | 2014-08-29 | 570.236667 | 0.27 | 0.000473 | 0.008716 | 0.386667 | 0.000678 |
15 | 735478 | 571.85 | 577.33 | 577.83 | 571.19 | 1574100 | 2014-09-02 | 575.450000 | 5.48 | 0.009523 | 0.011539 | 5.213333 | 0.009060 |
16 | 735479 | 580.00 | 577.94 | 582.99 | 575.00 | 1211800 | 2014-09-03 | 578.643333 | -2.06 | -0.003560 | 0.013808 | 3.193333 | 0.005519 |
17 | 735480 | 580.00 | 581.98 | 586.00 | 579.22 | 1454200 | 2014-09-04 | 582.400000 | 1.98 | 0.003400 | 0.011641 | 3.756667 | 0.006450 |
18 | 735481 | 583.98 | 586.08 | 586.55 | 581.95 | 1627900 | 2014-09-05 | 584.860000 | 2.10 | 0.003591 | 0.007865 | 2.460000 | 0.004206 |
19 rows × 13 columns
plt.figure(figsize=(10,6))
plt.plot_date(stocks_df["n_date"], stocks_df["average"], fmt="-b", linewidth=3, alpha=.4, label="Average")
plt.gcf().autofmt_xdate()
plt.title("Google Average Stock Price - Daily")
plt.grid()
plt.legend(loc="best")
plt.show();
fig = plt.figure(figsize=(10,6))
axes_1 = fig.add_subplot(111)
#axes_2 = fig.add_subplot(111)
axes_2 = axes_1.twinx()
line1 = axes_1.plot_date(stocks_df["n_date"], stocks_df["average"], "o-b",
linewidth=3, alpha=.4, label="Average")
line2 = axes_2.plot_date(stocks_df["n_date"], stocks_df["change_1_per"], ".-r",
linewidth=2, alpha=.4, label="Change from 1 day")
axes_2.plot_date(stocks_df["n_date"], np.zeros(len(stocks_df)), fmt="-k", linewidth=1, alpha=.4)
plt.gcf().autofmt_xdate()
axes_1.set_title("Google Average Stock Price - Daily")
axes_1.grid()
axes_1.legend(loc="upper right")
axes_2.legend(loc="lower right")
plt.show();
candlestick(ax, quotes, width=0.2, colorup='k', colordown='r', alpha=1.0)
quotes is a sequence of (time, open, close, high, low, ...) sequences.
ax : an Axes instance to plot to
width : fraction of a day for the rectangle width
colorup : the color of the rectangle where close >= open
colordown : the color of the rectangle where close < open
alpha : the rectangle alpha level
fig = plt.figure(figsize=(10,6))
axes = fig.add_subplot(111)
candlestick(axes, stocks_df[["n_date", "open", "close", "high", "low"]].values,
width=0.6, colorup='g', colordown='r')
axes.xaxis_date()
plt.gcf().autofmt_xdate()
plt.grid()
plt.show();
fig = plt.figure(figsize=(10,6))
axes_1 = fig.add_subplot(111)
axes_2 = axes_1.twinx()
candlestick(axes_1, stocks_df[["n_date", "open", "close", "high", "low"]].values,
width=0.6, colorup='g', colordown='r')
axes_2.plot_date(stocks_df["n_date"], stocks_df["change_1_per"], "o--b", linewidth=3, alpha=0.4)
axes_1.xaxis_date()
plt.gcf().autofmt_xdate()
axes_1.grid()
plt.show();
Now we will download 50 stocks and process their stats. We will be working with these stocks:
stock_dict={"GOOG": "Google",
"FB": "Facebook, Inc.",
"AAPL": "Apple Inc.",
"MSFT": "Microsoft Corporation",
"HPQ": "Hewlett-Packard Company",
"INTC": "Intel Corporation",
"NVDA": "NVIDIA Corporation",
"TXN": "Texas Instruments Incorporated",
"IBM": "International Business Machines Corp. (IBM)",
"SAP": "SAP SE (ADR)",
"ADBE": "Adobe Systems Incorporated",
"ADSK": "Autodesk, Inc.",
"CRM": "salesforce.com, inc.",
"N": "NetSuite Inc",
"VMW": "VMware, Inc.",
"CTXS": "Citrix Systems, Inc.",
"RHT": "Red Hat Inc",
"RAX": "Rackspace Hosting, Inc.",
"AMZN": "Amazon.com, Inc.",
"NWSA": "News Corp",
"EBAY": "eBay Inc",
"CBS": "CBS Corporation",
"CMCSA": "Comcast Corporation",
"VIAB": "Viacom, Inc.",
"NFLX": "Netflix, Inc.",
"TWX": "Time Warner Inc",
"FOXA": "Twenty-First Century Fox Inc",
"NYT": "The New York Times Company",
"TRI": "Thomson Reuters Corporation (USA)",
"DIS": "The Walt Disney Company",
"SNE": "Sony Corp (ADR)",
"PCRFY": "Panasonic Corporation (ADR)",
"CAJ": "Canon Inc (ADR)",
"TOSYY": "Toshiba Corp (USA)",
"BBRY": "BlackBerry Ltd",
"CSC": "Computer Sciences Corporation",
"GE": "General Electric Company",
"HTHIY": "Hitachi, Ltd. (ADR)",
"SIEGY": "Siemens AG (ADR)",
"CVX": "Chevron Corporation",
"XOM": "Exxon Mobil Corporation",
"BP": "BP plc (ADR)",
"CAT": "Caterpillar Inc.",
"LXK": "Lexmark International Inc",
"BKS": "Barnes & Noble, Inc.",
"FJTSY": "Fujitsu Ltd (ADR)",
"EMC": "EMC Corporation",
"ORCL": "Oracle Corporation",
"CSCO": "Cisco Systems, Inc.",
"XRX": "Xerox Corp",
}
symbols = stock_dict.keys()
names = stock_dict.values()
stocks_data = pd.DataFrame(symbols, columns=["symbol"])
stocks_data["name"] = names
stocks_data
symbol | name | |
---|---|---|
0 | NFLX | Netflix, Inc. |
1 | AAPL | Apple Inc. |
2 | NYT | The New York Times Company |
3 | FB | Facebook, Inc. |
4 | BP | BP plc (ADR) |
5 | ADSK | Autodesk, Inc. |
6 | MSFT | Microsoft Corporation |
7 | EMC | EMC Corporation |
8 | BKS | Barnes & Noble, Inc. |
9 | SAP | SAP SE (ADR) |
10 | RAX | Rackspace Hosting, Inc. |
11 | N | NetSuite Inc |
12 | GE | General Electric Company |
13 | AMZN | Amazon.com, Inc. |
14 | SNE | Sony Corp (ADR) |
15 | VIAB | Viacom, Inc. |
16 | FOXA | Twenty-First Century Fox Inc |
17 | PCRFY | Panasonic Corporation (ADR) |
18 | RHT | Red Hat Inc |
19 | EBAY | eBay Inc |
20 | VMW | VMware, Inc. |
21 | LXK | Lexmark International Inc |
22 | CMCSA | Comcast Corporation |
23 | TRI | Thomson Reuters Corporation (USA) |
24 | ADBE | Adobe Systems Incorporated |
25 | IBM | International Business Machines Corp. (IBM) |
26 | CVX | Chevron Corporation |
27 | FJTSY | Fujitsu Ltd (ADR) |
28 | CRM | salesforce.com, inc. |
29 | CAT | Caterpillar Inc. |
30 | CAJ | Canon Inc (ADR) |
31 | NVDA | NVIDIA Corporation |
32 | TWX | Time Warner Inc |
33 | DIS | The Walt Disney Company |
34 | TXN | Texas Instruments Incorporated |
35 | NWSA | News Corp |
36 | ORCL | Oracle Corporation |
37 | CTXS | Citrix Systems, Inc. |
38 | HTHIY | Hitachi, Ltd. (ADR) |
39 | GOOG | |
40 | INTC | Intel Corporation |
41 | BBRY | BlackBerry Ltd |
42 | TOSYY | Toshiba Corp (USA) |
43 | HPQ | Hewlett-Packard Company |
44 | CSC | Computer Sciences Corporation |
45 | XRX | Xerox Corp |
46 | XOM | Exxon Mobil Corporation |
47 | CSCO | Cisco Systems, Inc. |
48 | CBS | CBS Corporation |
49 | SIEGY | Siemens AG (ADR) |
50 rows × 2 columns
temp_list = []
for symbol in stocks_data["symbol"]:
temp_data = download_data(symbol)
process_date(temp_data)
calculate_stats(temp_data)
temp_data["symbol"] = symbol
temp_list.append(temp_data)
stocks_df = pd.concat(temp_list)
stocks_df
n_date | open | close | high | low | volume | date | average | change_amount | change_per | range | change_1_amount | change_1_per | symbol | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 735456 | 449.22 | 451.54 | 457.65 | 448.71 | 1892400 | 2014-08-11 | 452.633333 | 2.32 | 0.005126 | 0.019751 | 0.000000 | 0.000000 | NFLX |
1 | 735457 | 451.35 | 446.41 | 453.00 | 443.41 | 1549300 | 2014-08-12 | 447.606667 | -4.94 | -0.011036 | 0.021425 | -5.026667 | -0.011230 | NFLX |
2 | 735458 | 448.60 | 451.53 | 454.22 | 446.82 | 1294000 | 2014-08-13 | 450.856667 | 2.93 | 0.006499 | 0.016413 | 3.250000 | 0.007208 | NFLX |
3 | 735459 | 452.21 | 450.87 | 455.00 | 448.22 | 951500 | 2014-08-14 | 451.363333 | -1.34 | -0.002969 | 0.015021 | 0.506667 | 0.001123 | NFLX |
4 | 735460 | 451.49 | 459.09 | 462.00 | 448.60 | 3148100 | 2014-08-15 | 456.563333 | 7.60 | 0.016646 | 0.029350 | 5.200000 | 0.011389 | NFLX |
5 | 735463 | 462.06 | 466.00 | 469.50 | 461.25 | 1928000 | 2014-08-18 | 465.583333 | 3.94 | 0.008463 | 0.017720 | 9.020000 | 0.019374 | NFLX |
6 | 735464 | 467.09 | 468.15 | 470.50 | 462.43 | 1556800 | 2014-08-19 | 467.026667 | 1.06 | 0.002270 | 0.017280 | 1.443333 | 0.003090 | NFLX |
7 | 735465 | 467.00 | 472.19 | 473.75 | 464.54 | 1737700 | 2014-08-20 | 470.160000 | 5.19 | 0.011039 | 0.019589 | 3.133333 | 0.006664 | NFLX |
8 | 735466 | 471.28 | 472.05 | 476.15 | 467.67 | 1678100 | 2014-08-21 | 471.956667 | 0.77 | 0.001632 | 0.017968 | 1.796667 | 0.003807 | NFLX |
9 | 735467 | 470.94 | 479.19 | 479.55 | 470.05 | 1962200 | 2014-08-22 | 476.263333 | 8.25 | 0.017322 | 0.019947 | 4.306667 | 0.009043 | NFLX |
10 | 735470 | 481.55 | 480.93 | 485.30 | 477.53 | 1926400 | 2014-08-25 | 481.253333 | -0.62 | -0.001288 | 0.016145 | 4.990000 | 0.010369 | NFLX |
11 | 735471 | 478.50 | 479.36 | 481.85 | 474.55 | 1448200 | 2014-08-26 | 478.586667 | 0.86 | 0.001797 | 0.015253 | -2.666667 | -0.005572 | NFLX |
12 | 735472 | 479.40 | 474.70 | 480.25 | 473.63 | 1484900 | 2014-08-27 | 476.193333 | -4.70 | -0.009870 | 0.013902 | -2.393333 | -0.005026 | NFLX |
13 | 735473 | 472.65 | 475.21 | 477.48 | 470.81 | 1083500 | 2014-08-28 | 474.500000 | 2.56 | 0.005395 | 0.014057 | -1.693333 | -0.003569 | NFLX |
14 | 735474 | 476.85 | 477.64 | 480.50 | 474.14 | 1405500 | 2014-08-29 | 477.426667 | 0.79 | 0.001655 | 0.013321 | 2.926667 | 0.006130 | NFLX |
15 | 735478 | 478.50 | 476.60 | 478.79 | 474.59 | 1258100 | 2014-09-02 | 476.660000 | -1.90 | -0.003986 | 0.008811 | -0.766667 | -0.001608 | NFLX |
16 | 735479 | 480.52 | 477.39 | 487.60 | 476.53 | 2317000 | 2014-09-03 | 480.506667 | -3.13 | -0.006514 | 0.023038 | 3.846667 | 0.008005 | NFLX |
17 | 735480 | 479.11 | 472.67 | 481.69 | 472.13 | 1708400 | 2014-09-04 | 475.496667 | -6.44 | -0.013544 | 0.020105 | -5.010000 | -0.010536 | NFLX |
18 | 735481 | 473.23 | 475.68 | 477.35 | 470.07 | 1707600 | 2014-09-05 | 474.366667 | 2.45 | 0.005165 | 0.015347 | -1.130000 | -0.002382 | NFLX |
0 | 735456 | 95.27 | 95.99 | 96.08 | 94.84 | 36585000 | 2014-08-11 | 95.636667 | 0.72 | 0.007528 | 0.012966 | 0.000000 | 0.000000 | AAPL |
1 | 735457 | 96.04 | 95.97 | 96.88 | 95.61 | 33795000 | 2014-08-12 | 96.153333 | -0.07 | -0.000728 | 0.013208 | 0.516667 | 0.005373 | AAPL |
2 | 735458 | 96.15 | 97.24 | 97.24 | 96.04 | 31916000 | 2014-08-13 | 96.840000 | 1.09 | 0.011256 | 0.012392 | 0.686667 | 0.007091 | AAPL |
3 | 735459 | 97.33 | 97.50 | 97.57 | 96.80 | 28116000 | 2014-08-14 | 97.290000 | 0.17 | 0.001747 | 0.007914 | 0.450000 | 0.004625 | AAPL |
4 | 735460 | 97.90 | 97.98 | 98.19 | 96.86 | 48951000 | 2014-08-15 | 97.676667 | 0.08 | 0.000819 | 0.013616 | 0.386667 | 0.003959 | AAPL |
5 | 735463 | 98.49 | 99.16 | 99.37 | 97.98 | 47572000 | 2014-08-18 | 98.836667 | 0.67 | 0.006779 | 0.014064 | 1.160000 | 0.011737 | AAPL |
6 | 735464 | 99.41 | 100.53 | 100.68 | 99.32 | 69274700 | 2014-08-19 | 100.176667 | 1.12 | 0.011180 | 0.013576 | 1.340000 | 0.013376 | AAPL |
7 | 735465 | 100.44 | 100.57 | 101.09 | 99.95 | 52612800 | 2014-08-20 | 100.536667 | 0.13 | 0.001293 | 0.011339 | 0.360000 | 0.003581 | AAPL |
8 | 735466 | 100.57 | 100.58 | 100.94 | 100.11 | 33421900 | 2014-08-21 | 100.543333 | 0.01 | 0.000099 | 0.008255 | 0.006667 | 0.000066 | AAPL |
9 | 735467 | 100.29 | 101.32 | 101.47 | 100.19 | 44102400 | 2014-08-22 | 100.993333 | 1.03 | 0.010199 | 0.012674 | 0.450000 | 0.004456 | AAPL |
10 | 735470 | 101.79 | 101.54 | 102.17 | 101.28 | 40144700 | 2014-08-25 | 101.663333 | -0.25 | -0.002459 | 0.008754 | 0.670000 | 0.006590 | AAPL |
11 | 735471 | 101.42 | 100.89 | 101.50 | 100.86 | 33119800 | 2014-08-26 | 101.083333 | -0.53 | -0.005243 | 0.006331 | -0.580000 | -0.005738 | AAPL |
12 | 735472 | 101.02 | 102.13 | 102.57 | 100.70 | 46827400 | 2014-08-27 | 101.800000 | 1.11 | 0.010904 | 0.018369 | 0.716667 | 0.007040 | AAPL |
13 | 735473 | 101.59 | 102.25 | 102.78 | 101.56 | 68389800 | 2014-08-28 | 102.196667 | 0.66 | 0.006458 | 0.011938 | 0.396667 | 0.003881 | AAPL |
14 | 735474 | 102.86 | 102.50 | 102.90 | 102.20 | 44567000 | 2014-08-29 | 102.533333 | -0.36 | -0.003511 | 0.006827 | 0.336667 | 0.003283 | AAPL |
15 | 735478 | 103.06 | 103.30 | 103.74 | 102.72 | 53491400 | 2014-09-02 | 103.253333 | 0.24 | 0.002324 | 0.009879 | 0.720000 | 0.006973 | AAPL |
16 | 735479 | 103.10 | 98.94 | 103.20 | 98.58 | 125233100 | 2014-09-03 | 100.240000 | -4.16 | -0.041500 | 0.046089 | -3.013333 | -0.030061 | AAPL |
17 | 735480 | 98.85 | 98.12 | 100.09 | 97.79 | 85594800 | 2014-09-04 | 98.666667 | -0.73 | -0.007399 | 0.023311 | -1.573333 | -0.015946 | AAPL |
18 | 735481 | 98.80 | 98.97 | 99.39 | 98.31 | 58353200 | 2014-09-05 | 98.890000 | 0.17 | 0.001719 | 0.010921 | 0.223333 | 0.002258 | AAPL |
0 | 735456 | 12.58 | 12.48 | 12.68 | 12.46 | 624200 | 2014-08-11 | 12.540000 | -0.10 | -0.007974 | 0.017544 | 0.000000 | 0.000000 | NYT |
1 | 735457 | 12.39 | 12.26 | 12.50 | 12.18 | 704400 | 2014-08-12 | 12.313333 | -0.13 | -0.010558 | 0.025988 | -0.226667 | -0.018408 | NYT |
2 | 735458 | 12.27 | 12.26 | 12.33 | 12.23 | 593000 | 2014-08-13 | 12.273333 | -0.01 | -0.000815 | 0.008148 | -0.040000 | -0.003259 | NYT |
3 | 735459 | 12.25 | 12.27 | 12.35 | 12.16 | 514800 | 2014-08-14 | 12.260000 | 0.02 | 0.001631 | 0.015498 | -0.013333 | -0.001088 | NYT |
4 | 735460 | 12.41 | 12.27 | 12.41 | 12.06 | 688600 | 2014-08-15 | 12.246667 | -0.14 | -0.011432 | 0.028579 | -0.013333 | -0.001089 | NYT |
5 | 735463 | 12.42 | 12.38 | 12.47 | 12.34 | 548200 | 2014-08-18 | 12.396667 | -0.04 | -0.003227 | 0.010487 | 0.150000 | 0.012100 | NYT |
6 | 735464 | 12.38 | 12.29 | 12.44 | 12.27 | 412100 | 2014-08-19 | 12.333333 | -0.09 | -0.007297 | 0.013784 | -0.063333 | -0.005135 | NYT |
7 | 735465 | 12.25 | 12.29 | 12.33 | 12.20 | 426200 | 2014-08-20 | 12.273333 | 0.04 | 0.003259 | 0.010592 | -0.060000 | -0.004889 | NYT |
8 | 735466 | 12.27 | 12.41 | 12.45 | 12.24 | 664100 | 2014-08-21 | 12.366667 | 0.14 | 0.011321 | 0.016981 | 0.093333 | 0.007547 | NYT |
9 | 735467 | 12.41 | 12.38 | 12.46 | 12.32 | 556300 | 2014-08-22 | 12.386667 | -0.03 | -0.002422 | 0.011302 | 0.020000 | 0.001615 | NYT |
10 | 735470 | 12.44 | 12.32 | 12.46 | 12.24 | 350900 | 2014-08-25 | 12.340000 | -0.12 | -0.009724 | 0.017828 | -0.046667 | -0.003782 | NYT |
11 | 735471 | 12.34 | 12.39 | 12.42 | 12.27 | 532000 | 2014-08-26 | 12.360000 | 0.05 | 0.004045 | 0.012136 | 0.020000 | 0.001618 | NYT |
12 | 735472 | 12.40 | 12.40 | 12.44 | 12.35 | 586200 | 2014-08-27 | 12.396667 | 0.00 | 0.000000 | 0.007260 | 0.036667 | 0.002958 | NYT |
13 | 735473 | 12.33 | 12.35 | 12.41 | 12.25 | 469100 | 2014-08-28 | 12.336667 | 0.02 | 0.001621 | 0.012969 | -0.060000 | -0.004864 | NYT |
14 | 735474 | 12.36 | 12.38 | 12.41 | 12.17 | 342000 | 2014-08-29 | 12.320000 | 0.02 | 0.001623 | 0.019481 | -0.016667 | -0.001353 | NYT |
15 | 735478 | 12.37 | 12.33 | 12.45 | 12.19 | 810500 | 2014-09-02 | 12.323333 | -0.04 | -0.003246 | 0.021098 | 0.003333 | 0.000270 | NYT |
16 | 735479 | 12.38 | 12.33 | 12.43 | 12.29 | 968100 | 2014-09-03 | 12.350000 | -0.05 | -0.004049 | 0.011336 | 0.026667 | 0.002159 | NYT |
17 | 735480 | 12.38 | 12.35 | 12.46 | 12.26 | 750900 | 2014-09-04 | 12.356667 | -0.03 | -0.002428 | 0.016186 | 0.006667 | 0.000540 | NYT |
18 | 735481 | 12.31 | 12.37 | 12.45 | 12.27 | 559800 | 2014-09-05 | 12.363333 | 0.06 | 0.004853 | 0.014559 | 0.006667 | 0.000539 | NYT |
0 | 735456 | 73.46 | 73.44 | 73.91 | 73.06 | 24591000 | 2014-08-11 | 73.470000 | -0.02 | -0.000272 | 0.011569 | 0.000000 | 0.000000 | FB |
1 | 735457 | 73.09 | 72.83 | 73.33 | 72.22 | 27419000 | 2014-08-12 | 72.793333 | -0.26 | -0.003572 | 0.015249 | -0.676667 | -0.009296 | FB |
2 | 735458 | 73.12 | 73.77 | 74.25 | 73.05 | 29198500 | 2014-08-13 | 73.690000 | 0.65 | 0.008821 | 0.016284 | 0.896667 | 0.012168 | FB |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
950 rows × 14 columns
fig = plt.figure(figsize=(10,6))
axes = fig.add_subplot(111)
for symbol in stocks_df["symbol"].unique():
x = stocks_df[stocks_df["symbol"] == symbol]["n_date"]
y = stocks_df[stocks_df["symbol"] == symbol]["change_1_per"]
axes.plot_date(x=x, y=y, fmt="-", alpha=0.2, linewidth=2)
axes.xaxis_date()
plt.gcf().autofmt_xdate()
plt.grid()
plt.show();
def pivot_data(stocks_df, values="change_1_per"):
clustering_data = stocks_df.pivot(index="symbol", columns="n_date", values=values)
return clustering_data
clustering_data = pivot_data(stocks_df, values="change_1_per")
clustering_data
n_date | 735456 | 735457 | 735458 | 735459 | 735460 | 735463 | 735464 | 735465 | 735466 | 735467 | 735470 | 735471 | 735472 | 735473 | 735474 | 735478 | 735479 | 735480 | 735481 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
symbol | |||||||||||||||||||
AAPL | 0 | 0.005373 | 0.007091 | 0.004625 | 0.003959 | 0.011737 | 0.013376 | 0.003581 | 0.000066 | 0.004456 | 0.006590 | -0.005738 | 0.007040 | 0.003881 | 0.003283 | 0.006973 | -0.030061 | -0.015946 | 0.002258 |
ADBE | 0 | -0.002185 | 0.010283 | 0.007582 | 0.004037 | 0.009363 | 0.012268 | -0.006643 | 0.003217 | 0.006578 | 0.000648 | -0.003437 | -0.001256 | -0.004954 | 0.006732 | 0.005219 | 0.003177 | 0.003212 | 0.003430 |
ADSK | 0 | 0.004898 | 0.017504 | 0.013646 | -0.033605 | -0.015570 | 0.004711 | -0.004108 | 0.003350 | -0.002924 | -0.001059 | 0.010964 | -0.002099 | -0.002661 | -0.000619 | -0.007361 | 0.011897 | 0.007039 | -0.006158 |
AMZN | 0 | 0.002078 | 0.026988 | 0.009929 | 0.002949 | 0.009095 | -0.002142 | 0.002989 | -0.005231 | -0.005269 | 0.007005 | 0.016890 | 0.010134 | -0.010366 | -0.002032 | 0.003425 | -0.002177 | 0.018375 | -0.003090 |
BBRY | 0 | -0.006726 | -0.002840 | 0.009494 | 0.010783 | 0.014736 | 0.009504 | 0.027402 | -0.013043 | -0.004030 | 0.007333 | 0.003322 | 0.018265 | 0.001628 | 0.000651 | 0.012849 | 0.032027 | -0.009733 | -0.009189 |
BKS | 0 | -0.004016 | 0.001040 | 0.009566 | -0.002360 | 0.014967 | 0.008071 | -0.018048 | -0.007093 | 0.002947 | -0.008769 | 0.013200 | 0.027528 | 0.008626 | 0.001553 | -0.010558 | 0.016419 | 0.002240 | -0.004501 |
BP | 0 | -0.001125 | -0.005802 | 0.005278 | -0.000775 | 0.009557 | 0.007821 | -0.000138 | 0.001797 | -0.001661 | 0.006054 | -0.004214 | 0.002206 | -0.007850 | -0.002996 | -0.011132 | 0.010251 | -0.041773 | -0.001601 |
CAJ | 0 | -0.002323 | 0.003221 | 0.004908 | 0.000501 | -0.000501 | 0.001900 | -0.008164 | -0.000403 | -0.007006 | -0.000102 | 0.003441 | -0.002638 | 0.000608 | -0.004584 | 0.003351 | 0.003541 | 0.006134 | -0.002217 |
CAT | 0 | 0.001084 | 0.004349 | 0.005242 | 0.002017 | 0.009583 | 0.004691 | 0.003653 | 0.003178 | -0.007492 | 0.007283 | 0.002002 | 0.002672 | -0.000861 | 0.003523 | 0.003237 | -0.004016 | 0.000919 | -0.001718 |
CBS | 0 | -0.010617 | -0.002817 | 0.003984 | 0.007462 | 0.008284 | 0.004180 | -0.002702 | -0.001215 | 0.002478 | 0.004387 | -0.009521 | 0.002485 | -0.009082 | -0.007353 | -0.005758 | -0.006992 | 0.011908 | -0.002816 |
CMCSA | 0 | 0.000310 | 0.005419 | 0.002886 | 0.004888 | 0.003895 | -0.003114 | -0.000366 | -0.000733 | -0.005716 | 0.005441 | 0.000367 | 0.001099 | -0.002202 | 0.003779 | 0.000122 | 0.002674 | 0.003392 | 0.007574 |
CRM | 0 | -0.029861 | -0.004338 | 0.006434 | 0.003858 | 0.013566 | 0.010267 | 0.005979 | 0.006361 | 0.066805 | -0.006311 | 0.002361 | 0.009025 | -0.025011 | 0.010118 | 0.012062 | -0.004713 | -0.002418 | 0.003642 |
CSC | 0 | -0.003303 | 0.002547 | 0.006370 | 0.002154 | 0.010804 | 0.004196 | 0.002791 | 0.006711 | -0.004814 | -0.001434 | 0.000394 | 0.004502 | -0.004918 | -0.000559 | 0.006930 | 0.002213 | -0.002162 | 0.000166 |
CSCO | 0 | -0.002519 | -0.000265 | -0.021680 | -0.005998 | 0.004883 | 0.002841 | 0.000676 | 0.007246 | -0.003501 | -0.000404 | 0.002419 | -0.000538 | 0.000269 | 0.004683 | -0.001474 | 0.006126 | -0.002804 | 0.000667 |
CTXS | 0 | -0.004566 | 0.005074 | 0.001158 | -0.001741 | 0.009672 | -0.001151 | -0.000288 | 0.004345 | 0.006168 | -0.003571 | -0.000286 | -0.002100 | -0.004555 | 0.009922 | 0.005148 | 0.001368 | 0.000801 | 0.001553 |
CVX | 0 | -0.008605 | 0.005390 | -0.003951 | 0.001565 | -0.000132 | 0.008799 | -0.000026 | 0.003705 | -0.005799 | 0.005766 | 0.004183 | 0.001945 | 0.000648 | 0.004078 | -0.009115 | 0.001119 | -0.007733 | -0.001207 |
DIS | 0 | -0.003517 | 0.005172 | 0.008895 | 0.007890 | 0.009188 | 0.000370 | -0.001706 | 0.004910 | 0.002284 | 0.001140 | -0.004397 | 0.000480 | -0.000296 | -0.004043 | 0.008423 | 0.003263 | -0.005233 | 0.002316 |
EBAY | 0 | -0.009406 | -0.012296 | 0.001951 | -0.001892 | 0.003080 | 0.009711 | -0.001871 | 0.036590 | 0.001560 | 0.001737 | 0.006958 | 0.001484 | -0.012566 | -0.000722 | -0.006297 | -0.005785 | -0.002014 | -0.014046 |
EMC | 0 | 0.000455 | 0.005656 | 0.006630 | 0.001683 | 0.007350 | -0.001897 | -0.007079 | 0.002018 | -0.004166 | 0.002919 | -0.003832 | -0.003393 | -0.003176 | 0.003054 | -0.004202 | 0.006880 | -0.010486 | -0.009782 |
FB | 0 | -0.009296 | 0.012168 | 0.005846 | -0.004926 | 0.008913 | 0.009362 | -0.003428 | -0.001963 | -0.005833 | 0.009863 | 0.007014 | -0.009351 | -0.011530 | 0.007288 | 0.019939 | 0.003102 | -0.002189 | 0.007948 |
FJTSY | 0 | -0.015135 | 0.022376 | -0.003667 | -0.000367 | 0.000000 | -0.007576 | -0.008573 | 0.002510 | 0.000000 | 0.000000 | 0.000000 | -0.022526 | -0.021060 | 0.000679 | 0.016783 | -0.032388 | 0.002945 | 0.000883 |
FOXA | 0 | 0.010004 | 0.008595 | 0.009264 | 0.007339 | 0.003148 | -0.003998 | -0.002143 | -0.002522 | 0.000560 | -0.001777 | -0.001123 | 0.003359 | -0.001776 | -0.004979 | 0.008014 | 0.010968 | 0.002666 | -0.004432 |
GE | 0 | -0.005840 | 0.005036 | 0.001418 | -0.005705 | 0.011155 | 0.003450 | 0.006475 | 0.005178 | -0.006995 | -0.001528 | -0.002682 | 0.000638 | -0.003715 | -0.001154 | -0.004121 | 0.001543 | 0.002693 | 0.000256 |
GOOG | 0 | -0.008795 | 0.015167 | 0.004584 | -0.000029 | 0.011069 | 0.008833 | -0.002532 | -0.002716 | -0.000326 | -0.002431 | -0.004585 | -0.009683 | -0.005873 | 0.000678 | 0.009060 | 0.005519 | 0.006450 | 0.004206 |
HPQ | 0 | -0.005128 | 0.004443 | 0.003673 | -0.004162 | 0.003487 | 0.004130 | -0.007852 | 0.036108 | 0.008050 | 0.006559 | 0.015742 | 0.007287 | -0.001935 | 0.002807 | -0.001934 | 0.005072 | -0.008555 | -0.008718 |
HTHIY | 0 | 0.001694 | 0.018982 | -0.004305 | -0.004058 | 0.007181 | 0.003708 | -0.015190 | -0.000753 | -0.007772 | 0.012744 | -0.001325 | 0.000618 | -0.003943 | 0.007300 | 0.016224 | 0.001598 | -0.011005 | -0.003198 |
IBM | 0 | 0.001297 | 0.001703 | -0.001457 | 0.000124 | 0.006844 | 0.004827 | 0.000105 | 0.007076 | -0.002131 | 0.001917 | 0.006684 | 0.000692 | -0.005183 | 0.002135 | -0.001269 | 0.002254 | -0.005807 | -0.000681 |
INTC | 0 | -0.001112 | 0.024832 | 0.004219 | 0.001763 | 0.007486 | 0.002231 | 0.000969 | 0.015269 | 0.002475 | -0.004206 | -0.000670 | -0.001725 | -0.002787 | 0.004401 | -0.005967 | -0.000096 | 0.005932 | 0.004097 |
LXK | 0 | 0.007975 | 0.008664 | 0.010324 | -0.007590 | 0.000110 | 0.003788 | -0.005231 | -0.002781 | -0.008994 | -0.001651 | 0.001430 | -0.000267 | 0.001469 | 0.009064 | -0.008811 | -0.008414 | -0.004055 | -0.000609 |
MSFT | 0 | 0.003428 | 0.012751 | 0.006959 | 0.010214 | 0.006049 | 0.010608 | -0.001848 | 0.000370 | 0.002874 | -0.000295 | -0.002216 | -0.005348 | -0.001190 | 0.009283 | -0.002437 | -0.005944 | 0.004806 | 0.012413 |
N | 0 | -0.003774 | -0.002940 | -0.001937 | -0.004743 | 0.010588 | 0.011145 | -0.001350 | -0.000040 | 0.012627 | -0.002477 | 0.012155 | 0.013787 | -0.014610 | 0.013834 | 0.015171 | 0.001545 | -0.006065 | -0.003195 |
NFLX | 0 | -0.011230 | 0.007208 | 0.001123 | 0.011389 | 0.019374 | 0.003090 | 0.006664 | 0.003807 | 0.009043 | 0.010369 | -0.005572 | -0.005026 | -0.003569 | 0.006130 | -0.001608 | 0.008005 | -0.010536 | -0.002382 |
NVDA | 0 | -0.004594 | 0.007043 | -0.006938 | 0.004801 | 0.011358 | 0.012215 | -0.002593 | -0.007490 | -0.002795 | 0.003828 | 0.009650 | -0.002072 | 0.000690 | 0.004124 | 0.002229 | 0.009847 | 0.015379 | 0.001502 |
NWSA | 0 | 0.000192 | -0.021825 | 0.017388 | 0.000386 | -0.000386 | -0.005830 | 0.010955 | 0.010272 | -0.001715 | 0.005872 | 0.002457 | -0.000378 | 0.000567 | -0.000756 | -0.005705 | 0.003222 | 0.003965 | 0.001320 |
NYT | 0 | -0.018408 | -0.003259 | -0.001088 | -0.001089 | 0.012100 | -0.005135 | -0.004889 | 0.007547 | 0.001615 | -0.003782 | 0.001618 | 0.002958 | -0.004864 | -0.001353 | 0.000270 | 0.002159 | 0.000540 | 0.000539 |
ORCL | 0 | -0.001671 | 0.007382 | 0.000332 | 0.002069 | 0.007881 | 0.014721 | 0.002340 | 0.005058 | 0.001203 | 0.005741 | 0.001592 | -0.006087 | -0.005719 | 0.002811 | 0.002404 | 0.005340 | -0.003680 | -0.005793 |
PCRFY | 0 | -0.001397 | 0.016763 | 0.006823 | -0.001093 | 0.005975 | 0.000814 | -0.015431 | 0.001101 | -0.015372 | 0.005006 | 0.003602 | 0.005237 | 0.001651 | 0.010887 | 0.007295 | 0.014643 | -0.002134 | -0.010515 |
RAX | 0 | -0.036414 | -0.004699 | 0.033312 | 0.009003 | 0.017585 | 0.035446 | -0.000203 | 0.012440 | 0.021210 | -0.007319 | 0.020159 | -0.008504 | -0.005109 | 0.012419 | 0.020992 | 0.031463 | 0.019219 | 0.009385 |
RHT | 0 | 0.000507 | 0.007825 | 0.002064 | 0.001281 | 0.014115 | 0.021549 | -0.002640 | -0.002376 | 0.004356 | -0.005406 | 0.003287 | -0.007984 | -0.015275 | 0.007824 | 0.004141 | 0.006711 | -0.004622 | -0.003985 |
SAP | 0 | -0.006488 | -0.001993 | 0.002852 | -0.007576 | 0.008291 | 0.009113 | -0.008675 | 0.006645 | -0.005908 | 0.014241 | 0.005454 | 0.003329 | -0.015968 | -0.000986 | 0.008287 | -0.008875 | -0.002407 | 0.001502 |
SIEGY | 0 | -0.012527 | 0.014653 | 0.014548 | -0.004600 | 0.004551 | 0.005068 | -0.001160 | 0.014180 | -0.008505 | 0.016414 | 0.007543 | -0.000786 | -0.015761 | -0.002001 | 0.008176 | 0.011301 | 0.000680 | -0.000131 |
SNE | 0 | 0.000000 | 0.029412 | 0.005369 | 0.019070 | 0.007033 | 0.016146 | 0.006522 | -0.002651 | -0.000707 | 0.007548 | -0.005826 | -0.001414 | 0.002293 | 0.010991 | 0.013425 | 0.020566 | -0.006789 | -0.022029 |
TOSYY | 0 | 0.000000 | 0.012976 | -0.004764 | -0.001632 | 0.010191 | 0.012761 | -0.008040 | 0.007245 | -0.008420 | 0.001731 | -0.002230 | -0.015474 | 0.001005 | -0.005942 | 0.013593 | -0.010841 | -0.004177 | 0.005038 |
TRI | 0 | 0.004010 | -0.004197 | 0.009253 | 0.001130 | 0.005721 | -0.004335 | -0.000177 | 0.001681 | -0.000531 | -0.001064 | 0.001328 | 0.006857 | 0.000352 | -0.002025 | 0.000968 | 0.003943 | 0.003232 | -0.003242 |
TWX | 0 | -0.008865 | 0.006782 | 0.026300 | 0.007408 | 0.005309 | 0.001090 | 0.000651 | -0.002313 | 0.002966 | 0.002401 | -0.002507 | 0.002647 | -0.000390 | 0.000866 | 0.003151 | 0.005495 | -0.004095 | -0.006769 |
TXN | 0 | 0.001002 | 0.010276 | 0.003460 | 0.004779 | 0.002664 | 0.002796 | 0.003344 | 0.004577 | 0.000901 | -0.008384 | -0.004774 | 0.002800 | 0.005708 | 0.005124 | -0.002917 | 0.004425 | 0.002345 | 0.002820 |
VIAB | 0 | -0.005008 | -0.000787 | 0.002314 | 0.007547 | 0.009547 | -0.006172 | -0.002993 | -0.004944 | -0.001899 | -0.000950 | -0.002112 | 0.001860 | 0.001980 | 0.000948 | 0.007079 | -0.002872 | -0.000739 | -0.000041 |
VMW | 0 | -0.007897 | 0.004582 | 0.009869 | 0.005238 | 0.015535 | 0.000745 | 0.003810 | -0.003434 | -0.003283 | -0.014676 | -0.028492 | -0.005011 | -0.001810 | 0.008902 | 0.002936 | -0.002571 | -0.017769 | -0.010790 |
XOM | 0 | -0.005729 | 0.006333 | 0.000303 | -0.001247 | 0.005199 | 0.002409 | -0.000770 | -0.000100 | -0.010319 | 0.001992 | 0.009432 | -0.001306 | -0.000872 | -0.001578 | -0.006624 | 0.005612 | -0.006018 | 0.003000 |
XRX | 0 | 0.005475 | 0.010589 | -0.002964 | -0.007214 | 0.005935 | -0.002976 | 0.006162 | 0.003684 | 0.001471 | 0.002202 | 0.003657 | 0.002189 | 0.000000 | 0.007005 | 0.003850 | 0.009061 | -0.013778 | -0.013971 |
50 rows × 19 columns
Assuming $X_{i,j}$ is a matrix representing our dataset where $i$ is th number of rows and $j$ is the number of columns. This is the function to return $X_{scaled}$ which is the normalized version of the dataset:
$X_{std} = \frac{X - min(X_j)}{max(X_j) - min(X_j)}$
$X_{scaled} = X_{std} * (max - min) + min$
The most common Max and Min are:
$max = 1$
$min = -1$
$X_{scaled} = X_{std} * (1 - (-1)) + (-1) = X_{std} * 2 - 1$
And:
$max = 1$
$min = 0$
$X_{scaled} = X_{std} * (1 - 0) + 0 = X_{std}$
for item in clustering_data.values:
plt.plot(item)
plt.show();
norm_data = normalize(clustering_data.values, axis=1)
norm_data = pd.DataFrame(norm_data)
for item in norm_data.values:
plt.plot(item)
plt.show();
def cluster_data(data, n_clusters=8, normalize_data=False):
if normalize_data:
data = normalize(data.values, norm='l2', axis=1, copy=True)
cluster_model = KMeans(n_clusters=n_clusters)
prediction = cluster_model.fit_predict(data)
return prediction, cluster_model, data
prediction, model, data = cluster_data(clustering_data, n_clusters=8, normalize_data=True)
print "Cluster Count: %s" % len(np.unique(prediction))
clustering_data["Cluster"] = prediction
clustering_data
Cluster Count: 8
n_date | 735456 | 735457 | 735458 | 735459 | 735460 | 735463 | 735464 | 735465 | 735466 | 735467 | 735470 | 735471 | 735472 | 735473 | 735474 | 735478 | 735479 | 735480 | 735481 | Cluster |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
symbol | ||||||||||||||||||||
AAPL | 0 | 0.005373 | 0.007091 | 0.004625 | 0.003959 | 0.011737 | 0.013376 | 0.003581 | 0.000066 | 0.004456 | 0.006590 | -0.005738 | 0.007040 | 0.003881 | 0.003283 | 0.006973 | -0.030061 | -0.015946 | 0.002258 | 3 |
ADBE | 0 | -0.002185 | 0.010283 | 0.007582 | 0.004037 | 0.009363 | 0.012268 | -0.006643 | 0.003217 | 0.006578 | 0.000648 | -0.003437 | -0.001256 | -0.004954 | 0.006732 | 0.005219 | 0.003177 | 0.003212 | 0.003430 | 3 |
ADSK | 0 | 0.004898 | 0.017504 | 0.013646 | -0.033605 | -0.015570 | 0.004711 | -0.004108 | 0.003350 | -0.002924 | -0.001059 | 0.010964 | -0.002099 | -0.002661 | -0.000619 | -0.007361 | 0.011897 | 0.007039 | -0.006158 | 4 |
AMZN | 0 | 0.002078 | 0.026988 | 0.009929 | 0.002949 | 0.009095 | -0.002142 | 0.002989 | -0.005231 | -0.005269 | 0.007005 | 0.016890 | 0.010134 | -0.010366 | -0.002032 | 0.003425 | -0.002177 | 0.018375 | -0.003090 | 1 |
BBRY | 0 | -0.006726 | -0.002840 | 0.009494 | 0.010783 | 0.014736 | 0.009504 | 0.027402 | -0.013043 | -0.004030 | 0.007333 | 0.003322 | 0.018265 | 0.001628 | 0.000651 | 0.012849 | 0.032027 | -0.009733 | -0.009189 | 6 |
BKS | 0 | -0.004016 | 0.001040 | 0.009566 | -0.002360 | 0.014967 | 0.008071 | -0.018048 | -0.007093 | 0.002947 | -0.008769 | 0.013200 | 0.027528 | 0.008626 | 0.001553 | -0.010558 | 0.016419 | 0.002240 | -0.004501 | 2 |
BP | 0 | -0.001125 | -0.005802 | 0.005278 | -0.000775 | 0.009557 | 0.007821 | -0.000138 | 0.001797 | -0.001661 | 0.006054 | -0.004214 | 0.002206 | -0.007850 | -0.002996 | -0.011132 | 0.010251 | -0.041773 | -0.001601 | 0 |
CAJ | 0 | -0.002323 | 0.003221 | 0.004908 | 0.000501 | -0.000501 | 0.001900 | -0.008164 | -0.000403 | -0.007006 | -0.000102 | 0.003441 | -0.002638 | 0.000608 | -0.004584 | 0.003351 | 0.003541 | 0.006134 | -0.002217 | 1 |
CAT | 0 | 0.001084 | 0.004349 | 0.005242 | 0.002017 | 0.009583 | 0.004691 | 0.003653 | 0.003178 | -0.007492 | 0.007283 | 0.002002 | 0.002672 | -0.000861 | 0.003523 | 0.003237 | -0.004016 | 0.000919 | -0.001718 | 6 |
CBS | 0 | -0.010617 | -0.002817 | 0.003984 | 0.007462 | 0.008284 | 0.004180 | -0.002702 | -0.001215 | 0.002478 | 0.004387 | -0.009521 | 0.002485 | -0.009082 | -0.007353 | -0.005758 | -0.006992 | 0.011908 | -0.002816 | 5 |
CMCSA | 0 | 0.000310 | 0.005419 | 0.002886 | 0.004888 | 0.003895 | -0.003114 | -0.000366 | -0.000733 | -0.005716 | 0.005441 | 0.000367 | 0.001099 | -0.002202 | 0.003779 | 0.000122 | 0.002674 | 0.003392 | 0.007574 | 3 |
CRM | 0 | -0.029861 | -0.004338 | 0.006434 | 0.003858 | 0.013566 | 0.010267 | 0.005979 | 0.006361 | 0.066805 | -0.006311 | 0.002361 | 0.009025 | -0.025011 | 0.010118 | 0.012062 | -0.004713 | -0.002418 | 0.003642 | 5 |
CSC | 0 | -0.003303 | 0.002547 | 0.006370 | 0.002154 | 0.010804 | 0.004196 | 0.002791 | 0.006711 | -0.004814 | -0.001434 | 0.000394 | 0.004502 | -0.004918 | -0.000559 | 0.006930 | 0.002213 | -0.002162 | 0.000166 | 6 |
CSCO | 0 | -0.002519 | -0.000265 | -0.021680 | -0.005998 | 0.004883 | 0.002841 | 0.000676 | 0.007246 | -0.003501 | -0.000404 | 0.002419 | -0.000538 | 0.000269 | 0.004683 | -0.001474 | 0.006126 | -0.002804 | 0.000667 | 0 |
CTXS | 0 | -0.004566 | 0.005074 | 0.001158 | -0.001741 | 0.009672 | -0.001151 | -0.000288 | 0.004345 | 0.006168 | -0.003571 | -0.000286 | -0.002100 | -0.004555 | 0.009922 | 0.005148 | 0.001368 | 0.000801 | 0.001553 | 5 |
CVX | 0 | -0.008605 | 0.005390 | -0.003951 | 0.001565 | -0.000132 | 0.008799 | -0.000026 | 0.003705 | -0.005799 | 0.005766 | 0.004183 | 0.001945 | 0.000648 | 0.004078 | -0.009115 | 0.001119 | -0.007733 | -0.001207 | 0 |
DIS | 0 | -0.003517 | 0.005172 | 0.008895 | 0.007890 | 0.009188 | 0.000370 | -0.001706 | 0.004910 | 0.002284 | 0.001140 | -0.004397 | 0.000480 | -0.000296 | -0.004043 | 0.008423 | 0.003263 | -0.005233 | 0.002316 | 6 |
EBAY | 0 | -0.009406 | -0.012296 | 0.001951 | -0.001892 | 0.003080 | 0.009711 | -0.001871 | 0.036590 | 0.001560 | 0.001737 | 0.006958 | 0.001484 | -0.012566 | -0.000722 | -0.006297 | -0.005785 | -0.002014 | -0.014046 | 0 |
EMC | 0 | 0.000455 | 0.005656 | 0.006630 | 0.001683 | 0.007350 | -0.001897 | -0.007079 | 0.002018 | -0.004166 | 0.002919 | -0.003832 | -0.003393 | -0.003176 | 0.003054 | -0.004202 | 0.006880 | -0.010486 | -0.009782 | 0 |
FB | 0 | -0.009296 | 0.012168 | 0.005846 | -0.004926 | 0.008913 | 0.009362 | -0.003428 | -0.001963 | -0.005833 | 0.009863 | 0.007014 | -0.009351 | -0.011530 | 0.007288 | 0.019939 | 0.003102 | -0.002189 | 0.007948 | 4 |
FJTSY | 0 | -0.015135 | 0.022376 | -0.003667 | -0.000367 | 0.000000 | -0.007576 | -0.008573 | 0.002510 | 0.000000 | 0.000000 | 0.000000 | -0.022526 | -0.021060 | 0.000679 | 0.016783 | -0.032388 | 0.002945 | 0.000883 | 4 |
FOXA | 0 | 0.010004 | 0.008595 | 0.009264 | 0.007339 | 0.003148 | -0.003998 | -0.002143 | -0.002522 | 0.000560 | -0.001777 | -0.001123 | 0.003359 | -0.001776 | -0.004979 | 0.008014 | 0.010968 | 0.002666 | -0.004432 | 2 |
GE | 0 | -0.005840 | 0.005036 | 0.001418 | -0.005705 | 0.011155 | 0.003450 | 0.006475 | 0.005178 | -0.006995 | -0.001528 | -0.002682 | 0.000638 | -0.003715 | -0.001154 | -0.004121 | 0.001543 | 0.002693 | 0.000256 | 6 |
GOOG | 0 | -0.008795 | 0.015167 | 0.004584 | -0.000029 | 0.011069 | 0.008833 | -0.002532 | -0.002716 | -0.000326 | -0.002431 | -0.004585 | -0.009683 | -0.005873 | 0.000678 | 0.009060 | 0.005519 | 0.006450 | 0.004206 | 4 |
HPQ | 0 | -0.005128 | 0.004443 | 0.003673 | -0.004162 | 0.003487 | 0.004130 | -0.007852 | 0.036108 | 0.008050 | 0.006559 | 0.015742 | 0.007287 | -0.001935 | 0.002807 | -0.001934 | 0.005072 | -0.008555 | -0.008718 | 0 |
HTHIY | 0 | 0.001694 | 0.018982 | -0.004305 | -0.004058 | 0.007181 | 0.003708 | -0.015190 | -0.000753 | -0.007772 | 0.012744 | -0.001325 | 0.000618 | -0.003943 | 0.007300 | 0.016224 | 0.001598 | -0.011005 | -0.003198 | 4 |
IBM | 0 | 0.001297 | 0.001703 | -0.001457 | 0.000124 | 0.006844 | 0.004827 | 0.000105 | 0.007076 | -0.002131 | 0.001917 | 0.006684 | 0.000692 | -0.005183 | 0.002135 | -0.001269 | 0.002254 | -0.005807 | -0.000681 | 0 |
INTC | 0 | -0.001112 | 0.024832 | 0.004219 | 0.001763 | 0.007486 | 0.002231 | 0.000969 | 0.015269 | 0.002475 | -0.004206 | -0.000670 | -0.001725 | -0.002787 | 0.004401 | -0.005967 | -0.000096 | 0.005932 | 0.004097 | 3 |
LXK | 0 | 0.007975 | 0.008664 | 0.010324 | -0.007590 | 0.000110 | 0.003788 | -0.005231 | -0.002781 | -0.008994 | -0.001651 | 0.001430 | -0.000267 | 0.001469 | 0.009064 | -0.008811 | -0.008414 | -0.004055 | -0.000609 | 3 |
MSFT | 0 | 0.003428 | 0.012751 | 0.006959 | 0.010214 | 0.006049 | 0.010608 | -0.001848 | 0.000370 | 0.002874 | -0.000295 | -0.002216 | -0.005348 | -0.001190 | 0.009283 | -0.002437 | -0.005944 | 0.004806 | 0.012413 | 3 |
N | 0 | -0.003774 | -0.002940 | -0.001937 | -0.004743 | 0.010588 | 0.011145 | -0.001350 | -0.000040 | 0.012627 | -0.002477 | 0.012155 | 0.013787 | -0.014610 | 0.013834 | 0.015171 | 0.001545 | -0.006065 | -0.003195 | 5 |
NFLX | 0 | -0.011230 | 0.007208 | 0.001123 | 0.011389 | 0.019374 | 0.003090 | 0.006664 | 0.003807 | 0.009043 | 0.010369 | -0.005572 | -0.005026 | -0.003569 | 0.006130 | -0.001608 | 0.008005 | -0.010536 | -0.002382 | 6 |
NVDA | 0 | -0.004594 | 0.007043 | -0.006938 | 0.004801 | 0.011358 | 0.012215 | -0.002593 | -0.007490 | -0.002795 | 0.003828 | 0.009650 | -0.002072 | 0.000690 | 0.004124 | 0.002229 | 0.009847 | 0.015379 | 0.001502 | 1 |
NWSA | 0 | 0.000192 | -0.021825 | 0.017388 | 0.000386 | -0.000386 | -0.005830 | 0.010955 | 0.010272 | -0.001715 | 0.005872 | 0.002457 | -0.000378 | 0.000567 | -0.000756 | -0.005705 | 0.003222 | 0.003965 | 0.001320 | 2 |
NYT | 0 | -0.018408 | -0.003259 | -0.001088 | -0.001089 | 0.012100 | -0.005135 | -0.004889 | 0.007547 | 0.001615 | -0.003782 | 0.001618 | 0.002958 | -0.004864 | -0.001353 | 0.000270 | 0.002159 | 0.000540 | 0.000539 | 5 |
ORCL | 0 | -0.001671 | 0.007382 | 0.000332 | 0.002069 | 0.007881 | 0.014721 | 0.002340 | 0.005058 | 0.001203 | 0.005741 | 0.001592 | -0.006087 | -0.005719 | 0.002811 | 0.002404 | 0.005340 | -0.003680 | -0.005793 | 0 |
PCRFY | 0 | -0.001397 | 0.016763 | 0.006823 | -0.001093 | 0.005975 | 0.000814 | -0.015431 | 0.001101 | -0.015372 | 0.005006 | 0.003602 | 0.005237 | 0.001651 | 0.010887 | 0.007295 | 0.014643 | -0.002134 | -0.010515 | 1 |
RAX | 0 | -0.036414 | -0.004699 | 0.033312 | 0.009003 | 0.017585 | 0.035446 | -0.000203 | 0.012440 | 0.021210 | -0.007319 | 0.020159 | -0.008504 | -0.005109 | 0.012419 | 0.020992 | 0.031463 | 0.019219 | 0.009385 | 5 |
RHT | 0 | 0.000507 | 0.007825 | 0.002064 | 0.001281 | 0.014115 | 0.021549 | -0.002640 | -0.002376 | 0.004356 | -0.005406 | 0.003287 | -0.007984 | -0.015275 | 0.007824 | 0.004141 | 0.006711 | -0.004622 | -0.003985 | 4 |
SAP | 0 | -0.006488 | -0.001993 | 0.002852 | -0.007576 | 0.008291 | 0.009113 | -0.008675 | 0.006645 | -0.005908 | 0.014241 | 0.005454 | 0.003329 | -0.015968 | -0.000986 | 0.008287 | -0.008875 | -0.002407 | 0.001502 | 4 |
SIEGY | 0 | -0.012527 | 0.014653 | 0.014548 | -0.004600 | 0.004551 | 0.005068 | -0.001160 | 0.014180 | -0.008505 | 0.016414 | 0.007543 | -0.000786 | -0.015761 | -0.002001 | 0.008176 | 0.011301 | 0.000680 | -0.000131 | 4 |
SNE | 0 | 0.000000 | 0.029412 | 0.005369 | 0.019070 | 0.007033 | 0.016146 | 0.006522 | -0.002651 | -0.000707 | 0.007548 | -0.005826 | -0.001414 | 0.002293 | 0.010991 | 0.013425 | 0.020566 | -0.006789 | -0.022029 | 1 |
TOSYY | 0 | 0.000000 | 0.012976 | -0.004764 | -0.001632 | 0.010191 | 0.012761 | -0.008040 | 0.007245 | -0.008420 | 0.001731 | -0.002230 | -0.015474 | 0.001005 | -0.005942 | 0.013593 | -0.010841 | -0.004177 | 0.005038 | 4 |
TRI | 0 | 0.004010 | -0.004197 | 0.009253 | 0.001130 | 0.005721 | -0.004335 | -0.000177 | 0.001681 | -0.000531 | -0.001064 | 0.001328 | 0.006857 | 0.000352 | -0.002025 | 0.000968 | 0.003943 | 0.003232 | -0.003242 | 2 |
TWX | 0 | -0.008865 | 0.006782 | 0.026300 | 0.007408 | 0.005309 | 0.001090 | 0.000651 | -0.002313 | 0.002966 | 0.002401 | -0.002507 | 0.002647 | -0.000390 | 0.000866 | 0.003151 | 0.005495 | -0.004095 | -0.006769 | 6 |
TXN | 0 | 0.001002 | 0.010276 | 0.003460 | 0.004779 | 0.002664 | 0.002796 | 0.003344 | 0.004577 | 0.000901 | -0.008384 | -0.004774 | 0.002800 | 0.005708 | 0.005124 | -0.002917 | 0.004425 | 0.002345 | 0.002820 | 3 |
VIAB | 0 | -0.005008 | -0.000787 | 0.002314 | 0.007547 | 0.009547 | -0.006172 | -0.002993 | -0.004944 | -0.001899 | -0.000950 | -0.002112 | 0.001860 | 0.001980 | 0.000948 | 0.007079 | -0.002872 | -0.000739 | -0.000041 | 7 |
VMW | 0 | -0.007897 | 0.004582 | 0.009869 | 0.005238 | 0.015535 | 0.000745 | 0.003810 | -0.003434 | -0.003283 | -0.014676 | -0.028492 | -0.005011 | -0.001810 | 0.008902 | 0.002936 | -0.002571 | -0.017769 | -0.010790 | 7 |
XOM | 0 | -0.005729 | 0.006333 | 0.000303 | -0.001247 | 0.005199 | 0.002409 | -0.000770 | -0.000100 | -0.010319 | 0.001992 | 0.009432 | -0.001306 | -0.000872 | -0.001578 | -0.006624 | 0.005612 | -0.006018 | 0.003000 | 0 |
XRX | 0 | 0.005475 | 0.010589 | -0.002964 | -0.007214 | 0.005935 | -0.002976 | 0.006162 | 0.003684 | 0.001471 | 0.002202 | 0.003657 | 0.002189 | 0.000000 | 0.007005 | 0.003850 | 0.009061 | -0.013778 | -0.013971 | 0 |
50 rows × 20 columns
def visualize_clusters(data_df, values="change_1_per", n_clusters=8, normalize_data=False):
data = pivot_data(data_df, values)
prediction, model, c_data = cluster_data(data, n_clusters=n_clusters, normalize_data=normalize_data)
c_data = pd.DataFrame(c_data, index=data.index,columns=data.columns)
data["Cluster"] = prediction
c_data["Cluster"] = prediction
plt.figure
for cluster in np.unique(prediction):
plt.plot(model.cluster_centers_[cluster], "o-", alpha=0.5, linewidth=2)
plt.show()
for cluster in np.unique(prediction):
temp_cluster_data = c_data[c_data["Cluster"]==cluster]
print "Cluster: %s" % cluster
print "Members: %s" % ["%s: %s"% (symbol, stock_dict[symbol]) for symbol in list(temp_cluster_data.index)]
plt.figure()
plt.title("Cluster#: %s" % cluster)
plt.plot(model.cluster_centers_[cluster], "o--", alpha=0.5, linewidth=2)
for symbol in temp_cluster_data.index:
plt.plot(np.ravel(temp_cluster_data.loc[[symbol]].drop("Cluster", 1).values),
alpha=0.2, linewidth=2)
plt.grid()
plt.show();
return prediction, model, c_data
prediction, model, c_data = visualize_clusters(stocks_df, values="average", n_clusters=3, normalize_data=True);
Cluster: 0 Members: ['ADSK: Autodesk, Inc.', 'BP: BP plc (ADR)', 'CAJ: Canon Inc (ADR)', 'CBS: CBS Corporation', 'CSCO: Cisco Systems, Inc.', 'EMC: EMC Corporation', 'FJTSY: Fujitsu Ltd (ADR)', 'GE: General Electric Company', 'GOOG: Google', 'LXK: Lexmark International Inc', 'NYT: The New York Times Company', 'TOSYY: Toshiba Corp (USA)', 'VIAB: Viacom, Inc.', 'VMW: VMware, Inc.', 'XOM: Exxon Mobil Corporation']
Cluster: 1 Members: ['BBRY: BlackBerry Ltd', 'CRM: salesforce.com, inc.', 'HPQ: Hewlett-Packard Company', 'RAX: Rackspace Hosting, Inc.', 'SNE: Sony Corp (ADR)']
Cluster: 2 Members: ['AAPL: Apple Inc.', 'ADBE: Adobe Systems Incorporated', 'AMZN: Amazon.com, Inc.', 'BKS: Barnes & Noble, Inc.', 'CAT: Caterpillar Inc.', 'CMCSA: Comcast Corporation', 'CSC: Computer Sciences Corporation', 'CTXS: Citrix Systems, Inc.', 'CVX: Chevron Corporation', 'DIS: The Walt Disney Company', 'EBAY: eBay Inc', 'FB: Facebook, Inc.', 'FOXA: Twenty-First Century Fox Inc', 'HTHIY: Hitachi, Ltd. (ADR)', 'IBM: International Business Machines Corp. (IBM)', 'INTC: Intel Corporation', 'MSFT: Microsoft Corporation', 'N: NetSuite Inc', 'NFLX: Netflix, Inc.', 'NVDA: NVIDIA Corporation', 'NWSA: News Corp', 'ORCL: Oracle Corporation', 'PCRFY: Panasonic Corporation (ADR)', 'RHT: Red Hat Inc', 'SAP: SAP SE (ADR)', 'SIEGY: Siemens AG (ADR)', 'TRI: Thomson Reuters Corporation (USA)', 'TWX: Time Warner Inc', 'TXN: Texas Instruments Incorporated', 'XRX: Xerox Corp']
def measure_error(prediction, model, c_data):
error_score = []
for counter in range(len(c_data)):
true_val = c_data.drop("Cluster",1).values[counter]
center_val = model.cluster_centers_[c_data["Cluster"][counter]]
error_score.append(np.average(np.abs(true_val - center_val)) / np.average(center_val))
cluster_counts = c_data["Cluster"].value_counts()
return np.average(error_score), len(cluster_counts[cluster_counts==1])
measure_error(prediction, model, c_data)
(0.008035569568482899, 0)
from IPython.html import widgets
from IPython.html.widgets import interact
def visualize_clusters_widget(values="change_1_per", n_clusters=8, normalize_data=False):
prediction, model, c_data = visualize_clusters(data_df=stocks_df,
values=values,
n_clusters=n_clusters,
normalize_data=normalize_data
)
print measure_error(prediction, model, c_data)
interact(visualize_clusters_widget,
values=["change_1_per","close","average","change_per"],
n_clusters=(2,50),
normalize=False
);
Cluster: 0 Members: ['CRM: salesforce.com, inc.']
Cluster: 1 Members: ['AAPL: Apple Inc.', 'ADBE: Adobe Systems Incorporated', 'CTXS: Citrix Systems, Inc.', 'DIS: The Walt Disney Company', 'FB: Facebook, Inc.', 'FJTSY: Fujitsu Ltd (ADR)', 'GOOG: Google', 'HTHIY: Hitachi, Ltd. (ADR)', 'INTC: Intel Corporation', 'MSFT: Microsoft Corporation', 'N: NetSuite Inc', 'NFLX: Netflix, Inc.', 'NVDA: NVIDIA Corporation', 'ORCL: Oracle Corporation', 'RHT: Red Hat Inc', 'TOSYY: Toshiba Corp (USA)', 'TXN: Texas Instruments Incorporated', 'VIAB: Viacom, Inc.', 'VMW: VMware, Inc.']
Cluster: 2 Members: ['RAX: Rackspace Hosting, Inc.']
Cluster: 3 Members: ['AMZN: Amazon.com, Inc.', 'FOXA: Twenty-First Century Fox Inc', 'PCRFY: Panasonic Corporation (ADR)', 'SNE: Sony Corp (ADR)', 'TWX: Time Warner Inc']
Cluster: 4 Members: ['ADSK: Autodesk, Inc.', 'BP: BP plc (ADR)', 'CAJ: Canon Inc (ADR)', 'CAT: Caterpillar Inc.', 'CBS: CBS Corporation', 'CMCSA: Comcast Corporation', 'CSC: Computer Sciences Corporation', 'CSCO: Cisco Systems, Inc.', 'CVX: Chevron Corporation', 'EMC: EMC Corporation', 'GE: General Electric Company', 'IBM: International Business Machines Corp. (IBM)', 'LXK: Lexmark International Inc', 'NWSA: News Corp', 'NYT: The New York Times Company', 'SAP: SAP SE (ADR)', 'SIEGY: Siemens AG (ADR)', 'TRI: Thomson Reuters Corporation (USA)', 'XOM: Exxon Mobil Corporation', 'XRX: Xerox Corp']
Cluster: 5 Members: ['BKS: Barnes & Noble, Inc.']
Cluster: 6 Members: ['BBRY: BlackBerry Ltd']
Cluster: 7 Members: ['EBAY: eBay Inc', 'HPQ: Hewlett-Packard Company']
(5.4891499235401122, 4)
max_clusters = 30
feature = "average"
clustering_data = pivot_data(stocks_df, values=feature)
clustering_data["Cluster"] = pd.Series()
for normalize_data in [True, False]:
fig = plt.figure(figsize=(10,6))
plt.title("K-Means - Feature: %s Normalized: %s" % (feature, normalize_data))
axes_1 = fig.add_subplot(111)
axes_2 = axes_1.twinx()
score_error_list = []
failed_clusters_list = []
for n_clusters in range(2,max_clusters):
prediction, model, data = cluster_data(clustering_data.drop("Cluster",1), n_clusters=n_clusters,
normalize_data=normalize_data)
data = pd.DataFrame(data, index=clustering_data.index,columns=clustering_data.drop("Cluster",1).columns)
data["Cluster"] = prediction
score_error, failed_clusters = measure_error(prediction, model, data)
score_error_list.append(score_error)
failed_clusters_list.append(failed_clusters)
axes_1.plot(range(2,max_clusters), score_error_list, "ro-", label = "Average Error")
axes_2.plot(range(2,max_clusters), failed_clusters_list, "bo-", label = "Failed Cluster")
axes_1.grid()
axes_1.legend(loc = "lower center")
axes_2.legend(loc = "upper center")
axes_1.set_ylabel("Average Error")
axes_2.set_ylabel("Failed Cluster")
axes_1.set_xlabel("Clusters")
plt.show()
For questions please leave them on:
In the next lesson: