by XU Manning 17405785, YU Lei 17417929, SHEN Yue 17405815, Wong Pui Lam 16443578
source: https://www.censtatd.gov.hk/hkstat/sub/sp130_tc.jsp?productCode=D5600554; https://partnernet.hktb.com/
import requests
import bs4
import csv
import pandas as pd
from datetime import datetime
from dateutil import parser
import numpy
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
from collections import namedtuple
df = pd.read_csv('visitors.csv')
df
Date | Africa | TheAmericas | Australia,NewZealand&SouthPacific | Europe | MiddleEast | NorthAsia | South&SoutheastAsia | Taiwan | ThemainlandofChina | Macao | Total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2011-01-01 | 14470 | 141504 | 71946 | 143205 | 14927 | 215919 | 243083 | 182703 | 2503151 | 67122 | 3598030 |
1 | 2011-02-01 | 8914 | 107691 | 42858 | 121715 | 11177 | 190027 | 208270 | 165779 | 2008173 | 58466 | 2923070 |
2 | 2011-03-01 | 18778 | 170788 | 65442 | 176988 | 17926 | 180496 | 324999 | 158922 | 2009130 | 63402 | 3186871 |
3 | 2011-04-01 | 21341 | 171679 | 72112 | 179957 | 21580 | 159448 | 327629 | 197775 | 2136896 | 69543 | 3357960 |
4 | 2011-05-01 | 15733 | 150615 | 59579 | 135314 | 14217 | 177978 | 335874 | 164192 | 2115413 | 61113 | 3230028 |
5 | 2011-06-01 | 14074 | 144106 | 58797 | 109883 | 14029 | 172703 | 347762 | 184330 | 1924603 | 64935 | 3035222 |
6 | 2011-07-01 | 16175 | 151001 | 62401 | 132946 | 21706 | 189009 | 295771 | 216518 | 2677326 | 76535 | 3839388 |
7 | 2011-08-01 | 14438 | 131941 | 51384 | 145843 | 12581 | 223352 | 286781 | 195320 | 2914914 | 89207 | 4065761 |
8 | 2011-09-01 | 17007 | 134624 | 67953 | 133812 | 16549 | 185881 | 304734 | 174058 | 2094360 | 58113 | 3187091 |
9 | 2011-10-01 | 20541 | 189463 | 76795 | 199860 | 22080 | 181354 | 310435 | 180869 | 2542465 | 70214 | 3794076 |
10 | 2011-11-01 | 15655 | 176851 | 60740 | 178692 | 18161 | 200829 | 335613 | 155883 | 2343939 | 66370 | 3552733 |
11 | 2011-12-01 | 16505 | 150833 | 67864 | 143128 | 14412 | 227687 | 430198 | 172384 | 2829759 | 98310 | 4151080 |
12 | 2012-01-01 | 10593 | 130596 | 69745 | 139746 | 11320 | 204309 | 227883 | 178187 | 3101578 | 68037 | 4142042 |
13 | 2012-02-01 | 10759 | 128239 | 50286 | 156909 | 14026 | 233089 | 259111 | 162578 | 2296814 | 59350 | 3371202 |
14 | 2012-03-01 | 17173 | 167827 | 66145 | 186708 | 15896 | 210918 | 329028 | 154942 | 2497061 | 63750 | 3709484 |
15 | 2012-04-01 | 19467 | 175533 | 68476 | 183605 | 19784 | 167079 | 325477 | 167046 | 2647026 | 66649 | 3840180 |
16 | 2012-05-01 | 13896 | 154497 | 54992 | 140757 | 13760 | 181478 | 337150 | 151920 | 2525600 | 64841 | 3638934 |
17 | 2012-06-01 | 13349 | 142254 | 60110 | 118705 | 16145 | 183880 | 322301 | 178865 | 2514688 | 70592 | 3620912 |
18 | 2012-07-01 | 13942 | 143604 | 57773 | 132425 | 15687 | 199823 | 261583 | 203164 | 3264385 | 77343 | 4369786 |
19 | 2012-08-01 | 11969 | 127099 | 50510 | 138751 | 14343 | 241040 | 295529 | 195587 | 3727949 | 95982 | 4898843 |
20 | 2012-09-01 | 15301 | 130329 | 70437 | 135478 | 14668 | 179919 | 247091 | 171251 | 2751427 | 69079 | 3785018 |
21 | 2012-10-01 | 17018 | 175520 | 69127 | 196909 | 20982 | 163088 | 291921 | 171675 | 3064966 | 72523 | 4243787 |
22 | 2012-11-01 | 14379 | 164120 | 56178 | 186261 | 15066 | 171006 | 336516 | 163147 | 3040360 | 69872 | 4216937 |
23 | 2012-12-01 | 16081 | 138224 | 67016 | 151461 | 14675 | 197431 | 418213 | 190383 | 3479541 | 104924 | 4777988 |
24 | 2013-01-01 | 12909 | 135513 | 69903 | 157866 | 14669 | 203158 | 275588 | 165849 | 3526182 | 71262 | 4632926 |
25 | 2013-02-01 | 6690 | 103787 | 38400 | 125165 | 9558 | 162945 | 191353 | 183751 | 3132415 | 68031 | 4022120 |
26 | 2013-03-01 | 16860 | 165247 | 68459 | 200082 | 18764 | 183919 | 351684 | 165455 | 2842002 | 74772 | 4087286 |
27 | 2013-04-01 | 18858 | 163992 | 66608 | 188459 | 19857 | 152926 | 321524 | 174210 | 3110141 | 63290 | 4279889 |
28 | 2013-05-01 | 13758 | 137908 | 54000 | 150652 | 13225 | 162488 | 335432 | 145684 | 3053892 | 75238 | 4142301 |
29 | 2013-06-01 | 13326 | 128934 | 56478 | 121876 | 15097 | 153876 | 315277 | 168843 | 3150563 | 77036 | 4201331 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
54 | 2015-07-01 | 12282 | 132320 | 53368 | 129084 | 15756 | 162350 | 293147 | 187517 | 3845273 | 92316 | 4923431 |
55 | 2015-08-01 | 12349 | 120718 | 44346 | 133439 | 17414 | 191618 | 240408 | 189843 | 4550148 | 114561 | 5614852 |
56 | 2015-09-01 | 12436 | 128582 | 62717 | 134131 | 16239 | 190445 | 261798 | 165810 | 3509432 | 77201 | 4558800 |
57 | 2015-10-01 | 14908 | 170672 | 66077 | 200278 | 18001 | 187816 | 302603 | 175069 | 3857524 | 80534 | 5073494 |
58 | 2015-11-01 | 12884 | 169511 | 56990 | 189786 | 14194 | 206198 | 349406 | 160707 | 3512410 | 76619 | 4748715 |
59 | 2015-12-01 | 15601 | 148196 | 63431 | 153597 | 15927 | 220021 | 434383 | 179937 | 3721049 | 108906 | 5061064 |
60 | 2016-01-01 | 12085 | 141967 | 61800 | 159926 | 15664 | 238302 | 301606 | 174325 | 4043000 | 76895 | 5225578 |
61 | 2016-02-01 | 6536 | 110571 | 33785 | 130330 | 9678 | 199101 | 202333 | 160592 | 3367736 | 75049 | 4295731 |
62 | 2016-03-01 | 13712 | 165609 | 60864 | 191114 | 16847 | 199024 | 324452 | 149242 | 3017173 | 75748 | 4213801 |
63 | 2016-04-01 | 15929 | 170091 | 69936 | 194096 | 17856 | 181550 | 329623 | 176018 | 3459000 | 72200 | 4686316 |
64 | 2016-05-01 | 11390 | 145025 | 57558 | 142828 | 13352 | 189312 | 343260 | 150573 | 3322758 | 77051 | 4453118 |
65 | 2016-06-01 | 10508 | 137244 | 55873 | 114642 | 8850 | 179449 | 319470 | 176169 | 3206043 | 77465 | 4285730 |
66 | 2016-07-01 | 11944 | 132586 | 54475 | 134968 | 16605 | 189687 | 296427 | 185051 | 3930526 | 96736 | 5049022 |
67 | 2016-08-01 | 10827 | 120428 | 42976 | 140769 | 16376 | 216626 | 226061 | 174341 | 4037005 | 101070 | 5086496 |
68 | 2016-09-01 | 11754 | 134703 | 61128 | 135374 | 17575 | 214627 | 270860 | 168311 | 3333627 | 74470 | 4422441 |
69 | 2016-10-01 | 14130 | 176312 | 67826 | 210357 | 16346 | 197635 | 307059 | 161813 | 3723837 | 78384 | 4953705 |
70 | 2016-11-01 | 11701 | 177724 | 55317 | 192652 | 13610 | 235342 | 343238 | 153844 | 3388958 | 74536 | 4646938 |
71 | 2016-12-01 | 14802 | 161078 | 62508 | 157820 | 13502 | 244041 | 437407 | 181149 | 3948482 | 115218 | 5336027 |
72 | 2017-01-01 | 9408 | 143372 | 58895 | 152130 | 12714 | 232426 | 265508 | 164020 | 4353505 | 83192 | 5475176 |
73 | 2017-02-01 | 6705 | 119320 | 37537 | 140447 | 10777 | 252816 | 236713 | 165183 | 3138728 | 73187 | 4181417 |
74 | 2017-03-01 | 11770 | 169079 | 52630 | 187820 | 16017 | 252483 | 332297 | 160589 | 3330769 | 72726 | 4586186 |
75 | 2017-04-01 | 15476 | 173200 | 74981 | 203417 | 17811 | 189824 | 329358 | 166892 | 3521932 | 82938 | 4775834 |
76 | 2017-05-01 | 10812 | 142787 | 50139 | 140861 | 11433 | 213033 | 325114 | 166827 | 3450028 | 75968 | 4587014 |
77 | 2017-06-01 | 8523 | 141279 | 57284 | 117918 | 10847 | 197742 | 335055 | 163588 | 3097716 | 73297 | 4203256 |
78 | 2017-07-01 | 10806 | 139161 | 55789 | 134927 | 14206 | 196487 | 264025 | 180352 | 4077183 | 94758 | 5167700 |
79 | 2017-08-01 | 9460 | 120618 | 43986 | 135456 | 15684 | 242059 | 229376 | 177920 | 3955989 | 92881 | 5023433 |
80 | 2017-09-01 | 10841 | 125603 | 64829 | 134911 | 14312 | 220311 | 258269 | 157363 | 3572769 | 75948 | 4635161 |
81 | 2017-10-01 | 12845 | 167229 | 65127 | 198611 | 16879 | 231859 | 298480 | 174188 | 4033473 | 79972 | 5278677 |
82 | 2017-11-01 | 10272 | 178481 | 59569 | 196297 | 14338 | 247170 | 343769 | 159009 | 3680755 | 81448 | 4971119 |
83 | 2017-12-01 | 13825 | 161690 | 66361 | 158656 | 15090 | 241470 | 408194 | 174824 | 4232412 | 114652 | 5587184 |
84 rows × 12 columns
def parse_datetime(x):
try:
return parser.parse(x)
except:
return numpy.nan
df['datetime'] = df['Date'].apply(parse_datetime)
1) Hong Kong visitors are dominated by people from the mainland China;
2) Except the mainland China, visitor from other countries and regions are highly seasonal.
3)Visitor arrivals from the maninland China are not seasonal but are influenced by social events and exchange rates:
For instance, during the "Umbrella Revolution" in September 2014, there is a sharp drop in the number of visitors coming from the mainland;
In September 2017, visitor arrivals further dropped when typhoon no. 10 was issued in Hong Kong due to the typhoon Hato arrived, when delayed flights and heavy flooding in Hong Kong lasted for more than a month. The number of visitors from the mainland was influenced by the flu season in August in the same year.
df_all = df.set_index('datetime').resample('1m').aggregate('sum')
df_all.plot(linewidth=3, figsize=(10,5))
plt.title('Visitor Arrivals By Country/Region of Residence')
plt.xlabel('Year')
plt.ylabel('Visitors')
Text(0,0.5,'Visitors')
selected_columns = list(set(df_all.columns))
df_all[['ThemainlandofChina']].plot(linewidth=2, figsize=(10,5),color='G').plot()
plt.title('Visitor Arrivals By Country/Region of residence')
plt.xlabel('Year')
plt.ylabel('Visitors')
Text(0,0.5,'Visitors')
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline
year = ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']
month = ['01 January', '02 February', '03 March', '04 April', '05 May', '06 June', '07 July', '08 August', '09 September',
'10 October', '11 November', '12 December']
values = df['ThemainlandofChina']
np.random.seed(100)
arr_year = np.random.choice(year, size=(10000,))
list_year = list(arr_year)
arr_month= np.random.choice(month, size=(10000,))
list_month = list(arr_month)
values = np.random.randint(50, 1000, 10000)
list_values = list(values)
df2 = pd.DataFrame({'year':list_year,
'month': list_month,
'values':list_values})
df2.head()
month | values | year | |
---|---|---|---|
0 | 03 March | 281 | 2011 |
1 | 08 August | 759 | 2011 |
2 | 11 November | 879 | 2014 |
3 | 02 February | 673 | 2018 |
4 | 01 January | 987 | 2018 |
pt = df2.pivot_table(index='month', columns='year', values='values', aggfunc=np.sum)
pt.head()
year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 |
---|---|---|---|---|---|---|---|---|
month | ||||||||
01 January | 56696 | 52233 | 53909 | 55380 | 58730 | 58108 | 56758 | 57747 |
02 February | 50123 | 57850 | 45952 | 46448 | 51079 | 53916 | 66838 | 53645 |
03 March | 58432 | 47760 | 52198 | 52204 | 57505 | 57063 | 63134 | 61883 |
04 April | 46641 | 56184 | 53430 | 50481 | 55863 | 43418 | 62530 | 56216 |
05 May | 50772 | 65368 | 50983 | 51034 | 47671 | 56488 | 52055 | 57957 |
f, ax = plt.subplots(figsize = (10, 4))
cmap = sns.cubehelix_palette(start = 1, rot = 3, gamma=0.8, as_cmap = True)
sns.heatmap(pt, cmap = 'Blues', linewidths = 0.05, ax = ax)
ax.set_title('Visitor Arrivals from the Mainland China')
ax.set_xlabel('Year')
ax.set_ylabel('Month')
Text(69,0.5,'Month')
#ax = plt.figure(figsize=(20, 20))
myax = plt.subplot(7, 1, 1)
selected_columns = list(set(df_all.columns))
df_all[['South&SoutheastAsia']].plot(linewidth=2,color='Orange', ax=myax)
plt.title('Visitor Arrivals By Country/Region of Residents')
plt.xlabel('Year')
plt.ylabel('Number of Visitors')
myax = plt.subplot(7, 1, 3)
selected_columns = list(set(df_all.columns))
df_all[['Europe']].plot(linewidth=2, figsize=(20,5),color='Brown', ax=myax)
plt.title('Visitor Arrivals By Country/Region of Residence')
plt.xlabel('Year')
plt.ylabel('Visitors')
myax = plt.subplot(7, 1, 5)
selected_columns = list(set(df_all.columns))
df_all[['Macao']].plot(linewidth=2, figsize=(20,5),color='blue', ax=myax)
plt.title('Visitor Arrivals By Country/Region of Residence')
plt.xlabel('Year')
plt.ylabel('Visitors')
myax = plt.subplot(7, 1, 7)
selected_columns = list(set(df_all.columns))
df_all[['Australia,NewZealand&SouthPacific']].plot(linewidth=2, figsize=(20,5),color='Pink', ax=myax)
plt.title('Visitor Arrivals By Country/Region of Residence')
plt.xlabel('Year')
plt.ylabel('Visitors')
Text(0,0.5,'Visitors')
1) Most visitors arrive Hong Kong by air, except that most visitors from the mainland China arrive Hong Kong by land;
2) For long haul markets, most visitors arrive Hong Kong by air and traveling by sea was their last option. However, in short haul markets, the number of visitors arrived by sea exceeded the number of visitors by land after 2011 and has continued to rise. Still, coming to Hong Kong by air has always been the first choice.
import matplotlib.pyplot as plt
import numpy as np
df3 = pd.read_csv('transport_air.csv')
df3
Year | 2004 | 2005 | 2006 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | America | 860 | 946 | 963 | 884 | 986 | 1028 | 1014 | 961 | 988 | 1034 | 1081 |
1 | Europe Africa and the Middle East | 940 | 1129 | 1248 | 1276 | 1389 | 1400 | 1441 | 1472 | 1445 | 1419 | 1488 |
2 | Australia New Zealand and South Pacific | 338 | 426 | 457 | 467 | 510 | 497 | 487 | 482 | 490 | 475 | 475 |
3 | North Asia | 992 | 1086 | 1192 | 1014 | 1197 | 1259 | 1285 | 1191 | 1315 | 1334 | 1491 |
4 | South and Southeast Asia | 1226 | 1441 | 1602 | 1691 | 2106 | 2290 | 2263 | 2362 | 2327 | 2335 | 2504 |
5 | China | 1500 | 1563 | 1901 | 2150 | 2744 | 3284 | 3814 | 4558 | 5006 | 5298 | 5033 |
6 | Taiwan | 1128 | 1184 | 1232 | 1120 | 1224 | 1235 | 1213 | 1250 | 1214 | 1201 | 1238 |
# data to plot
n_groups = 7
data_2004 = df3['2004']
data_2005 = df3['2005']
data_2006 = df3['2006']
data_2009 = df3['2009']
data_2010 = df3['2010']
data_2011 = df3['2011']
data_2012 = df3['2012']
data_2013 = df3['2013']
data_2014 = df3['2014']
data_2015 = df3['2015']
data_2016 = df3['2016']
# create plot
fig, ax = plt.subplots(figsize=(15, 5))
index = np.arange(n_groups)
bar_width = 0.06
opacity = 0.8
rects1 = plt.bar(index, data_2004, bar_width,
alpha=opacity,
color='lightseagreen',
label='2004')
rects2 = plt.bar(index + bar_width, data_2005, bar_width,
alpha=opacity,
color='darkcyan',
label='2005')
rects3 = plt.bar(index + bar_width + bar_width, data_2006, bar_width,
alpha=opacity,
color='deepskyblue',
label='2006')
rects3 = plt.bar(index + bar_width + bar_width + bar_width, data_2009, bar_width,
alpha=opacity,
color='slategray',
label='2009')
rects4 = plt.bar(index + bar_width + bar_width + bar_width + bar_width, data_2010, bar_width,
alpha=opacity,
color='royalblue',
label='2010')
rects5 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width, data_2011, bar_width,
alpha=opacity,
color='navy',
label='2011')
rects6 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2012, bar_width,
alpha=opacity,
color='mediumpurple',
label='2012')
rects7 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2013, bar_width,
alpha=opacity,
color='darkorchid',
label='2013')
rects8 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2014, bar_width,
alpha=opacity,
color='plum',
label='2014')
rects9 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2015, bar_width,
alpha=opacity,
color='m',
label='2015')
rects10 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2016, bar_width,
alpha=opacity,
color='mediumvioletred',
label='2016')
plt.xlabel('Regions/Countries')
plt.ylabel('No. of visitors (in thousands)')
plt.title('No. of Hong Kong Inbound Visitors By Air between 2004 and 2016')
plt.xticks(np.arange(7),('America', 'Europe, Africa and the Middle East', 'Australia, New Zealand and South Pacific', 'North Asia', 'South and Southeast Asia', 'Mainland China', 'Taiwan'), rotation=20)
plt.legend()
plt.minorticks_on()
plt.autoscale(enable=True, axis='both', tight=False)
plt.show()
df4 = pd.read_csv('transport_land.csv')
df4
Year | 2004 | 2005 | 2006 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | America | 351 | 402 | 440 | 477 | 537 | 554 | 528 | 490 | 480 | 488 | 478 |
1 | Europe Africa and the Middle East | 287 | 390 | 440 | 446 | 515 | 523 | 501 | 492 | 489 | 478 | 458 |
2 | Australia New Zealand and South Pacific | 96 | 132 | 143 | 144 | 157 | 155 | 145 | 134 | 124 | 116 | 109 |
3 | North Asia | 432 | 492 | 515 | 435 | 537 | 548 | 517 | 480 | 479 | 437 | 403 |
4 | South and Southeast Asia | 673 | 754 | 767 | 598 | 702 | 736 | 680 | 630 | 592 | 557 | 509 |
5 | China | 9273 | 9740 | 10374 | 14679 | 18537 | 23074 | 29193 | 33920 | 39919 | 38566 | 35906 |
6 | Taiwan | 828 | 823 | 831 | 797 | 824 | 781 | 741 | 707 | 667 | 680 | 647 |
7 | Macao | 30 | 35 | 39 | 47 | 53 | 56 | 56 | 57 | 58 | 58 | 58 |
# data to plot
n_groups = 8
data_2004 = df4['2004']
data_2005 = df4['2005']
data_2006 = df4['2006']
data_2009 = df4['2009']
data_2010 = df4['2010']
data_2011 = df4['2011']
data_2012 = df4['2012']
data_2013 = df4['2013']
data_2014 = df4['2014']
data_2015 = df4['2015']
data_2016 = df4['2016']
# create plot
fig, ax = plt.subplots(figsize=(15, 5))
index = np.arange(n_groups)
bar_width = 0.06
opacity = 0.8
rects1 = plt.bar(index, data_2004, bar_width,
alpha=opacity,
color='lightseagreen',
label='2004')
rects2 = plt.bar(index + bar_width, data_2005, bar_width,
alpha=opacity,
color='darkcyan',
label='2005')
rects3 = plt.bar(index + bar_width + bar_width, data_2006, bar_width,
alpha=opacity,
color='deepskyblue',
label='2006')
rects3 = plt.bar(index + bar_width + bar_width + bar_width, data_2009, bar_width,
alpha=opacity,
color='slategray',
label='2009')
rects4 = plt.bar(index + bar_width + bar_width + bar_width + bar_width, data_2010, bar_width,
alpha=opacity,
color='royalblue',
label='2010')
rects5 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width, data_2011, bar_width,
alpha=opacity,
color='navy',
label='2011')
rects6 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2012, bar_width,
alpha=opacity,
color='mediumpurple',
label='2012')
rects7 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2013, bar_width,
alpha=opacity,
color='darkorchid',
label='2013')
rects8 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2014, bar_width,
alpha=opacity,
color='plum',
label='2014')
rects9 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2015, bar_width,
alpha=opacity,
color='m',
label='2015')
rects10 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2016, bar_width,
alpha=opacity,
color='mediumvioletred',
label='2016')
plt.xlabel('Region/Countries')
plt.ylabel('No. of visitors (in Thousands)')
plt.title('No. of Hong Kong Inbound Visitors By Land between 2004 and 2016')
plt.xticks(np.arange(8), ('America', 'Europe, Africa and the Middle East', 'Australia, New Zealand and South Pacific', 'North Asia', 'South and Southeast Asia', 'Mainland China', 'Taiwan', 'Macao'), rotation=20)
plt.legend()
plt.minorticks_on()
plt.autoscale(enable=True, axis='both', tight=None)
plt.show()
df5 = pd.read_csv('transport_sea.csv')
df5
Year | 2004 | 2005 | 2006 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | America | 189 | 217 | 227 | 207 | 226 | 239 | 236 | 214 | 211 | 206 | 215 |
1 | Europe Africa and the Middle East | 153 | 206 | 229 | 247 | 270 | 272 | 287 | 289 | 284 | 270 | 281 |
2 | Australia New Zealand and South Pacific | 49 | 62 | 68 | 97 | 101 | 106 | 109 | 102 | 101 | 91 | 101 |
3 | North Asia | 241 | 275 | 322 | 374 | 474 | 498 | 531 | 470 | 537 | 521 | 591 |
4 | South and Southeast Asia | 179 | 219 | 291 | 596 | 693 | 725 | 709 | 726 | 696 | 667 | 688 |
5 | China | 1473 | 1238 | 1317 | 1128 | 1404 | 1742 | 1904 | 2267 | 2322 | 1979 | 1839 |
6 | Taiwan | 119 | 124 | 115 | 93 | 117 | 133 | 134 | 143 | 151 | 135 | 126 |
7 | Macao | 425 | 446 | 508 | 601 | 697 | 754 | 784 | 846 | 880 | 887 | 848 |
# data to plot
n_groups = 8
data_2004 = df5['2004']
data_2005 = df5['2005']
data_2006 = df5['2006']
data_2009 = df5['2009']
data_2010 = df5['2010']
data_2011 = df5['2011']
data_2012 = df5['2012']
data_2013 = df5['2013']
data_2014 = df5['2014']
data_2015 = df5['2015']
data_2016 = df5['2016']
# create plot
fig, ax = plt.subplots(figsize=(15, 5))
index = np.arange(n_groups)
bar_width = 0.06
opacity = 0.8
rects1 = plt.bar(index, data_2004, bar_width,
alpha=opacity,
color='lightseagreen',
label='2004')
rects2 = plt.bar(index + bar_width, data_2005, bar_width,
alpha=opacity,
color='darkcyan',
label='2005')
rects3 = plt.bar(index + bar_width + bar_width, data_2006, bar_width,
alpha=opacity,
color='deepskyblue',
label='2006')
rects3 = plt.bar(index + bar_width + bar_width + bar_width, data_2009, bar_width,
alpha=opacity,
color='slategray',
label='2009')
rects4 = plt.bar(index + bar_width + bar_width + bar_width + bar_width, data_2010, bar_width,
alpha=opacity,
color='royalblue',
label='2010')
rects5 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width, data_2011, bar_width,
alpha=opacity,
color='navy',
label='2011')
rects6 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2012, bar_width,
alpha=opacity,
color='mediumpurple',
label='2012')
rects7 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2013, bar_width,
alpha=opacity,
color='darkorchid',
label='2013')
rects8 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2014, bar_width,
alpha=opacity,
color='plum',
label='2014')
rects9 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2015, bar_width,
alpha=opacity,
color='m',
label='2015')
rects10 = plt.bar(index + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width + bar_width, data_2016, bar_width,
alpha=opacity,
color='mediumvioletred',
label='2016')
plt.xlabel('Region')
plt.ylabel('No. of visitors (in thousands)')
plt.title('No. of Hong Kong Inbound Visitors By Sea between 2004 and 2016')
plt.xticks(np.arange(8), ('America', 'Europe, Africa & the Middle East', 'Australia, New Zealand & South Pacific', 'North Asia', 'South & Southeast Asia', 'Mainland China', 'Taiwan', 'Macau'), rotation=20)
plt.legend()
plt.minorticks_on()
plt.autoscale(enable=True, axis='both', tight=False)
plt.show()
df_long = pd.read_csv('long_haul_market.csv')
df_long
Year | By air | By land | By sea | |
---|---|---|---|---|
0 | 2004 | 2138 | 734 | 238 |
1 | 2005 | 2501 | 924 | 279 |
2 | 2006 | 2668 | 1023 | 295 |
3 | 2009 | 2627 | 1067 | 304 |
4 | 2010 | 2885 | 1209 | 327 |
5 | 2011 | 2925 | 1232 | 345 |
6 | 2012 | 2942 | 1174 | 345 |
7 | 2013 | 2915 | 1116 | 316 |
8 | 2014 | 2923 | 1093 | 312 |
9 | 2015 | 2928 | 1082 | 567 |
10 | 2016 | 3044 | 1045 | 597 |
11 | 2017 | 3105 | 1073 | 573 |
df_long = df_long.set_index('Year')
df_long.plot(kind='area', stacked=False, figsize=(10,5))
plt.ylabel('No. of visitors in thousands')
plt.xlabel('Year')
plt.xticks(np.arange(2004,2018, step=1))
plt.title('Mode of Transport of Hong Kong Inbound Visitors (Long Haul Markets)')
Text(0.5,1,'Mode of Transport of Hong Kong Inbound Visitors (Long Haul Markets)')
df_short = pd.read_csv('short_haul_market.csv')
df_short
Year | By air | By land | By sea | |
---|---|---|---|---|
0 | 2004 | 3375 | 1963 | 964 |
1 | 2005 | 3740 | 2104 | 1064 |
2 | 2006 | 4057 | 2152 | 1236 |
3 | 2009 | 3848 | 1877 | 1664 |
4 | 2010 | 4557 | 2116 | 1981 |
5 | 2011 | 4818 | 2121 | 2110 |
6 | 2012 | 4804 | 1994 | 2158 |
7 | 2013 | 4858 | 1874 | 2185 |
8 | 2014 | 4920 | 1796 | 2264 |
9 | 2015 | 4947 | 1732 | 2210 |
10 | 2016 | 5322 | 1617 | 2253 |
11 | 2017 | 5496 | 1678 | 2352 |
df_short = df_short.set_index('Year')
df_short.plot(kind='area', stacked=False, figsize=(10,5))
plt.ylabel('No. of visitors in thousands')
plt.xlabel('Year')
plt.xticks(np.arange(2004,2018, step=1))
plt.title('Mode of Transport of Hong Kong Inbound Visitors (Short Haul Markets excluding Mainland China)')
Text(0.5,1,'Mode of Transport of Hong Kong Inbound Visitors (Short Haul Markets excluding Mainland China)')
df_mainland = pd.read_csv('mainland.csv')
df_mainland = df_mainland.set_index('Year')
df_mainland.plot(kind='area', stacked=False, figsize=(10,5))
plt.ylabel('No. of visitors in thousands')
plt.xlabel('Year')
plt.xticks(np.arange(2004,2018, step=1))
plt.title('Mode of Transport of Hong Kong Inbound Visitors (Mainland China)')
Text(0.5,1,'Mode of Transport of Hong Kong Inbound Visitors (Mainland China)')
1) The Mainland China made up of the largest part in same-day consumption. And it has a big gap with other countries;
2) Apart from the Mainland China, Macao, Taiwan, South and Southeast Asia are the top three regions with the highest consumption;
3) In the per capita overnight consumption, Hong Kong visitors from the Mainland still account for the highest but not as dominate as those for same-day consumption;
4) Macao is the second highest in per capita same-day consumption while it is the smallest in per capita overnight consumption;
5) Apart from the Mainland China, the total consumption of overnight visitors from South and Southeast Asia have been higher than visitors from Europe and America.
import numpy
df=pd.read_csv('consumption 1.csv')
df
Year | The Americas | Europe Africa and the Middle East | Australia New Zealand South Pacific | North Asia | South and SoutheastAsia | Taiwan | The mainland of China | Macao | Total | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2011-12-31 | 245000000 | 377000000 | 119000000 | 428000000 | 686000000 | 859000000 | 35360000000 | 1285000000 | 39358000000 |
1 | 2012-12-31 | 233000000 | 261000000 | 98000000 | 418000000 | 567000000 | 695000000 | 49278000000 | 1056000000 | 52606000000 |
2 | 2013-12-31 | 188000000 | 301000000 | 85000000 | 407000000 | 613000000 | 742000000 | 64356000000 | 1341000000 | 68034000000 |
3 | 2014-12-31 | 225000000 | 279000000 | 77000000 | 352000000 | 622000000 | 728000000 | 76076000000 | 1385000000 | 79744000000 |
4 | 2015-12-31 | 233000000 | 319000000 | 78000000 | 320000000 | 538000000 | 694000000 | 75058000000 | 1241000000 | 78482000000 |
5 | 2016-12-31 | 210000000 | 325000000 | 86000000 | 306000000 | 558000000 | 695000000 | 60278000000 | 1195000000 | 63653000000 |
def parse_datetime(x):
try:
return parser.parse(x)
except:
return numpy.nan
df['datetime'] = df['Year'].apply(parse_datetime)
df_all=df.set_index('datetime').resample('1y').aggregate('sum')
df_all.plot(linewidth=1, figsize=(20,10)).plot()
plt.title('Destination consumption expenditure of same-day in-town visitors ($)')
plt.legend(loc=2,prop={'size':13})
<matplotlib.legend.Legend at 0x11695c240>
selected_columns = list(set(df_all.columns) - set(['Total', 'The mainland of China']))
df_all[selected_columns].plot(linewidth=2, figsize=(20,10)).plot()
plt.legend(loc=1,prop={'size':13})
plt.title('Destination consumption expenditure of same-day in-town visitors except for Total and mainland China ($)')
Text(0.5,1,'Destination consumption expenditure of same-day in-town visitors except for Total and mainland China ($)')
df=pd.read_csv('consumption 2.csv')
def parse_datetime(x):
try:
return parser.parse(x)
except:
return numpy.nan
df['datetime'] = df['Year'].apply(parse_datetime)
df_all=df.set_index('datetime').resample('1y').aggregate('sum')
df_all.plot(linewidth=2, figsize=(20,10)).plot()
plt.title('Destination consumption expenditure of overnight visitors ($)')
plt.legend(loc=2,prop={'size':10})
<matplotlib.legend.Legend at 0x1157def98>
selected_columns = list(set(df_all.columns) - set(['Total', 'The mainland of China']))
df_all[selected_columns].plot(linewidth=2, figsize=(20,10)).plot()
plt.title('Destination consumption expenditure of overnight visitors except for Total and mainland China($)')
plt.legend(loc=2,prop={'size':12})
<matplotlib.legend.Legend at 0x1140eb2b0>
df=pd.read_csv('per capita 1.csv')
def parse_datetime(x):
try:
return parser.parse(x)
except:
return numpy.nan
df['datetime'] = df['Year'].apply(parse_datetime)
df_all=df.set_index('datetime').resample('1y').aggregate('sum')
df_all.plot(linewidth=2, figsize=(20,10)).plot()
plt.title('Per capita spending of same-day in-town visitors ($)')
plt.legend(loc=4,prop={'size':11})
<matplotlib.legend.Legend at 0x11528edd8>
selected_columns = list(set(df_all.columns) - set(['Overall', 'The mainland of China']))
df_all[selected_columns].plot(linewidth=2, figsize=(20,10)).plot()
plt.title('Per capita spending of same-day in-town visitors except for Total and mainland China($)')
plt.legend(prop={'size':12})
<matplotlib.legend.Legend at 0x1152cf9e8>
df=pd.read_csv('per captia 2.csv')
def parse_datetime(x):
try:
return parser.parse(x)
except:
return numpy.nan
df['datetime'] = df['Year'].apply(parse_datetime)
df_all=df.set_index('datetime').resample('1y').aggregate('sum')
df_all.plot(linewidth=2, figsize=(20,10)).plot()
plt.title('Per capita spending of overnight visitors ($)')
plt.legend(loc=1,prop={'size':11})
<matplotlib.legend.Legend at 0x116a47278>
selected_columns = list(set(df_all.columns) - set(['Overall', 'The mainland of China']))
df_all[selected_columns].plot(linewidth=2, figsize=(20,10)).plot()
plt.title('Per capita spending of overnight visitors except for Total and mainland China($)')
plt.legend(loc=1,prop={'size':11})
<matplotlib.legend.Legend at 0x116a40cc0>