# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
/kaggle/input/moto-gp-world-championship19492022/grand-prix-events-held.csv /kaggle/input/moto-gp-world-championship19492022/same-nation-podium-lockouts.csv /kaggle/input/moto-gp-world-championship19492022/grand-prix-race-winners.csv /kaggle/input/moto-gp-world-championship19492022/constructure-world-championship.csv /kaggle/input/moto-gp-world-championship19492022/riders-info.csv /kaggle/input/moto-gp-world-championship19492022/riders-finishing-positions.csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
pip install pygal
Collecting pygal Downloading pygal-3.0.0-py2.py3-none-any.whl (129 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 129.4/129.4 KB 967.8 kB/s eta 0:00:00 Installing collected packages: pygal Successfully installed pygal-3.0.0 WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv Note: you may need to restart the kernel to use updated packages.
pip install squarify
Requirement already satisfied: squarify in /opt/conda/lib/python3.7/site-packages (0.4.3) WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv Note: you may need to restart the kernel to use updated packages.
import plotly as plot
import pygal as py
import squarify as sq
import matplotlib
plt.rcParams["figure.figsize"] = (20,15)
matplotlib.rc('xtick', labelsize=7)
matplotlib.rc('ytick', labelsize=7)
font = {'family' : 'normal',
'weight' : 'bold',
'size' : 5}
matplotlib.rc('font', **font)
df = pd.read_csv("../input/moto-gp-world-championship19492022/riders-info.csv")
df.head()
Riders All Time in All Classes | Victories | 2nd places | 3rd places | Pole positions from '74 to 2022 | Race fastest lap to 2022 | World Championships | |
---|---|---|---|---|---|---|---|
0 | AGOSTINI Giacomo | 122 | 35.0 | 2.0 | 9.0 | 117.0 | 15.0 |
1 | ROSSI Valentino | 115 | 67.0 | 53.0 | 65.0 | 96.0 | 9.0 |
2 | NIETO Angel | 90 | 35.0 | 14.0 | 34.0 | 81.0 | 13.0 |
3 | MARQUEZ Marc | 85 | 36.0 | 17.0 | 90.0 | 75.0 | 8.0 |
4 | HAILWOOD Mike | 76 | 25.0 | 11.0 | NaN | 79.0 | 9.0 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 368 entries, 0 to 367 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Riders All Time in All Classes 368 non-null object 1 Victories 368 non-null int64 2 2nd places 339 non-null float64 3 3rd places 320 non-null float64 4 Pole positions from '74 to 2022 226 non-null float64 5 Race fastest lap to 2022 295 non-null float64 6 World Championships 114 non-null float64 dtypes: float64(5), int64(1), object(1) memory usage: 20.2+ KB
df.isnull().sum()
Riders All Time in All Classes 0 Victories 0 2nd places 29 3rd places 48 Pole positions from '74 to 2022 142 Race fastest lap to 2022 73 World Championships 254 dtype: int64
df.rename(columns={"Riders All Time in All Classes":"Name"}, inplace=True)
df1 = df.fillna(0)
from wordcloud import WordCloud, STOPWORDS
comment_words = ''
stopwords = set(STOPWORDS)
for val in df1.Name:
val = str(val)
tokens = val.split()
for i in range(len(tokens)):
tokens[i] = tokens[i].lower()
comment_words += " ".join(tokens)+" "
wordcloud = WordCloud(width = 800, height = 800,
background_color ='white',
stopwords = stopwords,
min_font_size = 10).generate(comment_words)
plt.figure(figsize = (8, 8), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()
df1.shape
(368, 7)
df1.describe().transpose()
count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|
Victories | 368.0 | 8.679348 | 14.460711 | 1.0 | 1.0 | 4.0 | 9.25 | 122.0 |
2nd places | 368.0 | 7.826087 | 9.022054 | 0.0 | 2.0 | 5.0 | 10.00 | 67.0 |
3rd places | 368.0 | 7.016304 | 7.071820 | 0.0 | 2.0 | 5.0 | 10.00 | 53.0 |
Pole positions from '74 to 2022 | 368.0 | 6.000000 | 11.095572 | 0.0 | 0.0 | 1.0 | 7.00 | 90.0 |
Race fastest lap to 2022 | 368.0 | 7.649457 | 13.038800 | 0.0 | 1.0 | 3.0 | 9.00 | 117.0 |
World Championships | 368.0 | 0.739130 | 1.741430 | 0.0 | 0.0 | 0.0 | 1.00 | 15.0 |
df1.plot.box()
<AxesSubplot:>
df1.head(25).plot.barh(x="Name",y="Victories",color="red",alpha=0.80);plt.xlabel("victories");plt.ylabel("Name of Players");
plt.title("Victories by Players");plt.show()
df1[df1["World Championships"]>0].plot(x="Name",y="World Championships",kind="bar",color="green")
plt.xlabel("Player name")
plt.ylabel("No . of world campionships")
plt.title("No. of world title won by players",fontsize=10,pad=5)
plt.show()
w_c = df1[df1["World Championships"]>0]
norm = matplotlib.colors.Normalize(vmin=min(w_c["World Championships"]), vmax=max(w_c["World Championships"]))
colors = [matplotlib.cm.Blues(norm(value)) for value in w_c["World Championships"]]
fig = plt.gcf()
ax = fig.add_subplot()
sq.plot(label=w_c.Name,sizes=w_c.Victories, color = colors, alpha=.6,pad = True)
plt.title("Pemenang kejuaraan Motogp dengan ukuran mereka menunjukkan kemenangan",fontsize=23,fontweight="bold")
plt.axis('off')
plt.show()
df_mt=df1.drop(columns=["Pole positions from '74 to 2022","Race fastest lap to 2022","World Championships"])
df_mt.head(10).plot(x='Name', kind='bar', stacked=True,
title='Stacked Bar Graph by dataframe')
plt.xlabel("Name of moto gp players")
plt.ylabel("WINNERS in 1 st,2nd,3rd position")
plt.show()
total = sum(df1["Victories"])
data = [sum(df1["Victories"].head(10)),sum(df1["Victories"])-sum(df1["Victories"].head(10))]
sizes = data
labels = ['top 10 player victories', 'Other players victories']
colors = ['blue', 'red']
explode = (0.05, 0.05)
# Pie Chart
plt.pie(sizes, colors=colors, labels=labels,autopct='%1.1f%%', pctdistance=0.85,explode=explode)
# draw circle
centre_circle = plt.Circle((0, 0), 0.60, fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.title('Victories of top 10 players vs other players')
plt.legend(labels, loc="upper left", title="Comparision of top players vs others")
plt.show()
corr=df1.corr()
corr.style.highlight_max(color="red",axis=1)
Victories | 2nd places | 3rd places | Pole positions from '74 to 2022 | Race fastest lap to 2022 | World Championships | |
---|---|---|---|---|---|---|
Victories | 1.000000 | 0.800686 | 0.580136 | 0.692807 | 0.958650 | 0.907953 |
2nd places | 0.800686 | 1.000000 | 0.823005 | 0.714619 | 0.777982 | 0.658043 |
3rd places | 0.580136 | 0.823005 | 1.000000 | 0.607842 | 0.573283 | 0.399272 |
Pole positions from '74 to 2022 | 0.692807 | 0.714619 | 0.607842 | 1.000000 | 0.636049 | 0.508092 |
Race fastest lap to 2022 | 0.958650 | 0.777982 | 0.573283 | 0.636049 | 1.000000 | 0.867180 |
World Championships | 0.907953 | 0.658043 | 0.399272 | 0.508092 | 0.867180 | 1.000000 |
sns.heatmap(corr)
<AxesSubplot:>
pip install klib
Collecting klib Downloading klib-1.0.1-py3-none-any.whl (20 kB) Requirement already satisfied: numpy<2.0.0,>=1.16.3 in /opt/conda/lib/python3.7/site-packages (from klib) (1.21.6) Requirement already satisfied: Jinja2<4.0.0,>=3.0.3 in /opt/conda/lib/python3.7/site-packages (from klib) (3.1.2) Requirement already satisfied: scipy<2.0.0,>=1.1.0 in /opt/conda/lib/python3.7/site-packages (from klib) (1.7.3) Requirement already satisfied: pandas<2.0.0,>=1.1.2 in /opt/conda/lib/python3.7/site-packages (from klib) (1.3.5) Requirement already satisfied: seaborn<0.12.0,>=0.11.1 in /opt/conda/lib/python3.7/site-packages (from klib) (0.11.2) Requirement already satisfied: matplotlib<4.0.0,>=3.0.3 in /opt/conda/lib/python3.7/site-packages (from klib) (3.5.2) Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.7/site-packages (from Jinja2<4.0.0,>=3.0.3->klib) (2.0.1) Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.7/site-packages (from matplotlib<4.0.0,>=3.0.3->klib) (2.8.2) Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.7/site-packages (from matplotlib<4.0.0,>=3.0.3->klib) (1.4.0) Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.7/site-packages (from matplotlib<4.0.0,>=3.0.3->klib) (9.0.1) Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.7/site-packages (from matplotlib<4.0.0,>=3.0.3->klib) (4.30.0) Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from matplotlib<4.0.0,>=3.0.3->klib) (21.3) Requirement already satisfied: pyparsing>=2.2.1 in /opt/conda/lib/python3.7/site-packages (from matplotlib<4.0.0,>=3.0.3->klib) (3.0.7) Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.7/site-packages (from matplotlib<4.0.0,>=3.0.3->klib) (0.11.0) Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.7/site-packages (from pandas<2.0.0,>=1.1.2->klib) (2021.3) Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib<4.0.0,>=3.0.3->klib) (4.2.0) Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.7/site-packages (from python-dateutil>=2.7->matplotlib<4.0.0,>=3.0.3->klib) (1.16.0) Installing collected packages: klib Successfully installed klib-1.0.1 WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv Note: you may need to restart the kernel to use updated packages.
import klib
klib.corr_plot(df1, split='pos')
Displaying positive correlations. Specify a positive "threshold" to limit the results further.
<AxesSubplot:title={'center':'Feature-correlation (pearson)'}>
klib.dist_plot(df1)
plt.show()
klib.cat_plot(df1, top=4, bottom=4)
plt.show()
plt.figure(facecolor="olive",edgecolor="green")
sns.set_palette( 'inferno_r')
sns.set_style("darkgrid")
df2=df1.sort_values(by="Victories",ascending=False).head(10)
df3=df1.sort_values(by="2nd places",ascending=False).head(10)
df4=df1.sort_values(by="3rd places",ascending=False).head(10)
df5=df1.sort_values(by="World Championships",ascending=False).head(10)
fig, axes = plt.subplots(4,1)
fig.suptitle('Players top in their positions')
sns.barplot(ax=axes[0], x=df2.Name, y=df2.Victories)
axes[0].set_title("players with highest victories")
sns.barplot(ax=axes[1], x=df3.Name, y=df3["2nd places"])
axes[1].set_title("Players with most no. of 2 nd positions")
sns.barplot(ax=axes[2], x=df4.Name, y=df4["3rd places"])
axes[2].set_title("Players with most no. of 3rd positions")
sns.barplot(ax=axes[3], x=df5.Name, y=df5["World Championships"])
axes[3].set_title("Players with most no. of World Campionships")
plt.xticks(rotation=90)
plt.show()
<Figure size 1440x1080 with 0 Axes>
sns.pairplot(df1,palette="rainbow",corner=True,plot_kws=dict(marker="+", linewidth=1),diag_kws=dict(fill=False));plt.show()
import plotly.express as px
fig = px.box(df1, y="Race fastest lap to 2022", points="all",notched=True)
fig.show()
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Box(x=df1["Victories"],name='1st places'))
fig.add_trace(go.Box(x=df1["2nd places"],name="2nd places"))
fig.add_trace(go.Box(x=df1["3rd places"],name="3rd place"))
fig.add_trace(go.Box(x=df1["World Championships"],name="World Championships"))
fig.update_layout(title_text="Box ploting posisi pembalap dalam data yang diberikan")
fig.update_traces(orientation='h')
fig.show()
fig = px.scatter(df1, x="Pole positions from '74 to 2022", y="World Championships", size='Victories',color="Race fastest lap to 2022")
fig.show()
sns.lmplot(data=df1, x="Pole positions from '74 to 2022", y="World Championships",markers=["*"],palette="Set1")
<seaborn.axisgrid.FacetGrid at 0x7fc5496b9750>
fig2= px.treemap(data_frame=df1, path=["Name","Victories","2nd places","3rd places"],
values='Victories',color='World Championships', hover_data=["Race fastest lap to 2022"],color_continuous_scale='RdBu',
color_continuous_midpoint=np.average(df1["World Championships"], weights=df1['Victories']))
fig2.update_traces(root_color="cyan")
fig2.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig2.show()
w_v_m=df1.drop(columns=["2nd places","3rd places","Race fastest lap to 2022","Name"])
w_v_m.loc[w_v_m['World Championships'] <= 0, 'Won Championship or not?'] = 'False'
w_v_m.loc[w_v_m['World Championships'] > 0, 'Won Championship or not?'] = 'True'
w_v_m
Victories | Pole positions from '74 to 2022 | World Championships | Won Championship or not? | |
---|---|---|---|---|
0 | 122 | 9.0 | 15.0 | True |
1 | 115 | 65.0 | 9.0 | True |
2 | 90 | 34.0 | 13.0 | True |
3 | 85 | 90.0 | 8.0 | True |
4 | 76 | 0.0 | 9.0 | True |
... | ... | ... | ... | ... |
363 | 1 | 0.0 | 0.0 | False |
364 | 1 | 0.0 | 0.0 | False |
365 | 1 | 0.0 | 0.0 | False |
366 | 1 | 0.0 | 0.0 | False |
367 | 1 | 0.0 | 0.0 | False |
368 rows × 4 columns
X=w_v_m.iloc[:,:2]
y=w_v_m["Won Championship or not?"]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
from sklearn import metrics
print("Gaussian Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_test, y_pred)*100)
Gaussian Naive Bayes model accuracy(in %): 82.43243243243244
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score
accuracy_score(y_test, y_pred)
0.8243243243243243
from sklearn.metrics import confusion_matrix
expected = y_test
predicted = y_pred
results = confusion_matrix(expected, predicted)
print(results)
[[50 1] [12 11]]
y_pred = gnb.predict([[5,10]]) #Contoh Prediksi
y_pred
/opt/conda/lib/python3.7/site-packages/sklearn/base.py:451: UserWarning: X does not have valid feature names, but GaussianNB was fitted with feature names
array(['False'], dtype='<U5')
Giacomo Agostini became the most successful racer in MotoGP, Valentino Rossi the second most successful racer in MotoGP.
However, Valentino Rossi became the racer with the highest number of wins (Podium).
Marc Marquez is the most successful MotoGP racer who is still active.
About 40% of the championships are won by these top seeded players.
More than 50% of the wins went to the top 10 players.
Winning the championship is much more difficult than getting the win, 2nd or 3rd place.
Because the number of pole positions increases the chances of winning the world championship and getting a win also increases.
The world championship depends on the number of pole positions and wins.