#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd


# In[2]:


url='https://en.wikipedia.org/wiki/List_of_International_Space_Station_expeditions'


# In[3]:


#!pip install beautifulsoup4


# In[2]:


import bs4
import requests
r=requests.get(url)
soup = bs4.BeautifulSoup(r.content)
tables=soup.findAll("table")


# In[ ]:


exps={}
ppls={}
ppcs={}
msns={}
cnts={}
for trs in [tables[0].findAll("tr"),tables[1].findAll("tr")]:
    for i,tr in enumerate(trs):
        if i>0:
            tds=tr.findAll("td")
            for j,td in enumerate(tds):
                aas=td.findAll("a")
                for a in aas:
                    if a:
                        txt=a.text
                    if j==0:
                        if exp not in exps: exps[exp]=a['href']
                    elif j==1:
                        print(txt)
                        if not txt:
                            img=a.find('img')
                            cnt=img['alt']
                            if 'ISS' not in cnt:
                                if cnt not in cnts: cnts[cnt]=img['src']
                        else:
                            if '[' not in txt:
                                if txt not in ppcs: 
                                    ppcs[txt]=cnt
                                if txt not in ppls: 
                                    ppls[txt]=a['href']
                    else:
                        if txt:
                            if '[' not in txt:
                                if txt not in msns: msns[txt]=a['href']


# In[76]:


df=pd.read_html(url)
df=pd.concat(df[:2]).reset_index()


# In[77]:


def find_names(s,ppls,z):
    nms=s.split(' ')
    l=2
    while l<4:
        ppl=' '.join(nms[:l])
        if ppl in ppls:
            print(ppl)
            z.append(ppl)
            rest=' '.join(nms[l:])
            find_names(rest,ppls,z)
            l=4
        l+=1
    return z


# In[78]:


ppls


# In[72]:


for i in df.index:
    crew=df.loc[i]['Crew'].replace('\n','')
    crews=find_names(crew,ppls,[])
    if crew in ppls:
        crews.append(crew)
    print(crew,crews)


# In[359]:


df.loc[[i]][['Expedition','Launch date','Duration(days)']]