import requests
response = requests.get('https://www.flipkart.com/search?q=samsung&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&as-pos=0&as-type=HISTORY&page=2')
print(response.status_code)
200
from bs4 import BeautifulSoup
html_soup = BeautifulSoup(response.text, 'html.parser')
type(html_soup)
bs4.BeautifulSoup
mobile_data = html_soup.find_all('div', class_ = 'bhgxx2 col-12-12')
print(type(mobile_data))
print(len(mobile_data))
<class 'bs4.element.ResultSet'> 30
# Top 2 classes are used in web page for the left part not for mobile containers.
mobile_data = mobile_data[2:26]
len(mobile_data)
24
mobile_name = []
for each in mobile_data:
mobile_name.append(each.find('div','col col-7-12').find('div', '_3wU53n').text)
print(len(mobile_name))
print(mobile_name)
24 ['Samsung Galaxy A30s (Prism Crush Black, 64 GB)', 'Samsung Galaxy A50 (Black, 64 GB)', 'Samsung Galaxy J6 (Black, 32 GB)', 'Samsung Galaxy A30s (Prism Crush White, 64 GB)', 'Samsung Galaxy M10S (Metallic Blue, 32 GB)', 'Samsung Galaxy M30 (Gradation Blue, 128 GB)', 'Samsung Galaxy A30 (Black, 64 GB)', 'Samsung Galaxy A70 (Black, 128 GB)', 'Samsung Galaxy A50 (White, 64 GB)', 'Samsung Galaxy A30s (Prism Crush Violet, 128 GB)', 'Samsung Galaxy M30 (Black, 32 GB)', 'Samsung Galaxy S10 (Prism White, 128 GB)', 'Samsung Galaxy S10 (Prism Black, 128 GB)', 'Samsung Galaxy Note 9 (Midnight Black, 128 GB)', 'Samsung Galaxy A70 (Blue, 128 GB)', 'Samsung Guru Plus B110', 'Samsung Galaxy A30 (White, 64 GB)', 'Samsung Galaxy J6 (Blue, 32 GB)', 'Samsung Guru GT', 'Samsung Metro 313', 'Samsung Galaxy On Nxt (Black, 16 GB)', 'Samsung Galaxy A70 (White, 128 GB)', 'Samsung Galaxy A30 (Blue, 64 GB)', 'Samsung Galaxy J7 Nxt (Gold, 16 GB)']
for each in mobile_data:
print(each.find('div', 'niH0FQ').find('span', '_38sUEc').find('span').find_all('span'))
[<span>2,219 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 170 Reviews</span>] [<span>9,398 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 868 Reviews</span>] [<span>15,398 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,604 Reviews</span>] [<span>2,219 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 170 Reviews</span>] [<span>537 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 38 Reviews</span>] [<span>228 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 19 Reviews</span>] [<span>16,612 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,382 Reviews</span>] [<span>4,343 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 531 Reviews</span>] [<span>9,398 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 868 Reviews</span>] [<span>2,219 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 170 Reviews</span>] [<span>296 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 16 Reviews</span>] [<span>1,486 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 159 Reviews</span>] [<span>1,486 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 159 Reviews</span>] [<span>2,043 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 274 Reviews</span>] [<span>4,343 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 531 Reviews</span>] [<span>326 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 39 Reviews</span>] [<span>16,612 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,382 Reviews</span>] [<span>15,398 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,604 Reviews</span>] [<span>1,727 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 173 Reviews</span>] [<span>1,391 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 139 Reviews</span>] [<span>3,59,531 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 55,496 Reviews</span>] [<span>4,343 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 531 Reviews</span>] [<span>16,612 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,382 Reviews</span>] [<span>9,671 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,317 Reviews</span>]
mobile_price = []
mobile_star = []
total_ratings = []
total_reviews = []
for each in mobile_data:
mobile_price.append(each.find('div','col col-5-12 _2o7WAb').find('div', '_1vC4OE _2rQ-NK').text)
mobile_star.append(each.find('div', 'niH0FQ').find('div', 'hGSR34').text)
total_ratings.append(each.find('div', 'niH0FQ').find('span', '_38sUEc').find('span').find_all('span')[0].text)
total_reviews.append(each.find('div', 'niH0FQ').find('span', '_38sUEc').find('span').find_all('span')[2].text)
print(len(total_reviews))
print(total_reviews)
24 ['\xa0170 Reviews', '\xa0868 Reviews', '\xa01,604 Reviews', '\xa0170 Reviews', '\xa038 Reviews', '\xa019 Reviews', '\xa01,382 Reviews', '\xa0531 Reviews', '\xa0868 Reviews', '\xa0170 Reviews', '\xa016 Reviews', '\xa0159 Reviews', '\xa0159 Reviews', '\xa0274 Reviews', '\xa0531 Reviews', '\xa039 Reviews', '\xa01,382 Reviews', '\xa01,604 Reviews', '\xa0173 Reviews', '\xa0139 Reviews', '\xa055,496 Reviews', '\xa0531 Reviews', '\xa01,382 Reviews', '\xa01,317 Reviews']
import pandas as pd
Mobile_Dataset = pd.DataFrame({'Mobile_name': mobile_name,
'Price': mobile_price,
'Star': mobile_star,
'Total Ratings': total_ratings,
'Total Reviews': total_reviews
})
print(Mobile_Dataset.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 24 entries, 0 to 23 Data columns (total 5 columns): Mobile_name 24 non-null object Price 24 non-null object Star 24 non-null object Total Ratings 24 non-null object Total Reviews 24 non-null object dtypes: object(5) memory usage: 1.1+ KB None
Mobile_Dataset.head()
Mobile_name | Price | Star | Total Ratings | Total Reviews | |
---|---|---|---|---|---|
0 | Samsung Galaxy A30s (Prism Crush Black, 64 GB) | ₹14,999 | 4.3 | 2,219 Ratings | 170 Reviews |
1 | Samsung Galaxy A50 (Black, 64 GB) | ₹17,999 | 4.4 | 9,398 Ratings | 868 Reviews |
2 | Samsung Galaxy J6 (Black, 32 GB) | ₹10,490 | 4.4 | 15,398 Ratings | 1,604 Reviews |
3 | Samsung Galaxy A30s (Prism Crush White, 64 GB) | ₹14,999 | 4.3 | 2,219 Ratings | 170 Reviews |
4 | Samsung Galaxy M10S (Metallic Blue, 32 GB) | ₹9,353 | 4.4 | 537 Ratings | 38 Reviews |
Mobile_Dataset.to_csv('mobile_data.csv') # To Generate csv file of Dataframe.