import requests
response = requests.get('https://www.flipkart.com/search?q=samsung&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&as-pos=0&as-type=HISTORY&page=2')
print(response.status_code)
200
from bs4 import BeautifulSoup
html_soup = BeautifulSoup(response.text, 'html.parser')
type(html_soup)
bs4.BeautifulSoup
mobile_data = html_soup.find_all('div', class_ = 'bhgxx2 col-12-12')
print(type(mobile_data))
print(len(mobile_data))
<class 'bs4.element.ResultSet'> 30
mobile_data = mobile_data[2:26]
len(mobile_data)
24
mobile_name = []
for each in mobile_data:
mobile_name.append(each.find('div','col col-7-12').find('div', '_3wU53n').text)
print(len(mobile_name))
print(mobile_name)
24 ['Samsung Galaxy A70s (Prism Crush White, 128 GB)', 'Samsung Galaxy A70s (Prism Crush Red, 128 GB)', 'Samsung Galaxy A30s (Prism Crush White, 64 GB)', 'Samsung Galaxy A20s (Green, 32 GB)', 'Samsung Galaxy A30s (Prism Crush Violet, 128 GB)', 'Samsung Galaxy A30 (Black, 64 GB)', 'Samsung Galaxy A30s (Prism Crush Black, 64 GB)', 'Samsung Galaxy M30 (Gradation Blue, 128 GB)', 'Samsung Galaxy M10S (Metallic Blue, 32 GB)', 'Samsung Galaxy A50 (White, 64 GB)', 'Samsung Galaxy A70 (Black, 128 GB)', 'Samsung Galaxy M30 (Black, 32 GB)', 'Samsung Metro 313', 'Samsung Galaxy Note 9 (Midnight Black, 128 GB)', 'Samsung Galaxy S10 (Prism Black, 128 GB)', 'Samsung Galaxy A30 (White, 64 GB)', 'Samsung Guru Plus B110', 'Samsung Galaxy A70 (Blue, 128 GB)', 'Samsung Galaxy J6 (Blue, 32 GB)', 'Samsung Galaxy A70 (White, 128 GB)', 'Samsung Galaxy M30 (Gradation Black, 64 GB)', 'Samsung Galaxy On Nxt (Black, 16 GB)', 'Samsung Galaxy A20 (Gold, 32 GB)', 'Samsung Galaxy S10 (Prism White, 128 GB)']
for each in mobile_data:
print(each.find('div', 'niH0FQ').find('span', '_38sUEc').find('span').find_all('span'))
[<span>470 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 73 Reviews</span>] [<span>485 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 61 Reviews</span>] [<span>2,219 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 170 Reviews</span>] [<span>553 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 42 Reviews</span>] [<span>2,219 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 170 Reviews</span>] [<span>16,612 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,382 Reviews</span>] [<span>2,219 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 170 Reviews</span>] [<span>228 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 19 Reviews</span>] [<span>537 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 38 Reviews</span>] [<span>9,398 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 868 Reviews</span>] [<span>4,343 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 531 Reviews</span>] [<span>296 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 16 Reviews</span>] [<span>1,391 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 139 Reviews</span>] [<span>2,042 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 274 Reviews</span>] [<span>1,486 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 159 Reviews</span>] [<span>16,612 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,382 Reviews</span>] [<span>326 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 39 Reviews</span>] [<span>4,343 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 531 Reviews</span>] [<span>15,398 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,604 Reviews</span>] [<span>4,343 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 531 Reviews</span>] [<span>3,051 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 232 Reviews</span>] [<span>3,59,531 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 55,496 Reviews</span>] [<span>14,740 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 1,305 Reviews</span>] [<span>1,486 Ratings </span>, <span class="_1VpSqZ">&</span>, <span> 159 Reviews</span>]
mobile_price = []
mobile_star = []
total_ratings = []
total_reviews = []
for each in mobile_data:
mobile_price.append(each.find('div','col col-5-12 _2o7WAb').find('div', '_1vC4OE _2rQ-NK').text)
mobile_star.append(each.find('div', 'niH0FQ').find('div', 'hGSR34').text)
total_ratings.append(each.find('div', 'niH0FQ').find('span', '_38sUEc').find('span').find_all('span')[0].text)
total_reviews.append(each.find('div', 'niH0FQ').find('span', '_38sUEc').find('span').find_all('span')[2].text)
print(len(total_reviews))
print(total_reviews)
24 ['\xa073 Reviews', '\xa061 Reviews', '\xa0170 Reviews', '\xa042 Reviews', '\xa0170 Reviews', '\xa01,382 Reviews', '\xa0170 Reviews', '\xa019 Reviews', '\xa038 Reviews', '\xa0868 Reviews', '\xa0531 Reviews', '\xa016 Reviews', '\xa0139 Reviews', '\xa0274 Reviews', '\xa0159 Reviews', '\xa01,382 Reviews', '\xa039 Reviews', '\xa0531 Reviews', '\xa01,604 Reviews', '\xa0531 Reviews', '\xa0232 Reviews', '\xa055,496 Reviews', '\xa01,305 Reviews', '\xa0159 Reviews']
import pandas as pd
Mobile_Dataset = pd.DataFrame({'Mobile_name': mobile_name,
'Price': mobile_price,
'Star': mobile_star,
'Total Ratings': total_ratings,
'Total Reviews': total_reviews
})
print(Mobile_Dataset.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 24 entries, 0 to 23 Data columns (total 5 columns): Mobile_name 24 non-null object Price 24 non-null object Star 24 non-null object Total Ratings 24 non-null object Total Reviews 24 non-null object dtypes: object(5) memory usage: 1.1+ KB None
Mobile_Dataset.head()
Mobile_name | Price | Star | Total Ratings | Total Reviews | |
---|---|---|---|---|---|
0 | Samsung Galaxy A70s (Prism Crush White, 128 GB) | ₹25,999 | 4.2 | 470 Ratings | 73 Reviews |
1 | Samsung Galaxy A70s (Prism Crush Red, 128 GB) | ₹27,999 | 4.4 | 485 Ratings | 61 Reviews |
2 | Samsung Galaxy A30s (Prism Crush White, 64 GB) | ₹14,999 | 4.3 | 2,219 Ratings | 170 Reviews |
3 | Samsung Galaxy A20s (Green, 32 GB) | ₹10,999 | 4.3 | 553 Ratings | 42 Reviews |
4 | Samsung Galaxy A30s (Prism Crush Violet, 128 GB) | ₹15,999 | 4.3 | 2,219 Ratings | 170 Reviews |
Mobile_Dataset.to_csv('mobile_data') # To Generate csv file of Dataframe.