-
Notifications
You must be signed in to change notification settings - Fork 90
Expand file tree
/
Copy pathamazonReviewInCsv.py
More file actions
48 lines (48 loc) · 1.85 KB
/
amazonReviewInCsv.py
File metadata and controls
48 lines (48 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import requests
import csv
import time
from random import randint
from bs4 import BeautifulSoup
string = raw_input("Enter the Product Name ")
url="http://www.amazon.in/s/keywords="+string
r=requests.get(url)
soup = BeautifulSoup(r.content,"lxml")
link = soup.find("li",{"id":"result_0"})['data-asin']
limit=0
with open(string+'.csv', 'a') as csvfile:
fieldnames = ['Author', 'date', 'Rating', 'title', 'review']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i in range(1,200):
rurl="http://www.amazon.in/product-reviews/"+link+"/ref=cm_cr_arp_d_paging_btm_2?pageNumber="+str(i)
r = requests.get(rurl)
soup = BeautifulSoup(r.content, "lxml")
review = soup.find_all("div",{"class": "review"})
print "scrapping page = " + str(i)
if review == []:
delay = randint(0,5)
print "delay ="+str(delay)
i=i-1
limit=limit+1
time.sleep(delay)
if(limit == 5):
print "NO MORE REVIEWS"
break
else :
for item in review:
limit = 0
author= item.find("a", {"class": "author"}).text
rate = item.find("span", {"class": "a-icon-alt"}).text
title = item.find("a", {"class": "review-title"}).text
date = item.find("span",{"class":"review-date"}).text
text = item.find("span", {"class": "review-text"}).text
#print rate+" ///// "+title+"///"+date+"////"+text+"\n"
try:
writer.writerow(
{'Author': author,
'date': date ,
'Rating': rate,
'title': title,
'review': text})
except Exception:
pass