Lecture-Series-Python/Awesome-Scripts/quora_scraper.py at c297fabd1059e8d3db18401ba58294b49689f622 · Github-Classroom-Cybros/Lecture-Series-Python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Scrapes the Quora question and its top 3 answers given a quora link

from urlparse import urlparse
from bs4 import BeautifulSoup
import requests
import io
import json

URL = 'https://www.quora.com/What-is-the-toughest-sketch-you-have-drawn'

def scrape_quora(url):
    source_code = requests.get(url).text
    soup = BeautifulSoup(source_code, "html.parser")
    output = dict()
    output['question'] = soup.find('div', {'class': 'question_text_edit'}).find('span', {'class': 'rendered_qtext'}).get_text()
    output['answers'] = list()
    for answer in soup.findAll('div', {'class': 'AnswerBase'}):
        ans = dict()
        try:
            ans['author'] = answer.find('a', {'class': 'user'}).get_text()
        except:
            ans['author'] = "Hidden"
        ans['content'] = answer.find('span', {'class': 'rendered_qtext'}).get_text()
        output['answers'].append(ans)
    json_object = json.dumps(output, ensure_ascii=False, indent=4)
    io.open('quora_output.json', 'w').write(json_object)

def main(url):`
    hostname = str(urlparse(str(url)).hostname.split('.')[1]).lower()
    if hostname == "quora":
        scrape_quora(url)
    else:
        print "Give a valid URL"

main(URL)