Lecture-Series-Python/Awesome-Scripts/get_xkcd_comic.py at 8de831c4feeba55aeadf7c77ffcf30e23bf763a5 · Github-Classroom-Cybros/Lecture-Series-Python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/python
"""Scrapes xkcd comics and saves their images."""

import io
import sys
import requests
from bs4 import BeautifulSoup
from PIL import Image

def crawler(max_pages):
    """Main function for this script; crawls xkcd.com and fetches the images."""

    page = 1
    next_url = ''
    while page <= max_pages:
        url = 'https://xkcd.com' + next_url
        soup = BeautifulSoup(requests.get(url).text, "lxml")
        next_url = soup.findAll(
            'a',
            {'rel': 'prev', 'accesskey': 'p'}
            )[0].get('href')
        image_source = str(soup.findChild('div', {'id': 'comic'}))
        soup2 = BeautifulSoup(image_source, "lxml")
        image_url = 'https:' + soup2.findAll('img')[0].get('src')
        img = requests.get(image_url)
        image = Image.open(io.BytesIO(img.content))
        image.save(sys.argv[2]+image_url.split('/')[4])
        page += 1

crawler(int(sys.argv[1]))