-
Notifications
You must be signed in to change notification settings - Fork 90
Expand file tree
/
Copy pathget_xkcd_comic.py
More file actions
30 lines (26 loc) · 931 Bytes
/
get_xkcd_comic.py
File metadata and controls
30 lines (26 loc) · 931 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/python
"""Scrapes xkcd comics and saves their images."""
import io
import sys
import requests
from bs4 import BeautifulSoup
from PIL import Image
def crawler(max_pages):
"""Main function for this script; crawls xkcd.com and fetches the images."""
page = 1
next_url = ''
while page <= max_pages:
url = 'https://xkcd.com' + next_url
soup = BeautifulSoup(requests.get(url).text, "lxml")
next_url = soup.findAll(
'a',
{'rel': 'prev', 'accesskey': 'p'}
)[0].get('href')
image_source = str(soup.findChild('div', {'id': 'comic'}))
soup2 = BeautifulSoup(image_source, "lxml")
image_url = 'https:' + soup2.findAll('img')[0].get('src')
img = requests.get(image_url)
image = Image.open(io.BytesIO(img.content))
image.save(sys.argv[2]+image_url.split('/')[4])
page += 1
crawler(int(sys.argv[1]))