|
2 | 2 |
|
3 | 3 | # update/cfi.py - script to download CFI code list from the SIX group |
4 | 4 | # |
5 | | -# Copyright (C) 2022-2025 Arthur de Jong |
| 5 | +# Copyright (C) 2022-2026 Arthur de Jong |
6 | 6 | # |
7 | 7 | # This library is free software; you can redistribute it and/or |
8 | 8 | # modify it under the terms of the GNU Lesser General Public |
|
33 | 33 | download_url = 'https://www.six-group.com/en/products-services/financial-information/data-standards.html' |
34 | 34 |
|
35 | 35 |
|
| 36 | +# The user agent that will be passed in requests |
| 37 | +user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater; +https://arthurdejong.org/python-stdnum/)' |
| 38 | + |
| 39 | + |
36 | 40 | def normalise(value): |
37 | 41 | """Clean and minimise attribute names and values.""" |
38 | 42 | return re.sub(r' *[(\[\n].*', '', value, flags=re.MULTILINE).strip() |
@@ -76,14 +80,14 @@ def print_attributes(attributes, index=0): |
76 | 80 |
|
77 | 81 | if __name__ == '__main__': |
78 | 82 | # Download the page that contains the link to the current XLS file |
79 | | - response = requests.get(download_url, timeout=30) |
| 83 | + response = requests.get(download_url, timeout=30, headers={'User-Agent': user_agent}) |
80 | 84 | response.raise_for_status() |
81 | 85 | # Find the download link |
82 | 86 | document = lxml.html.document_fromstring(response.content) |
83 | 87 | links = [a.get('href') for a in document.findall('.//a[@href]')] |
84 | 88 | link_url = next(a for a in links if re.match(r'.*/cfi/.*xlsx?$', a)) |
85 | 89 | # Download and parse the spreadsheet |
86 | | - response = requests.get(link_url, timeout=30) |
| 90 | + response = requests.get(link_url, timeout=30, headers={'User-Agent': user_agent}) |
87 | 91 | response.raise_for_status() |
88 | 92 | workbook = openpyxl.load_workbook(io.BytesIO(response.content), read_only=True) |
89 | 93 |
|
|
0 commit comments