|
2 | 2 |
|
3 | 3 | import de.mediathekview.mlib.Config; |
4 | 4 | import de.mediathekview.mlib.Const; |
| 5 | +import de.mediathekview.mlib.daten.DatenFilm; |
5 | 6 | import de.mediathekview.mlib.tool.Log; |
| 7 | +import mServer.crawler.CrawlerTool; |
6 | 8 | import mServer.crawler.FilmeSuchen; |
| 9 | +import mServer.crawler.sender.MediathekCrawler; |
7 | 10 | import mServer.crawler.sender.base.CrawlerUrlDTO; |
8 | 11 | import mServer.crawler.sender.base.JsoupConnection; |
9 | | -import mServer.crawler.sender.zdf.tasks.ZdfDayPageHtmlTask; |
10 | | -import mServer.crawler.sender.zdf.tasks.ZdfLetterListHtmlTask; |
11 | | -import mServer.crawler.sender.zdf.tasks.ZdfTopicPageHtmlTask; |
12 | | -import mServer.crawler.sender.zdf.tasks.ZdfTopicsPageHtmlTask; |
| 12 | +import mServer.crawler.sender.zdf.tasks.*; |
| 13 | +import org.apache.logging.log4j.LogManager; |
| 14 | +import org.apache.logging.log4j.Logger; |
13 | 15 | import org.jetbrains.annotations.NotNull; |
14 | 16 |
|
15 | 17 | import java.time.LocalDateTime; |
16 | 18 | import java.time.format.DateTimeFormatter; |
17 | 19 | import java.time.temporal.ChronoUnit; |
18 | | -import java.util.Collection; |
| 20 | +import java.util.HashSet; |
19 | 21 | import java.util.Queue; |
20 | 22 | import java.util.Set; |
21 | 23 | import java.util.concurrent.ConcurrentLinkedQueue; |
22 | 24 | import java.util.concurrent.ExecutionException; |
| 25 | +import java.util.concurrent.RecursiveTask; |
23 | 26 |
|
24 | | -public class ZdfCrawler extends AbstractZdfCrawler { |
| 27 | +public class ZdfCrawler extends MediathekCrawler { |
25 | 28 |
|
26 | | - private static final int MAXIMUM_DAYS_HTML_PAST = 7; |
| 29 | + private static final Logger LOG = LogManager.getLogger(ZdfCrawler.class); |
| 30 | + private static final int MAX_LETTER_PAGEGS = 27; |
| 31 | + |
| 32 | + private static final String AUTH_KEY = "aa3noh4ohz9eeboo8shiesheec9ciequ9Quah7el"; |
| 33 | + |
| 34 | + JsoupConnection jsoupConnection = new JsoupConnection(); |
27 | 35 |
|
28 | 36 | public ZdfCrawler(FilmeSuchen ssearch, int startPrio) { |
29 | | - super(Const.ZDF, ssearch, startPrio); |
| 37 | + super(ssearch, Const.ZDF, 0, 1, startPrio); |
30 | 38 | } |
31 | 39 |
|
32 | | - @Override |
33 | | - protected @NotNull String getUrlBase() { |
34 | | - return ZdfConstants.URL_BASE; |
35 | | - } |
36 | 40 |
|
37 | 41 | @Override |
38 | | - protected String getApiUrlBase() { |
39 | | - return ZdfConstants.URL_API_BASE; |
| 42 | + protected synchronized void meldungThreadUndFertig() { |
| 43 | + // der MediathekReader ist erst fertig wenn nur noch ein Thread läuft |
| 44 | + // dann zusätzliche Sender, die der Crawler bearbeitet, beenden |
| 45 | + if (getThreads() <= 1) { |
| 46 | + mlibFilmeSuchen.meldenFertig(Const.ZDF_TIVI); |
| 47 | + mlibFilmeSuchen.meldenFertig(Const.ZDF_INFO); |
| 48 | + mlibFilmeSuchen.meldenFertig(Const.ZDF_NEO); |
| 49 | + } |
| 50 | + |
| 51 | + super.meldungThreadUndFertig(); |
40 | 52 | } |
41 | 53 |
|
42 | 54 | @Override |
43 | | - protected @NotNull String getUrlDay() { |
44 | | - return ZdfConstants.URL_DAY; |
| 55 | + protected RecursiveTask<Set<DatenFilm>> createCrawlerTask() { |
| 56 | + |
| 57 | + try { |
| 58 | + if (CrawlerTool.loadLongMax()) { |
| 59 | + Set<ZdfFilmDto> shows = new HashSet<>(); |
| 60 | + shows.addAll(getTopicsEntries()); |
| 61 | + |
| 62 | + Log.sysLog(getSendername() + " Anzahl: " + shows.size()); |
| 63 | + meldungAddMax(shows.size()); |
| 64 | + |
| 65 | + return new ZdfFilmTask(this, new ConcurrentLinkedQueue<>(shows), AUTH_KEY); |
| 66 | + } else { |
| 67 | + final ZdfConfiguration configuration = loadConfiguration(); |
| 68 | + if (configuration.getSearchAuthKey().isPresent() && configuration.getVideoAuthKey().isPresent()) { |
| 69 | + Set<CrawlerUrlDTO> shows = new HashSet<>(getDaysEntries(configuration)); |
| 70 | + Log.sysLog(getSendername() + " Anzahl: " + shows.size()); |
| 71 | + meldungAddMax(shows.size()); |
| 72 | + return new ZdfFilmDetailTask(this, getApiUrlBase(), new ConcurrentLinkedQueue<>(shows), configuration.getVideoAuthKey()); |
| 73 | + } |
| 74 | + } |
| 75 | + } catch (final InterruptedException ex) { |
| 76 | + LOG.debug("{} crawler interrupted.", getSendername(), ex); |
| 77 | + Thread.currentThread().interrupt(); |
| 78 | + } catch (final ExecutionException ex) { |
| 79 | + LOG.fatal("Exception in {} crawler.", getSendername(), ex); |
| 80 | + } |
| 81 | + return null; |
45 | 82 | } |
46 | 83 |
|
47 | | - @Override |
48 | | - public Queue<CrawlerUrlDTO> getTopicsEntries() throws ExecutionException, InterruptedException { |
| 84 | + private Queue<ZdfFilmDto> getTopicsEntries() throws ExecutionException, InterruptedException { |
| 85 | + |
| 86 | + final ConcurrentLinkedQueue<ZdfFilmDto> shows = new ConcurrentLinkedQueue<>(); |
| 87 | + |
| 88 | + ZdfLetterPageTask letterPageTask = |
| 89 | + new ZdfLetterPageTask(this, createLetterPageUrls(), AUTH_KEY); |
| 90 | + final Set<ZdfTopicUrlDto> topicUrls = forkJoinPool.submit(letterPageTask).get(); |
49 | 91 |
|
50 | | - final ConcurrentLinkedQueue<CrawlerUrlDTO> shows = new ConcurrentLinkedQueue<>(); |
| 92 | + Log.sysLog("ZDF: letter topics: " + topicUrls.size()); |
51 | 93 |
|
52 | | - final ConcurrentLinkedQueue<CrawlerUrlDTO> letterListUrl = new ConcurrentLinkedQueue<>(); |
53 | | - letterListUrl.add(new CrawlerUrlDTO(ZdfConstants.URL_TOPICS)); |
| 94 | + if (Config.getStop()) { |
| 95 | + return shows; |
| 96 | + } |
54 | 97 |
|
55 | | - final ZdfLetterListHtmlTask letterTask = new ZdfLetterListHtmlTask(this, letterListUrl); |
56 | | - final Set<CrawlerUrlDTO> letterUrls = forkJoinPool.submit(letterTask).get(); |
| 98 | + final ZdfPubFormTask pubFormTask = new ZdfPubFormTask(this, createPubFormUrls(), AUTH_KEY); |
| 99 | + final Set<ZdfPubFormResult> pubFormUrls = forkJoinPool.submit(pubFormTask).get(); |
57 | 100 |
|
58 | | - Log.sysLog("ZDF: letters: " + letterUrls.size()); |
| 101 | + Log.sysLog("ZDF: Pubform urls: " + pubFormUrls.size()); |
59 | 102 |
|
60 | 103 | if (Config.getStop()) { |
61 | 104 | return shows; |
62 | 105 | } |
63 | 106 |
|
64 | | - final ZdfTopicsPageHtmlTask topicsTask = |
65 | | - new ZdfTopicsPageHtmlTask(this, new ConcurrentLinkedQueue<>(letterUrls)); |
66 | | - final Set<CrawlerUrlDTO> topicsUrls = forkJoinPool.submit(topicsTask).get(); |
| 107 | + pubFormUrls.forEach( |
| 108 | + pubFormResult -> { |
| 109 | + topicUrls.addAll(pubFormResult.getTopics().getElements()); |
| 110 | + shows.addAll(pubFormResult.getFilms()); |
| 111 | + }); |
67 | 112 |
|
68 | | - Log.sysLog("ZDF: topics: " + topicsUrls.size()); |
| 113 | + Log.sysLog("ZDF: Pubform topics: " + pubFormUrls.size()); |
69 | 114 |
|
70 | 115 | if (Config.getStop()) { |
71 | 116 | return shows; |
72 | 117 | } |
73 | 118 |
|
74 | | - final ZdfTopicPageHtmlTask topicTask = |
75 | | - new ZdfTopicPageHtmlTask(this, new ConcurrentLinkedQueue<>(topicsUrls)); |
76 | | - shows.addAll(forkJoinPool.submit(topicTask).get()); |
| 119 | + ZdfTopicSeasonTask topicSeasonTask = |
| 120 | + new ZdfTopicSeasonTask(this, new ConcurrentLinkedQueue<>(topicUrls), AUTH_KEY); |
| 121 | + final Set<ZdfFilmDto> zdfFilmDtos = forkJoinPool.submit(topicSeasonTask).get(); |
| 122 | + shows.addAll(zdfFilmDtos); |
77 | 123 |
|
78 | 124 | return shows; |
79 | 125 | } |
80 | 126 |
|
81 | | - @Override |
82 | | - protected Collection<CrawlerUrlDTO> getExtraDaysEntries() |
83 | | - throws ExecutionException, InterruptedException { |
| 127 | + private ConcurrentLinkedQueue<ZdfPubFormDto> createPubFormUrls() { |
| 128 | + ConcurrentLinkedQueue<ZdfPubFormDto> urls = new ConcurrentLinkedQueue<>(); |
| 129 | + ZdfConstants.SPECIAL_COLLECTION_IDS.forEach((collectionId, topic) -> { |
| 130 | + final String url = |
| 131 | + ZdfUrlBuilder.buildTopicNoSeasonUrl( |
| 132 | + ZdfConstants.EPISODES_PAGE_SIZE, collectionId, ZdfConstants.NO_CURSOR); |
| 133 | + urls.add(new ZdfPubFormDto(topic, collectionId, url)); |
| 134 | + }); |
| 135 | + return urls; |
| 136 | + } |
| 137 | + |
| 138 | + private ConcurrentLinkedQueue<ZdfLetterDto> createLetterPageUrls() { |
| 139 | + final ConcurrentLinkedQueue<ZdfLetterDto> urls = new ConcurrentLinkedQueue<>(); |
| 140 | + for (int i = 0; i < MAX_LETTER_PAGEGS; i++) { |
| 141 | + urls.add(new ZdfLetterDto(i, ZdfUrlBuilder.buildLetterPageUrl(ZdfConstants.NO_CURSOR, i))); |
| 142 | + } |
84 | 143 |
|
85 | | - final ZdfDayPageHtmlTask dayTask = |
86 | | - new ZdfDayPageHtmlTask(getApiUrlBase(), this, getExtraDayUrls(), new JsoupConnection()); |
87 | | - return forkJoinPool.submit(dayTask).get(); |
| 144 | + return urls; |
88 | 145 | } |
89 | 146 |
|
90 | | - private ConcurrentLinkedQueue<CrawlerUrlDTO> getExtraDayUrls() { |
91 | | - final ConcurrentLinkedQueue<CrawlerUrlDTO> urls = new ConcurrentLinkedQueue<>(); |
92 | | - for (int i = 0; i <= MAXIMUM_DAYS_HTML_PAST; i++) { |
| 147 | + private ZdfConfiguration loadConfiguration() throws ExecutionException, InterruptedException { |
| 148 | + final ZdfIndexPageTask task = new ZdfIndexPageTask(this, getUrlBase(), jsoupConnection); |
| 149 | + return forkJoinPool.submit(task).get(); |
| 150 | + } |
| 151 | + |
| 152 | + private Set<CrawlerUrlDTO> getDaysEntries(ZdfConfiguration configuration) |
| 153 | + throws InterruptedException, ExecutionException { |
| 154 | + final ZdfDayPageTask dayTask |
| 155 | + = new ZdfDayPageTask(this, getApiUrlBase(), getDayUrls(), configuration.getSearchAuthKey()); |
| 156 | + final Set<CrawlerUrlDTO> shows = forkJoinPool.submit(dayTask).get(); |
| 157 | + |
| 158 | + Log.sysLog(getSendername() + ": days entries: " + shows.size()); |
| 159 | + |
| 160 | + return shows; |
| 161 | + } |
| 162 | + |
| 163 | + private ConcurrentLinkedQueue<CrawlerUrlDTO> getDayUrls() { |
93 | 164 |
|
94 | | - final LocalDateTime local = LocalDateTime.now().minus(i, ChronoUnit.DAYS); |
| 165 | + int daysPast = 7; |
| 166 | + int daysFuture = 5; |
| 167 | + |
| 168 | + final ConcurrentLinkedQueue<CrawlerUrlDTO> urls = new ConcurrentLinkedQueue<>(); |
| 169 | + for (int i = 0; |
| 170 | + i |
| 171 | + <= daysPast |
| 172 | + + daysFuture; |
| 173 | + i++) { |
| 174 | + |
| 175 | + final LocalDateTime local |
| 176 | + = LocalDateTime.now() |
| 177 | + .plus(daysFuture, ChronoUnit.DAYS) |
| 178 | + .minus(i, ChronoUnit.DAYS); |
95 | 179 | final String date = local.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); |
96 | | - final String url = String.format(ZdfConstants.URL_HTML_DAY, date); |
| 180 | + final String url = String.format(getUrlDay(), date, date); |
97 | 181 | urls.add(new CrawlerUrlDTO(url)); |
98 | 182 | } |
99 | 183 |
|
100 | 184 | return urls; |
101 | 185 | } |
| 186 | + private @NotNull String getUrlDay() { |
| 187 | + return ZdfConstants.URL_DAY; |
| 188 | + } |
| 189 | + |
| 190 | + private String getApiUrlBase() { |
| 191 | + return ZdfConstants.URL_API_BASE; |
| 192 | + } |
| 193 | + |
| 194 | + private @NotNull String getUrlBase() { |
| 195 | + return ZdfConstants.URL_BASE; |
| 196 | + } |
102 | 197 | } |
0 commit comments