11package de .mediathekview .mserver .crawler .zdf ;
22
3+ import de .mediathekview .mlib .daten .Film ;
34import de .mediathekview .mlib .daten .Sender ;
45import de .mediathekview .mlib .messages .listener .MessageListener ;
56import de .mediathekview .mserver .base .config .MServerConfigManager ;
7+ import de .mediathekview .mserver .base .messages .ServerMessages ;
8+ import de .mediathekview .mserver .crawler .basic .AbstractCrawler ;
69import de .mediathekview .mserver .crawler .basic .CrawlerUrlDTO ;
7- import de .mediathekview .mserver .crawler .zdf .tasks .ZdfDayPageHtmlTask ;
8- import de .mediathekview .mserver .crawler .zdf .tasks .ZdfLetterListHtmlTask ;
9- import de .mediathekview .mserver .crawler .zdf .tasks .ZdfTopicPageHtmlTask ;
10- import de .mediathekview .mserver .crawler .zdf .tasks .ZdfTopicsPageHtmlTask ;
10+ import de .mediathekview .mserver .crawler .zdf .tasks .*;
1111import de .mediathekview .mserver .progress .listeners .SenderProgressListener ;
12- import org .jetbrains .annotations .NotNull ;
1312
1413import java .time .LocalDateTime ;
1514import java .time .format .DateTimeFormatter ;
1615import java .time .temporal .ChronoUnit ;
17- import java .util .Collection ;
18- import java .util .Queue ;
19- import java .util .Set ;
16+ import java .util .*;
2017import java .util .concurrent .ConcurrentLinkedQueue ;
2118import java .util .concurrent .ExecutionException ;
2219import java .util .concurrent .ForkJoinPool ;
20+ import java .util .concurrent .RecursiveTask ;
21+ import org .apache .logging .log4j .LogManager ;
22+ import org .apache .logging .log4j .Logger ;
23+ import org .jetbrains .annotations .NotNull ;
2324
24- public class ZdfCrawler extends AbstractZdfCrawler {
25+ public class ZdfCrawler extends AbstractCrawler {
2526
26- private static final int MAXIMUM_DAYS_HTML_PAST = 7 ;
27+ private static final Logger LOG = LogManager .getLogger (ZdfCrawler .class );
28+ private static final int MAX_LETTER_PAGEGS = 27 ;
2729
28- public ZdfCrawler (
29- final ForkJoinPool aForkJoinPool ,
30- final Collection <MessageListener > aMessageListeners ,
31- final Collection <SenderProgressListener > aProgressListeners ,
32- final MServerConfigManager rootConfig ) {
33- super (aForkJoinPool , aMessageListeners , aProgressListeners , rootConfig , ZdfConstants .PARTNER_TO_SENDER );
34- }
30+ private static final String AUTH_KEY = "aa3noh4ohz9eeboo8shiesheec9ciequ9Quah7el" ;
3531
36- @ Override
37- protected @ NotNull String getUrlBase () {
38- return ZdfConstants .URL_BASE ;
39- }
40-
41- @ Override
42- protected String getApiUrlBase () {
43- return ZdfConstants .URL_API_BASE ;
44- }
45-
46- @ Override
47- protected @ NotNull String getUrlDay () {
48- return ZdfConstants .URL_DAY ;
32+ public ZdfCrawler (
33+ ForkJoinPool aForkJoinPool ,
34+ Collection <MessageListener > aMessageListeners ,
35+ Collection <SenderProgressListener > aProgressListeners ,
36+ MServerConfigManager rootConfig ) {
37+ super (aForkJoinPool , aMessageListeners , aProgressListeners , rootConfig );
4938 }
5039
5140 @ Override
@@ -54,52 +43,138 @@ public Sender getSender() {
5443 }
5544
5645 @ Override
57- public Queue <CrawlerUrlDTO > getTopicsEntries () throws ExecutionException , InterruptedException {
46+ protected RecursiveTask <Set <Film >> createCrawlerTask () {
47+
48+ try {
49+
50+ if (Boolean .TRUE .equals (crawlerConfig .getTopicsSearchEnabled ())) {
51+ final Set <ZdfFilmDto > shows = new HashSet <>();
52+
53+ ZdfLetterPageTask letterPageTask =
54+ new ZdfLetterPageTask (this , createLetterPageUrls (), AUTH_KEY );
55+ final Set <ZdfTopicUrlDto > topicUrls = forkJoinPool .submit (letterPageTask ).get ();
56+
57+ printMessage (
58+ ServerMessages .DEBUG_ALL_SENDUNG_FOLGEN_COUNT , getSender ().getName (), topicUrls .size ());
59+
60+ final ZdfPubFormTask pubFormTask = new ZdfPubFormTask (this , createPubFormUrls (), AUTH_KEY );
61+ final Set <ZdfPubFormResult > pubFormUrls = forkJoinPool .submit (pubFormTask ).get ();
62+
63+ printMessage (
64+ ServerMessages .DEBUG_ALL_SENDUNG_FOLGEN_COUNT ,
65+ getSender ().getName () + " - PubForm:" ,
66+ pubFormUrls .size ());
67+
68+ pubFormUrls .forEach (
69+ pubFormResult -> {
70+ topicUrls .addAll (pubFormResult .getTopics ().getElements ());
71+ shows .addAll (pubFormResult .getFilms ());
72+ });
73+ printMessage (
74+ ServerMessages .DEBUG_ALL_SENDUNG_FOLGEN_COUNT ,
75+ getSender ().getName () + " - PubForm-Topics integrated: " ,
76+ topicUrls .size ());
77+
78+ ZdfTopicSeasonTask topicSeasonTask =
79+ new ZdfTopicSeasonTask (this , new ConcurrentLinkedQueue <>(topicUrls ), AUTH_KEY );
80+ shows .addAll (forkJoinPool .submit (topicSeasonTask ).get ());
81+
82+ printMessage (
83+ ServerMessages .DEBUG_ALL_SENDUNG_FOLGEN_COUNT , getSender ().getName (), shows .size ());
84+
85+ return new ZdfFilmTask (this , new ConcurrentLinkedQueue <>(shows ), AUTH_KEY );
86+ } else {
87+ final ZdfConfiguration configuration = loadConfiguration ();
88+ if (configuration .getSearchAuthKey ().isPresent ()
89+ && configuration .getVideoAuthKey ().isPresent ()) {
90+ Set <CrawlerUrlDTO > shows = new HashSet <>(getDaysEntries (configuration ));
91+ printMessage (
92+ ServerMessages .DEBUG_ALL_SENDUNG_FOLGEN_COUNT , getSender ().getName (), shows .size ());
93+
94+ return new ZdfFilmDetailTask (
95+ this ,
96+ getApiUrlBase (),
97+ new ConcurrentLinkedQueue <>(shows ),
98+ configuration .getVideoAuthKey ().orElse ("" ), ZdfConstants .PARTNER_TO_SENDER );
99+ }
100+ }
101+ } catch (final InterruptedException ex ) {
102+ LOG .debug ("{} crawler interrupted." , getSender ().getName (), ex );
103+ Thread .currentThread ().interrupt ();
104+ } catch (final ExecutionException ex ) {
105+ LOG .fatal ("Exception in {} crawler." , getSender ().getName (), ex );
106+ }
107+ return null ;
108+ }
58109
59- final Queue <CrawlerUrlDTO > letterListUrl = new ConcurrentLinkedQueue <>();
60- letterListUrl .add (new CrawlerUrlDTO (ZdfConstants .URL_TOPICS ));
110+ private Queue <ZdfPubFormDto > createPubFormUrls () {
111+ Queue <ZdfPubFormDto > urls = new ConcurrentLinkedQueue <>();
112+ ZdfConstants .SPECIAL_COLLECTION_IDS .forEach (
113+ (collectionId , topic ) -> {
114+ final String url =
115+ ZdfUrlBuilder .buildTopicNoSeasonUrl (
116+ ZdfConstants .EPISODES_PAGE_SIZE , collectionId , ZdfConstants .NO_CURSOR );
117+ urls .add (new ZdfPubFormDto (topic , collectionId , url ));
118+ });
119+ return urls ;
120+ }
61121
62- final ZdfLetterListHtmlTask letterTask = new ZdfLetterListHtmlTask (this , letterListUrl );
63- final Set <CrawlerUrlDTO > letterUrls = forkJoinPool .submit (letterTask ).get ();
122+ private Queue <ZdfLetterDto > createLetterPageUrls () {
123+ final Queue <ZdfLetterDto > urls = new ConcurrentLinkedQueue <>();
124+ for (int i = 0 ; i < MAX_LETTER_PAGEGS ; i ++) {
125+ urls .add (new ZdfLetterDto (i , ZdfUrlBuilder .buildLetterPageUrl (ZdfConstants .NO_CURSOR , i )));
126+ }
64127
65- final ZdfTopicsPageHtmlTask topicsTask =
66- new ZdfTopicsPageHtmlTask (this , new ConcurrentLinkedQueue <>(letterUrls ));
67- final Set <CrawlerUrlDTO > topicsUrls = forkJoinPool .submit (topicsTask ).get ();
128+ return urls ;
129+ }
68130
69- final ZdfTopicPageHtmlTask topicTask =
70- new ZdfTopicPageHtmlTask (this , new ConcurrentLinkedQueue <>( topicsUrls ));
71- return new ConcurrentLinkedQueue <>( forkJoinPool .submit (topicTask ).get () );
131+ protected ZdfConfiguration loadConfiguration () throws ExecutionException , InterruptedException {
132+ final ZdfIndexPageTask task = new ZdfIndexPageTask (this , getUrlBase ( ));
133+ return forkJoinPool .submit (task ).get ();
72134 }
73135
74- @ Override
75- protected Collection <CrawlerUrlDTO > getExtraDaysEntries ()
76- throws ExecutionException , InterruptedException {
136+ private Set <CrawlerUrlDTO > getDaysEntries (final ZdfConfiguration configuration )
137+ throws InterruptedException , ExecutionException {
138+ final ZdfDayPageTask dayTask =
139+ new ZdfDayPageTask (
140+ this , getApiUrlBase (), getDayUrls (), configuration .getSearchAuthKey ().orElse (null ));
141+ final Set <CrawlerUrlDTO > shows = forkJoinPool .submit (dayTask ).get ();
77142
78- final ZdfDayPageHtmlTask dayTask =
79- new ZdfDayPageHtmlTask (getApiUrlBase (), this , getExtraDayUrls ());
80- return forkJoinPool .submit (dayTask ).get ();
143+ printMessage (
144+ ServerMessages .DEBUG_ALL_SENDUNG_FOLGEN_COUNT , getSender ().getName (), shows .size ());
145+
146+ return shows ;
81147 }
82148
83- private Queue <CrawlerUrlDTO > getExtraDayUrls () {
149+ private Queue <CrawlerUrlDTO > getDayUrls () {
84150 final Queue <CrawlerUrlDTO > urls = new ConcurrentLinkedQueue <>();
85- for (int i = 0 ; i <= getMaximumDaysPast (); i ++) {
86-
87- final LocalDateTime local = LocalDateTime .now ().minus (i , ChronoUnit .DAYS );
151+ for (int i = 0 ;
152+ i
153+ <= crawlerConfig .getMaximumDaysForSendungVerpasstSection ()
154+ + crawlerConfig .getMaximumDaysForSendungVerpasstSectionFuture ();
155+ i ++) {
156+
157+ final LocalDateTime local =
158+ LocalDateTime .now ()
159+ .plus (crawlerConfig .getMaximumDaysForSendungVerpasstSectionFuture (), ChronoUnit .DAYS )
160+ .minus (i , ChronoUnit .DAYS );
88161 final String date = local .format (DateTimeFormatter .ofPattern ("yyyy-MM-dd" ));
89- final String url = String .format (ZdfConstants . URL_HTML_DAY , date );
162+ final String url = String .format (getUrlDay (), date , date );
90163 urls .add (new CrawlerUrlDTO (url ));
91164 }
92165
93166 return urls ;
94167 }
95168
96- private int getMaximumDaysPast () {
97- final Integer maximumDaysForSendungVerpasstSection =
98- crawlerConfig .getMaximumDaysForSendungVerpasstSection ();
99- if (maximumDaysForSendungVerpasstSection == null
100- || maximumDaysForSendungVerpasstSection > MAXIMUM_DAYS_HTML_PAST ) {
101- return MAXIMUM_DAYS_HTML_PAST ;
102- }
103- return maximumDaysForSendungVerpasstSection ;
169+ private @ NotNull String getUrlBase () {
170+ return ZdfConstants .URL_BASE ;
171+ }
172+
173+ private String getApiUrlBase () {
174+ return ZdfConstants .URL_API_BASE ;
175+ }
176+
177+ private @ NotNull String getUrlDay () {
178+ return ZdfConstants .URL_DAY ;
104179 }
105180}
0 commit comments