Skip to content

Commit 02e381d

Browse files
authored
new ZDF Mediathek (#1049)
1 parent b501300 commit 02e381d

46 files changed

Lines changed: 11382 additions & 960 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfConfiguration.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@
55

66
public class ZdfConfiguration {
77

8+
public static final String AUTH_KEY_SEARCH = "5bb200097db507149612d7d983131d06c79706d5";
9+
public static final String AUTH_KEY_VIDEO = "20c238b5345eb428d01ae5c748c5076f033dfcc7";
10+
811
@Nullable private String searchAuthKey;
912
@Nullable private String videoAuthKey;
1013

1114
public ZdfConfiguration() {
12-
searchAuthKey = null;
13-
videoAuthKey = null;
15+
searchAuthKey = AUTH_KEY_SEARCH;
16+
videoAuthKey = AUTH_KEY_VIDEO;
1417
}
1518

1619
public Optional<String> getSearchAuthKey() {

src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfConstants.java

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,41 @@ public final class ZdfConstants {
1515
public static final String URL_TOPICS = URL_BASE + "/sendungen-a-z";
1616
/** Base url of the ZDF api. */
1717
public static final String URL_API_BASE = "https://api.zdf.de";
18+
19+
public static final String NO_CURSOR = "null";
20+
public static final int EPISODES_PAGE_SIZE = 24;
21+
22+
public static final String URL_LETTER_PAGE =
23+
URL_API_BASE
24+
+ "/graphql?operationName=specialPageByCanonical&" +
25+
"variables=%s&" +
26+
"extensions=%s";
27+
public static final String URL_LETTER_PAGE_VARIABLES =
28+
"{\"staticGridClusterPageSize\":6,\"staticGridClusterOffset\":0,\"canonical\":\"sendungen-100\",\"endCursor\":%s,\"tabIndex\":%d,\"itemsFilter\":{\"teaserUsageNotIn\":[\"TIVI_HBBTV_ONLY\"]}}";
29+
public static final String URL_LETTER_PAGE_EXTENSIONS =
30+
"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"7d33167e7700ba57779f48b28b5d485c8ada0a1d5dfbdc8a261b7bd62ca28ba7\"}}";
31+
32+
public static final String URL_TOPIC_PAGE = URL_API_BASE + "/graphql?operationName=seasonByCanonical&" +
33+
"variables=%s&" +
34+
"extensions=%s";
35+
public static final String URL_TOPIC_PAGE_VARIABLES = "{\"seasonIndex\":%d,\"episodesPageSize\":%d,\"canonical\":\"%s\",\"sortBy\":[{\"field\":\"EDITORIAL_DATE\",\"direction\":\"DESC\"}]}";
36+
public static final String URL_TOPIC_PAGE_VARIABLES_WITH_CURSOR = "{\"seasonIndex\":%d,\"episodesPageSize\":%d,\"canonical\":\"%s\",\"sortBy\":[{\"field\":\"EDITORIAL_DATE\",\"direction\":\"DESC\"}],\"episodesAfter\":\"%s\"}";
37+
public static final String URL_TOPIC_PAGE_EXTENSIONS =
38+
"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"9412a0f4ac55dc37d46975d461ec64bfd14380d815df843a1492348f77b5c99a\"}}";
39+
40+
public static final String URL_TOPIC_PAGE_NO_SEASON = URL_API_BASE + "/graphql?operationName=getMetaCollectionContent&" +
41+
"variables=%s&" +
42+
"extensions=%s";
43+
public static final String URL_TOPIC_PAGE_NO_SEASON_VARIABLES =
44+
"{\"collectionId\":\"%s\",\"input\":{\"appId\":\"ffw-mt-web-879d5c17\",\"filters\":{\"contentOwner\":[],\"fsk\":[],\"language\":[]},\"pagination\":{\"first\":%d,\"after\":%s},\"user\":{\"abGroup\":\"gruppe-d\",\"userSegment\":\"segment_0\"},\"tabId\":null}}";
45+
public static final String URL_TOPIC_PAGE_NO_SEASON_EXTENSIONS =
46+
"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"c85ca9c636258a65961a81124abd0dbef06ab97eaca9345cbdfde23b54117242\"}}";
47+
48+
public static final String URL_FILM_ENRY =
49+
URL_API_BASE + "/graphql?operationName=GetVideoMetaByCanonical&"
50+
+ "variables={\"canonical\"=\"%s\"}&"
51+
+ "extensions={\"persistedQuery\"={\"version\"=1,\"sha256Hash\"=\"737eb4421d274259baa3051929f4ecfef2d2afc59f12a9d82285c14dbdd1dd0d\"}}";
52+
1853
/** Url to search the films. */
1954
public static final String URL_DAY =
2055
URL_API_BASE
@@ -34,13 +69,19 @@ public final class ZdfConstants {
3469
public static final String LANGUAGE_GERMAN_DGS = LANGUAGE_GERMAN + LANGUAGE_SUFFIX_DGS;
3570

3671
public static final Map<String, Sender> PARTNER_TO_SENDER = new HashMap<>();
72+
public static final Map<String, String> SPECIAL_COLLECTION_IDS = new HashMap<>();
3773

3874
static {
39-
PARTNER_TO_SENDER.put("ZDFinfo", Sender.ZDF);
40-
PARTNER_TO_SENDER.put("ZDFneo", Sender.ZDF);
41-
PARTNER_TO_SENDER.put("ZDF", Sender.ZDF);
75+
PARTNER_TO_SENDER.put("ZDFinfo", Sender.ZDF_INFO);
76+
PARTNER_TO_SENDER.put("ZDFneo", Sender.ZDF_NEO);
77+
PARTNER_TO_SENDER.put("ZDF", Sender.ZDF);
4278
PARTNER_TO_SENDER.put("EMPTY", Sender.ZDF);
79+
PARTNER_TO_SENDER.put("ZDFtivi", Sender.ZDF_TIVI);
4380
// IGNORED Sender [KI.KA, WDR, PHOENIX, one, HR, 3sat, SWR, arte, BR, RBB, ARD, daserste, alpha, MDR, radiobremen, funk, ZDF, NDR, SR]
81+
82+
SPECIAL_COLLECTION_IDS.put("pub-form-10004", "Filme");
83+
SPECIAL_COLLECTION_IDS.put("pub-form-10003", "Dokus");
84+
SPECIAL_COLLECTION_IDS.put("pub-form-10010", "Serien");
4485
}
4586

4687
private ZdfConstants() {}
Lines changed: 135 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,40 @@
11
package de.mediathekview.mserver.crawler.zdf;
22

3+
import de.mediathekview.mlib.daten.Film;
34
import de.mediathekview.mlib.daten.Sender;
45
import de.mediathekview.mlib.messages.listener.MessageListener;
56
import de.mediathekview.mserver.base.config.MServerConfigManager;
7+
import de.mediathekview.mserver.base.messages.ServerMessages;
8+
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
69
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
7-
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfDayPageHtmlTask;
8-
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfLetterListHtmlTask;
9-
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfTopicPageHtmlTask;
10-
import de.mediathekview.mserver.crawler.zdf.tasks.ZdfTopicsPageHtmlTask;
10+
import de.mediathekview.mserver.crawler.zdf.tasks.*;
1111
import de.mediathekview.mserver.progress.listeners.SenderProgressListener;
12-
import org.jetbrains.annotations.NotNull;
1312

1413
import java.time.LocalDateTime;
1514
import java.time.format.DateTimeFormatter;
1615
import java.time.temporal.ChronoUnit;
17-
import java.util.Collection;
18-
import java.util.Queue;
19-
import java.util.Set;
16+
import java.util.*;
2017
import java.util.concurrent.ConcurrentLinkedQueue;
2118
import java.util.concurrent.ExecutionException;
2219
import java.util.concurrent.ForkJoinPool;
20+
import java.util.concurrent.RecursiveTask;
21+
import org.apache.logging.log4j.LogManager;
22+
import org.apache.logging.log4j.Logger;
23+
import org.jetbrains.annotations.NotNull;
2324

24-
public class ZdfCrawler extends AbstractZdfCrawler {
25+
public class ZdfCrawler extends AbstractCrawler {
2526

26-
private static final int MAXIMUM_DAYS_HTML_PAST = 7;
27+
private static final Logger LOG = LogManager.getLogger(ZdfCrawler.class);
28+
private static final int MAX_LETTER_PAGEGS = 27;
2729

28-
public ZdfCrawler(
29-
final ForkJoinPool aForkJoinPool,
30-
final Collection<MessageListener> aMessageListeners,
31-
final Collection<SenderProgressListener> aProgressListeners,
32-
final MServerConfigManager rootConfig) {
33-
super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig, ZdfConstants.PARTNER_TO_SENDER);
34-
}
30+
private static final String AUTH_KEY = "aa3noh4ohz9eeboo8shiesheec9ciequ9Quah7el";
3531

36-
@Override
37-
protected @NotNull String getUrlBase() {
38-
return ZdfConstants.URL_BASE;
39-
}
40-
41-
@Override
42-
protected String getApiUrlBase() {
43-
return ZdfConstants.URL_API_BASE;
44-
}
45-
46-
@Override
47-
protected @NotNull String getUrlDay() {
48-
return ZdfConstants.URL_DAY;
32+
public ZdfCrawler(
33+
ForkJoinPool aForkJoinPool,
34+
Collection<MessageListener> aMessageListeners,
35+
Collection<SenderProgressListener> aProgressListeners,
36+
MServerConfigManager rootConfig) {
37+
super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig);
4938
}
5039

5140
@Override
@@ -54,52 +43,138 @@ public Sender getSender() {
5443
}
5544

5645
@Override
57-
public Queue<CrawlerUrlDTO> getTopicsEntries() throws ExecutionException, InterruptedException {
46+
protected RecursiveTask<Set<Film>> createCrawlerTask() {
47+
48+
try {
49+
50+
if (Boolean.TRUE.equals(crawlerConfig.getTopicsSearchEnabled())) {
51+
final Set<ZdfFilmDto> shows = new HashSet<>();
52+
53+
ZdfLetterPageTask letterPageTask =
54+
new ZdfLetterPageTask(this, createLetterPageUrls(), AUTH_KEY);
55+
final Set<ZdfTopicUrlDto> topicUrls = forkJoinPool.submit(letterPageTask).get();
56+
57+
printMessage(
58+
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), topicUrls.size());
59+
60+
final ZdfPubFormTask pubFormTask = new ZdfPubFormTask(this, createPubFormUrls(), AUTH_KEY);
61+
final Set<ZdfPubFormResult> pubFormUrls = forkJoinPool.submit(pubFormTask).get();
62+
63+
printMessage(
64+
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT,
65+
getSender().getName() + " - PubForm:",
66+
pubFormUrls.size());
67+
68+
pubFormUrls.forEach(
69+
pubFormResult -> {
70+
topicUrls.addAll(pubFormResult.getTopics().getElements());
71+
shows.addAll(pubFormResult.getFilms());
72+
});
73+
printMessage(
74+
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT,
75+
getSender().getName() + " - PubForm-Topics integrated: ",
76+
topicUrls.size());
77+
78+
ZdfTopicSeasonTask topicSeasonTask =
79+
new ZdfTopicSeasonTask(this, new ConcurrentLinkedQueue<>(topicUrls), AUTH_KEY);
80+
shows.addAll(forkJoinPool.submit(topicSeasonTask).get());
81+
82+
printMessage(
83+
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
84+
85+
return new ZdfFilmTask(this, new ConcurrentLinkedQueue<>(shows), AUTH_KEY);
86+
} else {
87+
final ZdfConfiguration configuration = loadConfiguration();
88+
if (configuration.getSearchAuthKey().isPresent()
89+
&& configuration.getVideoAuthKey().isPresent()) {
90+
Set<CrawlerUrlDTO> shows = new HashSet<>(getDaysEntries(configuration));
91+
printMessage(
92+
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
93+
94+
return new ZdfFilmDetailTask(
95+
this,
96+
getApiUrlBase(),
97+
new ConcurrentLinkedQueue<>(shows),
98+
configuration.getVideoAuthKey().orElse(""), ZdfConstants.PARTNER_TO_SENDER);
99+
}
100+
}
101+
} catch (final InterruptedException ex) {
102+
LOG.debug("{} crawler interrupted.", getSender().getName(), ex);
103+
Thread.currentThread().interrupt();
104+
} catch (final ExecutionException ex) {
105+
LOG.fatal("Exception in {} crawler.", getSender().getName(), ex);
106+
}
107+
return null;
108+
}
58109

59-
final Queue<CrawlerUrlDTO> letterListUrl = new ConcurrentLinkedQueue<>();
60-
letterListUrl.add(new CrawlerUrlDTO(ZdfConstants.URL_TOPICS));
110+
private Queue<ZdfPubFormDto> createPubFormUrls() {
111+
Queue<ZdfPubFormDto> urls = new ConcurrentLinkedQueue<>();
112+
ZdfConstants.SPECIAL_COLLECTION_IDS.forEach(
113+
(collectionId, topic) -> {
114+
final String url =
115+
ZdfUrlBuilder.buildTopicNoSeasonUrl(
116+
ZdfConstants.EPISODES_PAGE_SIZE, collectionId, ZdfConstants.NO_CURSOR);
117+
urls.add(new ZdfPubFormDto(topic, collectionId, url));
118+
});
119+
return urls;
120+
}
61121

62-
final ZdfLetterListHtmlTask letterTask = new ZdfLetterListHtmlTask(this, letterListUrl);
63-
final Set<CrawlerUrlDTO> letterUrls = forkJoinPool.submit(letterTask).get();
122+
private Queue<ZdfLetterDto> createLetterPageUrls() {
123+
final Queue<ZdfLetterDto> urls = new ConcurrentLinkedQueue<>();
124+
for (int i = 0; i < MAX_LETTER_PAGEGS; i++) {
125+
urls.add(new ZdfLetterDto(i, ZdfUrlBuilder.buildLetterPageUrl(ZdfConstants.NO_CURSOR, i)));
126+
}
64127

65-
final ZdfTopicsPageHtmlTask topicsTask =
66-
new ZdfTopicsPageHtmlTask(this, new ConcurrentLinkedQueue<>(letterUrls));
67-
final Set<CrawlerUrlDTO> topicsUrls = forkJoinPool.submit(topicsTask).get();
128+
return urls;
129+
}
68130

69-
final ZdfTopicPageHtmlTask topicTask =
70-
new ZdfTopicPageHtmlTask(this, new ConcurrentLinkedQueue<>(topicsUrls));
71-
return new ConcurrentLinkedQueue<>(forkJoinPool.submit(topicTask).get());
131+
protected ZdfConfiguration loadConfiguration() throws ExecutionException, InterruptedException {
132+
final ZdfIndexPageTask task = new ZdfIndexPageTask(this, getUrlBase());
133+
return forkJoinPool.submit(task).get();
72134
}
73135

74-
@Override
75-
protected Collection<CrawlerUrlDTO> getExtraDaysEntries()
76-
throws ExecutionException, InterruptedException {
136+
private Set<CrawlerUrlDTO> getDaysEntries(final ZdfConfiguration configuration)
137+
throws InterruptedException, ExecutionException {
138+
final ZdfDayPageTask dayTask =
139+
new ZdfDayPageTask(
140+
this, getApiUrlBase(), getDayUrls(), configuration.getSearchAuthKey().orElse(null));
141+
final Set<CrawlerUrlDTO> shows = forkJoinPool.submit(dayTask).get();
77142

78-
final ZdfDayPageHtmlTask dayTask =
79-
new ZdfDayPageHtmlTask(getApiUrlBase(), this, getExtraDayUrls());
80-
return forkJoinPool.submit(dayTask).get();
143+
printMessage(
144+
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
145+
146+
return shows;
81147
}
82148

83-
private Queue<CrawlerUrlDTO> getExtraDayUrls() {
149+
private Queue<CrawlerUrlDTO> getDayUrls() {
84150
final Queue<CrawlerUrlDTO> urls = new ConcurrentLinkedQueue<>();
85-
for (int i = 0; i <= getMaximumDaysPast(); i++) {
86-
87-
final LocalDateTime local = LocalDateTime.now().minus(i, ChronoUnit.DAYS);
151+
for (int i = 0;
152+
i
153+
<= crawlerConfig.getMaximumDaysForSendungVerpasstSection()
154+
+ crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture();
155+
i++) {
156+
157+
final LocalDateTime local =
158+
LocalDateTime.now()
159+
.plus(crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture(), ChronoUnit.DAYS)
160+
.minus(i, ChronoUnit.DAYS);
88161
final String date = local.format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
89-
final String url = String.format(ZdfConstants.URL_HTML_DAY, date);
162+
final String url = String.format(getUrlDay(), date, date);
90163
urls.add(new CrawlerUrlDTO(url));
91164
}
92165

93166
return urls;
94167
}
95168

96-
private int getMaximumDaysPast() {
97-
final Integer maximumDaysForSendungVerpasstSection =
98-
crawlerConfig.getMaximumDaysForSendungVerpasstSection();
99-
if (maximumDaysForSendungVerpasstSection == null
100-
|| maximumDaysForSendungVerpasstSection > MAXIMUM_DAYS_HTML_PAST) {
101-
return MAXIMUM_DAYS_HTML_PAST;
102-
}
103-
return maximumDaysForSendungVerpasstSection;
169+
private @NotNull String getUrlBase() {
170+
return ZdfConstants.URL_BASE;
171+
}
172+
173+
private String getApiUrlBase() {
174+
return ZdfConstants.URL_API_BASE;
175+
}
176+
177+
private @NotNull String getUrlDay() {
178+
return ZdfConstants.URL_DAY;
104179
}
105180
}

0 commit comments

Comments
 (0)