Skip to content

Commit 6e4370f

Browse files
committed
ZDF: new mediathek
2 parents 58840d1 + 2ddc9ce commit 6e4370f

29 files changed

Lines changed: 1303 additions & 518 deletions

build.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ sourceCompatibility = JavaVersion.VERSION_17
2727
targetCompatibility = JavaVersion.VERSION_17
2828
group = 'de.mediathekview'
2929
archivesBaseName = "MServer"
30-
version = '3.1.256'
30+
version = '3.1.257'
3131

3232
def jarName = 'MServer.jar'
3333
def mainClass = 'mServer.Main'
3434

3535
dependencies {
36-
implementation 'de.mediathekview:MLib:3.0.17'
36+
implementation 'de.mediathekview:MLib:3.0.18'
3737
implementation 'commons-net:commons-net:3.9.0'
3838
implementation 'org.apache.commons:commons-compress:1.22'
3939
implementation 'org.apache.commons:commons-text:1.10.0'

src/main/java/mServer/crawler/sender/zdf/ZdfConstants.java

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
package mServer.crawler.sender.zdf;
22

3+
import de.mediathekview.mlib.Const;
4+
5+
import java.util.HashMap;
6+
import java.util.Map;
7+
38
public final class ZdfConstants {
49

510
/** Name of the header required for authentification. */
@@ -8,13 +13,43 @@ public final class ZdfConstants {
813
/** Base url of the ZDF website. */
914
public static final String URL_BASE = "https://www.zdf.de";
1015

11-
public static final String URL_HTML_DAY = URL_BASE + "/sendung-verpasst?airtimeDate=%s";
12-
13-
public static final String URL_TOPICS = URL_BASE + "/sendungen-a-z";
14-
1516
/** Base url of the ZDF api. */
1617
public static final String URL_API_BASE = "https://api.zdf.de";
1718

19+
public static final String NO_CURSOR = "null";
20+
public static final int EPISODES_PAGE_SIZE = 24;
21+
22+
public static final String URL_LETTER_PAGE =
23+
URL_API_BASE
24+
+ "/graphql?operationName=specialPageByCanonical&" +
25+
"variables=%s&" +
26+
"extensions=%s";
27+
public static final String URL_LETTER_PAGE_VARIABLES =
28+
"{\"staticGridClusterPageSize\":6,\"staticGridClusterOffset\":0,\"canonical\":\"sendungen-100\",\"endCursor\":%s,\"tabIndex\":%d,\"itemsFilter\":{\"teaserUsageNotIn\":[\"TIVI_HBBTV_ONLY\"]}}";
29+
public static final String URL_LETTER_PAGE_EXTENSIONS =
30+
"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"7d33167e7700ba57779f48b28b5d485c8ada0a1d5dfbdc8a261b7bd62ca28ba7\"}}";
31+
32+
public static final String URL_TOPIC_PAGE = URL_API_BASE + "/graphql?operationName=seasonByCanonical&" +
33+
"variables=%s&" +
34+
"extensions=%s";
35+
public static final String URL_TOPIC_PAGE_VARIABLES = "{\"seasonIndex\":%d,\"episodesPageSize\":%d,\"canonical\":\"%s\",\"sortBy\":[{\"field\":\"EDITORIAL_DATE\",\"direction\":\"DESC\"}]}";
36+
public static final String URL_TOPIC_PAGE_VARIABLES_WITH_CURSOR = "{\"seasonIndex\":%d,\"episodesPageSize\":%d,\"canonical\":\"%s\",\"sortBy\":[{\"field\":\"EDITORIAL_DATE\",\"direction\":\"DESC\"}],\"episodesAfter\":\"%s\"}";
37+
public static final String URL_TOPIC_PAGE_EXTENSIONS =
38+
"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"9412a0f4ac55dc37d46975d461ec64bfd14380d815df843a1492348f77b5c99a\"}}";
39+
40+
public static final String URL_TOPIC_PAGE_NO_SEASON = URL_API_BASE + "/graphql?operationName=getMetaCollectionContent&" +
41+
"variables=%s&" +
42+
"extensions=%s";
43+
public static final String URL_TOPIC_PAGE_NO_SEASON_VARIABLES =
44+
"{\"collectionId\":\"%s\",\"input\":{\"appId\":\"ffw-mt-web-879d5c17\",\"filters\":{\"contentOwner\":[],\"fsk\":[],\"language\":[]},\"pagination\":{\"first\":%d,\"after\":%s},\"user\":{\"abGroup\":\"gruppe-d\",\"userSegment\":\"segment_0\"},\"tabId\":null}}";
45+
public static final String URL_TOPIC_PAGE_NO_SEASON_EXTENSIONS =
46+
"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"c85ca9c636258a65961a81124abd0dbef06ab97eaca9345cbdfde23b54117242\"}}";
47+
48+
public static final String URL_FILM_ENRY =
49+
URL_API_BASE + "/graphql?operationName=GetVideoMetaByCanonical&"
50+
+ "variables={\"canonical\"=\"%s\"}&"
51+
+ "extensions={\"persistedQuery\"={\"version\"=1,\"sha256Hash\"=\"737eb4421d274259baa3051929f4ecfef2d2afc59f12a9d82285c14dbdd1dd0d\"}}";
52+
1853
/** Url to search the films. */
1954
public static final String URL_DAY =
2055
URL_API_BASE
@@ -36,5 +71,21 @@ public final class ZdfConstants {
3671
public static final String LANGUAGE_GERMAN_AD = LANGUAGE_GERMAN + LANGUAGE_SUFFIX_AD;
3772
public static final String LANGUAGE_GERMAN_DGS = LANGUAGE_GERMAN + LANGUAGE_SUFFIX_DGS;
3873

74+
public static final Map<String, String> PARTNER_TO_SENDER = new HashMap<>();
75+
public static final Map<String, String> SPECIAL_COLLECTION_IDS = new HashMap<>();
76+
77+
static {
78+
PARTNER_TO_SENDER.put("ZDFinfo", Const.ZDF_INFO);
79+
PARTNER_TO_SENDER.put("ZDFneo", Const.ZDF_NEO);
80+
PARTNER_TO_SENDER.put("ZDF", Const.ZDF);
81+
PARTNER_TO_SENDER.put("EMPTY", Const.ZDF);
82+
PARTNER_TO_SENDER.put("ZDFtivi", Const.ZDF_TIVI);
83+
// IGNORED Sender [KI.KA, WDR, PHOENIX, one, HR, 3sat, SWR, arte, BR, RBB, ARD, daserste, alpha, MDR, radiobremen, funk, ZDF, NDR, SR]
84+
85+
SPECIAL_COLLECTION_IDS.put("pub-form-10004", "Filme");
86+
SPECIAL_COLLECTION_IDS.put("pub-form-10003", "Dokus");
87+
SPECIAL_COLLECTION_IDS.put("pub-form-10010", "Serien");
88+
}
89+
3990
private ZdfConstants() {}
4091
}

src/main/java/mServer/crawler/sender/zdf/ZdfCrawler.java

Lines changed: 137 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2,101 +2,196 @@
22

33
import de.mediathekview.mlib.Config;
44
import de.mediathekview.mlib.Const;
5+
import de.mediathekview.mlib.daten.DatenFilm;
56
import de.mediathekview.mlib.tool.Log;
7+
import mServer.crawler.CrawlerTool;
68
import mServer.crawler.FilmeSuchen;
9+
import mServer.crawler.sender.MediathekCrawler;
710
import mServer.crawler.sender.base.CrawlerUrlDTO;
811
import mServer.crawler.sender.base.JsoupConnection;
9-
import mServer.crawler.sender.zdf.tasks.ZdfDayPageHtmlTask;
10-
import mServer.crawler.sender.zdf.tasks.ZdfLetterListHtmlTask;
11-
import mServer.crawler.sender.zdf.tasks.ZdfTopicPageHtmlTask;
12-
import mServer.crawler.sender.zdf.tasks.ZdfTopicsPageHtmlTask;
12+
import mServer.crawler.sender.zdf.tasks.*;
13+
import org.apache.logging.log4j.LogManager;
14+
import org.apache.logging.log4j.Logger;
1315
import org.jetbrains.annotations.NotNull;
1416

1517
import java.time.LocalDateTime;
1618
import java.time.format.DateTimeFormatter;
1719
import java.time.temporal.ChronoUnit;
18-
import java.util.Collection;
20+
import java.util.HashSet;
1921
import java.util.Queue;
2022
import java.util.Set;
2123
import java.util.concurrent.ConcurrentLinkedQueue;
2224
import java.util.concurrent.ExecutionException;
25+
import java.util.concurrent.RecursiveTask;
2326

24-
public class ZdfCrawler extends AbstractZdfCrawler {
27+
public class ZdfCrawler extends MediathekCrawler {
2528

26-
private static final int MAXIMUM_DAYS_HTML_PAST = 7;
29+
private static final Logger LOG = LogManager.getLogger(ZdfCrawler.class);
30+
private static final int MAX_LETTER_PAGEGS = 27;
31+
32+
private static final String AUTH_KEY = "aa3noh4ohz9eeboo8shiesheec9ciequ9Quah7el";
33+
34+
JsoupConnection jsoupConnection = new JsoupConnection();
2735

2836
public ZdfCrawler(FilmeSuchen ssearch, int startPrio) {
29-
super(Const.ZDF, ssearch, startPrio);
37+
super(ssearch, Const.ZDF, 0, 1, startPrio);
3038
}
3139

32-
@Override
33-
protected @NotNull String getUrlBase() {
34-
return ZdfConstants.URL_BASE;
35-
}
3640

3741
@Override
38-
protected String getApiUrlBase() {
39-
return ZdfConstants.URL_API_BASE;
42+
protected synchronized void meldungThreadUndFertig() {
43+
// der MediathekReader ist erst fertig wenn nur noch ein Thread läuft
44+
// dann zusätzliche Sender, die der Crawler bearbeitet, beenden
45+
if (getThreads() <= 1) {
46+
mlibFilmeSuchen.meldenFertig(Const.ZDF_TIVI);
47+
mlibFilmeSuchen.meldenFertig(Const.ZDF_INFO);
48+
mlibFilmeSuchen.meldenFertig(Const.ZDF_NEO);
49+
}
50+
51+
super.meldungThreadUndFertig();
4052
}
4153

4254
@Override
43-
protected @NotNull String getUrlDay() {
44-
return ZdfConstants.URL_DAY;
55+
protected RecursiveTask<Set<DatenFilm>> createCrawlerTask() {
56+
57+
try {
58+
if (CrawlerTool.loadLongMax()) {
59+
Set<ZdfFilmDto> shows = new HashSet<>();
60+
shows.addAll(getTopicsEntries());
61+
62+
Log.sysLog(getSendername() + " Anzahl: " + shows.size());
63+
meldungAddMax(shows.size());
64+
65+
return new ZdfFilmTask(this, new ConcurrentLinkedQueue<>(shows), AUTH_KEY);
66+
} else {
67+
final ZdfConfiguration configuration = loadConfiguration();
68+
if (configuration.getSearchAuthKey().isPresent() && configuration.getVideoAuthKey().isPresent()) {
69+
Set<CrawlerUrlDTO> shows = new HashSet<>(getDaysEntries(configuration));
70+
Log.sysLog(getSendername() + " Anzahl: " + shows.size());
71+
meldungAddMax(shows.size());
72+
return new ZdfFilmDetailTask(this, getApiUrlBase(), new ConcurrentLinkedQueue<>(shows), configuration.getVideoAuthKey());
73+
}
74+
}
75+
} catch (final InterruptedException ex) {
76+
LOG.debug("{} crawler interrupted.", getSendername(), ex);
77+
Thread.currentThread().interrupt();
78+
} catch (final ExecutionException ex) {
79+
LOG.fatal("Exception in {} crawler.", getSendername(), ex);
80+
}
81+
return null;
4582
}
4683

47-
@Override
48-
public Queue<CrawlerUrlDTO> getTopicsEntries() throws ExecutionException, InterruptedException {
84+
private Queue<ZdfFilmDto> getTopicsEntries() throws ExecutionException, InterruptedException {
85+
86+
final ConcurrentLinkedQueue<ZdfFilmDto> shows = new ConcurrentLinkedQueue<>();
87+
88+
ZdfLetterPageTask letterPageTask =
89+
new ZdfLetterPageTask(this, createLetterPageUrls(), AUTH_KEY);
90+
final Set<ZdfTopicUrlDto> topicUrls = forkJoinPool.submit(letterPageTask).get();
4991

50-
final ConcurrentLinkedQueue<CrawlerUrlDTO> shows = new ConcurrentLinkedQueue<>();
92+
Log.sysLog("ZDF: letter topics: " + topicUrls.size());
5193

52-
final ConcurrentLinkedQueue<CrawlerUrlDTO> letterListUrl = new ConcurrentLinkedQueue<>();
53-
letterListUrl.add(new CrawlerUrlDTO(ZdfConstants.URL_TOPICS));
94+
if (Config.getStop()) {
95+
return shows;
96+
}
5497

55-
final ZdfLetterListHtmlTask letterTask = new ZdfLetterListHtmlTask(this, letterListUrl);
56-
final Set<CrawlerUrlDTO> letterUrls = forkJoinPool.submit(letterTask).get();
98+
final ZdfPubFormTask pubFormTask = new ZdfPubFormTask(this, createPubFormUrls(), AUTH_KEY);
99+
final Set<ZdfPubFormResult> pubFormUrls = forkJoinPool.submit(pubFormTask).get();
57100

58-
Log.sysLog("ZDF: letters: " + letterUrls.size());
101+
Log.sysLog("ZDF: Pubform urls: " + pubFormUrls.size());
59102

60103
if (Config.getStop()) {
61104
return shows;
62105
}
63106

64-
final ZdfTopicsPageHtmlTask topicsTask =
65-
new ZdfTopicsPageHtmlTask(this, new ConcurrentLinkedQueue<>(letterUrls));
66-
final Set<CrawlerUrlDTO> topicsUrls = forkJoinPool.submit(topicsTask).get();
107+
pubFormUrls.forEach(
108+
pubFormResult -> {
109+
topicUrls.addAll(pubFormResult.getTopics().getElements());
110+
shows.addAll(pubFormResult.getFilms());
111+
});
67112

68-
Log.sysLog("ZDF: topics: " + topicsUrls.size());
113+
Log.sysLog("ZDF: Pubform topics: " + pubFormUrls.size());
69114

70115
if (Config.getStop()) {
71116
return shows;
72117
}
73118

74-
final ZdfTopicPageHtmlTask topicTask =
75-
new ZdfTopicPageHtmlTask(this, new ConcurrentLinkedQueue<>(topicsUrls));
76-
shows.addAll(forkJoinPool.submit(topicTask).get());
119+
ZdfTopicSeasonTask topicSeasonTask =
120+
new ZdfTopicSeasonTask(this, new ConcurrentLinkedQueue<>(topicUrls), AUTH_KEY);
121+
final Set<ZdfFilmDto> zdfFilmDtos = forkJoinPool.submit(topicSeasonTask).get();
122+
shows.addAll(zdfFilmDtos);
77123

78124
return shows;
79125
}
80126

81-
@Override
82-
protected Collection<CrawlerUrlDTO> getExtraDaysEntries()
83-
throws ExecutionException, InterruptedException {
127+
private ConcurrentLinkedQueue<ZdfPubFormDto> createPubFormUrls() {
128+
ConcurrentLinkedQueue<ZdfPubFormDto> urls = new ConcurrentLinkedQueue<>();
129+
ZdfConstants.SPECIAL_COLLECTION_IDS.forEach((collectionId, topic) -> {
130+
final String url =
131+
ZdfUrlBuilder.buildTopicNoSeasonUrl(
132+
ZdfConstants.EPISODES_PAGE_SIZE, collectionId, ZdfConstants.NO_CURSOR);
133+
urls.add(new ZdfPubFormDto(topic, collectionId, url));
134+
});
135+
return urls;
136+
}
137+
138+
private ConcurrentLinkedQueue<ZdfLetterDto> createLetterPageUrls() {
139+
final ConcurrentLinkedQueue<ZdfLetterDto> urls = new ConcurrentLinkedQueue<>();
140+
for (int i = 0; i < MAX_LETTER_PAGEGS; i++) {
141+
urls.add(new ZdfLetterDto(i, ZdfUrlBuilder.buildLetterPageUrl(ZdfConstants.NO_CURSOR, i)));
142+
}
84143

85-
final ZdfDayPageHtmlTask dayTask =
86-
new ZdfDayPageHtmlTask(getApiUrlBase(), this, getExtraDayUrls(), new JsoupConnection());
87-
return forkJoinPool.submit(dayTask).get();
144+
return urls;
88145
}
89146

90-
private ConcurrentLinkedQueue<CrawlerUrlDTO> getExtraDayUrls() {
91-
final ConcurrentLinkedQueue<CrawlerUrlDTO> urls = new ConcurrentLinkedQueue<>();
92-
for (int i = 0; i <= MAXIMUM_DAYS_HTML_PAST; i++) {
147+
private ZdfConfiguration loadConfiguration() throws ExecutionException, InterruptedException {
148+
final ZdfIndexPageTask task = new ZdfIndexPageTask(this, getUrlBase(), jsoupConnection);
149+
return forkJoinPool.submit(task).get();
150+
}
151+
152+
private Set<CrawlerUrlDTO> getDaysEntries(ZdfConfiguration configuration)
153+
throws InterruptedException, ExecutionException {
154+
final ZdfDayPageTask dayTask
155+
= new ZdfDayPageTask(this, getApiUrlBase(), getDayUrls(), configuration.getSearchAuthKey());
156+
final Set<CrawlerUrlDTO> shows = forkJoinPool.submit(dayTask).get();
157+
158+
Log.sysLog(getSendername() + ": days entries: " + shows.size());
159+
160+
return shows;
161+
}
162+
163+
private ConcurrentLinkedQueue<CrawlerUrlDTO> getDayUrls() {
93164

94-
final LocalDateTime local = LocalDateTime.now().minus(i, ChronoUnit.DAYS);
165+
int daysPast = 7;
166+
int daysFuture = 5;
167+
168+
final ConcurrentLinkedQueue<CrawlerUrlDTO> urls = new ConcurrentLinkedQueue<>();
169+
for (int i = 0;
170+
i
171+
<= daysPast
172+
+ daysFuture;
173+
i++) {
174+
175+
final LocalDateTime local
176+
= LocalDateTime.now()
177+
.plus(daysFuture, ChronoUnit.DAYS)
178+
.minus(i, ChronoUnit.DAYS);
95179
final String date = local.format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
96-
final String url = String.format(ZdfConstants.URL_HTML_DAY, date);
180+
final String url = String.format(getUrlDay(), date, date);
97181
urls.add(new CrawlerUrlDTO(url));
98182
}
99183

100184
return urls;
101185
}
186+
private @NotNull String getUrlDay() {
187+
return ZdfConstants.URL_DAY;
188+
}
189+
190+
private String getApiUrlBase() {
191+
return ZdfConstants.URL_API_BASE;
192+
}
193+
194+
private @NotNull String getUrlBase() {
195+
return ZdfConstants.URL_BASE;
196+
}
102197
}

0 commit comments

Comments
 (0)