Skip to content

Commit ff4ffb9

Browse files
authored
Merge pull request #1101 from mediathekview/feature/artehbb
Feature/artehbb
2 parents f525f77 + 1f6165b commit ff4ffb9

11 files changed

Lines changed: 166 additions & 53 deletions

File tree

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
package de.mediathekview.mserver.crawler.arte;
22

33
public class ArteConstants {
4-
public static final String VIDEOS_URL ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=-creationDate&language=%s";
5-
public static final String VIDEOS_URL_ALT ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=creationDate&language=%s";
6-
public static final String VIDEO_URL ="https://www.arte.tv/hbbtvv2/services/web/index.php/OPA/v3/streams/%s/%s/%s"; //PROGRAMID/KIND/LANG
7-
public static final String API_TOKEN = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA";
4+
public final static String VIDEOS_URL ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=-creationDate&language=%s";
5+
public final static String VIDEOS_URL_ALT ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=creationDate&language=%s";
6+
public final static String VIDEO_URL ="https://www.arte.tv/hbbtvv2/services/web/index.php/OPA/v3/streams/%s/%s/%s"; //PROGRAMID/KIND/LANG
7+
public final static String API_TOKEN = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA";
8+
public final static int MAX_POSSIBLE_SUBPAGES = 100;
89
private ArteConstants() {}
910

1011
}

src/main/java/de/mediathekview/mserver/crawler/arte/ArteCrawler.java

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,16 @@ protected ArteLanguage getLanguage() {
5252
protected RecursiveTask<Set<Film>> createCrawlerTask() {
5353

5454
try {
55+
final Queue<TopicUrlDTO> videoUrls = new ConcurrentLinkedQueue<>();
56+
/*
57+
for (String lang : new String[]{ "en", "es", "fr", "it", "pl", "de" }) {
58+
videoUrls.addAll(createVideosQueue(lang));
59+
}*/
60+
videoUrls.addAll(createVideosQueue(getLanguage().toString().toLowerCase()));
61+
5562
final ArteVideoInfoTask aArteRestVideoInfoTask;
5663
// DO NOT overload - maximumUrlsPerTask used to reduce threads to 4
57-
aArteRestVideoInfoTask = new ArteVideoInfoTask(this, createVideosQueue());
64+
aArteRestVideoInfoTask = new ArteVideoInfoTask(this, videoUrls, getMaxPagesForOverview(getLanguage().toString().toLowerCase()));
5865
final Queue<ArteVideoInfoDto> videos = new ConcurrentLinkedQueue<>();
5966
videos.addAll(aArteRestVideoInfoTask.fork().join());
6067
//
@@ -80,23 +87,29 @@ protected RecursiveTask<Set<Film>> createCrawlerTask() {
8087
return null;
8188
}
8289

83-
private Queue<TopicUrlDTO> createVideosQueue() {
84-
int maxPages = getMaxPagesForOverview();
90+
private Queue<TopicUrlDTO> createVideosQueue(String lang) {
8591
final Queue<TopicUrlDTO> root = new ConcurrentLinkedQueue<>();
86-
String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, getLanguage().toString().toLowerCase());
87-
root.add(new TopicUrlDTO("all videos1",rootUrl));
88-
if (maxPages >= 100) {
89-
String rootUrl2 = String.format(ArteConstants.VIDEOS_URL_ALT, 1, getLanguage().toString().toLowerCase());
90-
root.add(new TopicUrlDTO("all videos2",rootUrl2));
91-
}
92+
String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, lang);
93+
root.add(new TopicUrlDTO("all videos sorted up",rootUrl));
94+
String rootUrl2 = String.format(ArteConstants.VIDEOS_URL_ALT, 1, lang);
95+
root.add(new TopicUrlDTO("all videos sorted down",rootUrl2));
9296
return root;
9397
}
9498

95-
private int getMaxPagesForOverview() {
96-
final int naturalLimit = Math.min(100, getCrawlerConfig().getMaximumSubpages());
97-
String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, getLanguage().toString().toLowerCase());
98-
String[] path = {"meta", "videos", "pages"};
99+
private int getMaxPagesForOverview(String lang) {
100+
final int maxAvailablePages = getNumberOfAvailablePages(lang);
101+
final int configuredMaxPages = getCrawlerConfig().getMaximumSubpages();
102+
if (configuredMaxPages > maxAvailablePages) {
103+
return Math.min(ArteConstants.MAX_POSSIBLE_SUBPAGES, maxAvailablePages / 2);
104+
} else {
105+
return Math.min(ArteConstants.MAX_POSSIBLE_SUBPAGES, configuredMaxPages / 2);
106+
}
107+
}
108+
109+
private int getNumberOfAvailablePages(String lang) {
99110
try {
111+
String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, lang);
112+
String path[] = {"meta", "videos", "pages"};
100113
final Map<String, String> headers = Map.of(
101114
"Accept", "application/json",
102115
"Content-Type", "application/json",
@@ -105,12 +118,12 @@ private int getMaxPagesForOverview() {
105118
JsonElement element = getConnection().requestBodyAsJsonElement(rootUrl, headers);
106119
Optional<Integer> pages = JsonUtils.getElementValueAsInteger(element, path);
107120
if (pages.isPresent()) {
108-
return Math.min(pages.get(), naturalLimit);
121+
return pages.get();
109122
}
110123
} catch (IOException e) {
111124
LOG.error("getMaxPagesForOverview", e);
112125
}
113-
return naturalLimit;
126+
return ArteConstants.MAX_POSSIBLE_SUBPAGES;
114127
}
115128

116129
}

src/main/java/de/mediathekview/mserver/crawler/arte/ArteRestVideoTypeMapper.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
package de.mediathekview.mserver.crawler.arte;
22

3-
import de.mediathekview.mserver.daten.Resolution;
4-
import de.mediathekview.mserver.daten.Sender;
53
import org.apache.logging.log4j.LogManager;
64
import org.apache.logging.log4j.Logger;
75

6+
import de.mediathekview.mserver.daten.Resolution;
7+
import de.mediathekview.mserver.daten.Sender;
8+
89
import java.util.Optional;
910

1011
public class ArteRestVideoTypeMapper {

src/main/java/de/mediathekview/mserver/crawler/arte/json/ArteVideoInfoDto.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import de.mediathekview.mserver.crawler.arte.ArteConstants;
88
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
9+
import de.mediathekview.mserver.daten.Sender;
910

1011
public class ArteVideoInfoDto extends CrawlerUrlDTO {
1112
private Optional<String> firstBroadcastDate;
@@ -179,6 +180,18 @@ public List<ArteSubtitleLinkDto> getSubtitleLinks() {
179180
public void setSubtitleLinks(List<ArteSubtitleLinkDto> subtitleLinks) {
180181
this.subtitleLinks = subtitleLinks;
181182
}
183+
184+
public Sender getSender() {
185+
return switch (getLanguage().get()) {
186+
case "de" -> Sender.ARTE_DE;
187+
case "en" -> Sender.ARTE_EN;
188+
case "fr" -> Sender.ARTE_FR;
189+
case "es" -> Sender.ARTE_ES;
190+
case "it" -> Sender.ARTE_IT;
191+
case "pl" -> Sender.ARTE_PL;
192+
default -> Sender.ARTE_DE;
193+
};
194+
}
182195

183196
@Override
184197
public boolean equals(Object o) {

src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteDtoVideo2FilmTask.java

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
import java.time.ZoneId;
99
import java.time.ZonedDateTime;
1010
import java.time.format.DateTimeFormatter;
11-
import java.util.*;
11+
import java.util.HashMap;
12+
import java.util.Map;
13+
import java.util.Optional;
14+
import java.util.Queue;
15+
import java.util.UUID;
1216

13-
import de.mediathekview.mserver.daten.Film;
14-
import de.mediathekview.mserver.daten.FilmUrl;
15-
import de.mediathekview.mserver.daten.GeoLocations;
16-
import de.mediathekview.mserver.daten.Resolution;
1717
import org.apache.logging.log4j.LogManager;
1818
import org.apache.logging.log4j.Logger;
1919

@@ -23,6 +23,10 @@
2323
import de.mediathekview.mserver.crawler.arte.json.ArteVideoInfoDto;
2424
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
2525
import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask;
26+
import de.mediathekview.mserver.daten.Film;
27+
import de.mediathekview.mserver.daten.FilmUrl;
28+
import de.mediathekview.mserver.daten.GeoLocations;
29+
import de.mediathekview.mserver.daten.Resolution;
2630

2731

2832
public class ArteDtoVideo2FilmTask extends AbstractRecursiveConverterTask<Film, ArteVideoInfoDto> {
@@ -52,12 +56,14 @@ protected void processElement(ArteVideoInfoDto aElement) {
5256
parse(aElement);
5357
}
5458

59+
///////////////////////////////////////////////////////////////////////////
60+
5561
protected void parse(ArteVideoInfoDto aElement) {
5662
Map<Resolution, FilmUrl> videoUrls = buildVideoUrls(aElement, ArteVideoType.DEFAULT);
57-
Map<Resolution, FilmUrl> videoSubs = buildVideoUrls(aElement, ArteVideoType.AUDIO_DESCRIPTION);
63+
Map<Resolution, FilmUrl> videoAD = buildVideoUrls(aElement, ArteVideoType.AUDIO_DESCRIPTION);
5864
if (videoUrls.size() > 0) {
5965
Film film = buildFilmBody(aElement);
60-
addFilm(buildFilmBody(aElement), film.getTitel(), videoUrls, videoSubs);
66+
addFilm(buildFilmBody(aElement), film.getTitel(), videoUrls, videoAD);
6167
}
6268
//
6369
Map<Resolution, FilmUrl> originalVersion = buildVideoUrls(aElement, ArteVideoType.ORIGINAL);
@@ -69,6 +75,12 @@ protected void parse(ArteVideoInfoDto aElement) {
6975
Film film = buildFilmBody(aElement);
7076
addFilm(buildFilmBody(aElement), film.getTitel()+ " (Originalversion mit Untertitel)", originalVersionSubs, null);
7177
}
78+
// ARTE provides subs as a new video
79+
Map<Resolution, FilmUrl> videoSub = buildVideoUrls(aElement, ArteVideoType.SUBTITLE_INCLUDED);
80+
if (videoSub.size() > 0) {
81+
Film film = buildFilmBody(aElement);
82+
addFilm(buildFilmBody(aElement), film.getTitel()+ " (mit Untertitel)", videoSub, null);
83+
}
7284
}
7385

7486
protected void addFilm(Film film, String title, Map<Resolution, FilmUrl> video, Map<Resolution, FilmUrl> audioDesc) {
@@ -87,7 +99,7 @@ protected void addFilm(Film film, String title, Map<Resolution, FilmUrl> video,
8799
protected Film buildFilmBody(ArteVideoInfoDto aElement) {
88100
Film film = new Film(
89101
UUID.randomUUID(),
90-
crawler.getSender(),
102+
aElement.getSender(),
91103
buildTitle(aElement),
92104
buildTopic(aElement),
93105
buildAired(aElement),
@@ -172,7 +184,7 @@ protected URL buildWebsite(ArteVideoInfoDto aElement) {
172184
}
173185

174186
protected Map<Resolution, FilmUrl> buildVideoUrls(ArteVideoInfoDto aElement, ArteVideoType type) {
175-
Map<Resolution, FilmUrl> urls = new EnumMap<>(Resolution.class);
187+
Map<Resolution, FilmUrl> urls = new HashMap<>();
176188
Map<Resolution, String> rawUrls = builRawVideoUrls(aElement, type);
177189
rawUrls.forEach( (resolution, rawUrl) -> {
178190
try {
@@ -185,7 +197,7 @@ protected Map<Resolution, FilmUrl> buildVideoUrls(ArteVideoInfoDto aElement, Art
185197
}
186198

187199
protected Map<Resolution, String> builRawVideoUrls(ArteVideoInfoDto aElement, ArteVideoType type) {
188-
final Map<Resolution, String> urls = new EnumMap<>(Resolution.class);
200+
final Map<Resolution, String> urls = new HashMap<>();
189201
aElement.getVideoLinks().forEach( entry -> {
190202
Optional<ArteVideoType> audioTypeCode = ArteRestVideoTypeMapper.map(crawler.getSender(), entry.getAudioCode().get());
191203
if (audioTypeCode.isPresent() && audioTypeCode.get().equals(type)) {

src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTask.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ public class ArteVideoInfoTask
2727
extends AbstractJsonRestTask<ArteVideoInfoDto, PagedElementListDTO<ArteVideoInfoDto>, TopicUrlDTO> {
2828
private static final long serialVersionUID = 1L;
2929
protected final transient Logger log = LogManager.getLogger(this.getClass());
30-
protected transient Optional<AbstractRecursiveConverterTask<ArteVideoInfoDto, TopicUrlDTO>> nextPageTask = Optional.empty();
30+
protected Optional<AbstractRecursiveConverterTask<ArteVideoInfoDto, TopicUrlDTO>> nextPageTask = Optional.empty();
31+
protected int maxSubpages;
3132

3233

33-
public ArteVideoInfoTask(AbstractCrawler crawler, Queue<TopicUrlDTO> urlToCrawlDTOs) {
34+
public ArteVideoInfoTask(AbstractCrawler crawler, Queue<TopicUrlDTO> urlToCrawlDTOs, int maxSubpages) {
3435
super(crawler, urlToCrawlDTOs, ArteConstants.API_TOKEN);
36+
this.maxSubpages = maxSubpages;
3537
}
3638

3739
@Override
@@ -44,40 +46,40 @@ protected Type getType() {
4446
return new TypeToken<PagedElementListDTO<ArteVideoInfoDto>>() {}.getType();
4547
}
4648

47-
protected void postProcessingNextPage(PagedElementListDTO<ArteVideoInfoDto> aResponseObj) {
49+
protected void postProcessingNextPage(PagedElementListDTO<ArteVideoInfoDto> aResponseObj, TopicUrlDTO aDTO) {
4850
if (aResponseObj.getNextPage().isEmpty()) {
4951
return;
5052
}
51-
int maxPages = Math.min(100, crawler.getCrawlerConfig().getMaximumSubpages());
52-
if (aResponseObj.getNextPage().get().contains("age="+maxPages)) {
53-
log.debug("stop at page url {} due to limit {}", aResponseObj.getNextPage().get(), maxPages);
53+
if (aResponseObj.getNextPage().get().contains("page="+maxSubpages+"&")) {
54+
log.debug("stop at page url {} due to limit {}", aResponseObj.getNextPage().get(), maxSubpages);
5455
return;
5556
}
5657

5758
final Queue<TopicUrlDTO> nextPageLinks = new ConcurrentLinkedQueue<>();
5859
nextPageLinks.add(new TopicUrlDTO(aResponseObj.getNextPage().get(), aResponseObj.getNextPage().get()));
5960
nextPageTask = Optional.of(createNewOwnInstance(nextPageLinks));
6061
nextPageTask.get().fork();
62+
//log.debug("started paging to url {} for {}", aResponseObj.getNextPage().get(), aDTO.getUrl());
6163
}
6264

63-
protected void postProcessingElements(Set<ArteVideoInfoDto> elements) {
65+
protected void postProcessingElements(Set<ArteVideoInfoDto> elements, TopicUrlDTO aDTO) {
6466
for (ArteVideoInfoDto element : elements) {
6567
taskResults.add(element);
6668
}
6769
}
6870

6971
@Override
7072
protected void postProcessing(PagedElementListDTO<ArteVideoInfoDto> aResponseObj, TopicUrlDTO aDTO) {
71-
postProcessingNextPage(aResponseObj);
72-
postProcessingElements(aResponseObj.getElements());
73-
nextPageTask.ifPresent(paginationResults -> postProcessingElements(paginationResults.join()));
73+
postProcessingNextPage(aResponseObj, aDTO);
74+
postProcessingElements(aResponseObj.getElements(), aDTO);
75+
nextPageTask.ifPresent(paginationResults -> postProcessingElements(paginationResults.join(), aDTO));
7476

7577
}
7678

7779
@Override
7880
protected AbstractRecursiveConverterTask<ArteVideoInfoDto, TopicUrlDTO> createNewOwnInstance(
7981
Queue<TopicUrlDTO> aElementsToProcess) {
80-
return new ArteVideoInfoTask(crawler, aElementsToProcess);
82+
return new ArteVideoInfoTask(crawler, aElementsToProcess, maxSubpages);
8183
}
8284

8385
@Override

src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoLinkTask.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,19 @@
1818
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
1919
import de.mediathekview.mserver.crawler.basic.AbstractJsonRestTask;
2020
import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask;
21+
import de.mediathekview.mserver.crawler.basic.PagedElementListDTO;
2122
import jakarta.ws.rs.core.Response;
2223

24+
//return T Class from this task, desirialisation of class R , D , Reasearch in this url
25+
2326
public class ArteVideoLinkTask
2427
extends AbstractJsonRestTask<ArteVideoInfoDto, List<ArteVideoLinkDto>, ArteVideoInfoDto> {
2528
private static final long serialVersionUID = 1L;
2629
protected final transient Logger log = LogManager.getLogger(this.getClass());
2730

2831

2932
public ArteVideoLinkTask(AbstractCrawler crawler, Queue<ArteVideoInfoDto> videoInfo) {
30-
super(crawler, videoInfo, null);
33+
super(crawler, videoInfo, ArteConstants.API_TOKEN);
3134
}
3235

3336
@Override

src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ public Document requestBodyAsXmlDocument(String url) throws IOException {
193193
* @return size of the response in KB or -1 in case we could not determine the size.
194194
*/
195195
public long determineFileSizeInKB(String url) {
196+
getRateLimiter().acquire();
196197
return getConnection().determineFileSize(url) / 1024;
197198
}
198199

0 commit comments

Comments
 (0)