Skip to content

Commit 75f36a5

Browse files
committed
Merge branch 'feature/ArdGroupStructure' into develop3
1 parent b0d7936 commit 75f36a5

8 files changed

Lines changed: 25 additions & 30 deletions

File tree

src/main/docker/runDocker

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44
docker compose up -d postgresMV
55

66
# cron 01 01 long run
7-
docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R1.yaml" mserver-r1
7+
docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R1.yaml" mserver-r1
88

99
# cron 06-22 1,31 short run
10-
docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml" mserver-r2
10+
docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml" mserver-r2
1111

1212
# cron 22 55 url check
13-
docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml --flow checkAvailability" mserver-checkUrls
13+
docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow checkAvailability" mserver-checkUrls
1414

1515
# on demand - do not run this unless you know what you are doing!
16-
docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml --flow importFilmlistIntoDB" mserver-r3
16+
docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow importFilmlistIntoDB" mserver-r3
1717

1818
## docker save -o mserver.tar mediathekview/mserver:4.0.1-SNAPSHOT
1919
## docker load -i mserver.tar

src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ public void updateLastUrlCheck(List<Film> checked) {
8888
ps.addBatch();
8989
}
9090
int [] rs = ps.executeBatch();
91+
con.commit();
9192
for (int rsCode : rs) {
9293
updateCounter.addAndGet(rsCode);
9394
}
@@ -124,6 +125,7 @@ public void deleteFilms(Collection<Film> abandonedFilmlist) {
124125
ps.addBatch();
125126
}
126127
ps.executeBatch();
128+
con.commit();
127129
} catch (SQLException e) {
128130
LOG.error(e);
129131
}
@@ -203,6 +205,7 @@ public <T> List<T> filterNewVideos(Sender sender, List<T> videos, Function<T, St
203205
}
204206
}
205207
int[] rs = ps.executeBatch();
208+
con.commit();
206209
for (int rsIndex = 0; rsIndex < rs.length; rsIndex++) {
207210
if (rs[rsIndex] == 0) {
208211
newVideos.add(batch.get(rsIndex));
@@ -220,6 +223,10 @@ public <T> List<T> filterNewVideos(Sender sender, List<T> videos, Function<T, St
220223
result.addAll(f.get());
221224
}
222225
LOG.debug("Filtered {} in {} (in {} vs out {})",(videos.size()-result.size()), sender.getName(), videos.size(), result.size());
226+
// CARP films pro Nacht. Die filme werden dann am nächsten tag gefunden. TODO: für die nächste runde.
227+
/* result = new ArrayList<>(result.subList(0, 200000));
228+
*
229+
*/
223230
return result;
224231
} catch (Exception e) {
225232
LOG.error("{}", e);
@@ -319,8 +326,8 @@ ON CONFLICT (id) DO UPDATE
319326
LOG.error("saveBatch - Missing ID for film {}", film);
320327
}
321328
}
322-
323329
ps.executeBatch();
330+
con.commit();
324331
}
325332
return successCounter;
326333
}

src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,19 @@ private void init() {
3939
cfg.setUsername(aMServerConfigManager.getConfig().getDatabaseConfig().getUsername());
4040
cfg.setPassword(aMServerConfigManager.getConfig().getDatabaseConfig().getPassword());
4141

42-
// === Pool Sizing (wichtig!) ===
43-
cfg.setMaximumPoolSize(16); // Sweet Spot für 10k+/min
42+
// === Pool Sizing ===
43+
cfg.setMaximumPoolSize(50);
4444
cfg.setMinimumIdle(4);
4545

4646
// === Performance ===
47-
cfg.setAutoCommit(true);
47+
cfg.setAutoCommit(false);
4848
cfg.setConnectionTimeout(3000);
4949
cfg.setIdleTimeout(600_000);
5050
cfg.setMaxLifetime(1_800_000);
5151

5252
// === PostgreSQL Optimierungen ===
53-
cfg.addDataSourceProperty("reWriteBatchedInserts", "true");
5453
cfg.addDataSourceProperty("stringtype", "unspecified");
54+
cfg.addDataSourceProperty("defaultRowFetchSize", "10000");
5555

5656
// === Debug (optional) ===
5757
cfg.setPoolName("CrawlerPool");

src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ private ForkJoinTask<Set<CrawlerUrlDTO>> getTopicEntriesBySender(final String se
123123
Set<CrawlerUrlDTO> senderSingleLetterUrls = forkJoinPool.submit(
124124
new ArdTopicsTask(this, sender, CreateLetterUrlQuery(sender))).get();
125125

126-
LOG.debug("topics task result {}", senderSingleLetterUrls.size());
126+
//LOG.debug("topics task result {}", senderSingleLetterUrls.size());
127127
return forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, new ConcurrentLinkedQueue<>(senderSingleLetterUrls)));
128128
}
129129

src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,7 @@ private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips)
6060
if(id.contains(":")) {
6161
refId = id.replace(":", "%3A");
6262
}
63-
6463
final String url = String.format(ArdConstants.ITEM_URL, refId);
65-
66-
67-
68-
if (id.contains("a04c5a47-0801-40e5-b530-b7f9a4312be9:6898178275329995836")
69-
|| id.contains("Y3JpZDovL25kci5kZS9wcm9wbGFuXzE5NjM4MTA5N19nYW56ZVNlbmR1bmc")
70-
|| id.contains("1TDLUvc8cVEtcSb9GGsOnt:6898178275329995836")
71-
|| id.contains("6b64fc2c-4bd7-47ae-af6c-680e65b53b89")
72-
) {
73-
System.out.println("stop");
74-
}
75-
7664
return new ArdFilmInfoDto(id, url, numberOfClips);
7765
}
7866

src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg
3939
&& topicInfo.getFilmInfos() != null
4040
&& !topicInfo.getFilmInfos().isEmpty()) {
4141
taskResults.addAll(topicInfo.getFilmInfos());
42-
LOG.debug("Found {} shows for a topic of ARD.", topicInfo.getFilmInfos().size());
42+
//LOG.debug("Found {} shows for a topic of ARD.", topicInfo.getFilmInfos().size());
4343

4444
final Queue<CrawlerUrlDTO> subpages = createSubPageUrls(aTarget, topicInfo);
4545
if (!subpages.isEmpty()) {
@@ -65,7 +65,7 @@ private Queue<CrawlerUrlDTO> createSubPageUrls(
6565
break;
6666
}
6767
}
68-
LOG.debug("Found {} subpage", subpages.size());
68+
//LOG.debug("Found {} subpage", subpages.size());
6969
return subpages;
7070
}
7171

src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicsLetterTask.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ protected AbstractRecursiveConverterTask<CrawlerUrlDTO, CrawlerUrlDTO> createNew
4242
@Override
4343
protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarget) {
4444
final PaginationUrlDto results = deserialize(aTarget, PAGINATION_URL_DTO_TYPE_TOKEN, aDTO);
45-
LOG.debug("Found {} shows for {}.", results.getUrls().size(), sender);
45+
//LOG.debug("Found {} shows for {}.", results.getUrls().size(), sender);
4646
taskResults.addAll(results.getUrls());
4747

4848
if (results.getActualPage() == 0 && results.getMaxPages() > 1) {

src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializerTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ public void testDeserialize() {
1919
final CrawlerUrlDTO[] expected =
2020
new CrawlerUrlDTO[] {
2121
new CrawlerUrlDTO(
22-
"https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW4?pageSize=50"),
22+
"https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL2J1dGVudW5iaW5uZW5nZWJhZXJkZW5zcHJhY2hl?embedded=true"),
2323
new CrawlerUrlDTO(
24-
"https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX3Nwb3J0YmxpdHo?pageSize=50"),
24+
"https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX3Nwb3J0YmxpdHo?embedded=true"),
2525
new CrawlerUrlDTO(
26-
"https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW53ZXR0ZXI?pageSize=50"),
26+
"https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW53ZXR0ZXI?embedded=true"),
2727
new CrawlerUrlDTO(
28-
"https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW51bTY?pageSize=50"),
28+
"https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW4?embedded=true"),
2929
new CrawlerUrlDTO(
30-
"https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL2J1dGVudW5iaW5uZW5nZWJhZXJkZW5zcHJhY2hl?pageSize=50"),
30+
"https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW51bTY?embedded=true"),
3131
};
3232

3333
final ArdTopicsLetterDeserializer instance = new ArdTopicsLetterDeserializer();

0 commit comments

Comments
 (0)