Skip to content

Commit 221c188

Browse files
authored
Update NetWebView2Lib_pdfjs_Tools.js
1 parent f4e283a commit 221c188

1 file changed

Lines changed: 55 additions & 24 deletions

File tree

examples/JS_Lib/NetWebView2Lib_pdfjs_Tools.js

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,64 @@
11
/**
2-
* PDF_Tools.js - Final Combined Library
2+
* PDF_Tools.js - Final Combined Library (v1.4.2)
33
*/
44

5+
// NetWebView2Lib_pdfjs_Tools.js
56
async function PDF_ExtractToJSON() {
6-
try {
7-
const pdfUrl = window.PDFViewerApplication.url;
8-
const pdf = await pdfjsLib.getDocument(pdfUrl).promise;
9-
const meta = await pdf.getMetadata();
10-
let pdfData = {
11-
type: 'PDF_DATA_PACKAGE',
12-
metadata: {
13-
title: meta.info.Title || 'N/A',
14-
author: meta.info.Author || 'N/A',
15-
pagesCount: pdf.numPages
16-
},
17-
pages: []
18-
};
19-
for (let i = 1; i <= pdf.numPages; i++) {
20-
const page = await pdf.getPage(i);
21-
const content = await page.getTextContent();
22-
pdfData.pages.push({
23-
pageIndex: i,
24-
text: content.items.map(item => item.str).join(' ')
25-
});
7+
if (typeof PDFViewerApplication === 'undefined') return;
8+
9+
const pdf = PDFViewerApplication.pdfDocument;
10+
const pdfData = {
11+
type: 'PDF_DATA_PACKAGE',
12+
metadata: (await pdf.getMetadata()).info,
13+
pagesCount: pdf.numPages, // Explicitly send page count for AutoIt
14+
pages: []
15+
};
16+
17+
for (let i = 1; i <= pdf.numPages; i++) {
18+
const page = await pdf.getPage(i);
19+
const textContent = await page.getTextContent();
20+
21+
// Map items and include the actual width provided by PDF.js
22+
// Then sort by Y (top to bottom) and X (left to right)
23+
let items = textContent.items.map(item => ({
24+
str: item.str,
25+
x: item.transform[4],
26+
y: item.transform[5],
27+
width: item.width
28+
})).sort((a, b) => Math.abs(b.y - a.y) > 5 ? b.y - a.y : a.x - b.x);
29+
30+
let pageText = "";
31+
let lastY = -1;
32+
let lastX = 0;
33+
const charWidth = 5; // Standard multiplier for visual spacing
34+
35+
for (const item of items) {
36+
// Check for line change based on Y coordinate threshold
37+
if (lastY !== -1 && Math.abs(lastY - item.y) > 5) {
38+
pageText = pageText.trimEnd() + "\n";
39+
lastX = 0;
40+
}
41+
42+
// Calculate horizontal spacing based on distance from last item
43+
let distance = item.x - lastX;
44+
let spaces = Math.floor(distance / charWidth);
45+
46+
pageText += " ".repeat(Math.max(0, spaces)) + item.str;
47+
48+
// Update lastX using the actual width of the current text element
49+
lastX = item.x + item.width;
50+
lastY = item.y;
2651
}
27-
window.chrome.webview.postMessage(JSON.stringify(pdfData));
28-
} catch (e) {
29-
window.chrome.webview.postMessage(JSON.stringify({type: 'error', message: e.message}));
52+
53+
// Push processed page data to the package
54+
pdfData.pages.push({
55+
pageIndex: i,
56+
text: pageText.trim()
57+
});
3058
}
59+
60+
// Send the final JSON package back to AutoIt
61+
window.chrome.webview.postMessage(JSON.stringify(pdfData));
3162
}
3263

3364
async function PDF_ExtractLegacy() {

0 commit comments

Comments
 (0)