Skip to content

Commit 0ccfb82

Browse files
docs: LLM translations
Adds a workflow that triggers every time the English docs are updated to translate to other languages. --------- Co-authored-by: henderkes <m@pyc.ac>
1 parent c6b2b02 commit 0ccfb82

File tree

2 files changed

+194
-0
lines changed

2 files changed

+194
-0
lines changed

.github/workflows/translate.yaml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
name: Translate Docs
2+
concurrency:
3+
cancel-in-progress: true
4+
group: ${{ github.workflow }}-${{ github.ref }}
5+
on:
6+
push:
7+
branches:
8+
- main
9+
paths:
10+
- "docs/*"
11+
permissions:
12+
contents: write
13+
pull-requests: write
14+
jobs:
15+
build:
16+
name: Translate Docs
17+
runs-on: ubuntu-latest
18+
steps:
19+
- name: Checkout Code
20+
uses: actions/checkout@v6
21+
with:
22+
fetch-depth: 0
23+
persist-credentials: false
24+
- id: md_files
25+
run: |
26+
FILES=$(git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" -- 'docs/*.md')
27+
FILES=$(echo "$FILES" | xargs -n1 basename | tr '\n' ' ')
28+
[ -z "$FILES" ] && echo "found=false" >> "$GITHUB_OUTPUT" || echo "found=true" >> "$GITHUB_OUTPUT"
29+
echo "files=$FILES" >> "$GITHUB_OUTPUT"
30+
- name: Set up PHP
31+
if: steps.md_files.outputs.found == 'true'
32+
uses: shivammathur/setup-php@v2
33+
with:
34+
php-version: "8.5"
35+
- name: run translation script
36+
if: steps.md_files.outputs.found == 'true'
37+
env:
38+
GEMINI_API_KEY: "${{ secrets.GEMINI_API_KEY }}"
39+
MD_FILES: "${{ steps.md_files.outputs.files }}"
40+
run: |
41+
php ./docs/translate.php "$MD_FILES"
42+
- name: Run Linter
43+
if: steps.md_files.outputs.found == 'true'
44+
uses: super-linter/super-linter/slim@v8
45+
env:
46+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
47+
LINTER_RULES_PATH: /
48+
MARKDOWN_CONFIG_FILE: .markdown-lint.yaml
49+
FIX_NATURAL_LANGUAGE: true
50+
FIX_MARKDOWN: true
51+
- name: Create Pull Request
52+
if: steps.md_files.outputs.found == 'true'
53+
uses: peter-evans/create-pull-request@v8
54+
env:
55+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
56+
with:
57+
title: "docs: update translations"
58+
commit-message: "docs: update translations"
59+
committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
60+
author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com>
61+
branch: translations/${{ github.run_id }}
62+
delete-branch: true
63+
body: |
64+
Translation updates for: ${{ steps.md_files.outputs.files }}.
65+
labels: |
66+
translations
67+
bot
68+
draft: false

docs/translate.php

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
<?php
2+
3+
# update all translations to match the english docs
4+
# usage: php docs/translate.php [specific-file.md]
5+
# needs: php with openssl and gemini api key
6+
7+
const MODEL = 'gemini-2.5-flash';
8+
const SLEEP_SECONDS_BETWEEN_REQUESTS = 10;
9+
const LANGUAGES = [
10+
'cn' => 'Chinese',
11+
'fr' => 'French',
12+
'ja' => 'Japanese',
13+
'pt-br' => 'Portuguese (Brazilian)',
14+
'ru' => 'Russian',
15+
'tr' => 'Turkish',
16+
];
17+
18+
function makeGeminiRequest(string $systemPrompt, string $userPrompt, string $model, string $apiKey, int $reties = 2): string
19+
{
20+
$url = "https://generativelanguage.googleapis.com/v1beta/models/$model:generateContent";
21+
$body = json_encode([
22+
"contents" => [
23+
["role" => "model", "parts" => ['text' => $systemPrompt]],
24+
["role" => "user", "parts" => ['text' => $userPrompt]]
25+
],
26+
]);
27+
28+
$response = @file_get_contents($url, false, stream_context_create([
29+
'http' => [
30+
'method' => 'POST',
31+
'header' => "Content-Type: application/json\r\nX-Goog-Api-Key: $apiKey\r\nContent-Length: " . strlen($body) . "\r\n",
32+
'content' => $body,
33+
'timeout' => 300,
34+
]
35+
]));
36+
$generatedDocs = json_decode($response, true)['candidates'][0]['content']['parts'][0]['text'] ?? '';
37+
38+
if (!$response || !$generatedDocs) {
39+
print_r(error_get_last());
40+
print_r($response);
41+
if ($reties > 0) {
42+
echo "Retrying... ($reties retries left)\n";
43+
sleep(SLEEP_SECONDS_BETWEEN_REQUESTS);
44+
return makeGeminiRequest($systemPrompt, $userPrompt, $model, $apiKey, $reties - 1);
45+
}
46+
exit(1);
47+
}
48+
49+
return $generatedDocs;
50+
}
51+
52+
function createPrompt(string $language, string $englishFile, string $currentTranslation): array
53+
{
54+
$systemPrompt = <<<PROMPT
55+
You are translating the docs of the FrankenPHP server from english to other languages.
56+
You will receive the english version (authoritative) and a translation (possibly incomplete or incorrect).
57+
Your task is to produce a corrected and complete translation in the target language.
58+
You must strictly follow these rules:
59+
- You must not change the structure of the document (headings, code blocks, etc.)
60+
- You must not translate code, only comments and strings inside the code.
61+
- You must not translate links to other documentation pages, only the link text.
62+
- You must not add or remove any content, only translate what is present.
63+
- You must ensure that the translation is accurate and faithful to the original meaning.
64+
- You must write in a natural and fluent style, appropriate for technical documentation.
65+
- You must use the correct terminology for technical terms in the target language, don't translate if unsure.
66+
- You must not include any explanations or notes, only the translated document.
67+
PROMPT;
68+
69+
$languageName = LANGUAGES[$language];
70+
$userPrompt = <<<PROMPT
71+
Here is the english version of the document:
72+
73+
```markdown
74+
$englishFile
75+
```
76+
77+
Here is the current translation in $languageName:
78+
79+
```markdown
80+
$currentTranslation
81+
```
82+
83+
Here is the corrected and completed translation in $languageName:
84+
85+
```markdown
86+
PROMPT;
87+
88+
return [$systemPrompt, $userPrompt];
89+
}
90+
91+
function sanitizeMarkdown(string $markdown): string
92+
{
93+
if (str_starts_with($markdown, '```markdown')) {
94+
$markdown = substr($markdown, strlen('```markdown'));
95+
}
96+
$markdown = rtrim($markdown, '`');
97+
return trim($markdown) . "\n";
98+
}
99+
100+
$fileToTranslate = $argv;
101+
array_shift($fileToTranslate);
102+
$apiKey = $_SERVER['GEMINI_API_KEY'] ?? $_ENV['GEMINI_API_KEY'] ?? '';
103+
if (!$apiKey) {
104+
echo 'Enter gemini api key ($GEMINI_API_KEY): ';
105+
$apiKey = trim(fgets(STDIN));
106+
}
107+
108+
$files = array_filter(scandir(__DIR__), fn($filename) => str_ends_with($filename, '.md'));
109+
foreach ($files as $file) {
110+
$englishFile = file_get_contents(__DIR__ . "/$file");
111+
if ($fileToTranslate && !in_array($file, $fileToTranslate)) {
112+
continue;
113+
}
114+
foreach (LANGUAGES as $language => $languageName) {
115+
echo "Translating $file to $languageName\n";
116+
$currentTranslation = file_get_contents(__DIR__ . "/$language/$file") ?: '';
117+
[$systemPrompt, $userPrompt] = createPrompt($language, $englishFile, $currentTranslation);
118+
$markdown = makeGeminiRequest($systemPrompt, $userPrompt, MODEL, $apiKey);
119+
120+
echo "Writing translated file to $language/$file\n";
121+
file_put_contents(__DIR__ . "/$language/$file", sanitizeMarkdown($markdown));
122+
123+
echo "sleeping to avoid rate limiting...\n";
124+
sleep(SLEEP_SECONDS_BETWEEN_REQUESTS);
125+
}
126+
}

0 commit comments

Comments
 (0)