From 18ab602f0e6393d0048ce0f78aacefbbf4a9c1ac Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 20 Nov 2025 21:35:40 +0000 Subject: [PATCH 1/7] chore(librarian): Add header to files under .librarian/generator-input --- .generator/cli.py | 33 ++++++++++++++++++++++++- .generator/test_cli.py | 55 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/.generator/cli.py b/.generator/cli.py index 2dad6fb03349..ddce2c3eecf9 100644 --- a/.generator/cli.py +++ b/.generator/cli.py @@ -367,14 +367,45 @@ def _copy_files_needed_for_post_processing( path_to_library = f"packages/{library_id}" if is_mono_repo else "." source_dir = f"{input}/{path_to_library}" + destination_dir = f"{output}/{path_to_library}" + header_line = """# DO NOT EDIT THIS FILE OUTSIDE OF `.librarian/generator-input` +# The source of truth for this file is .librarian/generator-input\n""" if Path(source_dir).exists(): shutil.copytree( source_dir, - f"{output}/{path_to_library}", + destination_dir, dirs_exist_ok=True, ) + # Walk through the destination directory to add `header_line` + # to files in `.librarian/generator-input`. + for root, _, files in os.walk(source_dir): + for file_name in files: + # Skip JSON files which do not support comments. + if file_name.endswith(".json"): + continue + + rel_path = os.path.relpath(root, source_dir) + dest_file_path = Path(destination_dir) / rel_path / file_name + + with open(dest_file_path, "r", encoding="utf-8") as f: + lines = f.readlines() + + line_index = 0 + # Skip the license header which is a contiguous block of comments. + # We keep advancing as long as the line starts with '#'. + while line_index < len(lines) and lines[line_index].strip().startswith( + "#" + ): + line_index += 1 + + # Insert the new header at the calculated index. + lines.insert(line_index, "\n" + header_line + "\n") + + with open(dest_file_path, "w", encoding="utf-8") as f: + f.writelines(lines) + # We need to create these directories so that we can copy files necessary for post-processing. os.makedirs( f"{output}/{path_to_library}/scripts/client-post-processing", exist_ok=True diff --git a/.generator/test_cli.py b/.generator/test_cli.py index 0bfc3c66bf3b..adc27260e6b1 100644 --- a/.generator/test_cli.py +++ b/.generator/test_cli.py @@ -142,6 +142,16 @@ )""" +@pytest.fixture +def setup_dirs(tmp_path): + """Creates input and output directories.""" + input_dir = tmp_path / "input" + output_dir = tmp_path / "output" + input_dir.mkdir() + output_dir.mkdir() + return input_dir, output_dir + + @pytest.fixture(autouse=True) def _clear_lru_cache(): """Automatically clears the cache of all LRU-cached functions after each test.""" @@ -906,6 +916,51 @@ def test_copy_files_needed_for_post_processing_copies_files_from_generator_input mock_makedirs.assert_called() +def test_copy_files_needed_for_post_processing_copies_files_from_generator_input_skips_json_files( + setup_dirs, +): + """Test that .json files are copied but NOT modified.""" + input_dir, output_dir = setup_dirs + + json_content = '{"key": "value"}' + (input_dir / ".repo-metadata.json").write_text(json_content) + + _copy_files_needed_for_post_processing( + output=str(output_dir), + input=str(input_dir), + library_id="google-cloud-foo", + is_mono_repo=False, + ) + + dest_file = output_dir / ".repo-metadata.json" + assert dest_file.exists() + # Content should be exactly the same, no # comments added + assert dest_file.read_text() == json_content + + +def test_file_with_shebang_and_license(setup_dirs): + """Test insertion handles License correctly.""" + input_dir, output_dir = setup_dirs + + # Setup source + (input_dir / "run.sh").write_text("# Some Copyright" "# text" "code") + + _copy_files_needed_for_post_processing( + output=str(output_dir), + input=str(input_dir), + library_id="google-cloud-foo", + is_mono_repo=False, + ) + + content = (output_dir / "run.sh").read_text() + lines = content.splitlines() + + # Ensure header comes *after* copyright + copyright_index = next(i for i, line in enumerate(lines) if "Copyright" in line) + header_index = next(i for i, line in enumerate(lines) if "source of truth" in line) + assert header_index > copyright_index + + @pytest.mark.parametrize("is_mono_repo", [False, True]) def test_clean_up_files_after_post_processing_success(mocker, is_mono_repo): mock_shutil_rmtree = mocker.patch("shutil.rmtree") From 9df373949baa776fa1752b3c51246f9f8a48b8a1 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 20:31:26 +0000 Subject: [PATCH 2/7] address feedback --- .generator/cli.py | 73 ++++++++++++++++++++++++--------------- .generator/test_cli.py | 78 +++++++++++++++++++++++++++++++++--------- 2 files changed, 106 insertions(+), 45 deletions(-) diff --git a/.generator/cli.py b/.generator/cli.py index ddce2c3eecf9..77be27d70dfc 100644 --- a/.generator/cli.py +++ b/.generator/cli.py @@ -57,6 +57,11 @@ SOURCE_DIR = "source" _GITHUB_BASE = "https://github.com" +GENERATOR_INPUT_HEADER_TEXT = ( + "# DO NOT EDIT THIS FILE OUTSIDE OF `.librarian/generator-input`\n" + "# The source of truth for this file is `.librarian/generator-input`\n" +) + def _read_text_file(path: str) -> str: """Helper function that reads a text file path and returns the content. @@ -349,6 +354,44 @@ def _run_post_processor(output: str, library_id: str, is_mono_repo: bool): logger.info("Python post-processor ran successfully.") +def _add_header_to_files(directory: str) -> None: + """Adds a 'DO NOT EDIT' header to files in the specified directory. + + Skips JSON and YAML files. Attempts to insert the header after any existing + license headers (blocks of comments starting with '#'). + + Args: + directory (str): The directory containing files to update. + """ + header_line = ( + "# DO NOT EDIT THIS FILE OUTSIDE OF `.librarian/generator-input`\n" + "# The source of truth for this file is `.librarian/generator-input`\n" + ) + + # Files with these extensions should be ignored. + skipped_extensions = {".json", ".yaml"} + + for root, _, files in os.walk(directory): + for file_name in files: + file_path = Path(root) / file_name + + if file_path.suffix in skipped_extensions: + continue + + with open(file_path, "r", encoding="utf-8") as f: + lines = f.readlines() + + line_index = 0 + # Skip the license header (contiguous block of comments starting with '#'). + while line_index < len(lines) and lines[line_index].strip().startswith("#"): + line_index += 1 + + lines.insert(line_index, "\n" + header_line + "\n") + + with open(file_path, "w", encoding="utf-8") as f: + f.writelines(lines) + + def _copy_files_needed_for_post_processing( output: str, input: str, library_id: str, is_mono_repo: bool ): @@ -377,34 +420,8 @@ def _copy_files_needed_for_post_processing( destination_dir, dirs_exist_ok=True, ) - - # Walk through the destination directory to add `header_line` - # to files in `.librarian/generator-input`. - for root, _, files in os.walk(source_dir): - for file_name in files: - # Skip JSON files which do not support comments. - if file_name.endswith(".json"): - continue - - rel_path = os.path.relpath(root, source_dir) - dest_file_path = Path(destination_dir) / rel_path / file_name - - with open(dest_file_path, "r", encoding="utf-8") as f: - lines = f.readlines() - - line_index = 0 - # Skip the license header which is a contiguous block of comments. - # We keep advancing as long as the line starts with '#'. - while line_index < len(lines) and lines[line_index].strip().startswith( - "#" - ): - line_index += 1 - - # Insert the new header at the calculated index. - lines.insert(line_index, "\n" + header_line + "\n") - - with open(dest_file_path, "w", encoding="utf-8") as f: - f.writelines(lines) + # Apply headers only to the generator-input files copied above. + _add_header_to_files(destination_dir) # We need to create these directories so that we can copy files necessary for post-processing. os.makedirs( diff --git a/.generator/test_cli.py b/.generator/test_cli.py index adc27260e6b1..c68cf3bb9c02 100644 --- a/.generator/test_cli.py +++ b/.generator/test_cli.py @@ -27,6 +27,7 @@ import pytest from cli import ( GENERATE_REQUEST_FILE, + GENERATOR_INPUT_HEADER_TEXT, BUILD_REQUEST_FILE, CONFIGURE_REQUEST_FILE, RELEASE_STAGE_REQUEST_FILE, @@ -34,6 +35,7 @@ STATE_YAML_FILE, LIBRARIAN_DIR, REPO_DIR, + _add_header_to_files, _clean_up_files_after_post_processing, _copy_files_needed_for_post_processing, _create_main_version_header, @@ -938,27 +940,69 @@ def test_copy_files_needed_for_post_processing_copies_files_from_generator_input assert dest_file.read_text() == json_content -def test_file_with_shebang_and_license(setup_dirs): - """Test insertion handles License correctly.""" - input_dir, output_dir = setup_dirs +def test_add_header_with_existing_license(tmp_path): + """ + Test that the header is inserted AFTER the existing license block. + """ + # Setup: Create a file with a license header + file_path = tmp_path / "example.py" + original_content = ( + "# Copyright 2025 Google LLC\n" "# Licensed under Apache 2.0\n" "\n" "import os" + ) + file_path.write_text(original_content, encoding="utf-8") + + # Execute + _add_header_to_files(str(tmp_path)) + + # Verify + new_content = file_path.read_text(encoding="utf-8") + expected_content = ( + "# Copyright 2025 Google LLC\n" + "# Licensed under Apache 2.0\n" + "\n" + f"{GENERATOR_INPUT_HEADER_TEXT}\n" + "\n" + "import os" + ) + assert new_content == expected_content - # Setup source - (input_dir / "run.sh").write_text("# Some Copyright" "# text" "code") - _copy_files_needed_for_post_processing( - output=str(output_dir), - input=str(input_dir), - library_id="google-cloud-foo", - is_mono_repo=False, - ) +def test_add_header_to_files_add_header_no_license(tmp_path): + """ + Test that the header is inserted at the top if no license block exists. + """ + # Setup: Create a file starting directly with code + file_path = tmp_path / "script.sh" + original_content = "echo 'Hello World'" + file_path.write_text(original_content, encoding="utf-8") + + # Execute + _add_header_to_files(str(tmp_path)) + + # Verify + new_content = file_path.read_text(encoding="utf-8") + expected_content = "\n" f"{GENERATOR_INPUT_HEADER_TEXT}\n" "echo 'Hello World'" + assert new_content == expected_content + + +def test_add_header_to_files_skips_excluded_extensions(tmp_path): + """ + Test that .json and .yaml files are ignored. + """ + # Setup: Create files that should be ignored + json_file = tmp_path / "data.json" + yaml_file = tmp_path / "config.yaml" + + content = "key: value" + json_file.write_text('{"key": "value"}', encoding="utf-8") + yaml_file.write_text(content, encoding="utf-8") - content = (output_dir / "run.sh").read_text() - lines = content.splitlines() + # Execute + _add_header_to_files(str(tmp_path)) - # Ensure header comes *after* copyright - copyright_index = next(i for i, line in enumerate(lines) if "Copyright" in line) - header_index = next(i for i, line in enumerate(lines) if "source of truth" in line) - assert header_index > copyright_index + # Verify contents remain exactly the same + assert json_file.read_text(encoding="utf-8") == '{"key": "value"}' + assert yaml_file.read_text(encoding="utf-8") == content @pytest.mark.parametrize("is_mono_repo", [False, True]) From 5c43ffe9a412445f344b5e5fede5dd7070a3b31b Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 20:32:05 +0000 Subject: [PATCH 3/7] clean up --- .generator/cli.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.generator/cli.py b/.generator/cli.py index 77be27d70dfc..29b45c3da21f 100644 --- a/.generator/cli.py +++ b/.generator/cli.py @@ -363,10 +363,6 @@ def _add_header_to_files(directory: str) -> None: Args: directory (str): The directory containing files to update. """ - header_line = ( - "# DO NOT EDIT THIS FILE OUTSIDE OF `.librarian/generator-input`\n" - "# The source of truth for this file is `.librarian/generator-input`\n" - ) # Files with these extensions should be ignored. skipped_extensions = {".json", ".yaml"} @@ -386,7 +382,7 @@ def _add_header_to_files(directory: str) -> None: while line_index < len(lines) and lines[line_index].strip().startswith("#"): line_index += 1 - lines.insert(line_index, "\n" + header_line + "\n") + lines.insert(line_index, "\n" + GENERATOR_INPUT_HEADER_TEXT + "\n") with open(file_path, "w", encoding="utf-8") as f: f.writelines(lines) From f6c55c6a40ce3cc21738228888a83314ba4948ec Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 20:33:22 +0000 Subject: [PATCH 4/7] clean up --- .generator/cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/.generator/cli.py b/.generator/cli.py index 29b45c3da21f..3d7a45a5d24e 100644 --- a/.generator/cli.py +++ b/.generator/cli.py @@ -407,8 +407,6 @@ def _copy_files_needed_for_post_processing( path_to_library = f"packages/{library_id}" if is_mono_repo else "." source_dir = f"{input}/{path_to_library}" destination_dir = f"{output}/{path_to_library}" - header_line = """# DO NOT EDIT THIS FILE OUTSIDE OF `.librarian/generator-input` -# The source of truth for this file is .librarian/generator-input\n""" if Path(source_dir).exists(): shutil.copytree( From 58f73763b0529bcf6a674d4749c094c85f464a23 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 21:04:29 +0000 Subject: [PATCH 5/7] address feedback --- .generator/cli.py | 4 ++-- .generator/test_cli.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.generator/cli.py b/.generator/cli.py index 3d7a45a5d24e..441b2a803271 100644 --- a/.generator/cli.py +++ b/.generator/cli.py @@ -57,7 +57,7 @@ SOURCE_DIR = "source" _GITHUB_BASE = "https://github.com" -GENERATOR_INPUT_HEADER_TEXT = ( +_GENERATOR_INPUT_HEADER_TEXT = ( "# DO NOT EDIT THIS FILE OUTSIDE OF `.librarian/generator-input`\n" "# The source of truth for this file is `.librarian/generator-input`\n" ) @@ -382,7 +382,7 @@ def _add_header_to_files(directory: str) -> None: while line_index < len(lines) and lines[line_index].strip().startswith("#"): line_index += 1 - lines.insert(line_index, "\n" + GENERATOR_INPUT_HEADER_TEXT + "\n") + lines.insert(line_index, "\n" + _GENERATOR_INPUT_HEADER_TEXT + "\n") with open(file_path, "w", encoding="utf-8") as f: f.writelines(lines) diff --git a/.generator/test_cli.py b/.generator/test_cli.py index c68cf3bb9c02..878d1014baf2 100644 --- a/.generator/test_cli.py +++ b/.generator/test_cli.py @@ -26,8 +26,8 @@ import pytest from cli import ( + _GENERATOR_INPUT_HEADER_TEXT, GENERATE_REQUEST_FILE, - GENERATOR_INPUT_HEADER_TEXT, BUILD_REQUEST_FILE, CONFIGURE_REQUEST_FILE, RELEASE_STAGE_REQUEST_FILE, @@ -960,7 +960,7 @@ def test_add_header_with_existing_license(tmp_path): "# Copyright 2025 Google LLC\n" "# Licensed under Apache 2.0\n" "\n" - f"{GENERATOR_INPUT_HEADER_TEXT}\n" + f"{_GENERATOR_INPUT_HEADER_TEXT}\n" "\n" "import os" ) @@ -981,7 +981,7 @@ def test_add_header_to_files_add_header_no_license(tmp_path): # Verify new_content = file_path.read_text(encoding="utf-8") - expected_content = "\n" f"{GENERATOR_INPUT_HEADER_TEXT}\n" "echo 'Hello World'" + expected_content = "\n" f"{_GENERATOR_INPUT_HEADER_TEXT}\n" "echo 'Hello World'" assert new_content == expected_content From 8d05b519c174873a5a18409cc7daca0eb12e8f1a Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 21:13:30 +0000 Subject: [PATCH 6/7] omit new line if no comments --- .generator/cli.py | 5 ++++- .generator/test_cli.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.generator/cli.py b/.generator/cli.py index 441b2a803271..98c919949c26 100644 --- a/.generator/cli.py +++ b/.generator/cli.py @@ -382,7 +382,10 @@ def _add_header_to_files(directory: str) -> None: while line_index < len(lines) and lines[line_index].strip().startswith("#"): line_index += 1 - lines.insert(line_index, "\n" + _GENERATOR_INPUT_HEADER_TEXT + "\n") + header_prefix = "\n" if line_index > 0 else "" + lines.insert( + line_index, header_prefix + _GENERATOR_INPUT_HEADER_TEXT + "\n" + ) with open(file_path, "w", encoding="utf-8") as f: f.writelines(lines) diff --git a/.generator/test_cli.py b/.generator/test_cli.py index 878d1014baf2..4a168cbc2759 100644 --- a/.generator/test_cli.py +++ b/.generator/test_cli.py @@ -981,7 +981,7 @@ def test_add_header_to_files_add_header_no_license(tmp_path): # Verify new_content = file_path.read_text(encoding="utf-8") - expected_content = "\n" f"{_GENERATOR_INPUT_HEADER_TEXT}\n" "echo 'Hello World'" + expected_content = f"{_GENERATOR_INPUT_HEADER_TEXT}\n" "echo 'Hello World'" assert new_content == expected_content From d39feafd6ccb1dd39a8483c81dea1cfe63c67375 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 21:16:09 +0000 Subject: [PATCH 7/7] clean up --- .generator/cli.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.generator/cli.py b/.generator/cli.py index 98c919949c26..48d6776594f0 100644 --- a/.generator/cli.py +++ b/.generator/cli.py @@ -383,9 +383,7 @@ def _add_header_to_files(directory: str) -> None: line_index += 1 header_prefix = "\n" if line_index > 0 else "" - lines.insert( - line_index, header_prefix + _GENERATOR_INPUT_HEADER_TEXT + "\n" - ) + lines.insert(line_index, f"{header_prefix}{_GENERATOR_INPUT_HEADER_TEXT}\n") with open(file_path, "w", encoding="utf-8") as f: f.writelines(lines)