|
| 1 | +# Copyright (c) Microsoft Corporation. All rights reserved. |
| 2 | +# Licensed under the MIT License. |
| 3 | +"""Notebook-specific helpers for whole-notebook linting with cross-cell context.""" |
| 4 | + |
| 5 | +from __future__ import annotations |
| 6 | + |
| 7 | +import dataclasses |
| 8 | +import re |
| 9 | +from typing import Callable, Optional, Sequence |
| 10 | + |
| 11 | +from lsprotocol import types as lsp |
| 12 | + |
| 13 | +# Matches IPython magic lines (%, %%, !, !!) so they can be replaced with `pass`. |
| 14 | +MAGIC_LINE_RE = re.compile(r"^\s*[%!]") |
| 15 | + |
| 16 | +NOTEBOOK_SYNC_OPTIONS = lsp.NotebookDocumentSyncOptions( |
| 17 | + notebook_selector=[ |
| 18 | + lsp.NotebookDocumentFilterWithNotebook( |
| 19 | + notebook="jupyter-notebook", |
| 20 | + cells=[ |
| 21 | + lsp.NotebookCellLanguage(language="python"), |
| 22 | + ], |
| 23 | + ), |
| 24 | + lsp.NotebookDocumentFilterWithNotebook( |
| 25 | + notebook="interactive", |
| 26 | + cells=[ |
| 27 | + lsp.NotebookCellLanguage(language="python"), |
| 28 | + ], |
| 29 | + ), |
| 30 | + ], |
| 31 | + save=True, |
| 32 | +) |
| 33 | + |
| 34 | + |
| 35 | +@dataclasses.dataclass |
| 36 | +class CellOffset: |
| 37 | + """Describes where a single notebook cell's lines begin in the combined source.""" |
| 38 | + |
| 39 | + cell_uri: str |
| 40 | + start_line: int |
| 41 | + line_count: int |
| 42 | + |
| 43 | + |
| 44 | +CellMap = list[CellOffset] |
| 45 | + |
| 46 | + |
| 47 | +def build_notebook_source( |
| 48 | + cells: list, |
| 49 | + get_text_document: Callable[[str], Optional[object]], |
| 50 | +) -> tuple[str, CellMap]: |
| 51 | + """Build a single Python source string from all code cells. |
| 52 | +
|
| 53 | + Args: |
| 54 | + cells: The notebook's cell list (``nb.cells``). |
| 55 | + get_text_document: A callable that resolves a cell document URI to a |
| 56 | + text document object (with ``.source`` and ``.language_id`` |
| 57 | + attributes), e.g. ``workspace.get_text_document``. |
| 58 | +
|
| 59 | + Returns: |
| 60 | + (combined_source, cell_map) where *cell_map* is a list of |
| 61 | + :class:`CellOffset` instances describing where each cell's lines |
| 62 | + begin in the combined source. |
| 63 | +
|
| 64 | + IPython magic lines (``%``, ``%%``, ``!``, etc.) are replaced with |
| 65 | + ``pass`` statements so pylint does not raise syntax errors on them. |
| 66 | + """ |
| 67 | + source_parts: list[str] = [] |
| 68 | + cell_map: CellMap = [] |
| 69 | + current_line = 0 |
| 70 | + |
| 71 | + for cell in cells: |
| 72 | + if cell.kind != lsp.NotebookCellKind.Code or cell.document is None: |
| 73 | + continue |
| 74 | + doc = get_text_document(cell.document) |
| 75 | + if doc is None or doc.language_id != "python": |
| 76 | + continue |
| 77 | + |
| 78 | + source = doc.source |
| 79 | + if not source: |
| 80 | + continue |
| 81 | + |
| 82 | + lines = source.splitlines(keepends=True) |
| 83 | + # Ensure the last line ends with a newline. |
| 84 | + if lines and not lines[-1].endswith("\n"): |
| 85 | + lines[-1] += "\n" |
| 86 | + |
| 87 | + sanitized_lines = [ |
| 88 | + "pass\n" if MAGIC_LINE_RE.match(line) else line for line in lines |
| 89 | + ] |
| 90 | + |
| 91 | + cell_map.append(CellOffset(cell.document, current_line, len(sanitized_lines))) |
| 92 | + source_parts.extend(sanitized_lines) |
| 93 | + current_line += len(sanitized_lines) |
| 94 | + |
| 95 | + return "".join(source_parts), cell_map |
| 96 | + |
| 97 | + |
| 98 | +def get_cell_for_line(global_line: int, cell_map: CellMap) -> CellOffset | None: |
| 99 | + """Return the :class:`CellOffset` entry that owns *global_line*. |
| 100 | +
|
| 101 | + *global_line* is a 0-based line number in the combined notebook source. |
| 102 | + Returns ``None`` if no cell owns the line. |
| 103 | + """ |
| 104 | + for entry in cell_map: |
| 105 | + if entry.start_line <= global_line < entry.start_line + entry.line_count: |
| 106 | + return entry |
| 107 | + return None |
| 108 | + |
| 109 | + |
| 110 | +def remap_diagnostics_to_cells( |
| 111 | + diagnostics: Sequence[lsp.Diagnostic], |
| 112 | + cell_map: CellMap, |
| 113 | +) -> dict[str, list[lsp.Diagnostic]]: |
| 114 | + """Map combined-source diagnostics back to individual cell URIs. |
| 115 | +
|
| 116 | + Each diagnostic's line range is adjusted relative to the owning cell. |
| 117 | + Diagnostics whose start line doesn't fall in any cell are discarded. |
| 118 | + If a diagnostic's end line crosses a cell boundary it is clamped. |
| 119 | + """ |
| 120 | + per_cell: dict[str, list[lsp.Diagnostic]] = { |
| 121 | + entry.cell_uri: [] for entry in cell_map |
| 122 | + } |
| 123 | + |
| 124 | + for diag in diagnostics: |
| 125 | + entry = get_cell_for_line(diag.range.start.line, cell_map) |
| 126 | + if entry is None: |
| 127 | + continue |
| 128 | + |
| 129 | + local_start_line = diag.range.start.line - entry.start_line |
| 130 | + local_start = lsp.Position( |
| 131 | + line=local_start_line, |
| 132 | + character=diag.range.start.character, |
| 133 | + ) |
| 134 | + |
| 135 | + # Clamp end line to the cell boundary (defensive). |
| 136 | + max_end_line = entry.line_count - 1 |
| 137 | + raw_end_line = diag.range.end.line - entry.start_line |
| 138 | + clamped = raw_end_line > max_end_line |
| 139 | + local_end_line = min(raw_end_line, max_end_line) |
| 140 | + local_end = lsp.Position( |
| 141 | + line=local_end_line, |
| 142 | + character=0 if clamped else diag.range.end.character, |
| 143 | + ) |
| 144 | + |
| 145 | + remapped = lsp.Diagnostic( |
| 146 | + range=lsp.Range(start=local_start, end=local_end), |
| 147 | + message=diag.message, |
| 148 | + severity=diag.severity, |
| 149 | + code=diag.code, |
| 150 | + code_description=diag.code_description, |
| 151 | + source=diag.source, |
| 152 | + related_information=diag.related_information, |
| 153 | + tags=diag.tags, |
| 154 | + data=diag.data, |
| 155 | + ) |
| 156 | + per_cell[entry.cell_uri].append(remapped) |
| 157 | + |
| 158 | + return per_cell |
0 commit comments