getcodelimit · robvanderleek · Jul 14, 2025 · Jul 11, 2025 · Jul 14, 2025 · Jul 14, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -13,7 +13,7 @@ jobs:
       - name: Checkout sources
         uses: actions/checkout@v4
 
-      - name: Install uv
+      - name: Set up uv
         uses: astral-sh/setup-uv@v5
 
       - name: Install dependencies

diff --git a/codelimit/common/TokenRange.py b/codelimit/common/TokenRange.py
@@ -14,9 +14,6 @@ def __str__(self):
     def __repr__(self):
         return self.__str__()
 
-    def token_string(self, tokens: list[Token]):
-        return " ".join([t.value for t in tokens[self.start:self.end]])
-
     def lt(self, other: TokenRange):
         return self.start < other.start
 

diff --git a/codelimit/common/gsm/Pattern.py b/codelimit/common/gsm/Pattern.py
@@ -1,14 +1,15 @@
 from copy import deepcopy
 
+from codelimit.common.TokenRange import TokenRange
 from codelimit.common.gsm.automata.DFA import DFA
 from codelimit.common.gsm.automata.State import State
 from codelimit.common.gsm.predicate.Predicate import Predicate
+from codelimit.common.token_matching.predicate.Balanced import Balanced
 
 
-class Pattern:
-    def __init__(self, start: int, automata: DFA):
-        self.start = start
-        self.end = start
+class Pattern(TokenRange):
+    def __init__(self, automata: DFA, start: int = 0):
+        super().__init__(start, start)
         self.automata = automata
         self.state = automata.start
         self.tokens: list = []
@@ -30,7 +31,13 @@ def consume(self, item) -> State | None:
         return self.state if found_transition else None
 
     def is_accepting(self):
+        for p in self.predicate_map.values():
+            if isinstance(p, Balanced) and not p.depth == 0:
+                return False
         return self.automata.is_accepting(self.state)
 
     def token_string(self):
         return " ".join([t.value for t in self.tokens])
+
+    def __str__(self):
+        return f'Pattern(start={self.start}, end={self.end}, tokens=[{self.token_string()}])'
diff --git a/codelimit/common/gsm/matcher.py b/codelimit/common/gsm/matcher.py
@@ -9,15 +9,15 @@
 )
 from codelimit.common.gsm.Pattern import Pattern
 from codelimit.common.gsm.operator.Operator import Operator
-from codelimit.common.gsm.utils import render_automata
+from codelimit.common.gsm.utils import render_automata, prune_nested
 
 T = TypeVar("T")
 
 
 def match(expression: Expression, sequence: list) -> Pattern | None:
     nfa = expression_to_nfa(expression)
     dfa = nfa_to_dfa(nfa)
-    pattern = Pattern(0, dfa)
+    pattern = Pattern(dfa)
     for item in sequence:
         next_state = pattern.consume(item)
         if not next_state:
@@ -32,7 +32,7 @@ def match(expression: Expression, sequence: list) -> Pattern | None:
 def starts_with(expression: Expression, sequence: list) -> Pattern | None:
     nfa = expression_to_nfa(expression)
     dfa = nfa_to_dfa(nfa)
-    pattern = Pattern(0, dfa)
+    pattern = Pattern(dfa)
     for item in sequence:
         next_state = pattern.consume(item)
         if not next_state:
@@ -50,31 +50,29 @@ class FindState:
     next_state_patterns: list[Pattern]
 
 
-def find_all(expression: Expression, sequence: list) -> list[Pattern]:
+def find_all(expression: Expression, sequence: list, nested: bool = False) -> list[Pattern]:
     dfa = nfa_to_dfa(expression_to_nfa(expression))
     fs = FindState([], [], [])
     for idx, item in enumerate(sequence):
-        fs.active_patterns.append(Pattern(idx, dfa))
+        fs.active_patterns.append(Pattern(dfa, idx))
         fs.next_state_patterns = []
         for pattern in fs.active_patterns:
-            if fs.matches and pattern.start < fs.matches[-1].end:
-                continue
-            if len(pattern.state.transition) == 0 and pattern.is_accepting():
-                pattern.end = idx
-                fs.matches.append(pattern)
-                continue
             if pattern.consume(item):
                 fs.next_state_patterns.append(pattern)
-            else:
-                if pattern.is_accepting():
-                    pattern.end = idx
+            elif pattern.is_accepting():
+                pattern.end = idx
+                if not fs.matches or fs.matches[-1].end < pattern.end:
                     fs.matches.append(pattern)
         fs.active_patterns = fs.next_state_patterns
     for pattern in fs.active_patterns:
         if pattern.is_accepting():
             pattern.end = len(sequence)
-            fs.matches.append(pattern)
-    return fs.matches
+            if not fs.matches or fs.matches[-1].end < pattern.end:
+                fs.matches.append(pattern)
+    if nested:
+        return fs.matches
+    else:
+        return prune_nested(fs.matches)
 
 
 def nfa_match(expression: Expression, sequence: list):

diff --git a/codelimit/common/gsm/utils.py b/codelimit/common/gsm/utils.py
@@ -1,6 +1,8 @@
 import subprocess
 import tempfile
+from typing import TypeVar
 
+from codelimit.common.TokenRange import TokenRange
 from codelimit.common.gsm.automata.Automata import Automata
 from codelimit.common.gsm.automata.State import State
 
@@ -58,3 +60,20 @@ def to_dot(automata: Automata):
     result += state_transitions_to_dot(automata, automata.start)
     result += "}"
     return result
+
+
+T = TypeVar("T", bound=TokenRange)
+
+
+def prune_nested(ranges: list[T]) -> list[T]:
+    sorted_ranges = sorted(ranges, key=lambda x: (x.start, -(x.end - x.start)))
+    result: list[T] = []
+    for r in sorted_ranges:
+        if not result:
+            result.append(r)
+        else:
+            last = result[-1]
+            if last.start <= r.start and last.end >= r.end:
+                continue
+            result.append(r)
+    return result
diff --git a/codelimit/common/scope/scope_utils.py b/codelimit/common/scope/scope_utils.py
@@ -120,10 +120,10 @@ def has_curly_suffix(tokens: list[Token], index):
 
 
 def get_headers(
-        tokens: list[Token], expression: Expression, followed_by: Expression = None
+        tokens: list[Token], expression: Expression, followed_by: Expression = None, nested: bool = False
 ) -> list[Header]:
     # expression = replace_string_literal_with_predicate(expression)
-    patterns = find_all(expression, tokens)
+    patterns = find_all(expression, tokens, nested=nested)
     if followed_by:
         patterns = [p for p in patterns if starts_with(followed_by, tokens[p.end:])]
     result = []

diff --git a/codelimit/common/token_utils.py b/codelimit/common/token_utils.py
@@ -39,3 +39,7 @@ def sort_tokens(tokens: list[Token]) -> list[Token]:
     result = sorted(tokens, key=lambda t: t.location.column)
     result = sorted(result, key=lambda t: t.location.line)
     return result
+
+
+def token_string(tokens: list[Token], token_range: TokenRange) -> str:
+    return " ".join([t.value for t in tokens[token_range.start:token_range.end]])
diff --git a/codelimit/common/utils.py b/codelimit/common/utils.py
@@ -214,7 +214,6 @@ def _get_git_branch(path: Path) -> str | None:
         return ref
     try:
         out = sh.git('-c', f'safe.directory={path.resolve()}', 'rev-parse', '--abbrev-ref', 'HEAD', _cwd=path)
-        print(out)
         return out.strip()
     except (sh.ErrorReturnCode, sh.CommandNotFound):
         return None

diff --git a/codelimit/languages/Java.py b/codelimit/languages/Java.py
@@ -30,7 +30,7 @@ def extract_headers(self, tokens: list) -> list:
                     [Keyword('throws'), ZeroOrMore(And(Not(';'), Not('{'))), Symbol("{")]
                 )
             ]
-        )
+            , nested=True)
         return filter_headers(headers, tokens)
 
     def extract_blocks(self, tokens: list, headers: list) -> list:

diff --git a/codelimit/languages/JavaScript.py b/codelimit/languages/JavaScript.py
@@ -23,7 +23,7 @@ def extract_headers(self, tokens: list[Token]) -> list[Header]:
         functions = get_headers(
             tokens,
             [Optional(Keyword("function")), Name(), OneOrMore(Balanced("(", ")"))],
-            Symbol("{"),
+            Symbol("{"), nested=True
         )
         arrow_functions = get_headers(
             tokens,
@@ -35,11 +35,11 @@ def extract_headers(self, tokens: list[Token]) -> list[Header]:
                 OneOrMore(Balanced("(", ")")),
                 Symbol("=>"),
             ],
-            Symbol("{"),
+            Symbol("{"), nested=True,
         )
         return functions + arrow_functions
 
     def extract_blocks(
-        self, tokens: list[Token], headers: list[Header]
+            self, tokens: list[Token], headers: list[Header]
     ) -> list[TokenRange]:
         return get_blocks(tokens, "{", "}")