Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Checkout sources
uses: actions/checkout@v4

- name: Install uv
- name: Set up uv
uses: astral-sh/setup-uv@v5

- name: Install dependencies
Expand Down
3 changes: 0 additions & 3 deletions codelimit/common/TokenRange.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ def __str__(self):
def __repr__(self):
return self.__str__()

def token_string(self, tokens: list[Token]):
return " ".join([t.value for t in tokens[self.start:self.end]])

def lt(self, other: TokenRange):
return self.start < other.start

Expand Down
15 changes: 11 additions & 4 deletions codelimit/common/gsm/Pattern.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from copy import deepcopy

from codelimit.common.TokenRange import TokenRange
from codelimit.common.gsm.automata.DFA import DFA
from codelimit.common.gsm.automata.State import State
from codelimit.common.gsm.predicate.Predicate import Predicate
from codelimit.common.token_matching.predicate.Balanced import Balanced


class Pattern:
def __init__(self, start: int, automata: DFA):
self.start = start
self.end = start
class Pattern(TokenRange):
def __init__(self, automata: DFA, start: int = 0):
super().__init__(start, start)
self.automata = automata
self.state = automata.start
self.tokens: list = []
Expand All @@ -30,7 +31,13 @@ def consume(self, item) -> State | None:
return self.state if found_transition else None

def is_accepting(self):
for p in self.predicate_map.values():
if isinstance(p, Balanced) and not p.depth == 0:
return False
return self.automata.is_accepting(self.state)

def token_string(self):
return " ".join([t.value for t in self.tokens])

def __str__(self):
return f'Pattern(start={self.start}, end={self.end}, tokens=[{self.token_string()}])'
30 changes: 14 additions & 16 deletions codelimit/common/gsm/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
)
from codelimit.common.gsm.Pattern import Pattern
from codelimit.common.gsm.operator.Operator import Operator
from codelimit.common.gsm.utils import render_automata
from codelimit.common.gsm.utils import render_automata, prune_nested

T = TypeVar("T")


def match(expression: Expression, sequence: list) -> Pattern | None:
nfa = expression_to_nfa(expression)
dfa = nfa_to_dfa(nfa)
pattern = Pattern(0, dfa)
pattern = Pattern(dfa)
for item in sequence:
next_state = pattern.consume(item)
if not next_state:
Expand All @@ -32,7 +32,7 @@ def match(expression: Expression, sequence: list) -> Pattern | None:
def starts_with(expression: Expression, sequence: list) -> Pattern | None:
nfa = expression_to_nfa(expression)
dfa = nfa_to_dfa(nfa)
pattern = Pattern(0, dfa)
pattern = Pattern(dfa)
for item in sequence:
next_state = pattern.consume(item)
if not next_state:
Expand All @@ -50,31 +50,29 @@ class FindState:
next_state_patterns: list[Pattern]


def find_all(expression: Expression, sequence: list) -> list[Pattern]:
def find_all(expression: Expression, sequence: list, nested: bool = False) -> list[Pattern]:
dfa = nfa_to_dfa(expression_to_nfa(expression))
fs = FindState([], [], [])
for idx, item in enumerate(sequence):
fs.active_patterns.append(Pattern(idx, dfa))
fs.active_patterns.append(Pattern(dfa, idx))
fs.next_state_patterns = []
for pattern in fs.active_patterns:
if fs.matches and pattern.start < fs.matches[-1].end:
continue
if len(pattern.state.transition) == 0 and pattern.is_accepting():
pattern.end = idx
fs.matches.append(pattern)
continue
if pattern.consume(item):
fs.next_state_patterns.append(pattern)
else:
if pattern.is_accepting():
pattern.end = idx
elif pattern.is_accepting():
pattern.end = idx
if not fs.matches or fs.matches[-1].end < pattern.end:
fs.matches.append(pattern)
fs.active_patterns = fs.next_state_patterns
for pattern in fs.active_patterns:
if pattern.is_accepting():
pattern.end = len(sequence)
fs.matches.append(pattern)
return fs.matches
if not fs.matches or fs.matches[-1].end < pattern.end:
fs.matches.append(pattern)
if nested:
return fs.matches
else:
return prune_nested(fs.matches)


def nfa_match(expression: Expression, sequence: list):
Expand Down
19 changes: 19 additions & 0 deletions codelimit/common/gsm/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import subprocess
import tempfile
from typing import TypeVar

from codelimit.common.TokenRange import TokenRange
from codelimit.common.gsm.automata.Automata import Automata
from codelimit.common.gsm.automata.State import State

Expand Down Expand Up @@ -58,3 +60,20 @@ def to_dot(automata: Automata):
result += state_transitions_to_dot(automata, automata.start)
result += "}"
return result


T = TypeVar("T", bound=TokenRange)


def prune_nested(ranges: list[T]) -> list[T]:
sorted_ranges = sorted(ranges, key=lambda x: (x.start, -(x.end - x.start)))
result: list[T] = []
for r in sorted_ranges:
if not result:
result.append(r)
else:
last = result[-1]
if last.start <= r.start and last.end >= r.end:
continue
result.append(r)
return result
4 changes: 2 additions & 2 deletions codelimit/common/scope/scope_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,10 @@ def has_curly_suffix(tokens: list[Token], index):


def get_headers(
tokens: list[Token], expression: Expression, followed_by: Expression = None
tokens: list[Token], expression: Expression, followed_by: Expression = None, nested: bool = False
) -> list[Header]:
# expression = replace_string_literal_with_predicate(expression)
patterns = find_all(expression, tokens)
patterns = find_all(expression, tokens, nested=nested)
if followed_by:
patterns = [p for p in patterns if starts_with(followed_by, tokens[p.end:])]
result = []
Expand Down
4 changes: 4 additions & 0 deletions codelimit/common/token_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,7 @@ def sort_tokens(tokens: list[Token]) -> list[Token]:
result = sorted(tokens, key=lambda t: t.location.column)
result = sorted(result, key=lambda t: t.location.line)
return result


def token_string(tokens: list[Token], token_range: TokenRange) -> str:
return " ".join([t.value for t in tokens[token_range.start:token_range.end]])
1 change: 0 additions & 1 deletion codelimit/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ def _get_git_branch(path: Path) -> str | None:
return ref
try:
out = sh.git('-c', f'safe.directory={path.resolve()}', 'rev-parse', '--abbrev-ref', 'HEAD', _cwd=path)
print(out)
return out.strip()
except (sh.ErrorReturnCode, sh.CommandNotFound):
return None
Expand Down
2 changes: 1 addition & 1 deletion codelimit/languages/Java.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def extract_headers(self, tokens: list) -> list:
[Keyword('throws'), ZeroOrMore(And(Not(';'), Not('{'))), Symbol("{")]
)
]
)
, nested=True)
return filter_headers(headers, tokens)

def extract_blocks(self, tokens: list, headers: list) -> list:
Expand Down
6 changes: 3 additions & 3 deletions codelimit/languages/JavaScript.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def extract_headers(self, tokens: list[Token]) -> list[Header]:
functions = get_headers(
tokens,
[Optional(Keyword("function")), Name(), OneOrMore(Balanced("(", ")"))],
Symbol("{"),
Symbol("{"), nested=True
)
arrow_functions = get_headers(
tokens,
Expand All @@ -35,11 +35,11 @@ def extract_headers(self, tokens: list[Token]) -> list[Header]:
OneOrMore(Balanced("(", ")")),
Symbol("=>"),
],
Symbol("{"),
Symbol("{"), nested=True,
)
return functions + arrow_functions

def extract_blocks(
self, tokens: list[Token], headers: list[Header]
self, tokens: list[Token], headers: list[Header]
) -> list[TokenRange]:
return get_blocks(tokens, "{", "}")
Loading