Skip to content

Commit 4925d40

Browse files
Track CREATE TRIGGER body in top-level statement splitter
SQLite's parser treats semicolons inside a ``BEGIN..END`` trigger body as inner-statement terminators that do NOT close the outer DDL (parse.y::trigger_cmd_list). The splitter previously had no awareness of trigger-body block scope; a CREATE TRIGGER with embedded semicolons was split into multiple pieces, the bare ``END`` token at the tail then matched the COMMIT/END branch in ``_update_tx_flags_from_sql``, and the transaction tracker desynced from server state. Add a small state machine that recognises ``CREATE [TEMP|TEMPORARY] TRIGGER ... BEGIN`` and tracks ``BEGIN..END`` depth ONLY inside that scope. Outside trigger mode, a bare ``BEGIN`` is still transaction-control and ``;`` still splits — so multi-statement batches like ``BEGIN; INSERT; COMMIT;`` are unaffected. Inner ``BEGIN`` / ``END`` keywords inside string literals, comments, or square / double-quote / backtick identifiers do not flip the mode (the existing quote/comment scanner takes precedence). Limit the fix to triggers; CREATE VIEW with inner CTE chains uses parentheses, not ``BEGIN..END``, so over-generalising would risk false positives.
1 parent 6ce43c0 commit 4925d40

2 files changed

Lines changed: 291 additions & 1 deletion

File tree

src/dqliteclient/connection.py

Lines changed: 175 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,13 +127,29 @@ def _split_top_level_statements(sql: str) -> list[str]:
127127
* ``--`` to end-of-line — line comment
128128
* ``/* ... */`` — block comment
129129
130+
Trigger-body block scope (``parse.y::trigger_cmd_list``): SQLite
131+
treats ``;`` inside a ``CREATE [TEMP|TEMPORARY] TRIGGER ... BEGIN
132+
... END`` as inner-statement terminators that do NOT close the
133+
outer DDL. Track ``BEGIN..END`` depth ONLY after seeing the
134+
trigger preamble — a bare ``BEGIN`` is transaction-control and
135+
must still split.
136+
130137
Returns whitespace-stripped non-empty pieces. Keep this small and
131138
self-contained — adding a real tokeniser is out of scope.
132139
"""
133140
out: list[str] = []
134141
start = 0
135142
i = 0
136143
n = len(sql)
144+
# ``in_trigger_body``: inside a ``CREATE TRIGGER ... BEGIN`` body.
145+
# ``trigger_depth``: inner BEGIN..END nesting count (for compound
146+
# SQLite trigger bodies, e.g. ``BEGIN ... BEGIN ... END; END;``).
147+
in_trigger_body = False
148+
trigger_depth = 0
149+
# Position in the current statement piece where we last checked
150+
# for a CREATE TRIGGER preamble — only check ahead of, not behind,
151+
# to keep this O(n) per piece.
152+
trigger_scan_start = 0
137153
while i < n:
138154
c = sql[i]
139155
if c == "'":
@@ -184,18 +200,176 @@ def _split_top_level_statements(sql: str) -> list[str]:
184200
end = sql.find("*/", i + 2)
185201
i = n if end == -1 else end + 2
186202
continue
187-
if c == ";":
203+
# Track CREATE TRIGGER ... BEGIN entry and BEGIN..END nesting
204+
# so an inner ``;`` inside a trigger body does not close the
205+
# outer DDL. Only enter trigger-body mode after recognising
206+
# the full preamble; a bare ``BEGIN`` (transaction-control)
207+
# outside trigger mode must still permit ``;`` to split.
208+
if c.isalpha() and (i == 0 or not _is_word_char(sql[i - 1])):
209+
kw_end = i
210+
while kw_end < n and _is_word_char(sql[kw_end]):
211+
kw_end += 1
212+
kw = sql[i:kw_end].upper()
213+
if not in_trigger_body:
214+
# Look for ``CREATE TRIGGER ... BEGIN`` with optional
215+
# ``TEMP`` / ``TEMPORARY`` between CREATE and TRIGGER.
216+
if kw == "CREATE" and i >= trigger_scan_start:
217+
j = _scan_for_trigger_begin(sql, kw_end, n)
218+
if j > 0:
219+
in_trigger_body = True
220+
trigger_depth = 1
221+
i = j
222+
continue
223+
else:
224+
# Inside trigger body — track nested BEGIN..END so a
225+
# compound trigger (BEGIN ... BEGIN ... END; END;)
226+
# closes correctly.
227+
if kw == "BEGIN":
228+
trigger_depth += 1
229+
i = kw_end
230+
continue
231+
if kw == "END":
232+
trigger_depth -= 1
233+
if trigger_depth == 0:
234+
in_trigger_body = False
235+
i = kw_end
236+
continue
237+
i = kw_end
238+
continue
239+
if c == ";" and not in_trigger_body:
188240
piece = sql[start:i].strip()
189241
if piece:
190242
out.append(piece)
191243
start = i + 1
244+
trigger_scan_start = start
245+
# When inside a trigger body, ``;`` is an inner-statement
246+
# terminator and does not split the outer DDL.
192247
i += 1
193248
tail = sql[start:].strip()
194249
if tail:
195250
out.append(tail)
196251
return out
197252

198253

254+
def _is_word_char(c: str) -> bool:
255+
"""True if ``c`` is part of a SQL keyword/identifier word."""
256+
return c.isalnum() or c == "_"
257+
258+
259+
def _scan_for_trigger_begin(sql: str, after_create: int, n: int) -> int:
260+
"""Look ahead from just after ``CREATE`` for the trigger preamble
261+
``[TEMP|TEMPORARY] TRIGGER ... BEGIN`` and return the index just
262+
past the ``BEGIN`` keyword on success, or 0 (not a trigger).
263+
264+
Skips over quoted identifiers, comments, and parenthesised
265+
sub-expressions (the ``WHEN (...)`` clause). Stops at any ``;``
266+
or end-of-input. Symmetric with the splitter's quote/comment
267+
handling so a ``CREATE TRIGGER`` inside a string literal is not
268+
matched (the outer splitter has already advanced past those
269+
constructs by the time the alpha-token branch fires).
270+
"""
271+
i = after_create
272+
# Optional whitespace, then optional TEMP/TEMPORARY, then required
273+
# TRIGGER keyword.
274+
while i < n and sql[i].isspace():
275+
i += 1
276+
if i >= n:
277+
return 0
278+
j = i
279+
while j < n and _is_word_char(sql[j]):
280+
j += 1
281+
word = sql[i:j].upper()
282+
if word in ("TEMP", "TEMPORARY"):
283+
i = j
284+
while i < n and sql[i].isspace():
285+
i += 1
286+
j = i
287+
while j < n and _is_word_char(sql[j]):
288+
j += 1
289+
word = sql[i:j].upper()
290+
if word != "TRIGGER":
291+
return 0
292+
i = j
293+
# Now scan forward for the next standalone BEGIN at the same
294+
# nesting level, respecting quotes / comments / parens.
295+
paren_depth = 0
296+
while i < n:
297+
c = sql[i]
298+
if c == "'":
299+
i += 1
300+
while i < n:
301+
if sql[i] == "'":
302+
if i + 1 < n and sql[i + 1] == "'":
303+
i += 2
304+
continue
305+
i += 1
306+
break
307+
i += 1
308+
continue
309+
if c == '"':
310+
i += 1
311+
while i < n:
312+
if sql[i] == '"':
313+
if i + 1 < n and sql[i + 1] == '"':
314+
i += 2
315+
continue
316+
i += 1
317+
break
318+
i += 1
319+
continue
320+
if c == "[":
321+
i += 1
322+
while i < n and sql[i] != "]":
323+
i += 1
324+
if i < n:
325+
i += 1
326+
continue
327+
if c == "`":
328+
i += 1
329+
while i < n:
330+
if sql[i] == "`":
331+
if i + 1 < n and sql[i + 1] == "`":
332+
i += 2
333+
continue
334+
i += 1
335+
break
336+
i += 1
337+
continue
338+
if c == "-" and i + 1 < n and sql[i + 1] == "-":
339+
nl = sql.find("\n", i + 2)
340+
i = n if nl == -1 else nl + 1
341+
continue
342+
if c == "/" and i + 1 < n and sql[i + 1] == "*":
343+
end = sql.find("*/", i + 2)
344+
i = n if end == -1 else end + 2
345+
continue
346+
if c == "(":
347+
paren_depth += 1
348+
i += 1
349+
continue
350+
if c == ")":
351+
if paren_depth > 0:
352+
paren_depth -= 1
353+
i += 1
354+
continue
355+
if c == ";":
356+
# Reached the end of the would-be CREATE TRIGGER without a
357+
# BEGIN — not a trigger-body DDL after all (could be a
358+
# ``CREATE TRIGGER ... INSERT`` short-form trigger which
359+
# sqlite also supports without BEGIN..END). Bail out.
360+
return 0
361+
if paren_depth == 0 and c.isalpha() and (i == 0 or not _is_word_char(sql[i - 1])):
362+
j = i
363+
while j < n and _is_word_char(sql[j]):
364+
j += 1
365+
if sql[i:j].upper() == "BEGIN":
366+
return j
367+
i = j
368+
continue
369+
i += 1
370+
return 0
371+
372+
199373
# Fixed-order tuple instead of a frozenset: iteration order is
200374
# hash-seeded for frozensets, which would let a future verb that shares
201375
# a prefix with an existing one (e.g. ``BEGINEXCLUSIVE``) produce
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
"""Pin: ``_split_top_level_statements`` keeps a CREATE TRIGGER body
2+
together as one statement.
3+
4+
SQLite's parser treats ``;`` inside a ``BEGIN..END`` trigger body
5+
as inner-statement terminators that do NOT close the outer DDL
6+
(``parse.y::trigger_cmd_list``). The splitter previously had no
7+
awareness of trigger-body block scope, so a trigger DDL with
8+
embedded semicolons was split into multiple pieces — and the bare
9+
``END`` token at the end then matched the COMMIT/END branch in
10+
``_update_tx_flags_from_sql``, corrupting the transaction tracker.
11+
12+
Scope of the fix is restricted to triggers: a bare ``BEGIN`` is
13+
transaction-control (``BEGIN``, ``BEGIN TRANSACTION``,
14+
``BEGIN DEFERRED/IMMEDIATE/EXCLUSIVE``) and must still split.
15+
Trigger-body mode is entered only after the lexer recognises
16+
``CREATE [TEMP|TEMPORARY] TRIGGER ... BEGIN``.
17+
"""
18+
19+
from __future__ import annotations
20+
21+
from dqliteclient.connection import _split_top_level_statements
22+
23+
24+
class TestCreateTriggerBodyKeptTogether:
25+
def test_basic_trigger_body(self) -> None:
26+
sql = (
27+
"CREATE TRIGGER aud AFTER INSERT ON x BEGIN\n"
28+
" UPDATE y SET v=1 WHERE id=NEW.id;\n"
29+
" DELETE FROM z WHERE id=NEW.id;\n"
30+
"END;"
31+
)
32+
pieces = _split_top_level_statements(sql)
33+
assert len(pieces) == 1
34+
assert pieces[0].upper().startswith("CREATE TRIGGER")
35+
assert pieces[0].rstrip().upper().endswith("END")
36+
37+
def test_temp_trigger(self) -> None:
38+
sql = "CREATE TEMP TRIGGER t AFTER INSERT ON x BEGIN\n UPDATE y SET v=1;\nEND;"
39+
pieces = _split_top_level_statements(sql)
40+
assert len(pieces) == 1
41+
42+
def test_temporary_trigger(self) -> None:
43+
sql = "CREATE TEMPORARY TRIGGER t AFTER INSERT ON x BEGIN\n UPDATE y SET v=1;\nEND;"
44+
pieces = _split_top_level_statements(sql)
45+
assert len(pieces) == 1
46+
47+
def test_instead_of_trigger(self) -> None:
48+
sql = (
49+
"CREATE TRIGGER t INSTEAD OF INSERT ON v BEGIN\n"
50+
" INSERT INTO base VALUES (NEW.x);\n"
51+
" UPDATE other SET y=1;\n"
52+
"END;"
53+
)
54+
pieces = _split_top_level_statements(sql)
55+
assert len(pieces) == 1
56+
57+
def test_trigger_followed_by_other_statement(self) -> None:
58+
"""The next statement after the trigger body's terminating ``;``
59+
must split off correctly."""
60+
sql = (
61+
"CREATE TRIGGER aud AFTER INSERT ON x BEGIN\n"
62+
" UPDATE y SET v=1;\n"
63+
"END;\n"
64+
"INSERT INTO log VALUES (1)"
65+
)
66+
pieces = _split_top_level_statements(sql)
67+
assert len(pieces) == 2
68+
assert pieces[0].upper().startswith("CREATE TRIGGER")
69+
assert pieces[1].upper().startswith("INSERT")
70+
71+
72+
class TestRegularBeginEndStillSplits:
73+
def test_bare_begin_commit_still_split(self) -> None:
74+
"""Top-level ``BEGIN`` (transaction-control, NOT trigger body)
75+
must still split on ``;`` boundaries — otherwise multi-
76+
statement batches like ``BEGIN; INSERT; COMMIT;`` would be
77+
glued together."""
78+
sql = "BEGIN; INSERT INTO t VALUES (1); COMMIT;"
79+
pieces = _split_top_level_statements(sql)
80+
assert len(pieces) == 3
81+
assert pieces[0].upper() == "BEGIN"
82+
assert pieces[1].upper().startswith("INSERT")
83+
assert pieces[2].upper() == "COMMIT"
84+
85+
def test_begin_transaction_still_splits(self) -> None:
86+
sql = "BEGIN TRANSACTION; SELECT 1; COMMIT;"
87+
pieces = _split_top_level_statements(sql)
88+
assert len(pieces) == 3
89+
90+
def test_begin_immediate_still_splits(self) -> None:
91+
sql = "BEGIN IMMEDIATE; INSERT INTO t VALUES (1); COMMIT;"
92+
pieces = _split_top_level_statements(sql)
93+
assert len(pieces) == 3
94+
95+
96+
class TestTriggerKeywordsInsideQuotedContextDoNotFlipMode:
97+
def test_string_literal_with_create_trigger_text_does_not_enter_trigger_mode(self) -> None:
98+
sql = "INSERT INTO t VALUES ('CREATE TRIGGER a AFTER INSERT BEGIN'); SELECT 1"
99+
pieces = _split_top_level_statements(sql)
100+
# The string literal contents are NOT parsed; the splitter
101+
# sees an INSERT and a SELECT.
102+
assert len(pieces) == 2
103+
assert pieces[0].upper().startswith("INSERT")
104+
assert pieces[1].upper().startswith("SELECT")
105+
106+
def test_comment_with_create_trigger_text_does_not_enter_trigger_mode(self) -> None:
107+
sql = "/* CREATE TRIGGER a AFTER INSERT BEGIN */ SELECT 1; SELECT 2"
108+
pieces = _split_top_level_statements(sql)
109+
assert len(pieces) == 2
110+
111+
def test_keyword_substring_does_not_match(self) -> None:
112+
"""A column or table whose name STARTS with ``trigger`` must
113+
not flip mode."""
114+
sql = "INSERT INTO triggers VALUES (1); SELECT 1"
115+
pieces = _split_top_level_statements(sql)
116+
assert len(pieces) == 2

0 commit comments

Comments
 (0)