Skip to content

Commit 014c199

Browse files
authored
Fix nested comment parsing in literate scripts and add AddHtmlPrinter tests (#1138)
* Fix nested comment parsing in literate scripts and add AddHtmlPrinter tests - Fix ParseScript.fs: the comment block parser used LastIndexOf("*)") to detect comment endings, which incorrectly matched nested (*** command ***) markers inside markdown text (e.g. backtick-quoted examples of include-it or include-it-raw). This caused content after such references to be silently dropped from HTML output. Replaced with a nesting-aware scanner (findOuterCommentEnd) that properly tracks (* ... *) depth. - Add integration tests for: - Comment blocks containing nested (*** ***) markers - AddHtmlPrinter with include-it-raw - AddHtmlPrinter with CSS/JS resources - AddHtmlPrinter with base64 image pattern (as documented) - AddPrintTransformer chained with AddHtmlPrinter and include-it-raw - Fix missing [<Test>] attribute on Can include-output-and-it test * fix formatting
1 parent 4000a8a commit 014c199

3 files changed

Lines changed: 138 additions & 2 deletions

File tree

RELEASE_NOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
## [Unreleased]
44

55
### Fixed
6+
* Fix literate script comment parser prematurely closing `(**` blocks when the markdown text contained nested `(*** ... ***)` references (e.g. in backtick-quoted command examples), causing subsequent content to be silently dropped from HTML output.
7+
* Add missing `[<Test>]` attribute on `Can include-output-and-it` test so it is executed by the test runner.
68
* Add regression test confirming that types whose name matches their enclosing namespace are correctly included in generated API docs. [#944](https://github.com/fsprojects/FSharp.Formatting/issues/944)
79
* Fix crash (`failwith "tbd - IndirectImage"`) when `Markdown.ToMd` is called on a document containing reference-style images with bracket syntax. The indirect image is now serialised as `![alt](url)` when the reference is resolved, or in bracket notation when it is not. [#1094](https://github.com/fsprojects/FSharp.Formatting/pull/1094)
810
* Fix `Markdown.ToMd` serialising italic spans with asterisks incorrectly as bold spans. [#1102](https://github.com/fsprojects/FSharp.Formatting/pull/1102)

src/FSharp.Formatting.Literate/ParseScript.fs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,26 @@ module internal CodeBlockUtils =
5959
// * collectSnippet - we're in a normal F# code and we're waiting for a comment
6060
// (in both states, we also need to recognize (*** commands ***)
6161

62+
/// Find the position of the first `*)` at comment-nesting depth 0,
63+
/// properly skipping nested `(* ... *)` pairs. Returns -1 if not found.
64+
let private findOuterCommentEnd (comment: string) =
65+
let rec scan depth i =
66+
if i + 1 >= comment.Length then
67+
-1
68+
elif comment.[i] = '(' && comment.[i + 1] = '*' then
69+
scan (depth + 1) (i + 2)
70+
elif comment.[i] = '*' && comment.[i + 1] = ')' then
71+
if depth > 0 then scan (depth - 1) (i + 2) else i
72+
else
73+
scan depth (i + 1)
74+
75+
scan 0 0
76+
6277
/// Waiting for the end of a comment
6378
let rec private collectComment (comment: string) lines =
6479
seq {
6580
let findCommentEnd (comment: string) =
66-
let cend = comment.LastIndexOf("*)", StringComparison.OrdinalIgnoreCase)
81+
let cend = findOuterCommentEnd comment
6782

6883
if cend = -1 then
6984
failwith "A (* comment was not closed"
@@ -79,7 +94,7 @@ module internal CodeBlockUtils =
7994
yield! collectSnippet [] lines
8095

8196
| (ConcatenatedComments text) :: _ when
82-
comment.LastIndexOf("*)", StringComparison.Ordinal) <> -1
97+
findOuterCommentEnd comment >= 0
8398
&& text.Trim().StartsWith("//", StringComparison.Ordinal)
8499
->
85100
// Comment ended, but we found a code snippet starting with // comment

tests/FSharp.Literate.Tests/EvalTests.fs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,7 @@ let xxxx = 1+1
507507

508508
html1 |> shouldNotContainText "2000"
509509

510+
[<Test>]
510511
let ``Can include-output-and-it`` () =
511512
let content =
512513
"""
@@ -593,3 +594,121 @@ $$$
593594
pynb |> shouldContainText """\begin{equation}"""
594595
pynb |> shouldContainText """\end{equation}"""
595596
pynb |> shouldContainText """30001"""
597+
598+
[<Test>]
599+
let ``Comment with nested (*** ***) and following // line is not truncated`` () =
600+
let content =
601+
"""
602+
(**
603+
Some text mentioning `(*** include-it ***)` or `(*** include-it-raw ***)`.
604+
605+
A paragraph after nested comment markers.
606+
607+
[lang=fsharp]
608+
// a comment inside an indented code block
609+
let x = 1
610+
*)
611+
let a = 1
612+
"""
613+
614+
let doc = Literate.ParseScriptString(content, "." </> "A.fsx")
615+
616+
let html = Literate.ToHtml(doc)
617+
// The paragraph after the nested markers must not be truncated
618+
html |> shouldContainText "A paragraph after nested comment markers"
619+
html |> shouldContainText "include-it-raw"
620+
621+
[<Test>]
622+
let ``Can include-it-raw with AddHtmlPrinter`` () =
623+
let content =
624+
"""
625+
type Html = Html of string
626+
fsi.AddHtmlPrinter(fun (Html h) -> seq [], h)
627+
Html "<div>RAW_HTML_OUTPUT</div>"
628+
(*** include-it-raw ***)
629+
"""
630+
631+
let fsie = getFsiEvaluator ()
632+
633+
let doc1 = Literate.ParseScriptString(content, "." </> "A.fsx", fsiEvaluator = fsie)
634+
635+
let html1 = Literate.ToHtml(doc1)
636+
html1 |> shouldContainText "<div>RAW_HTML_OUTPUT</div>"
637+
638+
[<Test>]
639+
let ``AddHtmlPrinter with CSS and JS resources`` () =
640+
let content =
641+
"""
642+
type Widget = Widget of string
643+
fsi.AddHtmlPrinter(fun (Widget w) ->
644+
seq [
645+
"style", ".widget { color: red; }"
646+
"script", "console.log('widget loaded')"
647+
], sprintf "<span class='widget'>%s</span>" w)
648+
Widget "test-widget"
649+
(*** include-it ***)
650+
"""
651+
652+
let fsie = getFsiEvaluator ()
653+
654+
let doc1 = Literate.ParseScriptString(content, "." </> "A.fsx", fsiEvaluator = fsie)
655+
656+
let html1 = Literate.ToHtml(doc1)
657+
html1 |> shouldContainText "test-widget"
658+
html1 |> shouldContainText ".widget { color: red; }"
659+
660+
[<Test>]
661+
let ``AddHtmlPrinter with base64 image pattern`` () =
662+
let content =
663+
"""
664+
type MyChart = { Data: int list }
665+
fsi.AddHtmlPrinter(fun (chart: MyChart) ->
666+
let b64 = System.Convert.ToBase64String([| 0uy; 1uy; 2uy |])
667+
seq [], sprintf "<img src=\"data:image/png;base64,%s\" />" b64)
668+
{ Data = [1;2;3] }
669+
(*** include-it ***)
670+
"""
671+
672+
let fsie = getFsiEvaluator ()
673+
674+
let doc1 = Literate.ParseScriptString(content, "." </> "A.fsx", fsiEvaluator = fsie)
675+
676+
let html1 = Literate.ToHtml(doc1)
677+
html1 |> shouldContainText "data:image/png;base64,"
678+
html1 |> shouldContainText "<img"
679+
680+
[<Test>]
681+
let ``AddHtmlPrinter with include-it-raw emits unescaped HTML`` () =
682+
let content =
683+
"""
684+
type Tag = Tag of string
685+
fsi.AddHtmlPrinter(fun (Tag t) -> seq [], sprintf "<b>%s</b>" t)
686+
Tag "BOLDTEXT"
687+
(*** include-it-raw ***)
688+
"""
689+
690+
let fsie = getFsiEvaluator ()
691+
692+
let doc1 = Literate.ParseScriptString(content, "." </> "A.fsx", fsiEvaluator = fsie)
693+
694+
let html1 = Literate.ToHtml(doc1)
695+
html1 |> shouldContainText "<b>BOLDTEXT</b>"
696+
697+
[<Test>]
698+
let ``AddPrintTransformer chained with AddHtmlPrinter and include-it-raw`` () =
699+
let content =
700+
"""
701+
type Html = Html of string
702+
type Wrapped = Wrapped of string
703+
fsi.AddPrintTransformer(fun (Wrapped s) -> box (Html s))
704+
fsi.AddHtmlPrinter(fun (Html h) -> seq [], sprintf "<em>%s</em>" h)
705+
Wrapped "TRANSFORMED"
706+
(*** include-it-raw ***)
707+
"""
708+
709+
let fsie = getFsiEvaluator ()
710+
711+
let doc1 = Literate.ParseScriptString(content, "." </> "A.fsx", fsiEvaluator = fsie)
712+
713+
let html1 = Literate.ToHtml(doc1)
714+
html1 |> shouldContainText "<em>TRANSFORMED</em>"

0 commit comments

Comments
 (0)