Skip to content

Commit b013f37

Browse files
github-actions[bot]Copilotdsyme
authored
[Repo Assist] perf: compile regex instances to module-level singletons (#1162)
* perf: compile regex instances to module-level singletons Five locations were creating a new uncompiled Regex on every call: - PageContentList.mkPageContentMenu: new Regex per page rendered - Formatting.fs (search index): Regex.Replace (static, uncompiled) per HTML page - HtmlFormatting.formatAnchor: Regex.Matches per heading processed - Menu.snakeCase: Regex.Replace (static, uncompiled) per menu item - LlmsTxt (collapseBlankLines, normaliseTitle): two Regex.Replace per page entry Each is now a module-level let binding with RegexOptions.Compiled, so the pattern is compiled once at startup and reused for the lifetime of the process. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * ci: trigger checks --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Don Syme <dsyme@users.noreply.github.com>
1 parent 4fb7fc0 commit b013f37

File tree

6 files changed

+27
-9
lines changed

6 files changed

+27
-9
lines changed

RELEASE_NOTES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## [Unreleased]
44

55
### Changed
6+
* Compile `Regex` instances to module-level singletons (with `RegexOptions.Compiled`) in `PageContentList`, `HtmlFormatting`, `Formatting`, `Menu`, and `LlmsTxt`. Previously a new, uncompiled `Regex` was constructed on every call (once per page heading, once per HTML page, once per menu item, once per llms.txt entry), incurring repeated JIT overhead. The patterns are now compiled once at module load and reused across all calls.
67
* Replace deprecated `System.Net.WebClient` with `System.Net.Http.HttpClient` in the image downloader used by `--saveimages`. Removes the `#nowarn "44"` suppression.
78
* Bump `Newtonsoft.Json` transitive-dependency pin from 13.0.3 to 13.0.4.
89
* Bump `System.Memory` transitive-dependency pin from 4.5.5 to 4.6.3.0

src/FSharp.Formatting.Common/Menu.fs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ type MenuItem =
1212
IsActive: bool }
1313

1414
/// Converts a display string to a snake_case HTML id attribute value
15+
let private snakeCaseRegex =
16+
System.Text.RegularExpressions.Regex("[A-Z]", System.Text.RegularExpressions.RegexOptions.Compiled)
17+
1518
let private snakeCase (v: string) =
16-
System.Text.RegularExpressions.Regex.Replace(v, "[A-Z]", "$0").Replace(" ", "_").ToLower()
19+
snakeCaseRegex.Replace(v, "$0").Replace(" ", "_").ToLower()
1720

1821
/// Renders an HTML navigation menu for the given header and items using template files in `input`
1922
let createMenu (input: string) (isCategoryActive: bool) (header: string) (items: MenuItem list) : string =

src/FSharp.Formatting.Common/PageContentList.fs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@ let EmptyContent = "<div class=\"empty\"></div>"
1313
/// We process the html to collect the table of content.
1414
/// We can't use the doc.MarkdownDocument because we cannot easily get the generated id values.
1515
/// It is safer to parse the html.
16-
let mkPageContentMenu (html: string) =
17-
let headingLinkPattern = "<h(\\d)><a [^>]*href=\"([^\"]+)\">([^<]+)</a></h\\d>"
1816
19-
let regex = Regex(headingLinkPattern)
17+
// Compiled once at module load; reused across all pages.
18+
let private headingLinkRegex = Regex("<h(\\d)><a [^>]*href=\"([^\"]+)\">([^<]+)</a></h\\d>", RegexOptions.Compiled)
19+
20+
let mkPageContentMenu (html: string) =
2021

2122
let extractHeadingLinks (matchItem: Match) =
2223
let level = int matchItem.Groups.[1].Value
@@ -26,7 +27,7 @@ let mkPageContentMenu (html: string) =
2627
linkText, li [ Class $"level-%i{level}" ] [ a [ Href href ] [ !!linkText ] ]
2728

2829
let headingTexts, listItems =
29-
regex.Matches(html)
30+
headingLinkRegex.Matches(html)
3031
|> Seq.cast<Match>
3132
|> Seq.map extractHeadingLinks
3233
|> Seq.toList

src/FSharp.Formatting.Literate/Formatting.fs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ open FSharp.Formatting.Templating
1313
/// substitution key–value pairs used by the templating engine to populate page templates.
1414
module internal Formatting =
1515

16+
// Compiled once at module load; reused for every HTML page's search-index text extraction.
17+
let private htmlTagRegex = Regex("<.*?>", RegexOptions.Compiled ||| RegexOptions.Singleline)
18+
1619
/// Format document with the specified output kind
1720
let format (doc: MarkdownDocument) generateAnchors outputKind substitutions crefResolver mdlinkResolver =
1821
match outputKind with
@@ -290,7 +293,7 @@ module internal Formatting =
290293
(match ctx.OutputKind with
291294
| OutputKind.Html ->
292295
// Strip the html tags
293-
let fullText = Regex.Replace(formattedDocument, "<.*?>", "")
296+
let fullText = htmlTagRegex.Replace(formattedDocument, "")
294297
Some(IndexText(fullText, headingTexts))
295298
| _ -> None)
296299

src/FSharp.Formatting.Markdown/HtmlFormatting.fs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,12 @@ let rec internal formatSpan (ctx: FormattingContext) span =
129129
and internal formatSpans ctx = List.iter (formatSpan ctx)
130130

131131
/// generate anchor name from Markdown text
132+
// Compiled once at module load; reused for every heading anchor generated.
133+
let private wordRegex = Regex(@"\w+", RegexOptions.Compiled)
134+
132135
let internal formatAnchor (ctx: FormattingContext) (spans: MarkdownSpans) =
133136
let extractWords (text: string) =
134-
Regex.Matches(text, @"\w+") |> Seq.cast<Match> |> Seq.map (fun m -> m.Value)
137+
wordRegex.Matches(text) |> Seq.cast<Match> |> Seq.map (fun m -> m.Value)
135138

136139
let rec gather (span: MarkdownSpan) : string seq =
137140
seq {

src/fsdocs-tool/BuildCommand.fs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,6 +1317,13 @@ module Serve =
13171317
/// Helpers for generating llms.txt and llms-full.txt content.
13181318
module internal LlmsTxt =
13191319

1320+
// Compiled once at module load; reused across all llms.txt page entries.
1321+
let private multipleNewlinesRegex =
1322+
System.Text.RegularExpressions.Regex(@"\n{3,}", System.Text.RegularExpressions.RegexOptions.Compiled)
1323+
1324+
let private whitespaceRunRegex =
1325+
System.Text.RegularExpressions.Regex(@"\s+", System.Text.RegularExpressions.RegexOptions.Compiled)
1326+
13201327
/// Decode HTML entities (e.g. &quot; → ", &gt; → >) in a string.
13211328
let private decodeHtml (s: string) = System.Net.WebUtility.HtmlDecode(s)
13221329

@@ -1336,11 +1343,11 @@ module internal LlmsTxt =
13361343

13371344
/// Collapse three or more consecutive newlines into at most two.
13381345
let private collapseBlankLines (s: string) =
1339-
System.Text.RegularExpressions.Regex.Replace(s, @"\n{3,}", "\n\n")
1346+
multipleNewlinesRegex.Replace(s, "\n\n")
13401347

13411348
/// Normalise a title: trim and collapse internal whitespace/newlines to a single space.
13421349
let private normaliseTitle (s: string) =
1343-
System.Text.RegularExpressions.Regex.Replace(s.Trim(), @"\s+", " ")
1350+
whitespaceRunRegex.Replace(s.Trim(), " ")
13441351

13451352
/// Decode HTML entities and remove --eval noise from content.
13461353
let private cleanContent (s: string) =

0 commit comments

Comments
 (0)