-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathChunkingResultExtensions.cs
More file actions
50 lines (43 loc) · 1.72 KB
/
ChunkingResultExtensions.cs
File metadata and controls
50 lines (43 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
using System.Security.Cryptography;
using System.Text;
using EssentialCSharp.Chat.Common.Models;
namespace EssentialCSharp.Chat.Common.Services;
public static partial class ChunkingResultExtensions
{
public static List<BookContentChunk> ToBookContentChunks(this FileChunkingResult result)
{
int? chapterNumber = ExtractChapterNumber(result.FileName);
var chunks = result.Chunks
.Select((markdownChunk, index) =>
{
var contentHash = ComputeSha256Hash(markdownChunk.ChunkText);
return new BookContentChunk
{
Id = $"{result.FileName}_{index}",
FileName = result.FileName,
Heading = markdownChunk.Heading,
ChunkText = markdownChunk.ChunkText,
ChapterNumber = chapterNumber,
ChunkIndex = index,
ContentHash = contentHash
};
})
.ToList();
return chunks;
}
private static int? ExtractChapterNumber(string fileName)
{
// Example: "Chapter01.md" -> 1; non-chapter files return null.
var match = ChapterNumberRegex().Match(fileName);
if (match.Success && int.TryParse(match.Groups["ChapterNumber"].Value, out int chapterNumber))
return chapterNumber;
return null;
}
private static string ComputeSha256Hash(string text)
{
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(text));
return Convert.ToHexStringLower(bytes);
}
[System.Text.RegularExpressions.GeneratedRegex(@"Chapter(?<ChapterNumber>\d{2})")]
private static partial System.Text.RegularExpressions.Regex ChapterNumberRegex();
}