-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathBookContentChunk.cs
More file actions
63 lines (54 loc) · 2.36 KB
/
BookContentChunk.cs
File metadata and controls
63 lines (54 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
using Microsoft.Extensions.VectorData;
namespace EssentialCSharp.Chat.Common.Models;
/// <summary>
/// Represents a chunk of book content for vector search
/// </summary>
public sealed class BookContentChunk
{
/// <summary>
/// Unique identifier for the chunk - serves as the vector store key
/// </summary>
[VectorStoreKey]
public string Id { get; set; } = string.Empty;
/// <summary>
/// Original source file name
/// </summary>
[VectorStoreData]
public string FileName { get; set; } = string.Empty;
/// <summary>
/// Heading or title of the markdown chunk
/// </summary>
[VectorStoreData]
public string Heading { get; set; } = string.Empty;
/// <summary>
/// The actual markdown content text for this chunk
/// </summary>
[VectorStoreData]
public string ChunkText { get; set; } = string.Empty;
/// <summary>
/// Chapter number extracted from filename (e.g., "Chapter01.md" -> 1)
/// </summary>
[VectorStoreData]
public int? ChapterNumber { get; set; }
/// <summary>
/// Zero-based ordinal of this chunk within its source file.
/// Together with FileName, forms the basis for the deterministic Id.
/// </summary>
[VectorStoreData]
public int ChunkIndex { get; set; }
/// <summary>
/// SHA256 hash of the chunk content for change detection
/// </summary>
[VectorStoreData]
public string ContentHash { get; set; } = string.Empty;
/// <summary>
/// Vector embedding for the chunk text - will be generated by embedding service
/// Using 1536 dimensions for Azure OpenAI text-embedding-3-small
/// Note: HNSW index in Semantic Kernel PostgreSQL connector supports max 2000 dimensions
/// https://github.com/pgvector/pgvector/issues/461
/// Use CosineSimilarity distance function since we are using text-embedding-3 (https://platform.openai.com/docs/guides/embeddings#which-distance-function-should-i-use)
/// Postgres supports only Hnsw: https://learn.microsoft.com/en-us/semantic-kernel/concepts/vector-store-connectors/out-of-the-box-connectors/postgres-connector?pivots=programming-language-csharp&WT.mc_id=8B97120A00B57354
/// </summary>
[VectorStoreVector(Dimensions: 1536, DistanceFunction = DistanceFunction.CosineSimilarity, IndexKind = IndexKind.Hnsw)]
public ReadOnlyMemory<float>? TextEmbedding { get; set; }
}