@@ -20,6 +20,7 @@ import (
2020 "fmt"
2121 "os"
2222 "path/filepath"
23+ "sort"
2324 "strings"
2425
2526 "github.com/cloudwego/abcoder/lang/log"
@@ -41,20 +42,33 @@ func (c *Collector) fileLine(loc Location) uniast.FileLine {
4142 rel = filepath .Base (loc .URI .File ())
4243 }
4344 fileURI := string (loc .URI )
44- if c .cli == nil {
45- return uniast.FileLine {File : rel , Line : loc .Range .Start .Line + 1 }
46- }
47- f := c .cli .GetFile (loc .URI )
45+ filePath := loc .URI .File ()
46+
4847 text := ""
49- if f != nil {
50- text = f .Text
51- } else {
52- fd , err := os .ReadFile (loc .URI .File ())
48+ // 1. Try LSP client files
49+ if c .cli != nil {
50+ if f := c .cli .GetFile (loc .URI ); f != nil {
51+ text = f .Text
52+ }
53+ }
54+
55+ // 2. Try internal cache
56+ if text == "" {
57+ if cached , ok := c .fileContentCache [filePath ]; ok {
58+ text = cached
59+ }
60+ }
61+
62+ // 3. Fallback to OS ReadFile and update cache
63+ if text == "" {
64+ fd , err := os .ReadFile (filePath )
5365 if err != nil {
5466 return uniast.FileLine {File : rel , Line : loc .Range .Start .Line + 1 }
5567 }
5668 text = string (fd )
69+ c .fileContentCache [filePath ] = text
5770 }
71+
5872 return uniast.FileLine {
5973 File : rel ,
6074 Line : loc .Range .Start .Line + 1 ,
@@ -96,14 +110,32 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) {
96110 }
97111
98112 // not allow local symbols inside another symbol
99- c .filterLocalSymbols ()
113+ log .Info ("Export: filtering local symbols...\n " )
114+
115+ //c.filterLocalSymbols()
116+ c .filterLocalSymbolsByCache ()
117+
118+ // Pre-compute receivers map to avoid O(N^2) complexity in exportSymbol recursion
119+ log .Info ("Export: pre-computing receivers map...\n " )
120+ c .receivers = make (map [* DocumentSymbol ][]* DocumentSymbol , len (c .funcs )/ 4 )
121+ for method , rec := range c .funcs {
122+ if (method .Kind == SKMethod ) && rec .Method != nil && rec .Method .Receiver .Symbol != nil {
123+ c .receivers [rec .Method .Receiver .Symbol ] = append (c .receivers [rec .Method .Receiver .Symbol ], method )
124+ }
125+
126+ if (method .Kind == SKFunction && c .Language == uniast .Java ) && rec .Method != nil && rec .Method .Receiver .Symbol != nil {
127+ c .receivers [rec .Method .Receiver .Symbol ] = append (c .receivers [rec .Method .Receiver .Symbol ], method )
128+ }
129+ }
100130
101131 // export symbols
132+ log .Info ("Export: exporting %d symbols...\n " , len (c .syms ))
102133 visited := make (map [* DocumentSymbol ]* uniast.Identity )
103134 for _ , symbol := range c .syms {
104135 _ , _ = c .exportSymbol (& repo , symbol , "" , visited )
105136 }
106137
138+ log .Info ("Export: connecting files to packages...\n " )
107139 for fp , f := range c .files {
108140 rel , err := filepath .Rel (c .repo , fp )
109141 if err != nil {
@@ -162,6 +194,69 @@ func (c *Collector) filterLocalSymbols() {
162194 }
163195}
164196
197+ func (c * Collector ) filterLocalSymbolsByCache () {
198+ if len (c .syms ) == 0 {
199+ return
200+ }
201+
202+ // Group symbols by file URI to reduce comparison scope
203+ symsByFile := make (map [DocumentURI ][]* DocumentSymbol )
204+ for loc , sym := range c .syms {
205+ symsByFile [loc .URI ] = append (symsByFile [loc .URI ], sym )
206+ }
207+
208+ for _ , fileSyms := range symsByFile {
209+ if len (fileSyms ) <= 1 {
210+ continue
211+ }
212+
213+ // Sort symbols in the same file:
214+ // 1. By start offset (ascending)
215+ // 2. By end offset (descending) - larger range first
216+ // This ensures that if symbol A contains symbol B, A appears before B.
217+ sort .Slice (fileSyms , func (i , j int ) bool {
218+ locI , locJ := fileSyms [i ].Location , fileSyms [j ].Location
219+ if locI .Range .Start .Line != locJ .Range .Start .Line {
220+ return locI .Range .Start .Line < locJ .Range .Start .Line
221+ }
222+ if locI .Range .Start .Character != locJ .Range .Start .Character {
223+ return locI .Range .Start .Character < locJ .Range .Start .Character
224+ }
225+ if locI .Range .End .Line != locJ .Range .End .Line {
226+ return locI .Range .End .Line > locJ .Range .End .Line
227+ }
228+ return locI .Range .End .Character > locJ .Range .End .Character
229+ })
230+
231+ // Use a stack-like approach or simple active parent tracking
232+ // Since we sorted by start ASC and end DESC, a candidate parent always comes first.
233+ var activeParents []* DocumentSymbol
234+ for _ , sym := range fileSyms {
235+ isNested := false
236+ // Check if current symbol is nested within any of the active parents
237+ // We only need to check the most recent ones that could still contain it
238+ for i := len (activeParents ) - 1 ; i >= 0 ; i -- {
239+ parent := activeParents [i ]
240+ if parent .Location .Include (sym .Location ) {
241+ if ! utils .Contains (c .spec .ProtectedSymbolKinds (), sym .Kind ) {
242+ isNested = true
243+ break
244+ }
245+ } else if parent .Location .Range .End .Less (sym .Location .Range .Start ) {
246+ // This parent can no longer contain any future symbols (since we're sorted by start)
247+ // But we don't necessarily need to remove it from the slice here for correctness.
248+ }
249+ }
250+
251+ if isNested {
252+ delete (c .syms , sym .Location )
253+ } else {
254+ activeParents = append (activeParents , sym )
255+ }
256+ }
257+ }
258+ }
259+
165260func (c * Collector ) exportSymbol (repo * uniast.Repository , symbol * DocumentSymbol , refName string , visited map [* DocumentSymbol ]* uniast.Identity ) (id * uniast.Identity , e error ) {
166261 defer func () {
167262 if e != nil && e != ErrStdSymbol && e != ErrExternalSymbol {
@@ -207,29 +302,18 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
207302 return
208303 }
209304
210- // Java IPC mode: external/JDK/third-party symbols are exported as one-layer stub identities,
211- // and MUST NOT create module/package entries in repo.
305+ //// Java IPC mode: external/JDK/third-party symbols
306+ //// For external symbols, we set the module and continue with normal export flow
212307 isJavaIPC := c .Language == uniast .Java && c .javaIPC != nil
308+
213309 if isJavaIPC && ! c .internal (symbol .Location ) {
214- name := symbol .Name
215- if name == "" {
216- if refName == "" {
217- e = fmt .Errorf ("both symbol %v name and refname is empty" , symbol )
218- return
219- }
220- name = refName
221- }
222- m := "@external"
310+ // Determine module name based on URI path
223311 fp := symbol .Location .URI .File ()
224312 if strings .Contains (fp , "abcoder-jdk" ) {
225- m = "@ jdk"
226- } else if strings .Contains (fp , "abcoder-third " ) {
227- m = "@third "
313+ mod = "jdk"
314+ } else if strings .Contains (fp , "abcoder-unknown " ) {
315+ mod = "unknown "
228316 }
229- tmp := uniast .NewIdentity (m , "external" , name )
230- id = & tmp
231- visited [symbol ] = id
232- return id , nil
233317 }
234318 if ! c .NeedStdSymbol && mod == "" {
235319 e = ErrStdSymbol
@@ -290,12 +374,8 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
290374 }
291375
292376 // map receiver to methods
293- receivers := make (map [* DocumentSymbol ][]* DocumentSymbol , len (c .funcs )/ 4 )
294- for method , rec := range c .funcs {
295- if method .Kind == SKMethod && rec .Method != nil && rec .Method .Receiver .Symbol != nil {
296- receivers [rec .Method .Receiver .Symbol ] = append (receivers [rec .Method .Receiver .Symbol ], method )
297- }
298- }
377+ // Using pre-computed receivers map from c.receivers
378+ receivers := c .receivers
299379
300380 switch k := symbol .Kind ; k {
301381 // Function
@@ -442,6 +522,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
442522 Exported : public ,
443523 }
444524 // collect deps
525+ // collect deps
445526 if deps := c .deps [symbol ]; deps != nil {
446527 for _ , dep := range deps {
447528 tok := ""
0 commit comments