@@ -23,10 +23,13 @@ import (
2323 "slices"
2424 "sort"
2525 "strings"
26+ "sync"
2627 "unicode"
2728
29+ "golang.org/x/sync/errgroup"
2830 sitter "github.com/smacker/go-tree-sitter"
2931
32+ "github.com/cloudwego/abcoder/lang/cpp"
3033 "github.com/cloudwego/abcoder/lang/cxx"
3134 "github.com/cloudwego/abcoder/lang/java"
3235 javaipc "github.com/cloudwego/abcoder/lang/java/ipc"
@@ -125,6 +128,8 @@ func switchSpec(l uniast.Language, repo string) LanguageSpec {
125128 return python .NewPythonSpec ()
126129 case uniast .Java :
127130 return java .NewJavaSpec (repo )
131+ case uniast .Cpp :
132+ return cpp .NewCppSpec ()
128133 default :
129134 panic (fmt .Sprintf ("unsupported language %s" , l ))
130135 }
@@ -184,6 +189,8 @@ func (c *Collector) Collect(ctx context.Context) error {
184189 if err != nil {
185190 return err
186191 }
192+ } else if c .Language == uniast .Cpp {
193+ root_syms = c .ScannerFileForConCurrentCPPScan (ctx )
187194 } else {
188195 root_syms = c .ScannerFile (ctx )
189196 }
@@ -1125,6 +1132,129 @@ func (c *Collector) ScannerFile(ctx context.Context) []*DocumentSymbol {
11251132 return root_syms
11261133}
11271134
1135+ func (c * Collector ) ScannerFileForConCurrentCPPScan (ctx context.Context ) []* DocumentSymbol {
1136+ c .configureLSP (ctx )
1137+ excludes := make ([]string , len (c .Excludes ))
1138+ for i , e := range c .Excludes {
1139+ if ! filepath .IsAbs (e ) {
1140+ excludes [i ] = filepath .Join (c .repo , e )
1141+ } else {
1142+ excludes [i ] = e
1143+ }
1144+ }
1145+
1146+ var paths []string
1147+ scanner := func (path string , info os.FileInfo , err error ) error {
1148+ if err != nil {
1149+ return err
1150+ }
1151+ if info .IsDir () {
1152+ return nil
1153+ }
1154+ for _ , e := range excludes {
1155+ if strings .HasPrefix (path , e ) {
1156+ return nil
1157+ }
1158+ }
1159+
1160+ if c .spec .ShouldSkip (path ) {
1161+ return nil
1162+ }
1163+
1164+ paths = append (paths , path )
1165+ return nil
1166+ }
1167+
1168+ if err := filepath .Walk (c .repo , scanner ); err != nil {
1169+ log .Error ("scan files failed: %v" , err )
1170+ }
1171+
1172+ // pre-open all files sequentially to avoid concurrent map writes in cli.files
1173+ for _ , path := range paths {
1174+ _ , err := c .cli .DidOpen (ctx , NewURI (path ))
1175+ if err != nil {
1176+ log .Error ("open file failed: %v" , err )
1177+ }
1178+ }
1179+
1180+ var root_syms []* DocumentSymbol
1181+ var mu sync.Mutex
1182+
1183+ var eg errgroup.Group
1184+ // Limit concurrency to not overwhelm the LSP server
1185+ eg .SetLimit (32 )
1186+
1187+ for _ , path := range paths {
1188+ path := path // capture loop variable
1189+ eg .Go (func () error {
1190+ mu .Lock ()
1191+ file := c .files [path ]
1192+ if file == nil {
1193+ rel , err := filepath .Rel (c .repo , path )
1194+ if err == nil {
1195+ file = uniast .NewFile (rel )
1196+ c .files [path ] = file
1197+ }
1198+ }
1199+ mu .Unlock ()
1200+
1201+ if file == nil {
1202+ return nil
1203+ }
1204+
1205+ // 解析use语句
1206+ content , err := os .ReadFile (path )
1207+ if err != nil {
1208+ return nil
1209+ }
1210+ uses , err := c .spec .FileImports (content )
1211+ if err != nil {
1212+ log .Error ("parse file %s use statements failed: %v" , path , err )
1213+ } else {
1214+ mu .Lock ()
1215+ file .Imports = uses
1216+ mu .Unlock ()
1217+ }
1218+
1219+ // collect symbols
1220+ uri := NewURI (path )
1221+ symbols , err := c .cli .DocumentSymbols (ctx , uri )
1222+ if err != nil {
1223+ return nil
1224+ }
1225+
1226+ var local_syms []* DocumentSymbol
1227+ for _ , sym := range symbols {
1228+ // collect content
1229+ symContent , err := c .cli .Locate (sym .Location )
1230+ if err != nil {
1231+ continue
1232+ }
1233+ // collect tokens
1234+ tokens , err := c .cli .SemanticTokens (ctx , sym .Location )
1235+ if err != nil {
1236+ continue
1237+ }
1238+ sym .Text = symContent
1239+ sym .Tokens = tokens
1240+ local_syms = append (local_syms , sym )
1241+ }
1242+
1243+ mu .Lock ()
1244+ for _ , sym := range local_syms {
1245+ c .addSymbol (sym .Location , sym )
1246+ root_syms = append (root_syms , sym )
1247+ }
1248+ mu .Unlock ()
1249+
1250+ return nil
1251+ })
1252+ }
1253+
1254+ _ = eg .Wait ()
1255+ return root_syms
1256+ }
1257+
11281258func (c * Collector ) ScannerByTreeSitter (ctx context.Context ) ([]* DocumentSymbol , error ) {
11291259 var modulePaths []string
11301260 // Java uses parsing pom method to obtain hierarchical relationships
@@ -1837,11 +1967,13 @@ func (c *Collector) getSymbolByLocation(ctx context.Context, loc Location, depth
18371967 // return sym, nil
18381968 // }
18391969
1840- // 1. already loaded
1841- // Optimization: only search in symbols of the same file
1842- if fileSyms , ok := c .symsByFile [loc .URI ]; ok {
1843- if sym := c .findMatchingSymbolIn (loc , fileSyms ); sym != nil {
1844- return sym , nil
1970+ if ! (from .Type == "typeParameter" && c .Language == uniast .Cpp ) {
1971+ // 1. already loaded
1972+ // Optimization: only search in symbols of the same file
1973+ if fileSyms , ok := c .symsByFile [loc .URI ]; ok {
1974+ if sym := c .findMatchingSymbolIn (loc , fileSyms ); sym != nil {
1975+ return sym , nil
1976+ }
18451977 }
18461978 }
18471979
@@ -2071,11 +2203,11 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
20712203
20722204 // function info: type params, inputs, outputs, receiver (if !needImpl)
20732205 if sym .Kind == SKFunction || sym .Kind == SKMethod {
2074- var rsym * dependency
2206+ var rd * dependency
20752207 rec , tps , ips , ops := c .spec .FunctionSymbol (* sym )
2076-
2077- if ! hasImpl && rec >= 0 {
2208+ if (! hasImpl || c .Language == uniast .Cpp ) && rec >= 0 {
20782209 rsym , err := c .getSymbolByTokenWithLimit (ctx , sym .Tokens [rec ], depth )
2210+ rd = & dependency {sym .Tokens [rec ].Location , rsym }
20792211 if err != nil || rsym == nil {
20802212 log .Error ("get receiver symbol for token %v failed: %v\n " , rec , err )
20812213 }
@@ -2084,6 +2216,18 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
20842216 ipsyms , is := c .getDepsWithLimit (ctx , sym , ips , depth - 1 )
20852217 opsyms , os := c .getDepsWithLimit (ctx , sym , ops , depth - 1 )
20862218
2219+ // filter tsym is type parameter
2220+ if c .Language == uniast .Cpp {
2221+ tsFiltered := make ([]dependency , 0 , len (ts ))
2222+ for _ , d := range ts {
2223+ if d .Symbol == nil || d .Symbol .Kind == SKTypeParameter {
2224+ continue
2225+ }
2226+ tsFiltered = append (tsFiltered , d )
2227+ }
2228+ ts = tsFiltered
2229+ }
2230+
20872231 //get last token of params for get signature
20882232 lastToken := rec
20892233 for _ , t := range tps {
@@ -2102,18 +2246,28 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
21022246 }
21032247 }
21042248
2105- c .updateFunctionInfo (sym , tsyms , ipsyms , opsyms , ts , is , os , rsym , lastToken )
2249+ c .updateFunctionInfo (sym , tsyms , ipsyms , opsyms , ts , is , os , rd , lastToken )
21062250 }
21072251
21082252 // variable info: type
21092253 if sym .Kind == SKVariable || sym .Kind == SKConstant {
21102254 i := c .spec .DeclareTokenOfSymbol (* sym )
2255+ // in cpp, it should search form behind to front to find the first entity token
21112256 // find first entity token
2112- for i = i + 1 ; i < len (sym .Tokens ); i ++ {
2113- if c .spec .IsEntityToken (sym .Tokens [i ]) {
2114- break
2257+ if c .Language == uniast .Cpp {
2258+ for i = i - 1 ; i >= 0 ; i -- {
2259+ if c .spec .IsEntityToken (sym .Tokens [i ]) {
2260+ break
2261+ }
2262+ }
2263+ } else {
2264+ for i = i + 1 ; i < len (sym .Tokens ); i ++ {
2265+ if c .spec .IsEntityToken (sym .Tokens [i ]) {
2266+ break
2267+ }
21152268 }
21162269 }
2270+
21172271 if i < 0 || i >= len (sym .Tokens ) {
21182272 log .Error ("get type token of variable symbol %s failed\n " , sym )
21192273 return
0 commit comments