Skip to content

Commit a6e045f

Browse files
zyl1314li-jin-gou
andauthored
Feat/cpp (#177)
* feat: cpp init * feat: cpp testdata * feat: cxx cpp -> ci * fix: 修复cpp class method路径问题 * feat: ci setup clangd * Merge pull request #1 from zyl1314/feat_cpp_ll feat: add ScannerFileForConCurrentCPPScan empty func --------- Co-authored-by: kinggo <1963359402@qq.com>
1 parent 372cada commit a6e045f

File tree

11 files changed

+817
-17
lines changed

11 files changed

+817
-17
lines changed

.github/workflows/regression.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
runs-on: ubuntu-latest
1414
#if: "!contains(github.event.pull_request.title, '[NO-REGRESSION-TEST]')"
1515
env:
16-
LANGS: "go rust python typescript"
16+
LANGS: "go rust python typescript cxx cpp"
1717
# ignore package version for Go e.g. 'a.b/c@506fb8ece467f3a71c29322169bef9b0bc92d554'
1818
DIFFJSON_IGNORE: >
1919
['id']
@@ -64,6 +64,11 @@ jobs:
6464
with:
6565
node-version: '22'
6666

67+
- name: Setup clangd-18
68+
run: |
69+
sudo apt-get update
70+
sudo apt-get install -y clangd-18
71+
6772
- name: Compile both binaries
6873
run: |
6974
(cd main_repo && go build -o ../abcoder_old)

lang/collect/collect.go

Lines changed: 166 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@ import (
2323
"slices"
2424
"sort"
2525
"strings"
26+
"sync"
2627
"unicode"
2728

29+
"golang.org/x/sync/errgroup"
2830
sitter "github.com/smacker/go-tree-sitter"
2931

32+
"github.com/cloudwego/abcoder/lang/cpp"
3033
"github.com/cloudwego/abcoder/lang/cxx"
3134
"github.com/cloudwego/abcoder/lang/java"
3235
javaipc "github.com/cloudwego/abcoder/lang/java/ipc"
@@ -125,6 +128,8 @@ func switchSpec(l uniast.Language, repo string) LanguageSpec {
125128
return python.NewPythonSpec()
126129
case uniast.Java:
127130
return java.NewJavaSpec(repo)
131+
case uniast.Cpp:
132+
return cpp.NewCppSpec()
128133
default:
129134
panic(fmt.Sprintf("unsupported language %s", l))
130135
}
@@ -184,6 +189,8 @@ func (c *Collector) Collect(ctx context.Context) error {
184189
if err != nil {
185190
return err
186191
}
192+
} else if c.Language == uniast.Cpp {
193+
root_syms = c.ScannerFileForConCurrentCPPScan(ctx)
187194
} else {
188195
root_syms = c.ScannerFile(ctx)
189196
}
@@ -1125,6 +1132,129 @@ func (c *Collector) ScannerFile(ctx context.Context) []*DocumentSymbol {
11251132
return root_syms
11261133
}
11271134

1135+
func (c *Collector) ScannerFileForConCurrentCPPScan(ctx context.Context) []*DocumentSymbol {
1136+
c.configureLSP(ctx)
1137+
excludes := make([]string, len(c.Excludes))
1138+
for i, e := range c.Excludes {
1139+
if !filepath.IsAbs(e) {
1140+
excludes[i] = filepath.Join(c.repo, e)
1141+
} else {
1142+
excludes[i] = e
1143+
}
1144+
}
1145+
1146+
var paths []string
1147+
scanner := func(path string, info os.FileInfo, err error) error {
1148+
if err != nil {
1149+
return err
1150+
}
1151+
if info.IsDir() {
1152+
return nil
1153+
}
1154+
for _, e := range excludes {
1155+
if strings.HasPrefix(path, e) {
1156+
return nil
1157+
}
1158+
}
1159+
1160+
if c.spec.ShouldSkip(path) {
1161+
return nil
1162+
}
1163+
1164+
paths = append(paths, path)
1165+
return nil
1166+
}
1167+
1168+
if err := filepath.Walk(c.repo, scanner); err != nil {
1169+
log.Error("scan files failed: %v", err)
1170+
}
1171+
1172+
// pre-open all files sequentially to avoid concurrent map writes in cli.files
1173+
for _, path := range paths {
1174+
_, err := c.cli.DidOpen(ctx, NewURI(path))
1175+
if err != nil {
1176+
log.Error("open file failed: %v", err)
1177+
}
1178+
}
1179+
1180+
var root_syms []*DocumentSymbol
1181+
var mu sync.Mutex
1182+
1183+
var eg errgroup.Group
1184+
// Limit concurrency to not overwhelm the LSP server
1185+
eg.SetLimit(32)
1186+
1187+
for _, path := range paths {
1188+
path := path // capture loop variable
1189+
eg.Go(func() error {
1190+
mu.Lock()
1191+
file := c.files[path]
1192+
if file == nil {
1193+
rel, err := filepath.Rel(c.repo, path)
1194+
if err == nil {
1195+
file = uniast.NewFile(rel)
1196+
c.files[path] = file
1197+
}
1198+
}
1199+
mu.Unlock()
1200+
1201+
if file == nil {
1202+
return nil
1203+
}
1204+
1205+
// 解析use语句
1206+
content, err := os.ReadFile(path)
1207+
if err != nil {
1208+
return nil
1209+
}
1210+
uses, err := c.spec.FileImports(content)
1211+
if err != nil {
1212+
log.Error("parse file %s use statements failed: %v", path, err)
1213+
} else {
1214+
mu.Lock()
1215+
file.Imports = uses
1216+
mu.Unlock()
1217+
}
1218+
1219+
// collect symbols
1220+
uri := NewURI(path)
1221+
symbols, err := c.cli.DocumentSymbols(ctx, uri)
1222+
if err != nil {
1223+
return nil
1224+
}
1225+
1226+
var local_syms []*DocumentSymbol
1227+
for _, sym := range symbols {
1228+
// collect content
1229+
symContent, err := c.cli.Locate(sym.Location)
1230+
if err != nil {
1231+
continue
1232+
}
1233+
// collect tokens
1234+
tokens, err := c.cli.SemanticTokens(ctx, sym.Location)
1235+
if err != nil {
1236+
continue
1237+
}
1238+
sym.Text = symContent
1239+
sym.Tokens = tokens
1240+
local_syms = append(local_syms, sym)
1241+
}
1242+
1243+
mu.Lock()
1244+
for _, sym := range local_syms {
1245+
c.addSymbol(sym.Location, sym)
1246+
root_syms = append(root_syms, sym)
1247+
}
1248+
mu.Unlock()
1249+
1250+
return nil
1251+
})
1252+
}
1253+
1254+
_ = eg.Wait()
1255+
return root_syms
1256+
}
1257+
11281258
func (c *Collector) ScannerByTreeSitter(ctx context.Context) ([]*DocumentSymbol, error) {
11291259
var modulePaths []string
11301260
// Java uses parsing pom method to obtain hierarchical relationships
@@ -1837,11 +1967,13 @@ func (c *Collector) getSymbolByLocation(ctx context.Context, loc Location, depth
18371967
// return sym, nil
18381968
// }
18391969

1840-
// 1. already loaded
1841-
// Optimization: only search in symbols of the same file
1842-
if fileSyms, ok := c.symsByFile[loc.URI]; ok {
1843-
if sym := c.findMatchingSymbolIn(loc, fileSyms); sym != nil {
1844-
return sym, nil
1970+
if !(from.Type == "typeParameter" && c.Language == uniast.Cpp) {
1971+
// 1. already loaded
1972+
// Optimization: only search in symbols of the same file
1973+
if fileSyms, ok := c.symsByFile[loc.URI]; ok {
1974+
if sym := c.findMatchingSymbolIn(loc, fileSyms); sym != nil {
1975+
return sym, nil
1976+
}
18451977
}
18461978
}
18471979

@@ -2071,11 +2203,11 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
20712203

20722204
// function info: type params, inputs, outputs, receiver (if !needImpl)
20732205
if sym.Kind == SKFunction || sym.Kind == SKMethod {
2074-
var rsym *dependency
2206+
var rd *dependency
20752207
rec, tps, ips, ops := c.spec.FunctionSymbol(*sym)
2076-
2077-
if !hasImpl && rec >= 0 {
2208+
if (!hasImpl || c.Language == uniast.Cpp) && rec >= 0 {
20782209
rsym, err := c.getSymbolByTokenWithLimit(ctx, sym.Tokens[rec], depth)
2210+
rd = &dependency{sym.Tokens[rec].Location, rsym}
20792211
if err != nil || rsym == nil {
20802212
log.Error("get receiver symbol for token %v failed: %v\n", rec, err)
20812213
}
@@ -2084,6 +2216,18 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
20842216
ipsyms, is := c.getDepsWithLimit(ctx, sym, ips, depth-1)
20852217
opsyms, os := c.getDepsWithLimit(ctx, sym, ops, depth-1)
20862218

2219+
// filter tsym is type parameter
2220+
if c.Language == uniast.Cpp {
2221+
tsFiltered := make([]dependency, 0, len(ts))
2222+
for _, d := range ts {
2223+
if d.Symbol == nil || d.Symbol.Kind == SKTypeParameter {
2224+
continue
2225+
}
2226+
tsFiltered = append(tsFiltered, d)
2227+
}
2228+
ts = tsFiltered
2229+
}
2230+
20872231
//get last token of params for get signature
20882232
lastToken := rec
20892233
for _, t := range tps {
@@ -2102,18 +2246,28 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
21022246
}
21032247
}
21042248

2105-
c.updateFunctionInfo(sym, tsyms, ipsyms, opsyms, ts, is, os, rsym, lastToken)
2249+
c.updateFunctionInfo(sym, tsyms, ipsyms, opsyms, ts, is, os, rd, lastToken)
21062250
}
21072251

21082252
// variable info: type
21092253
if sym.Kind == SKVariable || sym.Kind == SKConstant {
21102254
i := c.spec.DeclareTokenOfSymbol(*sym)
2255+
// in cpp, it should search form behind to front to find the first entity token
21112256
// find first entity token
2112-
for i = i + 1; i < len(sym.Tokens); i++ {
2113-
if c.spec.IsEntityToken(sym.Tokens[i]) {
2114-
break
2257+
if c.Language == uniast.Cpp {
2258+
for i = i - 1; i >= 0; i-- {
2259+
if c.spec.IsEntityToken(sym.Tokens[i]) {
2260+
break
2261+
}
2262+
}
2263+
} else {
2264+
for i = i + 1; i < len(sym.Tokens); i++ {
2265+
if c.spec.IsEntityToken(sym.Tokens[i]) {
2266+
break
2267+
}
21152268
}
21162269
}
2270+
21172271
if i < 0 || i >= len(sym.Tokens) {
21182272
log.Error("get type token of variable symbol %s failed\n", sym)
21192273
return

0 commit comments

Comments
 (0)