Skip to content

Commit 4c3bcc6

Browse files
authored
feat: cpp init (#172)
* feat: cpp init * feat: cpp testdata * feat: cxx cpp -> ci
1 parent 492d69e commit 4c3bcc6

File tree

12 files changed

+668
-76
lines changed

12 files changed

+668
-76
lines changed

.github/workflows/regression.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
runs-on: ubuntu-latest
1414
#if: "!contains(github.event.pull_request.title, '[NO-REGRESSION-TEST]')"
1515
env:
16-
LANGS: "go rust python typescript"
16+
LANGS: "go rust python typescript cxx cpp"
1717
# ignore package version for Go e.g. 'a.b/c@506fb8ece467f3a71c29322169bef9b0bc92d554'
1818
DIFFJSON_IGNORE: >
1919
['id']

go.sum

Lines changed: 0 additions & 63 deletions
Large diffs are not rendered by default.

lang/collect/collect.go

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
sitter "github.com/smacker/go-tree-sitter"
2929

30+
"github.com/cloudwego/abcoder/lang/cpp"
3031
"github.com/cloudwego/abcoder/lang/cxx"
3132
"github.com/cloudwego/abcoder/lang/java"
3233
javaipc "github.com/cloudwego/abcoder/lang/java/ipc"
@@ -113,6 +114,8 @@ func switchSpec(l uniast.Language, repo string) LanguageSpec {
113114
return python.NewPythonSpec()
114115
case uniast.Java:
115116
return java.NewJavaSpec(repo)
117+
case uniast.Cpp:
118+
return cpp.NewCppSpec()
116119
default:
117120
panic(fmt.Sprintf("unsupported language %s", l))
118121
}
@@ -1698,9 +1701,11 @@ func (c *Collector) getSymbolByLocation(ctx context.Context, loc Location, depth
16981701
// return sym, nil
16991702
// }
17001703

1701-
// 1. already loaded
1702-
if sym := c.findMatchingSymbolIn(loc, slices.Collect(maps.Values(c.syms))); sym != nil {
1703-
return sym, nil
1704+
if !(from.Type == "typeParameter" && c.Language == uniast.Cpp) {
1705+
// 1. already loaded
1706+
if sym := c.findMatchingSymbolIn(loc, slices.Collect(maps.Values(c.syms))); sym != nil {
1707+
return sym, nil
1708+
}
17041709
}
17051710

17061711
if c.LoadExternalSymbol && !c.internal(loc) && (c.NeedStdSymbol || !c.spec.IsStdToken(from)) {
@@ -1929,11 +1934,11 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
19291934

19301935
// function info: type params, inputs, outputs, receiver (if !needImpl)
19311936
if sym.Kind == SKFunction || sym.Kind == SKMethod {
1932-
var rsym *dependency
1937+
var rd *dependency
19331938
rec, tps, ips, ops := c.spec.FunctionSymbol(*sym)
1934-
1935-
if !hasImpl && rec >= 0 {
1939+
if (!hasImpl || c.Language == uniast.Cpp) && rec >= 0 {
19361940
rsym, err := c.getSymbolByTokenWithLimit(ctx, sym.Tokens[rec], depth)
1941+
rd = &dependency{sym.Tokens[rec].Location, rsym}
19371942
if err != nil || rsym == nil {
19381943
log.Error("get receiver symbol for token %v failed: %v\n", rec, err)
19391944
}
@@ -1942,6 +1947,18 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
19421947
ipsyms, is := c.getDepsWithLimit(ctx, sym, ips, depth-1)
19431948
opsyms, os := c.getDepsWithLimit(ctx, sym, ops, depth-1)
19441949

1950+
// filter tsym is type parameter
1951+
if c.Language == uniast.Cpp {
1952+
tsFiltered := make([]dependency, 0, len(ts))
1953+
for _, d := range ts {
1954+
if d.Symbol == nil || d.Symbol.Kind == SKTypeParameter {
1955+
continue
1956+
}
1957+
tsFiltered = append(tsFiltered, d)
1958+
}
1959+
ts = tsFiltered
1960+
}
1961+
19451962
//get last token of params for get signature
19461963
lastToken := rec
19471964
for _, t := range tps {
@@ -1960,18 +1977,28 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
19601977
}
19611978
}
19621979

1963-
c.updateFunctionInfo(sym, tsyms, ipsyms, opsyms, ts, is, os, rsym, lastToken)
1980+
c.updateFunctionInfo(sym, tsyms, ipsyms, opsyms, ts, is, os, rd, lastToken)
19641981
}
19651982

19661983
// variable info: type
19671984
if sym.Kind == SKVariable || sym.Kind == SKConstant {
19681985
i := c.spec.DeclareTokenOfSymbol(*sym)
1986+
// in cpp, it should search form behind to front to find the first entity token
19691987
// find first entity token
1970-
for i = i + 1; i < len(sym.Tokens); i++ {
1971-
if c.spec.IsEntityToken(sym.Tokens[i]) {
1972-
break
1988+
if c.Language == uniast.Cpp {
1989+
for i = i - 1; i >= 0; i-- {
1990+
if c.spec.IsEntityToken(sym.Tokens[i]) {
1991+
break
1992+
}
1993+
}
1994+
} else {
1995+
for i = i + 1; i < len(sym.Tokens); i++ {
1996+
if c.spec.IsEntityToken(sym.Tokens[i]) {
1997+
break
1998+
}
19731999
}
19742000
}
2001+
19752002
if i < 0 || i >= len(sym.Tokens) {
19762003
log.Error("get type token of variable symbol %s failed\n", sym)
19772004
return

lang/collect/export.go

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"strings"
2424

2525
"github.com/cloudwego/abcoder/lang/log"
26+
"github.com/cloudwego/abcoder/lang/lsp"
2627
. "github.com/cloudwego/abcoder/lang/lsp"
2728
"github.com/cloudwego/abcoder/lang/uniast"
2829
"github.com/cloudwego/abcoder/lang/utils"
@@ -252,6 +253,21 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
252253
// NOTICE: use refName as id when symbol name is missing
253254
name = refName
254255
}
256+
257+
if c.Language == uniast.Cpp {
258+
// for function override, use call signature as id
259+
if symbol.Kind == SKMethod || symbol.Kind == SKFunction {
260+
name = c.extractCppCallSig(symbol)
261+
}
262+
263+
// join name with namespace
264+
if ns := c.scopePrefix(symbol); ns != "" {
265+
if !strings.HasPrefix(name, ns+"::") {
266+
name = ns + "::" + name
267+
}
268+
}
269+
}
270+
255271
tmp := uniast.NewIdentity(mod, path, name)
256272
id = &tmp
257273
// Save to visited ONLY WHEN no errors occur
@@ -381,6 +397,18 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
381397
id.Name = iid.Name + "<" + id.Name + ">"
382398
}
383399
}
400+
401+
// cpp get method name without class name
402+
if c.Language == uniast.Cpp && rid != nil {
403+
rec := strings.TrimSpace(rid.Name)
404+
if rec != "" {
405+
searchStr := rec + "::"
406+
if idx := strings.Index(name, searchStr); idx >= 0 {
407+
name = name[idx+len(searchStr):]
408+
}
409+
}
410+
}
411+
384412
if k == SKFunction {
385413
// NOTICE: class static method name is: type::method
386414
id.Name += "::" + name
@@ -523,3 +551,130 @@ func mapKind(kind SymbolKind) uniast.TypeKind {
523551
panic(fmt.Sprintf("unexpected kind %v", kind))
524552
}
525553
}
554+
555+
func (c *Collector) scopePrefix(sym *DocumentSymbol) string {
556+
parts := []string{}
557+
cur := sym
558+
for {
559+
p := c.cli.GetParent(cur)
560+
if p == nil {
561+
break
562+
}
563+
if p.Kind == lsp.SKNamespace {
564+
if p.Name != "" {
565+
parts = append([]string{p.Name}, parts...)
566+
}
567+
}
568+
cur = p
569+
}
570+
return strings.Join(parts, "::") // "a::b"
571+
}
572+
573+
func (c *Collector) cppBaseName(n string) string {
574+
n = strings.TrimSpace(n)
575+
if i := strings.LastIndex(n, "::"); i >= 0 {
576+
n = n[i+2:]
577+
}
578+
n = strings.TrimSpace(n)
579+
// optional: strip template args on the function name itself: foo<T> -> foo
580+
if j := strings.IndexByte(n, '<'); j >= 0 {
581+
n = n[:j]
582+
}
583+
return strings.TrimSpace(n)
584+
}
585+
586+
// extractCppCallSig returns "sym.Name(params)" where params is extracted from sym.Text.
587+
func (c *Collector) extractCppCallSig(sym *lsp.DocumentSymbol) (ret string) {
588+
name := strings.TrimSpace(sym.Name)
589+
if name == "" {
590+
return ""
591+
}
592+
text := sym.Text
593+
if text == "" {
594+
return name + "()"
595+
}
596+
597+
want := c.cppBaseName(name)
598+
if want == "" {
599+
want = name
600+
}
601+
fallback := name + "()"
602+
603+
isIdent := func(b byte) bool {
604+
return (b >= 'a' && b <= 'z') ||
605+
(b >= 'A' && b <= 'Z') ||
606+
(b >= '0' && b <= '9') ||
607+
b == '_'
608+
}
609+
isWholeIdentAt := func(s string, pos int, w string) bool {
610+
if pos < 0 || pos+len(w) > len(s) || s[pos:pos+len(w)] != w {
611+
return false
612+
}
613+
if pos > 0 && isIdent(s[pos-1]) {
614+
return false
615+
}
616+
if pos+len(w) < len(s) && isIdent(s[pos+len(w)]) {
617+
return false
618+
}
619+
return true
620+
}
621+
findMatchingParenIn := func(s string, openIdx int, end int) int {
622+
if openIdx < 0 || openIdx >= len(s) || s[openIdx] != '(' {
623+
return -1
624+
}
625+
if end > len(s) {
626+
end = len(s)
627+
}
628+
depth := 0
629+
for i := openIdx; i < end; i++ {
630+
switch s[i] {
631+
case '(':
632+
depth++
633+
case ')':
634+
depth--
635+
if depth == 0 {
636+
return i
637+
}
638+
}
639+
}
640+
return -1
641+
}
642+
643+
headerEnd := len(text)
644+
if i := strings.IndexByte(text, '{'); i >= 0 && i < headerEnd {
645+
headerEnd = i
646+
}
647+
if i := strings.IndexByte(text, ';'); i >= 0 && i < headerEnd {
648+
headerEnd = i
649+
}
650+
header := text[:headerEnd]
651+
652+
namePos := -1
653+
for i := 0; i+len(want) <= len(header); i++ {
654+
if isWholeIdentAt(header, i, want) {
655+
namePos = i
656+
break
657+
}
658+
}
659+
if namePos < 0 {
660+
return fallback
661+
}
662+
663+
openIdx := -1
664+
for i := namePos + len(want); i < len(header); i++ {
665+
if header[i] == '(' {
666+
openIdx = i
667+
break
668+
}
669+
}
670+
if openIdx < 0 {
671+
return fallback
672+
}
673+
674+
closeIdx := findMatchingParenIn(header, openIdx, len(header))
675+
if closeIdx < 0 {
676+
return fallback
677+
}
678+
679+
return name + header[openIdx:closeIdx+1]
680+
}

lang/cpp/lib.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright 2025 CloudWeGo Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package cpp
16+
17+
import (
18+
"fmt"
19+
"time"
20+
21+
"github.com/cloudwego/abcoder/lang/uniast"
22+
"github.com/cloudwego/abcoder/lang/utils"
23+
)
24+
25+
const MaxWaitDuration = 5 * time.Minute
26+
27+
func InstallLanguageServer() (string, error) {
28+
return "", fmt.Errorf("please install clangd-18 manually. See https://releases.llvm.org/ (clangd is in clang-extra)")
29+
}
30+
31+
func GetDefaultLSP() (lang uniast.Language, name string) {
32+
return uniast.Cpp, "clangd-18 --background-index=false -j=2 --clang-tidy=false"
33+
}
34+
35+
func CheckRepo(repo string) (string, time.Duration) {
36+
openfile := ""
37+
// TODO: check if the project compiles.
38+
39+
// NOTICE: wait for Rust projects based on code files
40+
_, size := utils.CountFiles(repo, ".cpp", "build/")
41+
wait := 2*time.Second + time.Second*time.Duration(size/1024)
42+
if wait > MaxWaitDuration {
43+
wait = MaxWaitDuration
44+
}
45+
return openfile, wait
46+
}

0 commit comments

Comments
 (0)