Skip to content

Commit 6c6f76b

Browse files
Huabuxiuli-jin-gou
andauthored
java parser (#171)
* add test case * feat: java ExternalSymbol with maven download * feat: add java parser exec java home * feat: update maven java home * feat: java local method support Java basic types, as well as generics, parameter types have inheritance, and implement interfaces * feat: java support lombok * rm * fix path * chore: revert json file * fix : check diff detail * fix : fix ut * fix : fix only java --------- Co-authored-by: kinggo <1963359402@qq.com> Co-authored-by: lilong.21 <lilong.21@bytedance.com>
1 parent bbf312d commit 6c6f76b

File tree

13 files changed

+15446
-15159
lines changed

13 files changed

+15446
-15159
lines changed

lang/collect/collect.go

Lines changed: 185 additions & 44 deletions
Large diffs are not rendered by default.

lang/collect/export.go

Lines changed: 114 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"fmt"
2121
"os"
2222
"path/filepath"
23+
"sort"
2324
"strings"
2425

2526
"github.com/cloudwego/abcoder/lang/log"
@@ -41,20 +42,33 @@ func (c *Collector) fileLine(loc Location) uniast.FileLine {
4142
rel = filepath.Base(loc.URI.File())
4243
}
4344
fileURI := string(loc.URI)
44-
if c.cli == nil {
45-
return uniast.FileLine{File: rel, Line: loc.Range.Start.Line + 1}
46-
}
47-
f := c.cli.GetFile(loc.URI)
45+
filePath := loc.URI.File()
46+
4847
text := ""
49-
if f != nil {
50-
text = f.Text
51-
} else {
52-
fd, err := os.ReadFile(loc.URI.File())
48+
// 1. Try LSP client files
49+
if c.cli != nil {
50+
if f := c.cli.GetFile(loc.URI); f != nil {
51+
text = f.Text
52+
}
53+
}
54+
55+
// 2. Try internal cache
56+
if text == "" {
57+
if cached, ok := c.fileContentCache[filePath]; ok {
58+
text = cached
59+
}
60+
}
61+
62+
// 3. Fallback to OS ReadFile and update cache
63+
if text == "" {
64+
fd, err := os.ReadFile(filePath)
5365
if err != nil {
5466
return uniast.FileLine{File: rel, Line: loc.Range.Start.Line + 1}
5567
}
5668
text = string(fd)
69+
c.fileContentCache[filePath] = text
5770
}
71+
5872
return uniast.FileLine{
5973
File: rel,
6074
Line: loc.Range.Start.Line + 1,
@@ -96,14 +110,32 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) {
96110
}
97111

98112
// not allow local symbols inside another symbol
99-
c.filterLocalSymbols()
113+
log.Info("Export: filtering local symbols...\n")
114+
115+
//c.filterLocalSymbols()
116+
c.filterLocalSymbolsByCache()
117+
118+
// Pre-compute receivers map to avoid O(N^2) complexity in exportSymbol recursion
119+
log.Info("Export: pre-computing receivers map...\n")
120+
c.receivers = make(map[*DocumentSymbol][]*DocumentSymbol, len(c.funcs)/4)
121+
for method, rec := range c.funcs {
122+
if (method.Kind == SKMethod) && rec.Method != nil && rec.Method.Receiver.Symbol != nil {
123+
c.receivers[rec.Method.Receiver.Symbol] = append(c.receivers[rec.Method.Receiver.Symbol], method)
124+
}
125+
126+
if (method.Kind == SKFunction && c.Language == uniast.Java) && rec.Method != nil && rec.Method.Receiver.Symbol != nil {
127+
c.receivers[rec.Method.Receiver.Symbol] = append(c.receivers[rec.Method.Receiver.Symbol], method)
128+
}
129+
}
100130

101131
// export symbols
132+
log.Info("Export: exporting %d symbols...\n", len(c.syms))
102133
visited := make(map[*DocumentSymbol]*uniast.Identity)
103134
for _, symbol := range c.syms {
104135
_, _ = c.exportSymbol(&repo, symbol, "", visited)
105136
}
106137

138+
log.Info("Export: connecting files to packages...\n")
107139
for fp, f := range c.files {
108140
rel, err := filepath.Rel(c.repo, fp)
109141
if err != nil {
@@ -162,6 +194,69 @@ func (c *Collector) filterLocalSymbols() {
162194
}
163195
}
164196

197+
func (c *Collector) filterLocalSymbolsByCache() {
198+
if len(c.syms) == 0 {
199+
return
200+
}
201+
202+
// Group symbols by file URI to reduce comparison scope
203+
symsByFile := make(map[DocumentURI][]*DocumentSymbol)
204+
for loc, sym := range c.syms {
205+
symsByFile[loc.URI] = append(symsByFile[loc.URI], sym)
206+
}
207+
208+
for _, fileSyms := range symsByFile {
209+
if len(fileSyms) <= 1 {
210+
continue
211+
}
212+
213+
// Sort symbols in the same file:
214+
// 1. By start offset (ascending)
215+
// 2. By end offset (descending) - larger range first
216+
// This ensures that if symbol A contains symbol B, A appears before B.
217+
sort.Slice(fileSyms, func(i, j int) bool {
218+
locI, locJ := fileSyms[i].Location, fileSyms[j].Location
219+
if locI.Range.Start.Line != locJ.Range.Start.Line {
220+
return locI.Range.Start.Line < locJ.Range.Start.Line
221+
}
222+
if locI.Range.Start.Character != locJ.Range.Start.Character {
223+
return locI.Range.Start.Character < locJ.Range.Start.Character
224+
}
225+
if locI.Range.End.Line != locJ.Range.End.Line {
226+
return locI.Range.End.Line > locJ.Range.End.Line
227+
}
228+
return locI.Range.End.Character > locJ.Range.End.Character
229+
})
230+
231+
// Use a stack-like approach or simple active parent tracking
232+
// Since we sorted by start ASC and end DESC, a candidate parent always comes first.
233+
var activeParents []*DocumentSymbol
234+
for _, sym := range fileSyms {
235+
isNested := false
236+
// Check if current symbol is nested within any of the active parents
237+
// We only need to check the most recent ones that could still contain it
238+
for i := len(activeParents) - 1; i >= 0; i-- {
239+
parent := activeParents[i]
240+
if parent.Location.Include(sym.Location) {
241+
if !utils.Contains(c.spec.ProtectedSymbolKinds(), sym.Kind) {
242+
isNested = true
243+
break
244+
}
245+
} else if parent.Location.Range.End.Less(sym.Location.Range.Start) {
246+
// This parent can no longer contain any future symbols (since we're sorted by start)
247+
// But we don't necessarily need to remove it from the slice here for correctness.
248+
}
249+
}
250+
251+
if isNested {
252+
delete(c.syms, sym.Location)
253+
} else {
254+
activeParents = append(activeParents, sym)
255+
}
256+
}
257+
}
258+
}
259+
165260
func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol, refName string, visited map[*DocumentSymbol]*uniast.Identity) (id *uniast.Identity, e error) {
166261
defer func() {
167262
if e != nil && e != ErrStdSymbol && e != ErrExternalSymbol {
@@ -207,29 +302,18 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
207302
return
208303
}
209304

210-
// Java IPC mode: external/JDK/third-party symbols are exported as one-layer stub identities,
211-
// and MUST NOT create module/package entries in repo.
305+
//// Java IPC mode: external/JDK/third-party symbols
306+
//// For external symbols, we set the module and continue with normal export flow
212307
isJavaIPC := c.Language == uniast.Java && c.javaIPC != nil
308+
213309
if isJavaIPC && !c.internal(symbol.Location) {
214-
name := symbol.Name
215-
if name == "" {
216-
if refName == "" {
217-
e = fmt.Errorf("both symbol %v name and refname is empty", symbol)
218-
return
219-
}
220-
name = refName
221-
}
222-
m := "@external"
310+
// Determine module name based on URI path
223311
fp := symbol.Location.URI.File()
224312
if strings.Contains(fp, "abcoder-jdk") {
225-
m = "@jdk"
226-
} else if strings.Contains(fp, "abcoder-third") {
227-
m = "@third"
313+
mod = "jdk"
314+
} else if strings.Contains(fp, "abcoder-unknown") {
315+
mod = "unknown"
228316
}
229-
tmp := uniast.NewIdentity(m, "external", name)
230-
id = &tmp
231-
visited[symbol] = id
232-
return id, nil
233317
}
234318
if !c.NeedStdSymbol && mod == "" {
235319
e = ErrStdSymbol
@@ -290,12 +374,8 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
290374
}
291375

292376
// map receiver to methods
293-
receivers := make(map[*DocumentSymbol][]*DocumentSymbol, len(c.funcs)/4)
294-
for method, rec := range c.funcs {
295-
if method.Kind == SKMethod && rec.Method != nil && rec.Method.Receiver.Symbol != nil {
296-
receivers[rec.Method.Receiver.Symbol] = append(receivers[rec.Method.Receiver.Symbol], method)
297-
}
298-
}
377+
// Using pre-computed receivers map from c.receivers
378+
receivers := c.receivers
299379

300380
switch k := symbol.Kind; k {
301381
// Function
@@ -442,6 +522,7 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
442522
Exported: public,
443523
}
444524
// collect deps
525+
// collect deps
445526
if deps := c.deps[symbol]; deps != nil {
446527
for _, dep := range deps {
447528
tok := ""
4.25 MB
Binary file not shown.

lang/java/ipc/converter.go

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,6 @@ func NewConverter(repoPath string, moduleName string) *Converter {
5858
return c
5959
}
6060

61-
// ConvertResponses 将 Java Parser 的流式响应列表转换为 UniAST Repository。
62-
func ConvertResponses(repoPath string, moduleName string, responses []*pb.AnalyzeResponse) (*uniast.Repository, error) {
63-
conv := NewConverter(repoPath, moduleName)
64-
for _, resp := range responses {
65-
if err := conv.ProcessResponse(resp); err != nil {
66-
return conv.Repository(), err
67-
}
68-
}
69-
return conv.Repository(), nil
70-
}
71-
7261
// Repository returns the converted UniAST repository
7362
func (c *Converter) Repository() *uniast.Repository {
7463
return c.repo
@@ -125,42 +114,47 @@ func (c *Converter) processClassInfo(info *pb.ClassInfo) error {
125114
if err != nil {
126115
return err
127116
}
128-
for _, dep := range info.Dependencies {
129-
if dep.SourceType == pb.SourceType_SOURCE_TYPE_JDK && dep.ClassName != "" {
130-
if _, ok := c.JdkClassCache[dep.ClassName]; !ok {
131-
depPoint := &pb.ClassInfo{
132-
ClassName: dep.ClassName,
133-
Source: &pb.SourceInfo{
134-
Type: pb.SourceType_SOURCE_TYPE_JDK,
135-
},
117+
return nil
118+
}
119+
120+
func (c *Converter) ProcessClassDepInfo() error {
121+
for _, info := range c.LocalClassCache {
122+
for _, dep := range info.Dependencies {
123+
if dep.SourceType == pb.SourceType_SOURCE_TYPE_JDK && dep.ClassName != "" {
124+
if _, ok := c.JdkClassCache[dep.ClassName]; !ok {
125+
depPoint := &pb.ClassInfo{
126+
ClassName: dep.ClassName,
127+
Source: &pb.SourceInfo{
128+
Type: pb.SourceType_SOURCE_TYPE_JDK,
129+
},
130+
}
131+
putCache(depPoint, c)
136132
}
137-
putCache(depPoint, c)
138133
}
139-
}
140-
if dep.SourceType == pb.SourceType_SOURCE_TYPE_UNKNOWN && dep.ClassName != "" {
141-
if _, ok := c.UnknowClassCache[dep.ClassName]; !ok {
142-
depPoint := &pb.ClassInfo{
143-
ClassName: dep.ClassName,
144-
Source: &pb.SourceInfo{
145-
Type: pb.SourceType_SOURCE_TYPE_UNKNOWN,
146-
},
134+
if dep.SourceType == pb.SourceType_SOURCE_TYPE_UNKNOWN && dep.ClassName != "" {
135+
if _, ok := c.UnknowClassCache[dep.ClassName]; !ok {
136+
depPoint := &pb.ClassInfo{
137+
ClassName: dep.ClassName,
138+
Source: &pb.SourceInfo{
139+
Type: pb.SourceType_SOURCE_TYPE_UNKNOWN,
140+
},
141+
}
142+
putCache(depPoint, c)
147143
}
148-
putCache(depPoint, c)
149144
}
150-
}
151-
if (dep.SourceType == pb.SourceType_SOURCE_TYPE_MAVEN || dep.SourceType == pb.SourceType_SOURCE_TYPE_EXTERNAL_JAR) && dep.ClassName != "" {
152-
if _, ok := c.ThirdPartClassCache[dep.ClassName]; !ok {
153-
depPoint := &pb.ClassInfo{
154-
ClassName: dep.ClassName,
155-
Source: &pb.SourceInfo{
156-
Type: pb.SourceType_SOURCE_TYPE_MAVEN,
157-
},
145+
if (dep.SourceType == pb.SourceType_SOURCE_TYPE_MAVEN || dep.SourceType == pb.SourceType_SOURCE_TYPE_EXTERNAL_JAR) && dep.ClassName != "" {
146+
if _, ok := c.ThirdPartClassCache[dep.ClassName]; !ok {
147+
depPoint := &pb.ClassInfo{
148+
ClassName: dep.ClassName,
149+
Source: &pb.SourceInfo{
150+
Type: pb.SourceType_SOURCE_TYPE_MAVEN,
151+
},
152+
}
153+
putCache(depPoint, c)
158154
}
159-
putCache(depPoint, c)
160155
}
161156
}
162157
}
163-
164158
return nil
165159
}
166160

lang/java/ipc/server.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ func (s *JavaParserServer) Start(ctx context.Context, repoPath string, analyzerC
111111
return nil, fmt.Errorf("failed to create socket listener: %w", err)
112112
}
113113

114-
// Step 2: Start Java subprocess
114+
//Step 2: Start Java subprocess
115115
if err := s.startJavaProcess(ctx); err != nil {
116116
s.cleanup()
117117
return nil, fmt.Errorf("failed to start Java process: %w", err)

lang/java/lib_ipc.go

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func DefaultParserConfig() *ParserConfig {
7676
}
7777

7878
return &ParserConfig{
79-
ResolveMavenDependencies: true,
79+
ResolveMavenDependencies: false,
8080
IncludeExternalClasses: false,
8181
Debug: false,
8282
JarPath: jarPath,
@@ -104,9 +104,52 @@ func ParseRepositoryByIpc(ctx context.Context, repoPath string, config *ParserCo
104104
// Create analyzer config
105105
analyzerConfig := &pb.AnalyzerConfig{
106106
ResolveMavenDependencies: config.ResolveMavenDependencies,
107-
M2RepositoryPath: config.M2RepositoryPath,
108107
ExtraJarPaths: config.ExtraJarPaths,
109108
IncludeExternalClasses: config.IncludeExternalClasses,
109+
ExtraConfig: make(map[string]string),
110+
}
111+
112+
if config.ResolveMavenDependencies {
113+
m2RepositoryPath := os.Getenv("MAVEN_M2_REPOSITORY_PATH")
114+
settingsFilePath := os.Getenv("MAVEN_SETTINGS_FILE_PATH")
115+
java8Home := os.Getenv("JAVA_8_HOME_PATH")
116+
java11Home := os.Getenv("JAVA_11_HOME_PATH")
117+
java17Home := os.Getenv("JAVA_17_HOME_PATH")
118+
java21Home := os.Getenv("JAVA_21_HOME_PATH")
119+
java25Home := os.Getenv("JAVA_25_HOME_PATH")
120+
121+
analyzerConfig.ExtraConfig["maven.enabled"] = "true"
122+
if m2RepositoryPath != "" {
123+
analyzerConfig.ExtraConfig["maven.m2RepositoryPath"] = m2RepositoryPath
124+
}
125+
if settingsFilePath != "" {
126+
analyzerConfig.ExtraConfig["maven.settingsFilePath"] = settingsFilePath
127+
}
128+
if java8Home != "" {
129+
analyzerConfig.ExtraConfig["maven.java8Home"] = java8Home
130+
}
131+
if java11Home != "" {
132+
analyzerConfig.ExtraConfig["maven.java11Home"] = java11Home
133+
}
134+
if java17Home != "" {
135+
analyzerConfig.ExtraConfig["maven.java17Home"] = java17Home
136+
}
137+
if java21Home != "" {
138+
analyzerConfig.ExtraConfig["maven.java21Home"] = java21Home
139+
}
140+
if java25Home != "" {
141+
analyzerConfig.ExtraConfig["maven.java25Home"] = java25Home
142+
}
143+
analyzerConfig.ExtraConfig["maven.timeoutSeconds"] = "600"
144+
analyzerConfig.ExtraConfig["maven.includeScopes"] = "compile,runtime"
145+
analyzerConfig.ExtraConfig["maven.excludeScopes"] = "test,provided"
146+
analyzerConfig.ExtraConfig["maven.offlineMode"] = "false"
147+
analyzerConfig.ExtraConfig["maven.skipTests"] = "true"
148+
analyzerConfig.ExtraConfig["maven.installBeforeResolve"] = "true"
149+
}
150+
151+
if config.Debug {
152+
analyzerConfig.ExtraConfig["maven.verbose"] = "true"
110153
}
111154

112155
// Create server and start analysis
@@ -127,6 +170,10 @@ func ParseRepositoryByIpc(ctx context.Context, repoPath string, config *ParserCo
127170
log.Printf("Warning: error processing response: %v", err)
128171
}
129172
}
173+
// Process class dependencies
174+
if err := converter.ProcessClassDepInfo(); err != nil {
175+
return nil, fmt.Errorf("failed to process class dependencies: %w", err)
176+
}
130177

131178
return converter, nil
132179
}

0 commit comments

Comments
 (0)