Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the performance of includes scanning #1735

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 168 additions & 79 deletions legacy/builder/container_find_includes.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,11 @@ import (
"encoding/json"
"fmt"
"os/exec"
"runtime"
"time"
"sync"
"strings"
"sync/atomic"

"github.com/arduino/arduino-cli/arduino/globals"
"github.com/arduino/arduino-cli/arduino/libraries"
Expand All @@ -107,6 +111,9 @@ import (
"github.com/pkg/errors"
)

var libMux sync.Mutex
var fileMux sync.Mutex

type ContainerFindIncludes struct{}

func (s *ContainerFindIncludes) Run(ctx *types.Context) error {
Expand All @@ -125,9 +132,11 @@ func (s *ContainerFindIncludes) findIncludes(ctx *types.Context) error {
cachePath := ctx.BuildPath.Join("includes.cache")
cache := readCache(cachePath)

appendIncludeFolder(ctx, cache, nil, "", ctx.BuildProperties.GetPath("build.core.path"))
// The entry with no_resolve prefix will be ignored in the preload phase
// in case they are already added into context at here
appendIncludeFolder(ctx, cache, nil, "no_resolve_1", ctx.BuildProperties.GetPath("build.core.path"))
if ctx.BuildProperties.Get("build.variant.path") != "" {
appendIncludeFolder(ctx, cache, nil, "", ctx.BuildProperties.GetPath("build.variant.path"))
appendIncludeFolder(ctx, cache, nil, "no_resolve_2", ctx.BuildProperties.GetPath("build.variant.path"))
}

sketch := ctx.Sketch
Expand All @@ -144,16 +153,69 @@ func (s *ContainerFindIncludes) findIncludes(ctx *types.Context) error {
queueSourceFilesFromFolder(ctx, sourceFilePaths, sketch, srcSubfolderPath, true /* recurse */)
}

for !sourceFilePaths.Empty() {
err := findIncludesUntilDone(ctx, cache, sourceFilePaths.Pop())
preloadCachedFolder(ctx, cache)

// The first source file is the main .ino.cpp
// handle it first to setup environment for other files
findIncludesUntilDone(ctx, cache, sourceFilePaths.Pop())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the main.ino.cpp is processed separately? If the processing order of the files doesn't matter, this should not be needed.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my original design, I also think the main.ino.cpp has no difference with other files. And it works in most cases.
But for some special cases, e.g. from the commits history of this PR we can see, the test case
assert run_command(["compile", "-b", "adafruit:samd:adafruit_feather_m4", sketch_path])
in test_compile_part_1.py will fail.
Due to the library found mechanism in arduino-cli, have to set main.ino.cpp as the root of the scanning, then start other scanning from it.


var errorsList []error
var errorsMux sync.Mutex

queue := make(chan types.SourceFile)
job := func(source types.SourceFile) {
// Find all includes
err := findIncludesUntilDone(ctx, cache, source)
if err != nil {
cachePath.Remove()
return errors.WithStack(err)
errorsMux.Lock()
errorsList = append(errorsList, err)
errorsMux.Unlock()
}
}

// Spawn jobs runners to make the scan faster
var wg sync.WaitGroup
jobs := ctx.Jobs
if jobs == 0 {
jobs = runtime.NumCPU()
}
var unhandled int32 = 0
for i := 0; i < jobs; i++ {
wg.Add(1)
go func() {
for source := range queue {
job(source)
atomic.AddInt32(&unhandled, -1)
}
wg.Done()
}()
}

// Loop until all files are handled
for (!sourceFilePaths.Empty() || unhandled != 0 ) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this unhandled variable redundant here? the WaitGroup should already take care of waiting for all the processes to finish.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unhandled is not redundant here. During the scanning process, we may find new source files. For example, suppose we have only 4 files to scan in the beginning, but we have 8 cores. Then the sourceFilePaths will be empty after the first goroutines fetch their task and the loop will be ended. But during the scanning against the 4 files may append more new found files to the sourceFilePaths but they won't be handled. The variable unhandled is to make sure sourceFilePaths is empty only after all goroutines finish the jobs.

errorsMux.Lock()
gotError := len(errorsList) > 0
errorsMux.Unlock()
if gotError {
break
}
if(!sourceFilePaths.Empty()){
fileMux.Lock()
queue <- sourceFilePaths.Pop()
fileMux.Unlock()
atomic.AddInt32(&unhandled, 1)
}
}
close(queue)
wg.Wait()

if len(errorsList) > 0 {
// output the first error
cachePath.Remove()
return errors.WithStack(errorsList[0])
}

// Finalize the cache
cache.ExpectEnd()
err = writeCache(cache, cachePath)
if err != nil {
return errors.WithStack(err)
Expand All @@ -173,8 +235,19 @@ func (s *ContainerFindIncludes) findIncludes(ctx *types.Context) error {
// and should be the empty string for the default include folders, like
// the core or variant.
func appendIncludeFolder(ctx *types.Context, cache *includeCache, sourceFilePath *paths.Path, include string, folder *paths.Path) {
libMux.Lock()
ctx.IncludeFolders = append(ctx.IncludeFolders, folder)
libMux.Unlock()
entry := &includeCacheEntry{Sourcefile: sourceFilePath, Include: include, Includepath: folder}
cache.entries.Store(include, entry)
cache.valid = false
}

// Append the given folder to the include path without touch the cache, because it is already in cache
func appendIncludeFolderWoCache(ctx *types.Context, include string, folder *paths.Path) {
libMux.Lock()
ctx.IncludeFolders = append(ctx.IncludeFolders, folder)
cache.ExpectEntry(sourceFilePath, include, folder)
libMux.Unlock()
}

func runCommand(ctx *types.Context, command types.Command) error {
Expand Down Expand Up @@ -204,56 +277,7 @@ func (entry *includeCacheEntry) Equals(other *includeCacheEntry) bool {
type includeCache struct {
// Are the cache contents valid so far?
valid bool
// Index into entries of the next entry to be processed. Unused
// when the cache is invalid.
next int
entries []*includeCacheEntry
}

// Return the next cache entry. Should only be called when the cache is
// valid and a next entry is available (the latter can be checked with
// ExpectFile). Does not advance the cache.
func (cache *includeCache) Next() *includeCacheEntry {
return cache.entries[cache.next]
}

// Check that the next cache entry is about the given file. If it is
// not, or no entry is available, the cache is invalidated. Does not
// advance the cache.
func (cache *includeCache) ExpectFile(sourcefile *paths.Path) {
if cache.valid && (cache.next >= len(cache.entries) || !cache.Next().Sourcefile.EqualsTo(sourcefile)) {
cache.valid = false
cache.entries = cache.entries[:cache.next]
}
}

// Check that the next entry matches the given values. If so, advance
// the cache. If not, the cache is invalidated. If the cache is
// invalidated, or was already invalid, an entry with the given values
// is appended.
func (cache *includeCache) ExpectEntry(sourcefile *paths.Path, include string, librarypath *paths.Path) {
entry := &includeCacheEntry{Sourcefile: sourcefile, Include: include, Includepath: librarypath}
if cache.valid {
if cache.next < len(cache.entries) && cache.Next().Equals(entry) {
cache.next++
} else {
cache.valid = false
cache.entries = cache.entries[:cache.next]
}
}

if !cache.valid {
cache.entries = append(cache.entries, entry)
}
}

// Check that the cache is completely consumed. If not, the cache is
// invalidated.
func (cache *includeCache) ExpectEnd() {
if cache.valid && cache.next < len(cache.entries) {
cache.valid = false
cache.entries = cache.entries[:cache.next]
}
entries sync.Map
}

// Read the cache from the given file
Expand All @@ -264,10 +288,18 @@ func readCache(path *paths.Path) *includeCache {
return &includeCache{}
}
result := &includeCache{}
err = json.Unmarshal(bytes, &result.entries)
var entries []*includeCacheEntry
err = json.Unmarshal(bytes, &entries)
if err != nil {
// Return an empty, invalid cache
return &includeCache{}
} else {
for _, entry := range entries {
if entry.Include != "" || entry.Includepath != nil {
// Put the entry into cache
result.entries.Store(entry.Include, entry)
}
}
}
result.valid = true
return result
Expand All @@ -283,7 +315,14 @@ func writeCache(cache *includeCache, path *paths.Path) error {
if cache.valid {
path.Chtimes(time.Now(), time.Now())
} else {
bytes, err := json.MarshalIndent(cache.entries, "", " ")
var entries []*includeCacheEntry
cache.entries.Range(func(k, v interface{}) bool {
if entry, ok := v.(*includeCacheEntry); ok {
entries = append(entries, entry)
}
return true
})
bytes, err := json.MarshalIndent(entries, "", " ")
if err != nil {
return errors.WithStack(err)
}
Expand All @@ -295,6 +334,46 @@ func writeCache(cache *includeCache, path *paths.Path) error {
return nil
}

// Preload the cached include files and libraries
func preloadCachedFolder(ctx *types.Context, cache *includeCache) {
var entryToRemove []string
cache.entries.Range(func(include, entry interface{}) bool {

header, ok := include.(string)
if(ok) {
// Ignore the pre-added folder
if(!strings.HasPrefix(header, "no_resolve")) {
library, imported := ResolveLibrary(ctx, header)
if library == nil {
if !imported {
// Cannot find the library and it is not imported, is it gone? Remove it later
entryToRemove = append(entryToRemove, header)
cache.valid = false
}
} else {

// Add this library to the list of libraries, the
// include path and queue its source files for further
// include scanning
ctx.ImportedLibraries = append(ctx.ImportedLibraries, library)
// Since it is already in cache, append the include folder only
appendIncludeFolderWoCache(ctx, header, library.SourceDir)
sourceDirs := library.SourceDirs()
for _, sourceDir := range sourceDirs {
queueSourceFilesFromFolder(ctx, ctx.CollectedSourceFiles, library, sourceDir.Dir, sourceDir.Recurse)
}
}
}
}
return true
})
Comment on lines +343 to +369
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I read this correctly, you're basically reusing all the previous set of include paths, but without keeping the history of how you obtained that set. This won't work in all cases, in particular, if the cache is invalidated you must redo all the work again to be sure to not pick libraries that are not needed anymore.

Copy link
Author

@shaoyie shaoyie Jun 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, now the strategy only cares missing files but doesn't care the redundant entries in cache in case it won't break the build.
In the particular case you mentioned, if need to clear the useless libraries, a clean build is required to do the job. And most of the compiling tool have "Clean build" besides "Build", this case can utilize the differences between these two kind of builds.
Not sure is there a better way to handle this case so we can have a better balance between the performance and accurate.

// Remove the invalid entry
for entry := range entryToRemove {
cache.entries.Delete(entry)
}
}

// For the uncached/updated source file, find the include files
func findIncludesUntilDone(ctx *types.Context, cache *includeCache, sourceFile types.SourceFile) error {
sourcePath := sourceFile.SourcePath(ctx)
targetFilePath := paths.NullPath()
Expand All @@ -321,7 +400,6 @@ func findIncludesUntilDone(ctx *types.Context, cache *includeCache, sourceFile t
first := true
for {
var include string
cache.ExpectFile(sourcePath)

includes := ctx.IncludeFolders
if library, ok := sourceFile.Origin.(*libraries.Library); ok && library.UtilityDir != nil {
Expand All @@ -342,11 +420,11 @@ func findIncludesUntilDone(ctx *types.Context, cache *includeCache, sourceFile t
var preproc_err error
var preproc_stderr []byte

if unchanged && cache.valid {
include = cache.Next().Include
if unchanged {
if first && ctx.Verbose {
ctx.Info(tr("Using cached library dependencies for file: %[1]s", sourcePath))
}
return nil
} else {
preproc_stderr, preproc_err = GCCPreprocRunnerForDiscoveringIncludes(ctx, sourcePath, targetFilePath, includes)
// Unwrap error and see if it is an ExitError.
Expand All @@ -369,34 +447,42 @@ func findIncludesUntilDone(ctx *types.Context, cache *includeCache, sourceFile t

if include == "" {
// No missing includes found, we're done
cache.ExpectEntry(sourcePath, "", nil)
return nil
}

library := ResolveLibrary(ctx, include)
libMux.Lock()
library, imported := ResolveLibrary(ctx, include)
if library == nil {
// Library could not be resolved, show error
// err := runCommand(ctx, &GCCPreprocRunner{SourceFilePath: sourcePath, TargetFileName: paths.New(constants.FILE_CTAGS_TARGET_FOR_GCC_MINUS_E), Includes: includes})
// return errors.WithStack(err)
if preproc_err == nil || preproc_stderr == nil {
// Filename came from cache, so run preprocessor to obtain error to show
preproc_stderr, preproc_err = GCCPreprocRunnerForDiscoveringIncludes(ctx, sourcePath, targetFilePath, includes)
if preproc_err == nil {
// If there is a missing #include in the cache, but running
// gcc does not reproduce that, there is something wrong.
// Returning an error here will cause the cache to be
// deleted, so hopefully the next compilation will succeed.
return errors.New(tr("Internal error in cache"))
if imported {
// Added by others
libMux.Unlock()
continue
} else {
defer libMux.Unlock()
// Library could not be resolved, show error
// err := runCommand(ctx, &GCCPreprocRunner{SourceFilePath: sourcePath, TargetFileName: paths.New(constants.FILE_CTAGS_TARGET_FOR_GCC_MINUS_E), Includes: includes})
// return errors.WithStack(err)
if preproc_err == nil || preproc_stderr == nil {
// Filename came from cache, so run preprocessor to obtain error to show
preproc_stderr, preproc_err = GCCPreprocRunnerForDiscoveringIncludes(ctx, sourcePath, targetFilePath, includes)
if preproc_err == nil {
// If there is a missing #include in the cache, but running
// gcc does not reproduce that, there is something wrong.
// Returning an error here will cause the cache to be
// deleted, so hopefully the next compilation will succeed.
return errors.New(tr("Internal error in cache"))
}
}
ctx.Stderr.Write(preproc_stderr)
return errors.WithStack(preproc_err)
}
ctx.Stderr.Write(preproc_stderr)
return errors.WithStack(preproc_err)
}

// Add this library to the list of libraries, the
// include path and queue its source files for further
// include scanning
ctx.ImportedLibraries = append(ctx.ImportedLibraries, library)
libMux.Unlock()
appendIncludeFolder(ctx, cache, sourcePath, include, library.SourceDir)
sourceDirs := library.SourceDirs()
for _, sourceDir := range sourceDirs {
Expand All @@ -421,7 +507,10 @@ func queueSourceFilesFromFolder(ctx *types.Context, queue *types.UniqueSourceFil
if err != nil {
return errors.WithStack(err)
}

fileMux.Lock()
queue.Push(sourceFile)
fileMux.Unlock()
}

return nil
Expand Down
8 changes: 4 additions & 4 deletions legacy/builder/resolve_library.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
"github.com/arduino/arduino-cli/legacy/builder/types"
)

func ResolveLibrary(ctx *types.Context, header string) *libraries.Library {
func ResolveLibrary(ctx *types.Context, header string) (*libraries.Library, bool) {
resolver := ctx.LibrariesResolver
importedLibraries := ctx.ImportedLibraries

Expand All @@ -35,12 +35,12 @@ func ResolveLibrary(ctx *types.Context, header string) *libraries.Library {
}

if candidates == nil || len(candidates) == 0 {
return nil
return nil, false
}

for _, candidate := range candidates {
if importedLibraries.Contains(candidate) {
return nil
return nil, true
}
}

Expand All @@ -63,7 +63,7 @@ func ResolveLibrary(ctx *types.Context, header string) *libraries.Library {
NotUsedLibraries: filterOutLibraryFrom(candidates, selected),
}

return selected
return selected, false
}

func filterOutLibraryFrom(libs libraries.List, libraryToRemove *libraries.Library) libraries.List {
Expand Down