diff --git a/API.Tests/Extensions/SeriesExtensionsTests.cs b/API.Tests/Extensions/SeriesExtensionsTests.cs index 38e5f0001..adaecfba5 100644 --- a/API.Tests/Extensions/SeriesExtensionsTests.cs +++ b/API.Tests/Extensions/SeriesExtensionsTests.cs @@ -185,6 +185,35 @@ public void GetCoverImage_JustVolumes() Assert.Equal("Volume 1 Chapter 1", series.GetCoverImage()); } + [Fact] + public void GetCoverImage_JustVolumes_ButVolume0() + { + var series = new SeriesBuilder("Test 1") + .WithFormat(MangaFormat.Archive) + + .WithVolume(new VolumeBuilder("0") + .WithName("Volume 0") + .WithChapter(new ChapterBuilder(Parser.DefaultChapter) + .WithCoverImage("Volume 0") + .Build()) + .Build()) + + .WithVolume(new VolumeBuilder("1") + .WithName("Volume 1") + .WithChapter(new ChapterBuilder(Parser.DefaultChapter) + .WithCoverImage("Volume 1") + .Build()) + .Build()) + .Build(); + + foreach (var vol in series.Volumes) + { + vol.CoverImage = vol.Chapters.MinBy(x => x.SortOrder, ChapterSortComparerDefaultFirst.Default)?.CoverImage; + } + + Assert.Equal("Volume 1", series.GetCoverImage()); + } + [Fact] public void GetCoverImage_JustSpecials_WithDecimal() { diff --git a/API.Tests/Services/DirectoryServiceTests.cs b/API.Tests/Services/DirectoryServiceTests.cs index 9844e7766..737779f0f 100644 --- a/API.Tests/Services/DirectoryServiceTests.cs +++ b/API.Tests/Services/DirectoryServiceTests.cs @@ -6,6 +6,7 @@ using System.Text; using System.Threading.Tasks; using API.Services; +using Kavita.Common.Helpers; using Microsoft.Extensions.Logging; using NSubstitute; using Xunit; @@ -745,6 +746,12 @@ public void FindHighestDirectoriesFromFilesTest(string[] rootDirectories, string [InlineData(new [] {"/manga"}, new [] {"/manga/Love Hina/Vol. 01.cbz", "/manga/Love Hina/Specials/Sp01.cbz"}, "/manga/Love Hina")] + [InlineData(new [] {"/manga"}, + new [] {"/manga/Love Hina/Hina/Vol. 01.cbz", "/manga/Love Hina/Specials/Sp01.cbz"}, + "/manga/Love Hina")] + [InlineData(new [] {"/manga"}, + new [] {"/manga/Dress Up Darling/Dress Up Darling Ch 01.cbz", "/manga/Dress Up Darling/Dress Up Darling/Dress Up Darling Vol 01.cbz"}, + "/manga/Dress Up Darling")] public void FindLowestDirectoriesFromFilesTest(string[] rootDirectories, string[] files, string expectedDirectory) { var fileSystem = new MockFileSystem(); @@ -920,8 +927,9 @@ public Task ScanFiles_ShouldFindNoFiles_AllAreIgnored() var ds = new DirectoryService(Substitute.For>(), fileSystem); - - var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions); + var globMatcher = new GlobMatcher(); + globMatcher.AddExclude("*.*"); + var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions, globMatcher); Assert.Empty(allFiles); @@ -945,7 +953,9 @@ public Task ScanFiles_ShouldFindNoNestedFiles_IgnoreNestedFiles() var ds = new DirectoryService(Substitute.For>(), fileSystem); - var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions); + var globMatcher = new GlobMatcher(); + globMatcher.AddExclude("**/Accel World/*"); + var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions, globMatcher); Assert.Single(allFiles); // Ignore files are not counted in files, only valid extensions @@ -974,7 +984,10 @@ public Task ScanFiles_NestedIgnore_IgnoreNestedFilesInOneDirectoryOnly() var ds = new DirectoryService(Substitute.For>(), fileSystem); - var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions); + var globMatcher = new GlobMatcher(); + globMatcher.AddExclude("**/Accel World/*"); + globMatcher.AddExclude("**/ArtBooks/*"); + var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions, globMatcher); Assert.Equal(2, allFiles.Count); // Ignore files are not counted in files, only valid extensions diff --git a/API.Tests/Services/ParseScannedFilesTests.cs b/API.Tests/Services/ParseScannedFilesTests.cs index 9fbb76ec3..ff4868a8c 100644 --- a/API.Tests/Services/ParseScannedFilesTests.cs +++ b/API.Tests/Services/ParseScannedFilesTests.cs @@ -206,24 +206,6 @@ public async Task ScanLibrariesForSeries_ShouldFindFiles() var psf = new ParseScannedFiles(Substitute.For>(), ds, new MockReadingItemService(ds, Substitute.For()), Substitute.For()); - // var parsedSeries = new Dictionary>(); - // - // Task TrackFiles(Tuple> parsedInfo) - // { - // var skippedScan = parsedInfo.Item1; - // var parsedFiles = parsedInfo.Item2; - // if (parsedFiles.Count == 0) return Task.CompletedTask; - // - // var foundParsedSeries = new ParsedSeries() - // { - // Name = parsedFiles.First().Series, - // NormalizedName = parsedFiles.First().Series.ToNormalized(), - // Format = parsedFiles.First().Format - // }; - // - // parsedSeries.Add(foundParsedSeries, parsedFiles); - // return Task.CompletedTask; - // } var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(1, @@ -273,7 +255,7 @@ public async Task ProcessFiles_ForLibraryMode_OnlyCallsFolderActionForEachTopLev var directoriesSeen = new HashSet(); var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(1, LibraryIncludes.Folders | LibraryIncludes.FileTypes); - var scanResults = await psf.ProcessFiles("C:/Data/", true, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library); + var scanResults = await psf.ScanFiles("C:/Data/", true, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library); foreach (var scanResult in scanResults) { directoriesSeen.Add(scanResult.Folder); @@ -295,7 +277,7 @@ public async Task ProcessFiles_ForNonLibraryMode_CallsFolderActionOnce() Assert.NotNull(library); var directoriesSeen = new HashSet(); - var scanResults = await psf.ProcessFiles("C:/Data/", false, + var scanResults = await psf.ScanFiles("C:/Data/", false, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library); foreach (var scanResult in scanResults) @@ -328,7 +310,7 @@ public async Task ProcessFiles_ShouldCallFolderActionTwice() var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(1, LibraryIncludes.Folders | LibraryIncludes.FileTypes); Assert.NotNull(library); - var scanResults = await psf.ProcessFiles("C:/Data", true, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library); + var scanResults = await psf.ScanFiles("C:/Data", true, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library); Assert.Equal(2, scanResults.Count); } @@ -357,7 +339,7 @@ public async Task ProcessFiles_ShouldCallFolderActionOnce() var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(1, LibraryIncludes.Folders | LibraryIncludes.FileTypes); Assert.NotNull(library); - var scanResults = await psf.ProcessFiles("C:/Data", false, + var scanResults = await psf.ScanFiles("C:/Data", false, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library); Assert.Single(scanResults); diff --git a/API.Tests/Services/ScannerServiceTests.cs b/API.Tests/Services/ScannerServiceTests.cs index 67b93273b..6d38d197d 100644 --- a/API.Tests/Services/ScannerServiceTests.cs +++ b/API.Tests/Services/ScannerServiceTests.cs @@ -51,79 +51,50 @@ protected override async Task ResetDb() } [Fact] - public void FindSeriesNotOnDisk_Should_Remove1() + public async Task ScanLibrary_ComicVine_PublisherFolder() { - var infos = new Dictionary>(); - - ParserInfoFactory.AddToParsedInfo(infos, new ParserInfo() {Series = "Darker than Black", Volumes = "1", Format = MangaFormat.Archive}); - //AddToParsedInfo(infos, new ParserInfo() {Series = "Darker than Black", Volumes = "1", Format = MangaFormat.Epub}); - - var existingSeries = new List - { - new SeriesBuilder("Darker Than Black") - .WithFormat(MangaFormat.Epub) - - .WithVolume(new VolumeBuilder("1") - .WithName("1") - .Build()) - .WithLocalizedName("Darker Than Black") - .Build() - }; + var testcase = "Publisher - ComicVine.json"; + var postLib = await GenerateScannerData(testcase); - Assert.Single(ScannerService.FindSeriesNotOnDisk(existingSeries, infos)); + Assert.NotNull(postLib); + Assert.Equal(4, postLib.Series.Count); } [Fact] - public void FindSeriesNotOnDisk_Should_RemoveNothing_Test() + public async Task ScanLibrary_ShouldCombineNestedFolder() { - var infos = new Dictionary>(); - - ParserInfoFactory.AddToParsedInfo(infos, new ParserInfo() {Series = "Darker than Black", Format = MangaFormat.Archive}); - ParserInfoFactory.AddToParsedInfo(infos, new ParserInfo() {Series = "Cage of Eden", Volumes = "1", Format = MangaFormat.Archive}); - ParserInfoFactory.AddToParsedInfo(infos, new ParserInfo() {Series = "Cage of Eden", Volumes = "10", Format = MangaFormat.Archive}); + var testcase = "Series and Series-Series Combined - Manga.json"; + var postLib = await GenerateScannerData(testcase); - var existingSeries = new List - { - new SeriesBuilder("Cage of Eden") - .WithFormat(MangaFormat.Archive) - - .WithVolume(new VolumeBuilder("1") - .WithName("1") - .Build()) - .WithLocalizedName("Darker Than Black") - .Build(), - new SeriesBuilder("Darker Than Black") - .WithFormat(MangaFormat.Archive) - .WithVolume(new VolumeBuilder("1") - .WithName("1") - .Build()) - .WithLocalizedName("Darker Than Black") - .Build(), - }; - - Assert.Empty(ScannerService.FindSeriesNotOnDisk(existingSeries, infos)); + Assert.NotNull(postLib); + Assert.Single(postLib.Series); + Assert.Equal(2, postLib.Series.First().Volumes.Count); } + [Fact] - public async Task ScanLibrary_ComicVine_PublisherFolder() + public async Task ScanLibrary_FlatSeries() { - var testcase = "Publisher - ComicVine.json"; + var testcase = "Flat Series - Manga.json"; var postLib = await GenerateScannerData(testcase); Assert.NotNull(postLib); - Assert.Equal(4, postLib.Series.Count); + Assert.Single(postLib.Series); + Assert.Equal(3, postLib.Series.First().Volumes.Count); + + // TODO: Trigger a deletion of ch 10 } [Fact] - public async Task ScanLibrary_ShouldCombineNestedFolder() + public async Task ScanLibrary_FlatSeriesWithSpecial() { - var testcase = "Series and Series-Series Combined - Manga.json"; + var testcase = "Flat Series with Specials - Manga.json"; var postLib = await GenerateScannerData(testcase); Assert.NotNull(postLib); Assert.Single(postLib.Series); - Assert.Single(postLib.Series); - Assert.Equal(2, postLib.Series.First().Volumes.Count); + Assert.Equal(4, postLib.Series.First().Volumes.Count); + Assert.NotNull(postLib.Series.First().Volumes.FirstOrDefault(v => v.Chapters.FirstOrDefault(c => c.IsSpecial) != null)); } private async Task GenerateScannerData(string testcase) @@ -145,13 +116,23 @@ private async Task GenerateScannerData(string testcase) _unitOfWork.LibraryRepository.Add(library); await _unitOfWork.CommitAsync(); + var scanner = CreateServices(); + + await scanner.ScanLibrary(library.Id); + + var postLib = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(library.Id, LibraryIncludes.Series); + return postLib; + } + + private ScannerService CreateServices() + { var ds = new DirectoryService(Substitute.For>(), new FileSystem()); var mockReadingService = new MockReadingItemService(ds, Substitute.For()); var processSeries = new ProcessSeries(_unitOfWork, Substitute.For>(), Substitute.For(), ds, Substitute.For(), mockReadingService, Substitute.For(), Substitute.For(), - Substitute.For(), Substitute.For(), + Substitute.For(), Substitute.For(), Substitute.For(), new TagManagerService(_unitOfWork, Substitute.For>())); @@ -159,11 +140,7 @@ private async Task GenerateScannerData(string testcase) Substitute.For(), Substitute.For(), Substitute.For(), ds, mockReadingService, processSeries, Substitute.For()); - - await scanner.ScanLibrary(library.Id); - - var postLib = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(library.Id, LibraryIncludes.Series); - return postLib; + return scanner; } private static (string Publisher, LibraryType Type) SplitPublisherAndLibraryType(string input) diff --git a/API.Tests/Services/Test Data/ScannerService/TestCases/Flat Series - Manga.json b/API.Tests/Services/Test Data/ScannerService/TestCases/Flat Series - Manga.json new file mode 100644 index 000000000..6b4b70160 --- /dev/null +++ b/API.Tests/Services/Test Data/ScannerService/TestCases/Flat Series - Manga.json @@ -0,0 +1,5 @@ +[ + "My Dress-Up Darling/My Dress-Up Darling v01.cbz", + "My Dress-Up Darling/My Dress-Up Darling v02.cbz", + "My Dress-Up Darling/My Dress-Up Darling ch 10.cbz" +] \ No newline at end of file diff --git a/API.Tests/Services/Test Data/ScannerService/TestCases/Flat Series with Specials - Manga.json b/API.Tests/Services/Test Data/ScannerService/TestCases/Flat Series with Specials - Manga.json new file mode 100644 index 000000000..12e80ea95 --- /dev/null +++ b/API.Tests/Services/Test Data/ScannerService/TestCases/Flat Series with Specials - Manga.json @@ -0,0 +1,6 @@ +[ + "My Dress-Up Darling/My Dress-Up Darling v01.cbz", + "My Dress-Up Darling/My Dress-Up Darling v02.cbz", + "My Dress-Up Darling/My Dress-Up Darling ch 10.cbz", + "My Dress-Up Darling/Specials/Official Anime Fanbook SP05 (2024) (Digital).cbz" +] \ No newline at end of file diff --git a/API.Tests/Services/Test Data/ScannerService/TestCases/Nested Chapters - Manga.json b/API.Tests/Services/Test Data/ScannerService/TestCases/Nested Chapters - Manga.json new file mode 100644 index 000000000..586ae90f5 --- /dev/null +++ b/API.Tests/Services/Test Data/ScannerService/TestCases/Nested Chapters - Manga.json @@ -0,0 +1,4 @@ +[ + "My Dress-Up Darling/Chapter 1/01.cbz", + "My Dress-Up Darling/Chapter 2/02.cbz" +] \ No newline at end of file diff --git a/API/Data/ManualMigrations/MigrateLowestSeriesFolderPath2.cs b/API/Data/ManualMigrations/MigrateLowestSeriesFolderPath2.cs new file mode 100644 index 000000000..bb79c3359 --- /dev/null +++ b/API/Data/ManualMigrations/MigrateLowestSeriesFolderPath2.cs @@ -0,0 +1,51 @@ +using System; +using System.Linq; +using System.Threading.Tasks; +using API.Entities; +using API.Services.Tasks.Scanner.Parser; +using Kavita.Common.EnvironmentInfo; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; + +namespace API.Data.ManualMigrations; + +/// +/// v0.8.3 still had a bug around LowestSeriesPath. This resets it for all users. +/// +public static class MigrateLowestSeriesFolderPath2 +{ + public static async Task Migrate(DataContext dataContext, IUnitOfWork unitOfWork, ILogger logger) + { + if (await dataContext.ManualMigrationHistory.AnyAsync(m => m.Name == "MigrateLowestSeriesFolderPath2")) + { + return; + } + + logger.LogCritical( + "Running MigrateLowestSeriesFolderPath2 migration - Please be patient, this may take some time. This is not an error"); + + var series = await dataContext.Series.Where(s => !string.IsNullOrEmpty(s.LowestFolderPath)).ToListAsync(); + foreach (var s in series) + { + s.LowestFolderPath = string.Empty; + unitOfWork.SeriesRepository.Update(s); + } + + // Save changes after processing all series + if (dataContext.ChangeTracker.HasChanges()) + { + await dataContext.SaveChangesAsync(); + } + + dataContext.ManualMigrationHistory.Add(new ManualMigrationHistory() + { + Name = "MigrateLowestSeriesFolderPath2", + ProductVersion = BuildInfo.Version.ToString(), + RanAt = DateTime.UtcNow + }); + + await dataContext.SaveChangesAsync(); + logger.LogCritical( + "Running MigrateLowestSeriesFolderPath2 migration - Completed. This is not an error"); + } +} diff --git a/API/Data/Repositories/GenreRepository.cs b/API/Data/Repositories/GenreRepository.cs index a68c3c548..211892f94 100644 --- a/API/Data/Repositories/GenreRepository.cs +++ b/API/Data/Repositories/GenreRepository.cs @@ -6,6 +6,7 @@ using API.Entities; using API.Extensions; using API.Extensions.QueryExtensions; +using API.Services.Tasks.Scanner.Parser; using AutoMapper; using AutoMapper.QueryableExtensions; using Microsoft.EntityFrameworkCore; @@ -24,6 +25,7 @@ public interface IGenreRepository Task GetCountAsync(); Task GetRandomGenre(); Task GetGenreById(int id); + Task> GetAllGenresNotInListAsync(ICollection genreNames); } public class GenreRepository : IGenreRepository @@ -133,4 +135,31 @@ public async Task> GetAllGenreDtosForLibrariesAsync(int userI .ProjectTo(_mapper.ConfigurationProvider) .ToListAsync(); } + + /// + /// Gets all genres that are not already present in the system. + /// Normalizes genres for lookup, but returns non-normalized names for creation. + /// + /// The list of genre names (non-normalized). + /// A list of genre names that do not exist in the system. + public async Task> GetAllGenresNotInListAsync(ICollection genreNames) + { + // Create a dictionary mapping normalized names to non-normalized names + var normalizedToOriginalMap = genreNames.Distinct() + .ToDictionary(Parser.Normalize, genre => genre); + + var normalizedGenreNames = normalizedToOriginalMap.Keys.ToList(); + + // Query the database for existing genres using the normalized names + var existingGenres = await _context.Genre + .Where(g => normalizedGenreNames.Contains(g.NormalizedTitle)) // Assuming you have a normalized field + .Select(g => g.NormalizedTitle) + .ToListAsync(); + + // Find the normalized genres that do not exist in the database + var missingGenres = normalizedGenreNames.Except(existingGenres).ToList(); + + // Return the original non-normalized genres for the missing ones + return missingGenres.Select(normalizedName => normalizedToOriginalMap[normalizedName]).ToList(); + } } diff --git a/API/Data/Repositories/PersonRepository.cs b/API/Data/Repositories/PersonRepository.cs index 5633d7403..eeb0d28f5 100644 --- a/API/Data/Repositories/PersonRepository.cs +++ b/API/Data/Repositories/PersonRepository.cs @@ -106,7 +106,8 @@ public async Task> GetAllPeople() public async Task> GetAllPersonDtosAsync(int userId) { var ageRating = await _context.AppUser.GetUserAgeRestriction(userId); - var libraryIds = await _context.Library.GetUserLibraries(userId).ToListAsync(); + // TODO: Figure out how to fix this lack of RBS + //var libraryIds = await _context.Library.GetUserLibraries(userId).ToListAsync(); return await _context.Person .OrderBy(p => p.Name) .RestrictAgainstAgeRestriction(ageRating) diff --git a/API/Data/Repositories/SeriesRepository.cs b/API/Data/Repositories/SeriesRepository.cs index 88b9b0a75..f06f81eee 100644 --- a/API/Data/Repositories/SeriesRepository.cs +++ b/API/Data/Repositories/SeriesRepository.cs @@ -1749,45 +1749,36 @@ public async Task> GetAllSeriesByAnyName(string seriesName, string /// public async Task> RemoveSeriesNotInList(IList seenSeries, int libraryId) { - if (seenSeries.Count == 0) return Array.Empty(); + if (!seenSeries.Any()) return Array.Empty(); + + // Get all series from DB in one go, based on libraryId + var dbSeries = await _context.Series + .Where(s => s.LibraryId == libraryId) + .ToListAsync(); + + // Get a set of matching series ids for the given parsedSeries + var ids = new HashSet(); - var ids = new List(); foreach (var parsedSeries in seenSeries) { - try - { - var seriesId = await _context.Series - .Where(s => s.Format == parsedSeries.Format && s.NormalizedName == parsedSeries.NormalizedName && - s.LibraryId == libraryId) - .Select(s => s.Id) - .SingleOrDefaultAsync(); - if (seriesId > 0) - { - ids.Add(seriesId); - } - } - catch (Exception) + var matchingSeries = dbSeries + .Where(s => s.Format == parsedSeries.Format && s.NormalizedName == parsedSeries.NormalizedName) + .OrderBy(s => s.Id) // Sort to handle potential duplicates + .ToList(); + + // Prefer the first match or handle duplicates by choosing the last one + if (matchingSeries.Any()) { - // This is due to v0.5.6 introducing bugs where we could have multiple series get duplicated and no way to delete them - // This here will delete the 2nd one as the first is the one to likely be used. - var sId = await _context.Series - .Where(s => s.Format == parsedSeries.Format && s.NormalizedName == parsedSeries.NormalizedName && - s.LibraryId == libraryId) - .Select(s => s.Id) - .OrderBy(s => s) - .LastAsync(); - if (sId > 0) - { - ids.Add(sId); - } + ids.Add(matchingSeries.Last().Id); } } - var seriesToRemove = await _context.Series - .Where(s => s.LibraryId == libraryId) + // Filter out series that are not in the seenSeries + var seriesToRemove = dbSeries .Where(s => !ids.Contains(s.Id)) - .ToListAsync(); + .ToList(); + // Remove series in bulk _context.Series.RemoveRange(seriesToRemove); return seriesToRemove; diff --git a/API/Data/Repositories/TagRepository.cs b/API/Data/Repositories/TagRepository.cs index 2fdb8377e..c0bf04786 100644 --- a/API/Data/Repositories/TagRepository.cs +++ b/API/Data/Repositories/TagRepository.cs @@ -5,6 +5,7 @@ using API.Entities; using API.Extensions; using API.Extensions.QueryExtensions; +using API.Services.Tasks.Scanner.Parser; using AutoMapper; using AutoMapper.QueryableExtensions; using Microsoft.EntityFrameworkCore; @@ -20,6 +21,7 @@ public interface ITagRepository Task> GetAllTagDtosAsync(int userId); Task RemoveAllTagNoLongerAssociated(); Task> GetAllTagDtosForLibrariesAsync(int userId, IList? libraryIds = null); + Task> GetAllTagsNotInListAsync(ICollection tags); } public class TagRepository : ITagRepository @@ -79,6 +81,27 @@ public async Task> GetAllTagDtosForLibrariesAsync(int userId, ILis .ToListAsync(); } + public async Task> GetAllTagsNotInListAsync(ICollection tags) + { + // Create a dictionary mapping normalized names to non-normalized names + var normalizedToOriginalMap = tags.Distinct() + .ToDictionary(Parser.Normalize, tag => tag); + + var normalizedTagNames = normalizedToOriginalMap.Keys.ToList(); + + // Query the database for existing genres using the normalized names + var existingTags = await _context.Tag + .Where(g => normalizedTagNames.Contains(g.NormalizedTitle)) // Assuming you have a normalized field + .Select(g => g.NormalizedTitle) + .ToListAsync(); + + // Find the normalized genres that do not exist in the database + var missingTags = normalizedTagNames.Except(existingTags).ToList(); + + // Return the original non-normalized genres for the missing ones + return missingTags.Select(normalizedName => normalizedToOriginalMap[normalizedName]).ToList(); + } + public async Task> GetAllTagsAsync() { return await _context.Tag.ToListAsync(); diff --git a/API/Extensions/SeriesExtensions.cs b/API/Extensions/SeriesExtensions.cs index 83f6ecfdf..01ae718c7 100644 --- a/API/Extensions/SeriesExtensions.cs +++ b/API/Extensions/SeriesExtensions.cs @@ -28,6 +28,12 @@ public static class SeriesExtensions firstVolume = volumes[1]; } + // If the first volume is 0, then use Volume 1 + if (firstVolume.MinNumber.Is(0f) && volumes.Count > 1) + { + firstVolume = volumes[1]; + } + var chapters = firstVolume.Chapters .OrderBy(c => c.SortOrder) .ToList(); diff --git a/API/Services/DirectoryService.cs b/API/Services/DirectoryService.cs index c19914668..1c56c65fc 100644 --- a/API/Services/DirectoryService.cs +++ b/API/Services/DirectoryService.cs @@ -55,7 +55,7 @@ public interface IDirectoryService bool CopyDirectoryToDirectory(string? sourceDirName, string destDirName, string searchPattern = ""); Dictionary FindHighestDirectoriesFromFiles(IEnumerable libraryFolders, IList filePaths); - string? FindLowestDirectoriesFromFiles(IEnumerable libraryFolders, + string? FindLowestDirectoriesFromFiles(IList libraryFolders, IList filePaths); IEnumerable GetFoldersTillRoot(string rootPath, string fullPath); IEnumerable GetFiles(string path, string fileNameRegex = "", SearchOption searchOption = SearchOption.TopDirectoryOnly); @@ -69,14 +69,13 @@ IEnumerable GetFilesWithCertainExtensions(string path, SearchOption searchOption = SearchOption.TopDirectoryOnly); IEnumerable GetDirectories(string folderPath); IEnumerable GetDirectories(string folderPath, GlobMatcher? matcher); + IEnumerable GetAllDirectories(string folderPath, GlobMatcher? matcher = null); string GetParentDirectoryName(string fileOrFolder); - IList ScanFiles(string folderPath, string fileTypes, GlobMatcher? matcher = null); + IList ScanFiles(string folderPath, string fileTypes, GlobMatcher? matcher = null, SearchOption searchOption = SearchOption.AllDirectories); DateTime GetLastWriteTime(string folderPath); - GlobMatcher? CreateMatcherFromFile(string filePath); } public class DirectoryService : IDirectoryService { - public const string KavitaIgnoreFile = ".kavitaignore"; public IFileSystem FileSystem { get; } public string CacheDirectory { get; } public string CoverImageDirectory { get; } @@ -95,11 +94,9 @@ public class DirectoryService : IDirectoryService private static readonly Regex ExcludeDirectories = new Regex( @"@eaDir|\.DS_Store|\.qpkg|__MACOSX|@Recently-Snapshot|@recycle|\.@__thumb|\.caltrash|#recycle|\.yacreaderlibrary", - MatchOptions, - Tasks.Scanner.Parser.Parser.RegexTimeout); + MatchOptions, Parser.RegexTimeout); private static readonly Regex FileCopyAppend = new Regex(@"\(\d+\)", - MatchOptions, - Tasks.Scanner.Parser.Parser.RegexTimeout); + MatchOptions, Parser.RegexTimeout); public static readonly string BackupDirectory = Path.Join(Directory.GetCurrentDirectory(), "config", "backups"); public DirectoryService(ILogger logger, IFileSystem fileSystem) @@ -136,22 +133,38 @@ public DirectoryService(ILogger logger, IFileSystem fileSystem /// /// This will always exclude patterns /// Directory to search - /// Regex version of search pattern (ie \.mp3|\.mp4). Defaults to * meaning all files. + /// Regex version of search pattern (e.g., \.mp3|\.mp4). Defaults to * meaning all files. /// SearchOption to use, defaults to TopDirectoryOnly /// List of file paths public IEnumerable GetFilesWithCertainExtensions(string path, string searchPatternExpression = "", SearchOption searchOption = SearchOption.TopDirectoryOnly) { - if (!FileSystem.Directory.Exists(path)) return ImmutableList.Empty; - var reSearchPattern = new Regex(searchPatternExpression, RegexOptions.IgnoreCase, Tasks.Scanner.Parser.Parser.RegexTimeout); + // If directory doesn't exist, exit the iterator with no results + if (!FileSystem.Directory.Exists(path)) + yield break; - return FileSystem.Directory.EnumerateFiles(path, "*", searchOption) - .Where(file => - reSearchPattern.IsMatch(FileSystem.Path.GetExtension(file)) && !FileSystem.Path.GetFileName(file).StartsWith(Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith)); + // Compile the regex pattern for faster repeated matching + var reSearchPattern = new Regex(searchPatternExpression, + RegexOptions.IgnoreCase | RegexOptions.Compiled, + Parser.RegexTimeout); + + // Enumerate files in the directory and apply filters + foreach (var file in FileSystem.Directory.EnumerateFiles(path, "*", searchOption)) + { + var fileName = FileSystem.Path.GetFileName(file); + var fileExtension = FileSystem.Path.GetExtension(file); + + // Check if the file matches the pattern and exclude macOS metadata files + if (reSearchPattern.IsMatch(fileExtension) && !fileName.StartsWith(Parser.MacOsMetadataFileStartsWith)) + { + yield return file; + } + } } + /// /// Returns a list of folders from end of fullPath to rootPath. If a file is passed at the end of the fullPath, it will be ignored. /// @@ -173,8 +186,6 @@ public IEnumerable GetFoldersTillRoot(string rootPath, string fullPath) rootPath = rootPath.Replace(FileSystem.Path.DirectorySeparatorChar, FileSystem.Path.AltDirectorySeparatorChar); } - - var path = fullPath.EndsWith(separator) ? fullPath.Substring(0, fullPath.Length - 1) : fullPath; var root = rootPath.EndsWith(separator) ? rootPath.Substring(0, rootPath.Length - 1) : rootPath; var paths = new List(); @@ -215,25 +226,34 @@ public bool Exists(string directory) /// public IEnumerable GetFiles(string path, string fileNameRegex = "", SearchOption searchOption = SearchOption.TopDirectoryOnly) { - if (!FileSystem.Directory.Exists(path)) return ImmutableList.Empty; + if (!FileSystem.Directory.Exists(path)) + yield break; // Use yield break to exit the iterator early - if (fileNameRegex != string.Empty) + Regex? reSearchPattern = null; + if (!string.IsNullOrEmpty(fileNameRegex)) { - var reSearchPattern = new Regex(fileNameRegex, RegexOptions.IgnoreCase, - Tasks.Scanner.Parser.Parser.RegexTimeout); - return FileSystem.Directory.EnumerateFiles(path, "*", searchOption) - .Where(file => - { - var fileName = FileSystem.Path.GetFileName(file); - return reSearchPattern.IsMatch(fileName) && - !fileName.StartsWith(Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith); - }); + // Compile the regex for better performance when used frequently + reSearchPattern = new Regex(fileNameRegex, RegexOptions.IgnoreCase | RegexOptions.Compiled, Tasks.Scanner.Parser.Parser.RegexTimeout); } - return FileSystem.Directory.EnumerateFiles(path, "*", searchOption).Where(file => - !FileSystem.Path.GetFileName(file).StartsWith(Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith)); + // Enumerate files lazily + foreach (var file in FileSystem.Directory.EnumerateFiles(path, "*", searchOption)) + { + var fileName = FileSystem.Path.GetFileName(file); + + // Exclude macOS metadata files + if (fileName.StartsWith(Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith)) + continue; + + // If a regex is provided, match the file name against it + if (reSearchPattern != null && !reSearchPattern.IsMatch(fileName)) + continue; + + yield return file; // Yield each matching file as it's found + } } + /// /// Copies a file into a directory. Does not maintain parent folder of file. /// Will create target directory if doesn't exist. Automatically overwrites what is there. @@ -329,7 +349,7 @@ public string[] GetFilesWithExtension(string path, string searchPatternExpressio return GetFilesWithCertainExtensions(path, searchPatternExpression).ToArray(); } - return !FileSystem.Directory.Exists(path) ? Array.Empty() : FileSystem.Directory.GetFiles(path); + return !FileSystem.Directory.Exists(path) ? [] : FileSystem.Directory.GetFiles(path); } /// @@ -391,10 +411,12 @@ public void ClearDirectory(string directoryPath) { foreach (var file in di.EnumerateFiles()) { + if (!file.Exists) continue; file.Delete(); } foreach (var dir in di.EnumerateDirectories()) { + if (!dir.Exists) continue; dir.Delete(true); } } @@ -594,46 +616,60 @@ public Dictionary FindHighestDirectoriesFromFiles(IEnumerable /// Finds the lowest directory from a set of file paths. Does not return the root path, will always select the lowest non-root path. /// - /// If the file paths do not contain anything from libraryFolders, this returns an empty dictionary back + /// If the file paths do not contain anything from libraryFolders, this returns null. /// List of top level folders which files belong to /// List of file paths that belong to libraryFolders - /// - public string? FindLowestDirectoriesFromFiles(IEnumerable libraryFolders, IList filePaths) + /// Lowest non-root path, or null if not found + public string? FindLowestDirectoriesFromFiles(IList libraryFolders, IList filePaths) { - var dirs = new Dictionary(); + // Normalize the file paths only once var normalizedFilePaths = filePaths.Select(Parser.NormalizePath).ToList(); - foreach (var folder in libraryFolders.Select(Parser.NormalizePath)) + // Use a list to store all directories for comparison + var dirs = new List(); + + // Iterate through each library folder and collect matching directories + foreach (var normalizedFolder in libraryFolders.Select(Parser.NormalizePath)) { foreach (var file in normalizedFilePaths) { - if (!file.Contains(folder)) continue; + // If the file path contains the folder path, get its directory + if (!file.Contains(normalizedFolder)) continue; - var lowestPath = Path.GetDirectoryName(file); + var lowestPath = Path.GetDirectoryName(file); if (!string.IsNullOrEmpty(lowestPath)) { - dirs.TryAdd(Parser.NormalizePath(lowestPath), string.Empty); + dirs.Add(Parser.NormalizePath(lowestPath)); // Add to list } - } } - if (dirs.Keys.Count == 1) return dirs.Keys.First(); - if (dirs.Keys.Count > 1) + if (dirs.Count == 0) { - // For each key, validate that each file exists in the key path - foreach (var folder in dirs.Keys) - { - if (normalizedFilePaths.TrueForAll(filePath => filePath.Contains(Parser.NormalizePath(folder)))) - { - return folder; - } - } + return null; // No directories found } - return null; + // Now find the deepest common directory among all paths + var commonPath = dirs.Aggregate(GetDeepestCommonPath); // Use new method to get deepest path + + // Return the common path if it exists and is not one of the root directories + return libraryFolders.Any(folder => commonPath == Parser.NormalizePath(folder)) ? null : commonPath; } + public static string GetDeepestCommonPath(string path1, string path2) + { + var parts1 = path1.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); + var parts2 = path2.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); + + // Get the longest matching parts, ensuring that deeper parts in hierarchy are considered + var commonParts = parts1.Zip(parts2, (p1, p2) => p1 == p2 ? p1 : null) + .TakeWhile(part => part != null) + .ToArray(); + + return Parser.NormalizePath(string.Join(Path.DirectorySeparatorChar.ToString(), commonParts)); + } + + /// /// Gets a set of directories from the folder path. Automatically excludes directories that shouldn't be in scope. /// @@ -665,8 +701,9 @@ public IEnumerable GetDirectories(string folderPath, GlobMatcher? matche /// Returns all directories, including subdirectories. Automatically excludes directories that shouldn't be in scope. /// /// + /// /// - public IEnumerable GetAllDirectories(string folderPath) + public IEnumerable GetAllDirectories(string folderPath, GlobMatcher? matcher = null) { if (!FileSystem.Directory.Exists(folderPath)) return ImmutableArray.Empty; var directories = new List(); @@ -675,7 +712,7 @@ public IEnumerable GetAllDirectories(string folderPath) foreach (var foundDir in foundDirs) { directories.Add(foundDir); - directories.AddRange(GetAllDirectories(foundDir)); + directories.AddRange(GetAllDirectories(foundDir, matcher)); } return directories; @@ -699,93 +736,82 @@ public string GetParentDirectoryName(string fileOrFolder) } /// - /// Scans a directory by utilizing a recursive folder search. If a .kavitaignore file is found, will ignore matching patterns + /// Scans a directory by utilizing a recursive folder search. /// /// /// /// + /// Pass TopDirectories /// - public IList ScanFiles(string folderPath, string fileTypes, GlobMatcher? matcher = null) + public IList ScanFiles(string folderPath, string fileTypes, GlobMatcher? matcher = null, + SearchOption searchOption = SearchOption.AllDirectories) { _logger.LogTrace("[ScanFiles] called on {Path}", folderPath); var files = new List(); + if (!Exists(folderPath)) return files; - var potentialIgnoreFile = FileSystem.Path.Join(folderPath, KavitaIgnoreFile); - if (matcher == null) + if (searchOption == SearchOption.AllDirectories) { - matcher = CreateMatcherFromFile(potentialIgnoreFile); - } - else - { - matcher.Merge(CreateMatcherFromFile(potentialIgnoreFile)); - } - - var directories = GetDirectories(folderPath, matcher); + // Stack to hold directories to process + var directoriesToProcess = new Stack(); + directoriesToProcess.Push(folderPath); - foreach (var directory in directories) - { - files.AddRange(ScanFiles(directory, fileTypes, matcher)); - } + while (directoriesToProcess.Count > 0) + { + var currentDirectory = directoriesToProcess.Pop(); + // Get files from the current directory + var filesInCurrentDirectory = GetFilesWithCertainExtensions(currentDirectory, fileTypes); + files.AddRange(filesInCurrentDirectory); - // Get the matcher from either ignore or global (default setup) - if (matcher == null) - { - files.AddRange(GetFilesWithCertainExtensions(folderPath, fileTypes)); + // Get subdirectories and add them to the stack + var subdirectories = GetDirectories(currentDirectory, matcher); + foreach (var subdirectory in subdirectories) + { + directoriesToProcess.Push(subdirectory); + } + } } else { - var foundFiles = GetFilesWithCertainExtensions(folderPath, - fileTypes) - .Where(file => !matcher.ExcludeMatches(FileSystem.FileInfo.New(file).Name)); - files.AddRange(foundFiles); + // If TopDirectoryOnly is specified, only get files in the specified folder + var filesInCurrentDirectory = GetFilesWithCertainExtensions(folderPath, fileTypes); + files.AddRange(filesInCurrentDirectory); + } + + // Filter out unwanted files based on matcher if provided + if (matcher != null) + { + files = files.Where(file => !matcher.ExcludeMatches(FileSystem.FileInfo.New(file).Name)).ToList(); } return files; } + /// /// Recursively scans a folder and returns the max last write time on any folders and files /// - /// If the folder is empty or non-existant, this will return MaxValue for a DateTime + /// If the folder is empty or non-existent, this will return MaxValue for a DateTime /// /// Max Last Write Time public DateTime GetLastWriteTime(string folderPath) { if (!FileSystem.Directory.Exists(folderPath)) return DateTime.MaxValue; + var fileEntries = FileSystem.Directory.GetFileSystemEntries(folderPath, "*.*", SearchOption.AllDirectories); if (fileEntries.Length == 0) return DateTime.MaxValue; - return fileEntries.Max(path => FileSystem.File.GetLastWriteTime(path)); - } - - /// - /// Generates a GlobMatcher from a .kavitaignore file found at path. Returns null otherwise. - /// - /// - /// - public GlobMatcher? CreateMatcherFromFile(string filePath) - { - if (!FileSystem.File.Exists(filePath)) - { - return null; - } - // Read file in and add each line to Matcher - var lines = FileSystem.File.ReadAllLines(filePath); - if (lines.Length == 0) - { - return null; - } + // Find the max last write time of the files + var maxFiles = fileEntries.Max(path => FileSystem.File.GetLastWriteTime(path)); - GlobMatcher matcher = new(); - foreach (var line in lines.Where(s => !string.IsNullOrEmpty(s))) - { - matcher.AddExclude(line); - } + // Get the last write time of the directory itself + var directoryLastWriteTime = FileSystem.Directory.GetLastWriteTime(folderPath); - return matcher; + // Use comparison to get the max DateTime value + return directoryLastWriteTime > maxFiles ? directoryLastWriteTime : maxFiles; } diff --git a/API/Services/Tasks/Scanner/ParseScannedFiles.cs b/API/Services/Tasks/Scanner/ParseScannedFiles.cs index 16586bc9d..3bbfea352 100644 --- a/API/Services/Tasks/Scanner/ParseScannedFiles.cs +++ b/API/Services/Tasks/Scanner/ParseScannedFiles.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Concurrent; using System.Collections.Generic; +using System.Diagnostics; using System.Globalization; using System.IO; using System.Linq; @@ -121,7 +122,7 @@ public ParseScannedFiles(ILogger logger, IDirectoryService directoryService, /// A dictionary mapping a normalized path to a list of to help scanner skip I/O /// A library folder or series folder /// If we should bypass any folder last write time checks on the scan and force I/O - public async Task> ProcessFiles(string folderPath, bool scanDirectoryByDirectory, + public async Task> ScanFiles(string folderPath, bool scanDirectoryByDirectory, IDictionary> seriesPaths, Library library, bool forceCheck = false) { var fileExtensions = string.Join("|", library.LibraryFileTypes.Select(l => l.FileTypeGroup.GetRegex())); @@ -138,69 +139,128 @@ public async Task> ProcessFiles(string folderPath, bool scanDi return await ScanSingleDirectory(folderPath, seriesPaths, library, forceCheck, result, fileExtensions, matcher); } - private async Task> ScanDirectories(string folderPath, IDictionary> seriesPaths, Library library, bool forceCheck, - GlobMatcher matcher, List result, string fileExtensions) + private async Task> ScanDirectories(string folderPath, IDictionary> seriesPaths, + Library library, bool forceCheck, GlobMatcher matcher, List result, string fileExtensions) { - var directories = _directoryService.GetDirectories(folderPath, matcher).Select(Parser.Parser.NormalizePath); - foreach (var directory in directories) + var allDirectories = _directoryService.GetAllDirectories(folderPath, matcher) + .Select(Parser.Parser.NormalizePath) + .OrderByDescending(d => d.Length) + .ToList(); + + var processedDirs = new HashSet(); + + _logger.LogDebug("[ScannerService] Step 1.C Found {DirectoryCount} directories to process for {FolderPath}", allDirectories.Count, folderPath); + foreach (var directory in allDirectories) { + // Don't process any folders where we've already scanned everything below + if (processedDirs.Any(d => d.StartsWith(directory + Path.AltDirectorySeparatorChar) || d.Equals(directory))) + { + // Skip this directory as we've already processed a parent unless there are loose files at that directory + CheckSurfaceFiles(result, directory, folderPath, fileExtensions, matcher); + continue; + } + + // Skip directories ending with "Specials", let the parent handle it + if (directory.EndsWith("Specials", StringComparison.OrdinalIgnoreCase)) + { + // Log or handle that we are skipping this directory + _logger.LogDebug("Skipping {Directory} as it ends with 'Specials'", directory); + continue; + } + await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent(directory, library.Name, ProgressEventType.Updated)); - if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, directory, forceCheck)) { - if (result.Exists(r => r.Folder == directory)) - { - _logger.LogDebug("[ProcessFiles] Skipping adding {Directory} as it's already added", directory); - continue; - } - _logger.LogDebug("[ProcessFiles] Skipping {Directory} as it hasn't changed since last scan", directory); - result.Add(CreateScanResult(directory, folderPath, false, ArraySegment.Empty)); + HandleUnchangedFolder(result, folderPath, directory); } - else if (!forceCheck && seriesPaths.TryGetValue(directory, out var series) - && series.Count > 1 && series.All(s => !string.IsNullOrEmpty(s.LowestFolderPath))) + else { - // If there are multiple series inside this path, let's check each of them to see which was modified and only scan those - // This is very helpful for ComicVine libraries by Publisher + PerformFullScan(result, directory, folderPath, fileExtensions, matcher); + } - // TODO: BUG: We might miss new folders this way. Likely need to get all folder names and see if there are any that aren't in known series list + processedDirs.Add(directory); + } - _logger.LogDebug("[ProcessFiles] {Directory} is dirty and has multiple series folders, checking if we can avoid a full scan", directory); - foreach (var seriesModified in series) - { - var hasFolderChangedSinceLastScan = seriesModified.LastScanned.Truncate(TimeSpan.TicksPerSecond) < - _directoryService - .GetLastWriteTime(seriesModified.LowestFolderPath!) - .Truncate(TimeSpan.TicksPerSecond); + return result; + } - await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, - MessageFactory.FileScanProgressEvent(seriesModified.LowestFolderPath!, library.Name, ProgressEventType.Updated)); + /// + /// Checks against all folder paths on file if the last scanned is >= the directory's last write time, down to the second + /// + /// + /// This should be normalized + /// + /// + private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary> seriesPaths, string directory, bool forceCheck) + { + // With the bottom-up approach, this can report a false positive where a nested folder will get scanned even though a parent is the series + // This can't really be avoided. This is more likely to happen on Image chapter folder library layouts. + if (forceCheck || !seriesPaths.TryGetValue(directory, out var seriesList)) + { + return false; + } - if (!hasFolderChangedSinceLastScan) - { - _logger.LogDebug("[ProcessFiles] {Directory} subfolder {Folder} did not change since last scan, adding entry to skip", directory, seriesModified.LowestFolderPath); - result.Add(CreateScanResult(seriesModified.LowestFolderPath!, folderPath, false, ArraySegment.Empty)); - } - else - { - _logger.LogDebug("[ProcessFiles] {Directory} subfolder {Folder} changed for Series {SeriesName}", directory, seriesModified.LowestFolderPath, seriesModified.SeriesName); - result.Add(CreateScanResult(directory, folderPath, true, - _directoryService.ScanFiles(seriesModified.LowestFolderPath!, fileExtensions, matcher))); - } - } - } - else + foreach (var series in seriesList) + { + var lastWriteTime = _directoryService.GetLastWriteTime(series.LowestFolderPath!).Truncate(TimeSpan.TicksPerSecond); + var seriesLastScanned = series.LastScanned.Truncate(TimeSpan.TicksPerSecond); + if (seriesLastScanned < lastWriteTime) { - _logger.LogDebug("[ProcessFiles] Performing file scan on {Directory}", directory); - var files = _directoryService.ScanFiles(directory, fileExtensions, matcher); - result.Add(CreateScanResult(directory, folderPath, true, files)); + return false; } } - return result; + return true; + } + + /// + /// Handles directories that haven't changed since the last scan. + /// + private void HandleUnchangedFolder(List result, string folderPath, string directory) + { + if (result.Exists(r => r.Folder == directory)) + { + _logger.LogDebug("[ProcessFiles] Skipping adding {Directory} as it's already added, this indicates a bad layout issue", directory); + } + else + { + _logger.LogDebug("[ProcessFiles] Skipping {Directory} as it hasn't changed since last scan", directory); + result.Add(CreateScanResult(directory, folderPath, false, ArraySegment.Empty)); + } + } + + /// + /// Performs a full scan of the directory and adds it to the result. + /// + private void PerformFullScan(List result, string directory, string folderPath, string fileExtensions, GlobMatcher matcher) + { + _logger.LogDebug("[ProcessFiles] Performing full scan on {Directory}", directory); + var files = _directoryService.ScanFiles(directory, fileExtensions, matcher); + if (files.Count == 0) + { + _logger.LogDebug("[ProcessFiles] Empty directory: {Directory}. Keeping empty will cause Kavita to scan this each time", directory); + } + result.Add(CreateScanResult(directory, folderPath, true, files)); + } + + /// + /// Performs a full scan of the directory and adds it to the result. + /// + private void CheckSurfaceFiles(List result, string directory, string folderPath, string fileExtensions, GlobMatcher matcher) + { + var files = _directoryService.ScanFiles(directory, fileExtensions, matcher, SearchOption.TopDirectoryOnly); + if (files.Count == 0) + { + return; + } + result.Add(CreateScanResult(directory, folderPath, true, files)); } + /// + /// Scans a single directory and processes the scan result. + /// private async Task> ScanSingleDirectory(string folderPath, IDictionary> seriesPaths, Library library, bool forceCheck, List result, string fileExtensions, GlobMatcher matcher) { @@ -249,6 +309,33 @@ private static ScanResult CreateScanResult(string folderPath, string libraryRoot }; } + /// + /// Processes scanResults to track all series across the combined results. + /// Ensures series are correctly grouped even if they span multiple folders. + /// + /// A collection of scan results + /// A concurrent dictionary to store the tracked series + private void TrackSeriesAcrossScanResults(IList scanResults, ConcurrentDictionary> scannedSeries) + { + // Flatten all ParserInfos from scanResults + var allInfos = scanResults.SelectMany(sr => sr.ParserInfos).ToList(); + + // Iterate through each ParserInfo and track the series + foreach (var info in allInfos) + { + if (info == null) continue; + + try + { + TrackSeries(scannedSeries, info); + } + catch (Exception ex) + { + _logger.LogError(ex, "[ScannerService] Exception occurred during tracking {FilePath}. Skipping this file", info?.FullFilePath); + } + } + } + /// /// Attempts to either add a new instance of a series mapping to the _scannedSeries bag or adds to an existing. @@ -299,7 +386,7 @@ private void TrackSeries(ConcurrentDictionary> sc || ps.NormalizedName.Equals(normalizedLocalizedSeries) || ps.NormalizedName.Equals(normalizedSortSeries)))) { - _logger.LogCritical("[ScannerService] Matches: {SeriesName} matches on {SeriesKey}", info.Series, seriesKey.Name); + _logger.LogCritical("[ScannerService] Matches: '{SeriesName}' matches on '{SeriesKey}'", info.Series, seriesKey.Name); } } } @@ -343,6 +430,7 @@ private string MergeName(ConcurrentDictionary> sc (p.Key.NormalizedName.ToNormalized() == normalizedSeries || p.Key.NormalizedName.ToNormalized() == normalizedLocalSeries) && p.Key.Format == info.Format); + foreach (var pair in values) { _logger.LogCritical("[ScannerService] Duplicate Series in DB matches with {SeriesName}: {DuplicateName}", info.Series, pair.Key.Name); @@ -353,7 +441,6 @@ private string MergeName(ConcurrentDictionary> sc return info.Series; } - /// /// This will process series by folder groups. This is used solely by ScanSeries /// @@ -364,151 +451,308 @@ private string MergeName(ConcurrentDictionary> sc /// Defaults to false /// public async Task> ScanLibrariesForSeries(Library library, - IEnumerable folders, bool isLibraryScan, + IList folders, bool isLibraryScan, IDictionary> seriesPaths, bool forceCheck = false) { - await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started)); + await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, + MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started)); _logger.LogDebug("[ScannerService] Library {LibraryName} Step 1.A: Process {FolderCount} folders", library.Name, folders.Count()); - var processedScannedSeries = new List(); - //var processedScannedSeries = new ConcurrentBag(); - foreach (var folderPath in folders) + var processedScannedSeries = new ConcurrentBag(); + + foreach (var folder in folders) { try { - _logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.B: Scan files in {Folder}", library.Name, folderPath); - var scanResults = await ProcessFiles(folderPath, isLibraryScan, seriesPaths, library, forceCheck); - - _logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.C: Process files in {Folder}", library.Name, folderPath); - foreach (var scanResult in scanResults) - { - await ParseAndTrackSeries(library, seriesPaths, scanResult, processedScannedSeries); - } - - // This reduced a 1.1k series networked scan by a little more than 1 hour, but the order series were added to Kavita was not alphabetical - // await Task.WhenAll(scanResults.Select(async scanResult => - // { - // await ParseAndTrackSeries(library, seriesPaths, scanResult, processedScannedSeries); - // })); - + await ScanAndParseFolder(folder, library, isLibraryScan, seriesPaths, processedScannedSeries, forceCheck); } catch (ArgumentException ex) { - _logger.LogError(ex, "[ScannerService] The directory '{FolderPath}' does not exist", folderPath); + _logger.LogError(ex, "[ScannerService] The directory '{FolderPath}' does not exist", folder); } } - await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended)); + await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, + MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended)); return processedScannedSeries.ToList(); - } - private async Task ParseAndTrackSeries(Library library, IDictionary> seriesPaths, ScanResult scanResult, - List processedScannedSeries) + /// + /// Helper method to scan and parse a folder + /// + /// + /// + /// + /// + /// + /// + private async Task ScanAndParseFolder(string folderPath, Library library, + bool isLibraryScan, IDictionary> seriesPaths, + ConcurrentBag processedScannedSeries, bool forceCheck) { - // scanResult is updated with the parsed infos - await ProcessScanResult(scanResult, seriesPaths, library); // NOTE: This may be able to be parallelized + _logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.B: Scan files in {Folder}", library.Name, folderPath); + var scanResults = await ScanFiles(folderPath, isLibraryScan, seriesPaths, library, forceCheck); - // We now have all the parsed infos from the scan result, perform any merging that is necessary and post processing steps + // Aggregate the scanned series across all scanResults var scannedSeries = new ConcurrentDictionary>(); - // Merge any series together (like Nagatoro/nagator.cbz, japanesename.cbz) -> Nagator series - MergeLocalizedSeriesWithSeries(scanResult.ParserInfos); - - // Combine everything into scannedSeries - foreach (var info in scanResult.ParserInfos) + _logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.C: Process files in {Folder}", library.Name, folderPath); + foreach (var scanResult in scanResults) { - try - { - TrackSeries(scannedSeries, info); - } - catch (Exception ex) - { - _logger.LogError(ex, - "[ScannerService] There was an exception that occurred during tracking {FilePath}. Skipping this file", - info?.FullFilePath); - } + await ParseFiles(scanResult, seriesPaths, library); } + _logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.D: Merge any localized series with series {Folder}", library.Name, folderPath); + scanResults = MergeLocalizedSeriesAcrossScanResults(scanResults); + + _logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.E: Group all parsed data into logical Series", library.Name); + TrackSeriesAcrossScanResults(scanResults, scannedSeries); + + + // Now transform and add to processedScannedSeries AFTER everything is processed + _logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.F: Generate Sort Order for Series and Finalize", library.Name); + GenerateProcessedScannedSeries(scannedSeries, scanResults, processedScannedSeries); + } + + /// + /// Processes and generates the final results for processedScannedSeries after updating sort order. + /// + /// A concurrent dictionary of tracked series and their parsed infos + /// List of all scan results, used to determine if any series has changed + /// A thread-safe concurrent bag of processed series results + private void GenerateProcessedScannedSeries(ConcurrentDictionary> scannedSeries, IList scanResults, ConcurrentBag processedScannedSeries) + { + // First, update the sort order for all series + UpdateSeriesSortOrder(scannedSeries); + + // Now, generate the final processed scanned series results + CreateFinalSeriesResults(scannedSeries, scanResults, processedScannedSeries); + } + + /// + /// Updates the sort order for all series in the scannedSeries dictionary. + /// + /// A concurrent dictionary of tracked series and their parsed infos + private void UpdateSeriesSortOrder(ConcurrentDictionary> scannedSeries) + { foreach (var series in scannedSeries.Keys) { if (scannedSeries[series].Count <= 0) continue; try { - UpdateSortOrder(scannedSeries, series); + UpdateSortOrder(scannedSeries, series); // Call to method that updates sort order } catch (Exception ex) { - _logger.LogError(ex, "There was an issue setting IssueOrder"); + _logger.LogError(ex, "[ScannerService] Issue occurred while setting IssueOrder for series {SeriesName}", series.Name); } + } + } + /// + /// Generates the final processed scanned series results after processing the sort order. + /// + /// A concurrent dictionary of tracked series and their parsed infos + /// List of all scan results, used to determine if any series has changed + /// The list where processed results will be added + private static void CreateFinalSeriesResults(ConcurrentDictionary> scannedSeries, + IList scanResults, ConcurrentBag processedScannedSeries) + { + foreach (var series in scannedSeries.Keys) + { + if (scannedSeries[series].Count <= 0) continue; - processedScannedSeries.Add(new ScannedSeriesResult() + processedScannedSeries.Add(new ScannedSeriesResult { - HasChanged = scanResult.HasChanged, + HasChanged = scanResults.Any(sr => sr.HasChanged), // Combine HasChanged flag across all scanResults ParsedSeries = series, ParsedInfos = scannedSeries[series] }); } } + /// + /// Merges localized series with the series field across all scan results. + /// Combines ParserInfos from all scanResults and processes them collectively + /// to ensure consistent series names. + /// + /// + /// Accel World v01.cbz has Series "Accel World" and Localized Series "World of Acceleration" + /// World of Acceleration v02.cbz has Series "World of Acceleration" + /// After running this code, we'd have: + /// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration" + /// + /// A collection of scan results + /// A new list of scan results with merged series + private IList MergeLocalizedSeriesAcrossScanResults(IList scanResults) + { + // Flatten all ParserInfos across scanResults + var allInfos = scanResults.SelectMany(sr => sr.ParserInfos).ToList(); + + // Filter relevant infos (non-special and with localized series) + var relevantInfos = GetRelevantInfos(allInfos); + + if (relevantInfos.Count == 0) return scanResults; + + // Get distinct localized series and process each one + var distinctLocalizedSeries = relevantInfos + .Select(i => i.LocalizedSeries) + .Distinct() + .ToList(); + + foreach (var localizedSeries in distinctLocalizedSeries) + { + if (string.IsNullOrEmpty(localizedSeries)) continue; + + // Process the localized series for merging + ProcessLocalizedSeries(scanResults, allInfos, relevantInfos, localizedSeries); + } + + // Remove or clear any scan results that now have no ParserInfos after merging + return scanResults.Where(sr => sr.ParserInfos.Any()).ToList(); + } + + private static List GetRelevantInfos(List allInfos) + { + return allInfos + .Where(i => !i.IsSpecial && !string.IsNullOrEmpty(i.LocalizedSeries)) + .GroupBy(i => i.Format) + .SelectMany(g => g.ToList()) + .ToList(); + } + + private void ProcessLocalizedSeries(IList scanResults, List allInfos, List relevantInfos, string localizedSeries) + { + var seriesForLocalized = GetSeriesForLocalized(relevantInfos, localizedSeries); + if (seriesForLocalized.Count == 0) return; + + var nonLocalizedSeries = GetNonLocalizedSeries(seriesForLocalized, localizedSeries); + if (nonLocalizedSeries == null) return; + + // Remap and update relevant ParserInfos + RemapSeries(scanResults, allInfos, localizedSeries, nonLocalizedSeries); + + } + + private static List GetSeriesForLocalized(List relevantInfos, string localizedSeries) + { + return relevantInfos + .Where(i => i.LocalizedSeries == localizedSeries) + .DistinctBy(r => r.Series) + .Select(r => r.Series) + .ToList(); + } + + private string? GetNonLocalizedSeries(List seriesForLocalized, string localizedSeries) + { + switch (seriesForLocalized.Count) + { + case 1: + return seriesForLocalized[0]; + case <= 2: + return seriesForLocalized.FirstOrDefault(s => !s.Equals(Parser.Parser.Normalize(localizedSeries))); + default: + _logger.LogError( + "[ScannerService] Multiple series detected across scan results that contain localized series. " + + "This will cause them to group incorrectly. Please separate series into their own dedicated folder: {LocalizedSeries}", + string.Join(", ", seriesForLocalized) + ); + return null; + } + } + + private void RemapSeries(IList scanResults, List allInfos, string localizedSeries, string nonLocalizedSeries) + { + // Find all infos that need to be remapped from the localized series to the non-localized series + var seriesToBeRemapped = allInfos.Where(i => i.Series.Equals(localizedSeries)).ToList(); + + foreach (var infoNeedingMapping in seriesToBeRemapped) + { + infoNeedingMapping.Series = nonLocalizedSeries; + + // Find the scan result containing the localized info + var localizedScanResult = scanResults.FirstOrDefault(sr => sr.ParserInfos.Contains(infoNeedingMapping)); + if (localizedScanResult == null) continue; + + // Remove the localized series from this scan result + localizedScanResult.ParserInfos.Remove(infoNeedingMapping); + + // Find the scan result that should be merged with + var nonLocalizedScanResult = scanResults.FirstOrDefault(sr => sr.ParserInfos.Any(pi => pi.Series == nonLocalizedSeries)); + + if (nonLocalizedScanResult == null) continue; + + // Add the remapped info to the non-localized scan result + nonLocalizedScanResult.ParserInfos.Add(infoNeedingMapping); + + // Assign the higher folder path (i.e., the one closer to the root) + //nonLocalizedScanResult.Folder = DirectoryService.GetDeepestCommonPath(localizedScanResult.Folder, nonLocalizedScanResult.Folder); + } + } + /// /// For a given ScanResult, sets the ParserInfos on the result /// /// /// /// - private async Task ProcessScanResult(ScanResult result, IDictionary> seriesPaths, Library library) + private async Task ParseFiles(ScanResult result, IDictionary> seriesPaths, Library library) { - // TODO: This should return the result as we are modifying it as a side effect - - // If the folder hasn't changed, generate fake ParserInfos for the Series that were in that folder. var normalizedFolder = Parser.Parser.NormalizePath(result.Folder); + + // If folder hasn't changed, generate fake ParserInfos if (!result.HasChanged) { result.ParserInfos = seriesPaths[normalizedFolder] - .Select(fp => new ParserInfo() - { - Series = fp.SeriesName, - Format = fp.Format, - }) + .Select(fp => new ParserInfo { Series = fp.SeriesName, Format = fp.Format }) .ToList(); - _logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed since last scan", normalizedFolder); + _logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed", normalizedFolder); await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, - MessageFactory.FileScanProgressEvent("Skipped " + normalizedFolder, library.Name, ProgressEventType.Updated)); + MessageFactory.FileScanProgressEvent($"Skipped {normalizedFolder}", library.Name, ProgressEventType.Updated)); return; } var files = result.Files; + var fileCount = files.Count; - // When processing files for a folder and we do enter, we need to parse the information and combine parser infos - // NOTE: We might want to move the merge step later in the process, like return and combine. - - if (files.Count == 0) + if (fileCount == 0) { - _logger.LogInformation("[ScannerService] {Folder} is empty, no longer in this location, or has no file types that match Library File Types", normalizedFolder); + _logger.LogInformation("[ScannerService] {Folder} is empty or has no matching file types", normalizedFolder); result.ParserInfos = ArraySegment.Empty; return; } _logger.LogDebug("[ScannerService] Found {Count} files for {Folder}", files.Count, normalizedFolder); await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, - MessageFactory.FileScanProgressEvent($"{files.Count} files in {normalizedFolder}", library.Name, ProgressEventType.Updated)); + MessageFactory.FileScanProgressEvent($"{fileCount} files in {normalizedFolder}", library.Name, ProgressEventType.Updated)); + + // Parse files into ParserInfos + if (fileCount < 100) + { + // Process files sequentially + result.ParserInfos = files + .Select(file => _readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type)) + .Where(info => info != null) + .ToList()!; + } + else + { + // Process files in parallel + var tasks = files.Select(file => Task.Run(() => + _readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type))); - // Multiple Series can exist within a folder. We should instead put these infos on the result and perform merging above - IList infos = files - .Select(file => _readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type)) - .Where(info => info != null) - .ToList()!; + var infos = await Task.WhenAll(tasks); + result.ParserInfos = infos.Where(info => info != null).ToList()!; + } - result.ParserInfos = infos; + _logger.LogDebug("[ScannerService] Parsed {Count} files for {Folder}", result.ParserInfos.Count, normalizedFolder); } - public static void UpdateSortOrder(ConcurrentDictionary> scannedSeries, ParsedSeries series) + private static void UpdateSortOrder(ConcurrentDictionary> scannedSeries, ParsedSeries series) { // Set the Sort order per Volume var volumes = scannedSeries[series].GroupBy(info => info.Volumes); @@ -586,96 +830,4 @@ public static void UpdateSortOrder(ConcurrentDictionary seriesFolders, - string normalizedFolder) - { - return seriesFolders.All(f => HasSeriesFolderNotChangedSinceLastScan(f, normalizedFolder)); - } - - /// - /// Checks against all folder paths on file if the last scanned is >= the directory's last write down to the second - /// - /// - /// - /// - /// - private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary> seriesPaths, string normalizedFolder, bool forceCheck = false) - { - if (forceCheck) return false; - - if (seriesPaths.TryGetValue(normalizedFolder, out var v)) - { - return HasAllSeriesFolderNotChangedSinceLastScan(v, normalizedFolder); - } - - return false; - } - - private bool HasSeriesFolderNotChangedSinceLastScan(SeriesModified seriesModified, string normalizedFolder) - { - return seriesModified.LastScanned.Truncate(TimeSpan.TicksPerSecond) >= - _directoryService.GetLastWriteTime(normalizedFolder) - .Truncate(TimeSpan.TicksPerSecond); - } - - - - /// - /// Checks if there are any ParserInfos that have a Series that matches the LocalizedSeries field in any other info. If so, - /// rewrites the infos with series name instead of the localized name, so they stack. - /// - /// - /// Accel World v01.cbz has Series "Accel World" and Localized Series "World of Acceleration" - /// World of Acceleration v02.cbz has Series "World of Acceleration" - /// After running this code, we'd have: - /// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration" - /// - /// A collection of ParserInfos - private void MergeLocalizedSeriesWithSeries(IList infos) - { - var hasLocalizedSeries = infos.Any(i => !string.IsNullOrEmpty(i.LocalizedSeries)); - if (!hasLocalizedSeries) return; - - var localizedSeries = infos - .Where(i => !i.IsSpecial) - .Select(i => i.LocalizedSeries) - .Distinct() - .FirstOrDefault(i => !string.IsNullOrEmpty(i)); - if (string.IsNullOrEmpty(localizedSeries)) return; - - // NOTE: If we have multiple series in a folder with a localized title, then this will fail. It will group into one series. User needs to fix this themselves. - string? nonLocalizedSeries; - // Normalize this as many of the cases is a capitalization difference - var nonLocalizedSeriesFound = infos - .Where(i => !i.IsSpecial) - .Select(i => i.Series) - .DistinctBy(Parser.Parser.Normalize) - .ToList(); - - if (nonLocalizedSeriesFound.Count == 1) - { - nonLocalizedSeries = nonLocalizedSeriesFound[0]; - } - else - { - // There can be a case where there are multiple series in a folder that causes merging. - if (nonLocalizedSeriesFound.Count > 2) - { - _logger.LogError("[ScannerService] There are multiple series within one folder that contain localized series. This will cause them to group incorrectly. Please separate series into their own dedicated folder or ensure there is only 2 potential series (localized and series): {LocalizedSeries}", string.Join(", ", nonLocalizedSeriesFound)); - } - nonLocalizedSeries = nonLocalizedSeriesFound.Find(s => !s.Equals(localizedSeries)); - } - - if (nonLocalizedSeries == null) return; - - var normalizedNonLocalizedSeries = nonLocalizedSeries.ToNormalized(); - foreach (var infoNeedingMapping in infos.Where(i => - !i.Series.ToNormalized().Equals(normalizedNonLocalizedSeries))) - { - infoNeedingMapping.Series = nonLocalizedSeries; - infoNeedingMapping.LocalizedSeries = localizedSeries; - } - } } diff --git a/API/Services/Tasks/Scanner/Parser/BasicParser.cs b/API/Services/Tasks/Scanner/Parser/BasicParser.cs index 98264faf8..4a3448dbf 100644 --- a/API/Services/Tasks/Scanner/Parser/BasicParser.cs +++ b/API/Services/Tasks/Scanner/Parser/BasicParser.cs @@ -79,7 +79,13 @@ public class BasicParser(IDirectoryService directoryService, IDefaultParser imag // NOTE: This uses rootPath. LibraryRoot works better for manga, but it's not always that way. // It might be worth writing some logic if the file is a special, to take the folder above the Specials/ // if present - ParseFromFallbackFolders(filePath, rootPath, type, ref ret); + var tempRootPath = rootPath; + if (rootPath.EndsWith("Specials") || rootPath.EndsWith("Specials/")) + { + tempRootPath = rootPath.Replace("Specials", string.Empty).TrimEnd('/'); + } + + ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret); } if (string.IsNullOrEmpty(ret.Series)) diff --git a/API/Services/Tasks/Scanner/Parser/PdfParser.cs b/API/Services/Tasks/Scanner/Parser/PdfParser.cs index d589a9914..696a61867 100644 --- a/API/Services/Tasks/Scanner/Parser/PdfParser.cs +++ b/API/Services/Tasks/Scanner/Parser/PdfParser.cs @@ -59,7 +59,13 @@ public override ParserInfo Parse(string filePath, string rootPath, string librar ret.Chapters = Parser.DefaultChapter; ret.Volumes = Parser.SpecialVolume; - ParseFromFallbackFolders(filePath, rootPath, type, ref ret); + var tempRootPath = rootPath; + if (rootPath.EndsWith("Specials") || rootPath.EndsWith("Specials/")) + { + tempRootPath = rootPath.Replace("Specials", string.Empty).TrimEnd('/'); + } + + ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret); } if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && type == LibraryType.Book) diff --git a/API/Services/Tasks/Scanner/ProcessSeries.cs b/API/Services/Tasks/Scanner/ProcessSeries.cs index 0f1340d4e..eb4959bf0 100644 --- a/API/Services/Tasks/Scanner/ProcessSeries.cs +++ b/API/Services/Tasks/Scanner/ProcessSeries.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Globalization; +using System.IO; using System.Linq; using System.Threading.Tasks; using API.Data; @@ -34,6 +35,18 @@ public interface IProcessSeries void Reset(); Task ProcessSeriesAsync(IList parsedInfos, Library library, int totalToProcess, bool forceUpdate = false); + /// + /// Given a list of all Genres, generates new Genre entries for any that do not exist. + /// Does not delete anything, that will be handled by nightly task + /// + /// + Task CreateAllGenresAsync(ICollection genres); + /// + /// Given a list of all Tags, generates new Tag entries for any that do not exist. + /// Does not delete anything, that will be handled by nightly task + /// + /// + Task CreateAllTagsAsync(ICollection tags); } /// @@ -50,7 +63,6 @@ public class ProcessSeries : IProcessSeries private readonly IFileService _fileService; private readonly IMetadataService _metadataService; private readonly IWordCountAnalyzerService _wordCountAnalyzerService; - private readonly ICollectionTagService _collectionTagService; private readonly IReadingListService _readingListService; private readonly IExternalMetadataService _externalMetadataService; private readonly ITagManagerService _tagManagerService; @@ -58,8 +70,7 @@ public class ProcessSeries : IProcessSeries public ProcessSeries(IUnitOfWork unitOfWork, ILogger logger, IEventHub eventHub, IDirectoryService directoryService, ICacheHelper cacheHelper, IReadingItemService readingItemService, - IFileService fileService, IMetadataService metadataService, IWordCountAnalyzerService wordCountAnalyzerService, - ICollectionTagService collectionTagService, IReadingListService readingListService, + IFileService fileService, IMetadataService metadataService, IWordCountAnalyzerService wordCountAnalyzerService, IReadingListService readingListService, IExternalMetadataService externalMetadataService, ITagManagerService tagManagerService) { _unitOfWork = unitOfWork; @@ -71,7 +82,6 @@ public ProcessSeries(IUnitOfWork unitOfWork, ILogger logger, IEve _fileService = fileService; _metadataService = metadataService; _wordCountAnalyzerService = wordCountAnalyzerService; - _collectionTagService = collectionTagService; _readingListService = readingListService; _externalMetadataService = externalMetadataService; _tagManagerService = tagManagerService; @@ -192,6 +202,7 @@ await _unitOfWork.SeriesRepository.GetFullSeriesByAnyName(firstInfo.Series, firs } catch (DbUpdateConcurrencyException ex) { + // Note to self: I've seen this trigger for an AppUser before indicating the way we are spawning ProcessSeries isn't getting it's own DBContext. foreach (var entry in ex.Entries) { if (entry.Entity is Series) @@ -245,8 +256,6 @@ await _eventHub.SendMessageAsync(MessageFactory.Error, // See if any recommendations can link up to the series and pre-fetch external metadata for the series _logger.LogInformation("Linking up External Recommendations new series (if applicable)"); - // BackgroundJob.Enqueue(() => - // _externalMetadataService.GetNewSeriesData(series.Id, series.Library.Type)); await _externalMetadataService.GetNewSeriesData(series.Id, series.Library.Type); await _eventHub.SendMessageAsync(MessageFactory.SeriesAdded, @@ -268,11 +277,49 @@ await _eventHub.SendMessageAsync(MessageFactory.SeriesAdded, var settings = await _unitOfWork.SettingsRepository.GetSettingsDtoAsync(); await _metadataService.GenerateCoversForSeries(series, settings.EncodeMediaAs, settings.CoverImageSize); - // BackgroundJob.Enqueue(() => _wordCountAnalyzerService.ScanSeries(series.LibraryId, series.Id, forceUpdate)); await _wordCountAnalyzerService.ScanSeries(series.LibraryId, series.Id, forceUpdate); } + public async Task CreateAllGenresAsync(ICollection genres) + { + // Pass the non-normalized genres directly to the repository + var nonExistingGenres = await _unitOfWork.GenreRepository.GetAllGenresNotInListAsync(genres); + + // Create and attach new genres using the non-normalized names + foreach (var genre in nonExistingGenres) + { + var newGenre = new GenreBuilder(genre).Build(); + _unitOfWork.GenreRepository.Attach(newGenre); + } + + // Commit changes + if (nonExistingGenres.Count > 0) + { + await _unitOfWork.CommitAsync(); + } + } + + public async Task CreateAllTagsAsync(ICollection tags) + { + // Pass the non-normalized tags directly to the repository + var nonExistingTags = await _unitOfWork.TagRepository.GetAllTagsNotInListAsync(tags); + + // Create and attach new genres using the non-normalized names + foreach (var tag in nonExistingTags) + { + var newTag = new TagBuilder(tag).Build(); + _unitOfWork.TagRepository.Attach(newTag); + } + + // Commit changes + if (nonExistingTags.Count > 0) + { + await _unitOfWork.CommitAsync(); + } + } + + private async Task ReportDuplicateSeriesLookup(Library library, ParserInfo firstInfo, Exception ex) { var seriesCollisions = await _unitOfWork.SeriesRepository.GetAllSeriesByAnyName(firstInfo.LocalizedSeries, string.Empty, library.Id, firstInfo.Format); @@ -777,26 +824,115 @@ private void UpdateChapters(Series series, Volume volume, IList pars // Remove chapters that aren't in parsedInfos or have no files linked + // var existingChapters = volume.Chapters.ToList(); + // foreach (var existingChapter in existingChapters) + // { + // if (existingChapter.Files.Count == 0 || !parsedInfos.HasInfo(existingChapter)) + // { + // _logger.LogDebug("[ScannerService] Removed chapter {Chapter} for Volume {VolumeNumber} on {SeriesName}", + // existingChapter.Range, volume.Name, parsedInfos[0].Series); + // volume.Chapters.Remove(existingChapter); + // } + // else + // { + // // Ensure we remove any files that no longer exist AND order + // existingChapter.Files = existingChapter.Files + // .Where(f => parsedInfos.Any(p => Parser.Parser.NormalizePath(p.FullFilePath) == Parser.Parser.NormalizePath(f.FilePath))) + // .OrderByNatural(f => f.FilePath) + // .ToList(); + // existingChapter.Pages = existingChapter.Files.Sum(f => f.Pages); + // } + // } + + // // This represents the directories that were part of the current scan. + // var scannedDirectories = parsedInfos + // .Select(p => Path.GetDirectoryName(p.FullFilePath)) + // .Distinct() + // .ToList(); + // + // var existingChapters = volume.Chapters.ToList(); + // foreach (var existingChapter in existingChapters) + // { + // // Check if this chapter has files from directories that were part of the current scan + // var chapterFileDirectories = existingChapter.Files + // .Select(f => Path.GetDirectoryName(f.FilePath)) + // .Distinct() + // .ToList(); + // + // // Only remove the chapter if its files are from a directory that was scanned and it's not in parserInfos anymore + // if (existingChapter.Files.Count == 0 || + // chapterFileDirectories.Exists(dir => scannedDirectories.Contains(dir)) && !parsedInfos.HasInfo(existingChapter)) + // { + // _logger.LogDebug("[ScannerService] Removed chapter {Chapter} for Volume {VolumeNumber} on {SeriesName}", + // existingChapter.Range, volume.Name, parsedInfos[0].Series); + // volume.Chapters.Remove(existingChapter); + // } + // else + // { + // // Ensure we remove any files that no longer exist AND reorder the remaining files + // existingChapter.Files = existingChapter.Files + // .Where(f => parsedInfos.Any(p => Parser.Parser.NormalizePath(p.FullFilePath) == Parser.Parser.NormalizePath(f.FilePath))) + // .OrderByNatural(f => f.FilePath) + // .ToList(); + // + // // Update the page count after filtering the files + // existingChapter.Pages = existingChapter.Files.Sum(f => f.Pages); + // } + // } + var existingChapters = volume.Chapters.ToList(); - foreach (var existingChapter in existingChapters) + +// Extract the directories (without filenames) from parserInfos +var parsedDirectories = parsedInfos + .Select(p => Path.GetDirectoryName(p.FullFilePath)) // Get directory path + .Distinct() + .ToList(); + +foreach (var existingChapter in existingChapters) +{ + // Get the directories for the files in the current chapter + var chapterFileDirectories = existingChapter.Files + .Select(f => Path.GetDirectoryName(f.FilePath)) // Get directory path minus the filename + .Distinct() + .ToList(); + + // Check if any of the chapter's file directories match the parsedDirectories + var hasMatchingDirectory = chapterFileDirectories.Exists(dir => parsedDirectories.Contains(dir)); + + if (hasMatchingDirectory) + { + // Ensure we remove any files that no longer exist AND order the remaining files + existingChapter.Files = existingChapter.Files + .Where(f => parsedInfos.Any(p => Parser.Parser.NormalizePath(p.FullFilePath) == Parser.Parser.NormalizePath(f.FilePath))) + .OrderByNatural(f => f.FilePath) + .ToList(); + + // Update the chapter's page count after filtering the files + existingChapter.Pages = existingChapter.Files.Sum(f => f.Pages); + + // If no files remain after filtering, remove the chapter + if (existingChapter.Files.Count == 0) { - if (existingChapter.Files.Count == 0 || !parsedInfos.HasInfo(existingChapter)) - { - _logger.LogDebug("[ScannerService] Removed chapter {Chapter} for Volume {VolumeNumber} on {SeriesName}", - existingChapter.Range, volume.Name, parsedInfos[0].Series); - volume.Chapters.Remove(existingChapter); - } - else - { - // Ensure we remove any files that no longer exist AND order - existingChapter.Files = existingChapter.Files - .Where(f => parsedInfos.Any(p => Parser.Parser.NormalizePath(p.FullFilePath) == Parser.Parser.NormalizePath(f.FilePath))) - .OrderByNatural(f => f.FilePath) - .ToList(); - existingChapter.Pages = existingChapter.Files.Sum(f => f.Pages); - } + _logger.LogDebug("[ScannerService] Removed chapter {Chapter} for Volume {VolumeNumber} on {SeriesName}", + existingChapter.Range, volume.Name, parsedInfos[0].Series); + volume.Chapters.Remove(existingChapter); } } + else + { + // If there are no matching directories in the current scan, check if the files still exist on disk + var filesExist = existingChapter.Files.Any(f => File.Exists(f.FilePath)); + + // If no files exist, remove the chapter + if (!filesExist) + { + _logger.LogDebug("[ScannerService] Removed chapter {Chapter} for Volume {VolumeNumber} on {SeriesName} as no files exist", + existingChapter.Range, volume.Name, parsedInfos[0].Series); + volume.Chapters.Remove(existingChapter); + } + } +} + } private void AddOrUpdateFileForChapter(Chapter chapter, ParserInfo info, bool forceUpdate = false) { diff --git a/API/Services/Tasks/ScannerService.cs b/API/Services/Tasks/ScannerService.cs index 5fbc2602d..eec3417d5 100644 --- a/API/Services/Tasks/ScannerService.cs +++ b/API/Services/Tasks/ScannerService.cs @@ -258,17 +258,17 @@ public async Task ScanSeries(int seriesId, bool bypassFolderOptimizationChecks = } // If the series path doesn't exist anymore, it was either moved or renamed. We need to essentially delete it - var parsedSeries = new Dictionary>(); + //var parsedSeries = new Dictionary>(); await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Started, series.Name, 1)); _logger.LogInformation("Beginning file scan on {SeriesName}", series.Name); - var (scanElapsedTime, processedSeries) = await ScanFiles(library, new []{ folderPath }, + var (scanElapsedTime, parsedSeries) = await ScanFiles(library, [folderPath], false, true); - // Transform seen series into the parsedSeries (I think we can actually just have processedSeries be used instead - TrackFoundSeriesAndFiles(parsedSeries, processedSeries); + // // Transform seen series into the parsedSeries (I think we can actually just have processedSeries be used instead + // var parsedSeries = TrackFoundSeriesAndFiles(processedSeries); _logger.LogInformation("ScanFiles for {Series} took {Time} milliseconds", series.Name, scanElapsedTime); @@ -347,13 +347,16 @@ await _eventHub.SendMessageAsync(MessageFactory.ScanSeries, BackgroundJob.Enqueue(() => _directoryService.ClearDirectory(_directoryService.CacheDirectory)); } - private void TrackFoundSeriesAndFiles(Dictionary> parsedSeries, IList seenSeries) + private static Dictionary> TrackFoundSeriesAndFiles(IList seenSeries) { - foreach (var series in seenSeries.Where(s => s.ParsedInfos.Count > 0)) + var parsedSeries = new Dictionary>(); + foreach (var series in seenSeries.Where(s => s.ParsedInfos.Count > 0 && s.HasChanged)) { var parsedFiles = series.ParsedInfos; parsedSeries.Add(series.ParsedSeries, parsedFiles); } + + return parsedSeries; } private async Task ShouldScanSeries(int seriesId, Library library, IList libraryPaths, Series series, bool bypassFolderChecks = false) @@ -530,23 +533,18 @@ public async Task ScanLibrary(int libraryId, bool forceUpdate = false, bool isSi } - _logger.LogDebug("[ScannerService] Library {LibraryName} Step 1: Scan Files", library.Name); - var (scanElapsedTime, processedSeries) = await ScanFiles(library, libraryFolderPaths, + _logger.LogDebug("[ScannerService] Library {LibraryName} Step 1: Scan & Parse Files", library.Name); + var (scanElapsedTime, parsedSeries) = await ScanFiles(library, libraryFolderPaths, shouldUseLibraryScan, forceUpdate); - _logger.LogDebug("[ScannerService] Library {LibraryName} Step 2: Track Found Series", library.Name); - var parsedSeries = new Dictionary>(); - TrackFoundSeriesAndFiles(parsedSeries, processedSeries); - // We need to remove any keys where there is no actual parser info - _logger.LogDebug("[ScannerService] Library {LibraryName} Step 3: Process Parsed Series", library.Name); + _logger.LogDebug("[ScannerService] Library {LibraryName} Step 2: Process and Update Database", library.Name); var totalFiles = await ProcessParsedSeries(forceUpdate, parsedSeries, library, scanElapsedTime); UpdateLastScanned(library); - - _unitOfWork.LibraryRepository.Update(library); - _logger.LogDebug("[ScannerService] Library {LibraryName} Step 4: Save Library", library.Name); + + _logger.LogDebug("[ScannerService] Library {LibraryName} Step 3: Save Library", library.Name); if (await _unitOfWork.CommitAsync()) { if (isSingleScan) @@ -587,55 +585,90 @@ private async Task RemoveSeriesNotFound(Dictionary s.Name)); - _logger.LogDebug("[ScannerService] Removing Series that were not found during the scan - complete"); + _logger.LogDebug("[ScannerService] Found {Count} series to remove: {SeriesList}", + removedSeries.Count, string.Join(", ", removedSeries.Select(s => s.Name))); + // Commit the changes await _unitOfWork.CommitAsync(); - foreach (var s in removedSeries) + // Notify for each removed series + foreach (var series in removedSeries) { - await _eventHub.SendMessageAsync(MessageFactory.SeriesRemoved, - MessageFactory.SeriesRemovedEvent(s.Id, s.Name, s.LibraryId), false); + await _eventHub.SendMessageAsync( + MessageFactory.SeriesRemoved, + MessageFactory.SeriesRemovedEvent(series.Id, series.Name, series.LibraryId), + false + ); } + + _logger.LogDebug("[ScannerService] Series removal process completed"); } catch (Exception ex) { - _logger.LogCritical(ex, "[ScannerService] There was an issue deleting series for cleanup. Please check logs and rescan"); + _logger.LogCritical(ex, "[ScannerService] Error during series cleanup. Please check logs and rescan"); } } private async Task ProcessParsedSeries(bool forceUpdate, Dictionary> parsedSeries, Library library, long scanElapsedTime) { - var toProcess = parsedSeries.Keys - .Where(k => parsedSeries[k].Any() && !string.IsNullOrEmpty(parsedSeries[k][0].Filename)) - .ToList(); + // Iterate over the dictionary and remove only the ParserInfos that don't need processing + var toProcess = new Dictionary>(); + + foreach (var series in parsedSeries) + { + // Filter out ParserInfos where FullFilePath is empty (i.e., folder not modified) + var validInfos = series.Value.Where(info => !string.IsNullOrEmpty(info.Filename)).ToList(); + + if (validInfos.Count != 0) + { + toProcess[series.Key] = validInfos; + } + } if (toProcess.Count > 0) { - // This grabs all the shared entities, like tags, genre, people. To be solved later in this refactor on how to not have blocking access. + // For all Genres in the ParserInfos, do a bulk check against the DB on what is not in the DB and create them + // This will ensure all Genres are pre-created and allow our Genre lookup (and Priming) to be much simpler. It will be slower, but more consistent. + var allGenres = toProcess + .SelectMany(s => s.Value + .SelectMany(p => p.ComicInfo?.Genre? + .Split(",", StringSplitOptions.RemoveEmptyEntries) // Split on comma and remove empty entries + .Select(g => g.Trim()) // Trim each genre + .Where(g => !string.IsNullOrWhiteSpace(g)) // Ensure no null/empty genres + ?? [])); // Handle null Genre or ComicInfo safely + + await _processSeries.CreateAllGenresAsync(allGenres.ToList()); + + var allTags = toProcess + .SelectMany(s => s.Value + .SelectMany(p => p.ComicInfo?.Tags? + .Split(",", StringSplitOptions.RemoveEmptyEntries) // Split on comma and remove empty entries + .Select(g => g.Trim()) // Trim each genre + .Where(g => !string.IsNullOrWhiteSpace(g)) // Ensure no null/empty genres + ?? [])); // Handle null Tag or ComicInfo safely + + await _processSeries.CreateAllTagsAsync(allTags.ToList()); + + // TODO: Do the above for People as well (until we overhaul the People code) + + // Prime shared entities if there are any series to process await _processSeries.Prime(); + } var totalFiles = 0; - //var tasks = new List(); var seriesLeftToProcess = toProcess.Count; + foreach (var pSeries in toProcess) { - totalFiles += parsedSeries[pSeries].Count; - //tasks.Add(_processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, forceUpdate)); - // We can't do Task.WhenAll because of concurrency issues. - await _processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, seriesLeftToProcess, forceUpdate); + totalFiles += pSeries.Value.Count; + await _processSeries.ProcessSeriesAsync(pSeries.Value, library, seriesLeftToProcess, forceUpdate); seriesLeftToProcess--; } - //await Task.WhenAll(tasks); - - await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent(string.Empty, library.Name, ProgressEventType.Ended)); @@ -644,6 +677,7 @@ await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, return totalFiles; } + private static void UpdateLastScanned(Library library) { var time = DateTime.Now; @@ -655,7 +689,7 @@ private static void UpdateLastScanned(Library library) library.UpdateLastScanned(time); } - private async Task>> ScanFiles(Library library, IEnumerable dirs, + private async Task>>> ScanFiles(Library library, IList dirs, bool isLibraryScan, bool forceChecks = false) { var scanner = new ParseScannedFiles(_logger, _directoryService, _readingItemService, _eventHub); @@ -666,12 +700,8 @@ private async Task>> ScanFiles(Library li var scanElapsedTime = scanWatch.ElapsedMilliseconds; - return Tuple.Create(scanElapsedTime, processedSeries); - } + var parsedSeries = TrackFoundSeriesAndFiles(processedSeries); - public static IEnumerable FindSeriesNotOnDisk(IEnumerable existingSeries, Dictionary> parsedSeries) - { - return existingSeries.Where(es => !ParserInfoHelpers.SeriesHasMatchingParserInfoFormat(es, parsedSeries)); + return Tuple.Create(scanElapsedTime, parsedSeries); } - } diff --git a/API/Startup.cs b/API/Startup.cs index 37f2efbbf..103ded813 100644 --- a/API/Startup.cs +++ b/API/Startup.cs @@ -271,6 +271,9 @@ public void Configure(IApplicationBuilder app, IBackgroundJobClient backgroundJo await MigrateInitialInstallData.Migrate(dataContext, logger, directoryService); await MigrateSeriesLowestFolderPath.Migrate(dataContext, logger, directoryService); + // v0.8.4 + await MigrateLowestSeriesFolderPath2.Migrate(dataContext, unitOfWork, logger); + // Update the version in the DB after all migrations are run var installVersion = await unitOfWork.SettingsRepository.GetSettingAsync(ServerSettingKey.InstallVersion); installVersion.Value = BuildInfo.Version.ToString(); diff --git a/UI/Web/src/app/nav/_components/grouped-typeahead/grouped-typeahead.component.ts b/UI/Web/src/app/nav/_components/grouped-typeahead/grouped-typeahead.component.ts index a01e066a8..d805d4941 100644 --- a/UI/Web/src/app/nav/_components/grouped-typeahead/grouped-typeahead.component.ts +++ b/UI/Web/src/app/nav/_components/grouped-typeahead/grouped-typeahead.component.ts @@ -119,9 +119,7 @@ export class GroupedTypeaheadComponent implements OnInit { @HostListener('window:click', ['$event']) handleDocumentClick(event: MouseEvent) { - console.log('click: ', event) this.close(); - } @HostListener('window:keydown', ['$event']) diff --git a/UI/Web/src/app/sidenav/_components/sidenav-stream-list-item/sidenav-stream-list-item.component.ts b/UI/Web/src/app/sidenav/_components/sidenav-stream-list-item/sidenav-stream-list-item.component.ts index 7b69a0371..f42b48d10 100644 --- a/UI/Web/src/app/sidenav/_components/sidenav-stream-list-item/sidenav-stream-list-item.component.ts +++ b/UI/Web/src/app/sidenav/_components/sidenav-stream-list-item/sidenav-stream-list-item.component.ts @@ -20,9 +20,4 @@ export class SidenavStreamListItemComponent { @Output() hide: EventEmitter = new EventEmitter(); protected readonly SideNavStreamType = SideNavStreamType; protected readonly baseUrl = inject(APP_BASE_HREF); - - constructor() { - console.log('baseUrl', this.baseUrl); - } - }