Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scanner Refactor #3240

Merged
merged 22 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
aef006e
Removed old .kavitaignore logic from ScanFiles and optimized ScanFiles.
majora2007 Sep 23, 2024
79876c0
More optimizations in the DirectoryService.
majora2007 Sep 23, 2024
852a4cf
Lots of scanner changes, but this has an issue where HasSeriesFolderN…
majora2007 Sep 23, 2024
49ba28c
Updated GetAllDirectories to take a GlobMatcher
majora2007 Sep 23, 2024
808c6bf
Finally a breakthrough! New approach is bottom-up scanning of library…
majora2007 Sep 24, 2024
35034fc
Fixed up specials getting parsed as their own series.
majora2007 Sep 24, 2024
fa3905f
We are close, special parsing still needs some work.
majora2007 Sep 24, 2024
5042251
Removed some dead code. New scanner does work, I do notice some dupli…
majora2007 Sep 25, 2024
cd3395e
I think we are making progress, I'm seeing good results on regular ma…
majora2007 Sep 25, 2024
e0425fa
Fixed an issue with skipping things like chapter 1.1 because chapter …
majora2007 Sep 25, 2024
6bd3b1b
This might be a breaking commit. Working out how to handle deletion o…
majora2007 Sep 26, 2024
dfd590e
Fixed an edge case in LowestSeriesFolder code:
majora2007 Sep 27, 2024
dd9aedd
Started to work on merging scan results aat a library level rather th…
majora2007 Sep 28, 2024
5538324
LocalizedSeries Merge is working well now.
majora2007 Sep 28, 2024
505e485
Code cleanup
majora2007 Sep 28, 2024
0bd5504
Fixed a bug with directory normalization in the scanner.
majora2007 Sep 28, 2024
21bcce0
Fixed another case of GetLowestFolder
majora2007 Sep 28, 2024
3f43b6e
Fixed a scanner case
majora2007 Sep 29, 2024
3c11223
Fixed a bug where Series Cover image could choose a Volume 0 instead …
majora2007 Sep 29, 2024
389aa4e
Fixed up some bugs around localized series merges.
majora2007 Sep 29, 2024
2cdbcef
Prime all the tags ahead of time with library scan to avoid potential…
majora2007 Sep 29, 2024
ad2c2e3
Added a migration to ensure users get their LowestFolderPath reset du…
majora2007 Sep 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions API.Tests/Extensions/SeriesExtensionsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,35 @@ public void GetCoverImage_JustVolumes()
Assert.Equal("Volume 1 Chapter 1", series.GetCoverImage());
}

[Fact]
public void GetCoverImage_JustVolumes_ButVolume0()
{
var series = new SeriesBuilder("Test 1")
.WithFormat(MangaFormat.Archive)

.WithVolume(new VolumeBuilder("0")
.WithName("Volume 0")
.WithChapter(new ChapterBuilder(Parser.DefaultChapter)
.WithCoverImage("Volume 0")
.Build())
.Build())

.WithVolume(new VolumeBuilder("1")
.WithName("Volume 1")
.WithChapter(new ChapterBuilder(Parser.DefaultChapter)
.WithCoverImage("Volume 1")
.Build())
.Build())
.Build();

foreach (var vol in series.Volumes)
{
vol.CoverImage = vol.Chapters.MinBy(x => x.SortOrder, ChapterSortComparerDefaultFirst.Default)?.CoverImage;
}

Assert.Equal("Volume 1", series.GetCoverImage());
}

[Fact]
public void GetCoverImage_JustSpecials_WithDecimal()
{
Expand Down
21 changes: 17 additions & 4 deletions API.Tests/Services/DirectoryServiceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Text;
using System.Threading.Tasks;
using API.Services;
using Kavita.Common.Helpers;
using Microsoft.Extensions.Logging;
using NSubstitute;
using Xunit;
Expand Down Expand Up @@ -745,6 +746,12 @@ public void FindHighestDirectoriesFromFilesTest(string[] rootDirectories, string
[InlineData(new [] {"/manga"},
new [] {"/manga/Love Hina/Vol. 01.cbz", "/manga/Love Hina/Specials/Sp01.cbz"},
"/manga/Love Hina")]
[InlineData(new [] {"/manga"},
new [] {"/manga/Love Hina/Hina/Vol. 01.cbz", "/manga/Love Hina/Specials/Sp01.cbz"},
"/manga/Love Hina")]
[InlineData(new [] {"/manga"},
new [] {"/manga/Dress Up Darling/Dress Up Darling Ch 01.cbz", "/manga/Dress Up Darling/Dress Up Darling/Dress Up Darling Vol 01.cbz"},
"/manga/Dress Up Darling")]
public void FindLowestDirectoriesFromFilesTest(string[] rootDirectories, string[] files, string expectedDirectory)
{
var fileSystem = new MockFileSystem();
Expand Down Expand Up @@ -920,8 +927,9 @@ public Task ScanFiles_ShouldFindNoFiles_AllAreIgnored()

var ds = new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), fileSystem);


var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions);
var globMatcher = new GlobMatcher();
globMatcher.AddExclude("*.*");
var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions, globMatcher);

Assert.Empty(allFiles);

Expand All @@ -945,7 +953,9 @@ public Task ScanFiles_ShouldFindNoNestedFiles_IgnoreNestedFiles()

var ds = new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), fileSystem);

var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions);
var globMatcher = new GlobMatcher();
globMatcher.AddExclude("**/Accel World/*");
var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions, globMatcher);

Assert.Single(allFiles); // Ignore files are not counted in files, only valid extensions

Expand Down Expand Up @@ -974,7 +984,10 @@ public Task ScanFiles_NestedIgnore_IgnoreNestedFilesInOneDirectoryOnly()

var ds = new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), fileSystem);

var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions);
var globMatcher = new GlobMatcher();
globMatcher.AddExclude("**/Accel World/*");
globMatcher.AddExclude("**/ArtBooks/*");
var allFiles = ds.ScanFiles("C:/Data/", API.Services.Tasks.Scanner.Parser.Parser.SupportedExtensions, globMatcher);

Assert.Equal(2, allFiles.Count); // Ignore files are not counted in files, only valid extensions

Expand Down
26 changes: 4 additions & 22 deletions API.Tests/Services/ParseScannedFilesTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -206,24 +206,6 @@ public async Task ScanLibrariesForSeries_ShouldFindFiles()
var psf = new ParseScannedFiles(Substitute.For<ILogger<ParseScannedFiles>>(), ds,
new MockReadingItemService(ds, Substitute.For<IBookService>()), Substitute.For<IEventHub>());

// var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
//
// Task TrackFiles(Tuple<bool, IList<ParserInfo>> parsedInfo)
// {
// var skippedScan = parsedInfo.Item1;
// var parsedFiles = parsedInfo.Item2;
// if (parsedFiles.Count == 0) return Task.CompletedTask;
//
// var foundParsedSeries = new ParsedSeries()
// {
// Name = parsedFiles.First().Series,
// NormalizedName = parsedFiles.First().Series.ToNormalized(),
// Format = parsedFiles.First().Format
// };
//
// parsedSeries.Add(foundParsedSeries, parsedFiles);
// return Task.CompletedTask;
// }

var library =
await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(1,
Expand Down Expand Up @@ -273,7 +255,7 @@ public async Task ProcessFiles_ForLibraryMode_OnlyCallsFolderActionForEachTopLev
var directoriesSeen = new HashSet<string>();
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(1,
LibraryIncludes.Folders | LibraryIncludes.FileTypes);
var scanResults = await psf.ProcessFiles("C:/Data/", true, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library);
var scanResults = await psf.ScanFiles("C:/Data/", true, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library);
foreach (var scanResult in scanResults)
{
directoriesSeen.Add(scanResult.Folder);
Expand All @@ -295,7 +277,7 @@ public async Task ProcessFiles_ForNonLibraryMode_CallsFolderActionOnce()
Assert.NotNull(library);

var directoriesSeen = new HashSet<string>();
var scanResults = await psf.ProcessFiles("C:/Data/", false,
var scanResults = await psf.ScanFiles("C:/Data/", false,
await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library);

foreach (var scanResult in scanResults)
Expand Down Expand Up @@ -328,7 +310,7 @@ public async Task ProcessFiles_ShouldCallFolderActionTwice()
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(1,
LibraryIncludes.Folders | LibraryIncludes.FileTypes);
Assert.NotNull(library);
var scanResults = await psf.ProcessFiles("C:/Data", true, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library);
var scanResults = await psf.ScanFiles("C:/Data", true, await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library);

Assert.Equal(2, scanResults.Count);
}
Expand Down Expand Up @@ -357,7 +339,7 @@ public async Task ProcessFiles_ShouldCallFolderActionOnce()
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(1,
LibraryIncludes.Folders | LibraryIncludes.FileTypes);
Assert.NotNull(library);
var scanResults = await psf.ProcessFiles("C:/Data", false,
var scanResults = await psf.ScanFiles("C:/Data", false,
await _unitOfWork.SeriesRepository.GetFolderPathMap(1), library);

Assert.Single(scanResults);
Expand Down
91 changes: 34 additions & 57 deletions API.Tests/Services/ScannerServiceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,79 +51,50 @@ protected override async Task ResetDb()
}

[Fact]
public void FindSeriesNotOnDisk_Should_Remove1()
public async Task ScanLibrary_ComicVine_PublisherFolder()
{
var infos = new Dictionary<ParsedSeries, IList<ParserInfo>>();

ParserInfoFactory.AddToParsedInfo(infos, new ParserInfo() {Series = "Darker than Black", Volumes = "1", Format = MangaFormat.Archive});
//AddToParsedInfo(infos, new ParserInfo() {Series = "Darker than Black", Volumes = "1", Format = MangaFormat.Epub});

var existingSeries = new List<Series>
{
new SeriesBuilder("Darker Than Black")
.WithFormat(MangaFormat.Epub)

.WithVolume(new VolumeBuilder("1")
.WithName("1")
.Build())
.WithLocalizedName("Darker Than Black")
.Build()
};
var testcase = "Publisher - ComicVine.json";
var postLib = await GenerateScannerData(testcase);

Assert.Single(ScannerService.FindSeriesNotOnDisk(existingSeries, infos));
Assert.NotNull(postLib);
Assert.Equal(4, postLib.Series.Count);
}

[Fact]
public void FindSeriesNotOnDisk_Should_RemoveNothing_Test()
public async Task ScanLibrary_ShouldCombineNestedFolder()
{
var infos = new Dictionary<ParsedSeries, IList<ParserInfo>>();

ParserInfoFactory.AddToParsedInfo(infos, new ParserInfo() {Series = "Darker than Black", Format = MangaFormat.Archive});
ParserInfoFactory.AddToParsedInfo(infos, new ParserInfo() {Series = "Cage of Eden", Volumes = "1", Format = MangaFormat.Archive});
ParserInfoFactory.AddToParsedInfo(infos, new ParserInfo() {Series = "Cage of Eden", Volumes = "10", Format = MangaFormat.Archive});
var testcase = "Series and Series-Series Combined - Manga.json";
var postLib = await GenerateScannerData(testcase);

var existingSeries = new List<Series>
{
new SeriesBuilder("Cage of Eden")
.WithFormat(MangaFormat.Archive)

.WithVolume(new VolumeBuilder("1")
.WithName("1")
.Build())
.WithLocalizedName("Darker Than Black")
.Build(),
new SeriesBuilder("Darker Than Black")
.WithFormat(MangaFormat.Archive)
.WithVolume(new VolumeBuilder("1")
.WithName("1")
.Build())
.WithLocalizedName("Darker Than Black")
.Build(),
};

Assert.Empty(ScannerService.FindSeriesNotOnDisk(existingSeries, infos));
Assert.NotNull(postLib);
Assert.Single(postLib.Series);
Assert.Equal(2, postLib.Series.First().Volumes.Count);
}


[Fact]
public async Task ScanLibrary_ComicVine_PublisherFolder()
public async Task ScanLibrary_FlatSeries()
{
var testcase = "Publisher - ComicVine.json";
var testcase = "Flat Series - Manga.json";
var postLib = await GenerateScannerData(testcase);

Assert.NotNull(postLib);
Assert.Equal(4, postLib.Series.Count);
Assert.Single(postLib.Series);
Assert.Equal(3, postLib.Series.First().Volumes.Count);

// TODO: Trigger a deletion of ch 10
}

[Fact]
public async Task ScanLibrary_ShouldCombineNestedFolder()
public async Task ScanLibrary_FlatSeriesWithSpecial()
{
var testcase = "Series and Series-Series Combined - Manga.json";
var testcase = "Flat Series with Specials - Manga.json";
var postLib = await GenerateScannerData(testcase);

Assert.NotNull(postLib);
Assert.Single(postLib.Series);
Assert.Single(postLib.Series);
Assert.Equal(2, postLib.Series.First().Volumes.Count);
Assert.Equal(4, postLib.Series.First().Volumes.Count);
Assert.NotNull(postLib.Series.First().Volumes.FirstOrDefault(v => v.Chapters.FirstOrDefault(c => c.IsSpecial) != null));
}

private async Task<Library> GenerateScannerData(string testcase)
Expand All @@ -145,25 +116,31 @@ private async Task<Library> GenerateScannerData(string testcase)
_unitOfWork.LibraryRepository.Add(library);
await _unitOfWork.CommitAsync();

var scanner = CreateServices();

await scanner.ScanLibrary(library.Id);

var postLib = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(library.Id, LibraryIncludes.Series);
return postLib;
}

private ScannerService CreateServices()
{
var ds = new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), new FileSystem());
var mockReadingService = new MockReadingItemService(ds, Substitute.For<IBookService>());
var processSeries = new ProcessSeries(_unitOfWork, Substitute.For<ILogger<ProcessSeries>>(),
Substitute.For<IEventHub>(),
ds, Substitute.For<ICacheHelper>(), mockReadingService, Substitute.For<IFileService>(),
Substitute.For<IMetadataService>(),
Substitute.For<IWordCountAnalyzerService>(), Substitute.For<ICollectionTagService>(),
Substitute.For<IWordCountAnalyzerService>(),
Substitute.For<IReadingListService>(),
Substitute.For<IExternalMetadataService>(), new TagManagerService(_unitOfWork, Substitute.For<ILogger<TagManagerService>>()));

var scanner = new ScannerService(_unitOfWork, Substitute.For<ILogger<ScannerService>>(),
Substitute.For<IMetadataService>(),
Substitute.For<ICacheService>(), Substitute.For<IEventHub>(), ds,
mockReadingService, processSeries, Substitute.For<IWordCountAnalyzerService>());

await scanner.ScanLibrary(library.Id);

var postLib = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(library.Id, LibraryIncludes.Series);
return postLib;
return scanner;
}

private static (string Publisher, LibraryType Type) SplitPublisherAndLibraryType(string input)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
"My Dress-Up Darling/My Dress-Up Darling v01.cbz",
"My Dress-Up Darling/My Dress-Up Darling v02.cbz",
"My Dress-Up Darling/My Dress-Up Darling ch 10.cbz"
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[
"My Dress-Up Darling/My Dress-Up Darling v01.cbz",
"My Dress-Up Darling/My Dress-Up Darling v02.cbz",
"My Dress-Up Darling/My Dress-Up Darling ch 10.cbz",
"My Dress-Up Darling/Specials/Official Anime Fanbook SP05 (2024) (Digital).cbz"
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
"My Dress-Up Darling/Chapter 1/01.cbz",
"My Dress-Up Darling/Chapter 2/02.cbz"
]
51 changes: 51 additions & 0 deletions API/Data/ManualMigrations/MigrateLowestSeriesFolderPath2.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
using System;
using System.Linq;
using System.Threading.Tasks;
using API.Entities;
using API.Services.Tasks.Scanner.Parser;
using Kavita.Common.EnvironmentInfo;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;

namespace API.Data.ManualMigrations;

/// <summary>
/// v0.8.3 still had a bug around LowestSeriesPath. This resets it for all users.
/// </summary>
public static class MigrateLowestSeriesFolderPath2
{
public static async Task Migrate(DataContext dataContext, IUnitOfWork unitOfWork, ILogger<Program> logger)
{
if (await dataContext.ManualMigrationHistory.AnyAsync(m => m.Name == "MigrateLowestSeriesFolderPath2"))
{
return;
}

logger.LogCritical(
"Running MigrateLowestSeriesFolderPath2 migration - Please be patient, this may take some time. This is not an error");

var series = await dataContext.Series.Where(s => !string.IsNullOrEmpty(s.LowestFolderPath)).ToListAsync();
foreach (var s in series)
{
s.LowestFolderPath = string.Empty;
unitOfWork.SeriesRepository.Update(s);
}

// Save changes after processing all series
if (dataContext.ChangeTracker.HasChanges())
{
await dataContext.SaveChangesAsync();
}

dataContext.ManualMigrationHistory.Add(new ManualMigrationHistory()
{
Name = "MigrateLowestSeriesFolderPath2",
ProductVersion = BuildInfo.Version.ToString(),
RanAt = DateTime.UtcNow
});

await dataContext.SaveChangesAsync();
logger.LogCritical(
"Running MigrateLowestSeriesFolderPath2 migration - Completed. This is not an error");
}
}
Loading
Loading