Skip to content

Commit

Permalink
feat: classify emails by importance based on subjects
Browse files Browse the repository at this point in the history
Signed-off-by: Richard Steinmetz <[email protected]>
  • Loading branch information
st3iny committed Dec 18, 2024
1 parent ade90a2 commit 1907ebc
Show file tree
Hide file tree
Showing 20 changed files with 1,024 additions and 556 deletions.
3 changes: 2 additions & 1 deletion appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ The rating depends on the installed text processing backend. See [the rating ove
Learn more about the Nextcloud Ethical AI Rating [in our blog](https://nextcloud.com/blog/nextcloud-ethical-ai-rating/).
]]></description>
<version>4.2.0-alpha.0</version>
<version>4.2.0-alpha.1</version>
<licence>agpl</licence>
<author homepage="https://github.com/ChristophWurst">Christoph Wurst</author>
<author homepage="https://github.com/GretaD">GretaD</author>
Expand Down Expand Up @@ -90,6 +90,7 @@ Learn more about the Nextcloud Ethical AI Rating [in our blog](https://nextcloud
<command>OCA\Mail\Command\TrainAccount</command>
<command>OCA\Mail\Command\UpdateAccount</command>
<command>OCA\Mail\Command\UpdateSystemAutoresponders</command>
<command>OCA\Mail\Command\RunMetaEstimator</command>
</commands>
<settings>
<admin>OCA\Mail\Settings\AdminSettings</admin>
Expand Down
2 changes: 0 additions & 2 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
use OCA\Mail\Listener\MessageCacheUpdaterListener;
use OCA\Mail\Listener\MessageKnownSinceListener;
use OCA\Mail\Listener\MoveJunkListener;
use OCA\Mail\Listener\NewMessageClassificationListener;
use OCA\Mail\Listener\NewMessagesNotifier;
use OCA\Mail\Listener\OauthTokenRefreshListener;
use OCA\Mail\Listener\OptionalIndicesListener;
Expand Down Expand Up @@ -130,7 +129,6 @@ public function register(IRegistrationContext $context): void {
$context->registerEventListener(MessageDeletedEvent::class, MessageCacheUpdaterListener::class);
$context->registerEventListener(MessageSentEvent::class, AddressCollectionListener::class);
$context->registerEventListener(MessageSentEvent::class, InteractionListener::class);
$context->registerEventListener(NewMessagesSynchronized::class, NewMessageClassificationListener::class);
$context->registerEventListener(NewMessagesSynchronized::class, MessageKnownSinceListener::class);
$context->registerEventListener(NewMessagesSynchronized::class, NewMessagesNotifier::class);
$context->registerEventListener(SynchronizationEvent::class, AccountSynchronizedThreadUpdaterListener::class);
Expand Down
5 changes: 1 addition & 4 deletions lib/BackgroundJob/TrainImportanceClassifierJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ protected function run($argument) {
}

try {
$this->classifier->train(
$account,
$this->logger
);
$this->classifier->train($account, $this->logger);

Check warning on line 72 in lib/BackgroundJob/TrainImportanceClassifierJob.php

View check run for this annotation

Codecov / codecov/patch

lib/BackgroundJob/TrainImportanceClassifierJob.php#L72

Added line #L72 was not covered by tests
} catch (Throwable $e) {
$this->logger->error('Cron importance classifier training failed: ' . $e->getMessage(), [
'exception' => $e,
Expand Down
23 changes: 14 additions & 9 deletions lib/Command/PredictImportance.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use OCA\Mail\Db\Message;
use OCA\Mail\Service\AccountService;
use OCA\Mail\Service\Classification\ImportanceClassifier;
use OCA\Mail\Support\ConsoleLoggerDecorator;
use OCP\AppFramework\Db\DoesNotExistException;
use OCP\IConfig;
use Psr\Log\LoggerInterface;
Expand All @@ -25,6 +26,7 @@
class PredictImportance extends Command {
public const ARGUMENT_ACCOUNT_ID = 'account-id';
public const ARGUMENT_SENDER = 'sender';
public const ARGUMENT_SUBJECT = 'subject';

private AccountService $accountService;
private ImportanceClassifier $classifier;
Expand All @@ -43,26 +45,27 @@ public function __construct(AccountService $service,
$this->config = $config;
}

/**
* @return void
*/
protected function configure() {
protected function configure(): void {

Check warning on line 48 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L48

Added line #L48 was not covered by tests
$this->setName('mail:predict-importance');
$this->setDescription('Predict importance of an incoming message');
$this->addArgument(self::ARGUMENT_ACCOUNT_ID, InputArgument::REQUIRED);
$this->addArgument(self::ARGUMENT_SENDER, InputArgument::REQUIRED);
$this->addArgument(self::ARGUMENT_SUBJECT, InputArgument::OPTIONAL);

Check warning on line 53 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L53

Added line #L53 was not covered by tests
}

public function isEnabled() {
public function isEnabled(): bool {

Check warning on line 56 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L56

Added line #L56 was not covered by tests
return $this->config->getSystemValueBool('debug');
}

/**
* @return int
*/
protected function execute(InputInterface $input, OutputInterface $output): int {
$accountId = (int)$input->getArgument(self::ARGUMENT_ACCOUNT_ID);
$sender = $input->getArgument(self::ARGUMENT_SENDER);
$subject = $input->getArgument(self::ARGUMENT_SUBJECT) ?? '';

Check warning on line 63 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L63

Added line #L63 was not covered by tests

$consoleLogger = new ConsoleLoggerDecorator(
$this->logger,
$output
);

Check warning on line 68 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L65-L68

Added lines #L65 - L68 were not covered by tests

try {
$account = $this->accountService->findById($accountId);
Expand All @@ -73,9 +76,11 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$fakeMessage = new Message();
$fakeMessage->setUid(0);
$fakeMessage->setFrom(AddressList::parse("Name <$sender>"));
$fakeMessage->setSubject($subject);

Check warning on line 79 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L79

Added line #L79 was not covered by tests
[$prediction] = $this->classifier->classifyImportance(
$account,
[$fakeMessage]
[$fakeMessage],
$consoleLogger

Check warning on line 83 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L82-L83

Added lines #L82 - L83 were not covered by tests
);
if ($prediction) {
$output->writeln('Message is important');
Expand Down
117 changes: 117 additions & 0 deletions lib/Command/RunMetaEstimator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<?php

declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCA\Mail\Command;

use OCA\Mail\Service\AccountService;
use OCA\Mail\Service\Classification\ImportanceClassifier;
use OCA\Mail\Support\ConsoleLoggerDecorator;
use OCP\AppFramework\Db\DoesNotExistException;
use OCP\IConfig;
use Psr\Log\LoggerInterface;
use Rubix\ML\Backends\Amp;
use Rubix\ML\Classifiers\KNearestNeighbors;
use Rubix\ML\CrossValidation\KFold;
use Rubix\ML\CrossValidation\Metrics\FBeta;
use Rubix\ML\GridSearch;
use Rubix\ML\Kernels\Distance\Euclidean;
use Rubix\ML\Kernels\Distance\Jaccard;
use Rubix\ML\Kernels\Distance\Manhattan;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;

class RunMetaEstimator extends Command {
public const ARGUMENT_ACCOUNT_ID = 'account-id';
public const ARGUMENT_SHUFFLE = 'shuffle';

private AccountService $accountService;
private LoggerInterface $logger;
private ImportanceClassifier $classifier;
private IConfig $config;

public function __construct(

Check warning on line 40 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L40

Added line #L40 was not covered by tests
AccountService $accountService,
LoggerInterface $logger,
ImportanceClassifier $classifier,
IConfig $config,
) {
parent::__construct();

Check warning on line 46 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L46

Added line #L46 was not covered by tests

$this->accountService = $accountService;
$this->logger = $logger;
$this->classifier = $classifier;
$this->config = $config;

Check warning on line 51 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L48-L51

Added lines #L48 - L51 were not covered by tests
}

protected function configure(): void {
$this->setName('mail:account:run-meta-estimator');
$this->setDescription('Run the meta estimator for an account');
$this->addArgument(self::ARGUMENT_ACCOUNT_ID, InputArgument::REQUIRED);
$this->addOption(self::ARGUMENT_SHUFFLE, null, null, 'Shuffle data set before training');

Check warning on line 58 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L54-L58

Added lines #L54 - L58 were not covered by tests
}

public function isEnabled(): bool {
return $this->config->getSystemValueBool('debug');

Check warning on line 62 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L61-L62

Added lines #L61 - L62 were not covered by tests
}

protected function execute(InputInterface $input, OutputInterface $output): int {
$accountId = (int)$input->getArgument(self::ARGUMENT_ACCOUNT_ID);
$shuffle = (bool)$input->getOption(self::ARGUMENT_SHUFFLE);

Check warning on line 67 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L65-L67

Added lines #L65 - L67 were not covered by tests

try {
$account = $this->accountService->findById($accountId);
} catch (DoesNotExistException $e) {
$output->writeln("<error>Account $accountId does not exist</error>");
return 1;

Check warning on line 73 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L70-L73

Added lines #L70 - L73 were not covered by tests
}

$consoleLogger = new ConsoleLoggerDecorator(
$this->logger,
$output
);

Check warning on line 79 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L76-L79

Added lines #L76 - L79 were not covered by tests

$estimator = static function () use ($consoleLogger) {
$params = [
[5, 10, 15, 20, 25, 30, 35, 40], // Neighbors
[true, false], // Weighted?
[new Euclidean(), new Manhattan(), new Jaccard()], // Kernel
];

Check warning on line 86 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L81-L86

Added lines #L81 - L86 were not covered by tests

$estimator = new GridSearch(
KNearestNeighbors::class,
$params,
new FBeta(),
new KFold(5)
);
$estimator->setLogger($consoleLogger);
$estimator->setBackend(new Amp());
return $estimator;
};

Check warning on line 97 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L88-L97

Added lines #L88 - L97 were not covered by tests

$pipeline = $this->classifier->train(
$account,
$consoleLogger,
$estimator,
$shuffle,
false,
);

Check warning on line 105 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L99-L105

Added lines #L99 - L105 were not covered by tests

/** @var GridSearch $metaEstimator */
$metaEstimator = $pipeline?->getEstimator();
if ($metaEstimator !== null) {
$output->writeln("<info>Best estimator: {$metaEstimator->base()}</info>");

Check warning on line 110 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L108-L110

Added lines #L108 - L110 were not covered by tests
}

$mbs = (int)(memory_get_peak_usage() / 1024 / 1024);
$output->writeln('<info>' . $mbs . 'MB of memory used</info>');
return 0;

Check warning on line 115 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L113-L115

Added lines #L113 - L115 were not covered by tests
}
}
38 changes: 28 additions & 10 deletions lib/Command/TrainAccount.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2019 Nextcloud GmbH and Nextcloud contributors
* SPDX-FileCopyrightText: 2019-2024 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

Expand All @@ -23,6 +23,9 @@

class TrainAccount extends Command {
public const ARGUMENT_ACCOUNT_ID = 'account-id';
public const ARGUMENT_SHUFFLE = 'shuffle';
public const ARGUMENT_DRY_RUN = 'dry-run';
public const ARGUMENT_FORCE = 'force';

private AccountService $accountService;
private ImportanceClassifier $classifier;
Expand All @@ -41,28 +44,39 @@ public function __construct(AccountService $service,
$this->classificationSettingsService = $classificationSettingsService;
}

/**
* @return void
*/
protected function configure() {
protected function configure(): void {

Check warning on line 47 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L47

Added line #L47 was not covered by tests
$this->setName('mail:account:train');
$this->setDescription('Train the classifier of new messages');
$this->addArgument(self::ARGUMENT_ACCOUNT_ID, InputArgument::REQUIRED);
$this->addOption(self::ARGUMENT_SHUFFLE, null, null, 'Shuffle data set before training');
$this->addOption(
self::ARGUMENT_DRY_RUN,
null,
null,
'Don\'t persist classifier after training'
);
$this->addOption(
self::ARGUMENT_FORCE,
null,
null,
'Train an estimator even if the classification is disabled by the user'
);

Check warning on line 63 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L51-L63

Added lines #L51 - L63 were not covered by tests
}

/**
* @return int
*/
protected function execute(InputInterface $input, OutputInterface $output): int {
$accountId = (int)$input->getArgument(self::ARGUMENT_ACCOUNT_ID);
$shuffle = (bool)$input->getOption(self::ARGUMENT_SHUFFLE);
$dryRun = (bool)$input->getOption(self::ARGUMENT_DRY_RUN);
$force = (bool)$input->getOption(self::ARGUMENT_FORCE);

Check warning on line 70 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L68-L70

Added lines #L68 - L70 were not covered by tests

try {
$account = $this->accountService->findById($accountId);
} catch (DoesNotExistException $e) {
$output->writeln("<error>account $accountId does not exist</error>");
return 1;
}
if (!$this->classificationSettingsService->isClassificationEnabled($account->getUserId())) {

if (!$force && !$this->classificationSettingsService->isClassificationEnabled($account->getUserId())) {

Check warning on line 79 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L79

Added line #L79 was not covered by tests
$output->writeln("<info>classification is turned off for account $accountId</info>");
return 2;
}
Expand All @@ -71,9 +85,13 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$this->logger,
$output
);

$this->classifier->train(
$account,
$consoleLogger
$consoleLogger,
null,
$shuffle,
!$dryRun

Check warning on line 94 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L91-L94

Added lines #L91 - L94 were not covered by tests
);

$mbs = (int)(memory_get_peak_usage() / 1024 / 1024);
Expand Down
Loading

0 comments on commit 1907ebc

Please sign in to comment.