diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/adjuster/adjusters/CopyAdjuster.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/adjuster/adjusters/CopyAdjuster.java index 24bd37fc2e6..b7141457926 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/adjuster/adjusters/CopyAdjuster.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/adjuster/adjusters/CopyAdjuster.java @@ -136,7 +136,8 @@ private synchronized void createTask(PoolManagerPoolInformation targetInfo, Stri false, // compute checksum on update; should not happen false, // force copy even if pool is not readable true, // maintain atime - 1); // only one copy per task + 1, // only one copy per task + true); // wait for new targets if necessary createTask(taskParameters, source); diff --git a/modules/dcache-resilience/src/main/java/org/dcache/resilience/handlers/FileOperationHandler.java b/modules/dcache-resilience/src/main/java/org/dcache/resilience/handlers/FileOperationHandler.java index b01a37d55e9..7f0aafdbeb5 100644 --- a/modules/dcache-resilience/src/main/java/org/dcache/resilience/handlers/FileOperationHandler.java +++ b/modules/dcache-resilience/src/main/java/org/dcache/resilience/handlers/FileOperationHandler.java @@ -380,7 +380,8 @@ public Task handleMakeOneCopy(FileAttributes attributes) { false, // compute checksum on update; should not happen false, // force copy even if pool not readable true, // maintain atime - 1); + 1, // only one copy per task + true); // wait for new targets if necessary Task task = new Task(taskParameters, completionHandler, source, pnfsId, ReplicaState.CACHED, ONLINE_STICKY_RECORD, diff --git a/modules/dcache/src/main/java/org/dcache/pool/migration/Job.java b/modules/dcache/src/main/java/org/dcache/pool/migration/Job.java index 1f609bb52bd..3679f42e37a 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/migration/Job.java +++ b/modules/dcache/src/main/java/org/dcache/pool/migration/Job.java @@ -81,7 +81,7 @@ enum State { private final Set _queued = new LinkedHashSet<>(); private final Map _sizes = new HashMap<>(); private final Map _running = new HashMap<>(); - private final BlockingQueue _errors = new ArrayBlockingQueue<>(15); + private final BlockingQueue _notes = new ArrayBlockingQueue<>(15); private final Map _cancelRequests = new HashMap<>(); @@ -109,7 +109,7 @@ public Job(MigrationContext context, JobDefinition definition) { context.getExecutor(), definition.selectionStrategy, definition.poolList, definition.isEager, definition.isMetaOnly, definition.computeChecksumOnUpdate, definition.forceSourceMode, - definition.maintainAtime, definition.replicas); + definition.maintainAtime, definition.replicas, definition.waitForTargets); _pinPrefix = context.getPinManagerStub().getDestinationPath().getDestinationAddress() .getCellName(); @@ -189,11 +189,11 @@ public void setConcurrency(int concurrency) { } } - public void addError(Error error) { + public void addNote(Note note) { _lock.lock(); try { - while (!_errors.offer(error)) { - _errors.poll(); + while (!_notes.offer(note)) { + _notes.poll(); } } finally { _lock.unlock(); @@ -255,10 +255,10 @@ public void getInfo(PrintWriter pw) { task.getInfo(pw); } - if (!_errors.isEmpty()) { - pw.println("Most recent errors:"); - for (Error error : _errors) { - pw.println(error); + if (!_notes.isEmpty()) { + pw.println("Most recent notes:"); + for (Note note : _notes) { + pw.println(note); } } } finally { @@ -516,7 +516,7 @@ private void schedule() { PnfsId pnfsId = i.next(); if (!_context.lock(pnfsId)) { - addError(new Error(0, pnfsId, "File is locked")); + addNote(new Note(0, pnfsId, "File is locked")); continue; } @@ -742,7 +742,7 @@ public void taskFailed(Task task, int rc, String msg) { schedule(); } - addError(new Error(task.getId(), pnfsId, msg)); + addNote(new Note(task.getId(), pnfsId, msg)); } finally { _lock.unlock(); } @@ -761,7 +761,7 @@ public void taskFailedPermanently(Task task, int rc, String msg) { _context.unlock(pnfsId); schedule(); - addError(new Error(task.getId(), pnfsId, msg)); + addNote(new Note(task.getId(), pnfsId, msg)); } finally { _lock.unlock(); } @@ -785,6 +785,18 @@ public void taskCompleted(Task task) { } } + @Override + public void taskCompletedWithNote(Task task, String msg) { + _lock.lock(); + try { + taskCompleted(task); + + addNote(new Note(task.getId(), task.getPnfsId(), msg)); + } finally { + _lock.unlock(); + } + } + public Object messageArrived(CellMessage envelope, PoolMigrationJobCancelMessage message) { DelayedReply reply = new DelayedReply(); _lock.lock(); @@ -939,23 +951,23 @@ public boolean evaluateLifetimePredicate(Expression expression) { return expression.evaluateBoolean(symbols); } - protected static class Error { + protected static class Note { private final long _id; private final long _time; private final PnfsId _pnfsId; - private final String _error; + private final String _note; - public Error(long id, PnfsId pnfsId, String error) { + public Note(long id, PnfsId pnfsId, String note) { _id = id; _time = System.currentTimeMillis(); _pnfsId = pnfsId; - _error = error; + _note = note; } public String toString() { return String.format("%tT [%d] %s: %s", - _time, _id, _pnfsId, _error); + _time, _id, _pnfsId, _note); } } } diff --git a/modules/dcache/src/main/java/org/dcache/pool/migration/JobDefinition.java b/modules/dcache/src/main/java/org/dcache/pool/migration/JobDefinition.java index f7ede7f078c..c8ab4bfa5be 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/migration/JobDefinition.java +++ b/modules/dcache/src/main/java/org/dcache/pool/migration/JobDefinition.java @@ -103,6 +103,12 @@ public class JobDefinition { */ public final int replicas; + /** + * Whether to wait for targets to become available to satisfy number of replicas. + */ + public final boolean waitForTargets; + + public JobDefinition(Predicate filter, CacheEntryMode sourceMode, CacheEntryMode targetMode, @@ -120,7 +126,8 @@ public JobDefinition(Predicate filter, boolean maintainAtime, Expression pauseWhen, Expression stopWhen, - boolean forceSourceMode) { + boolean forceSourceMode, + boolean waitForTargets) { this.filter = filter; this.sourceMode = sourceMode; this.targetMode = targetMode; @@ -139,5 +146,6 @@ public JobDefinition(Predicate filter, this.pauseWhen = pauseWhen; this.stopWhen = stopWhen; this.forceSourceMode = forceSourceMode; + this.waitForTargets = waitForTargets; } } diff --git a/modules/dcache/src/main/java/org/dcache/pool/migration/MigrationModule.java b/modules/dcache/src/main/java/org/dcache/pool/migration/MigrationModule.java index db419a5cdea..556f3177b99 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/migration/MigrationModule.java +++ b/modules/dcache/src/main/java/org/dcache/pool/migration/MigrationModule.java @@ -537,6 +537,16 @@ public class MigrationCopyCommand implements Callable { usage = "Enables the transfer of files from a disabled pool.") boolean forceSourceMode; + @Option(name = "target-deficit-mode", metaVar = "tdmode", + values = { "wait", "limit" }, + category = "Transfer options", + usage = "Behaviour when requested replicas exceeds available targets:\n" + + "wait:\n" + + " wait for new targets to become available.\n" + + "limit:\n" + + " limit the number of replicas to that of the currently-available targets.\n") + String targetDeficitMode = "wait"; + @Argument(metaVar = "target", required = false, usage = "Required unless -target=pgroup is supplied, in which case we" + @@ -838,7 +848,8 @@ public String call() throws IllegalArgumentException { maintainAtime, createLifetimePredicate(pauseWhen), createLifetimePredicate(stopWhen), - forceSourceMode); + forceSourceMode, + targetDeficitMode.equals("wait")); if (definition.targetMode.state == CacheEntryMode.State.DELETE || definition.targetMode.state == CacheEntryMode.State.REMOVABLE) { diff --git a/modules/dcache/src/main/java/org/dcache/pool/migration/Task.java b/modules/dcache/src/main/java/org/dcache/pool/migration/Task.java index 1ee2cedaaec..605eb4d533d 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/migration/Task.java +++ b/modules/dcache/src/main/java/org/dcache/pool/migration/Task.java @@ -241,6 +241,10 @@ synchronized boolean needsMoreReplicas() { return _replicas.size() < _parameters.replicas; } + synchronized boolean moreReplicasPossible() { + return _parameters.waitForTargets || (_replicas.size() < _parameters.poolList.getPools().size()); + } + /** * FSM Action */ @@ -347,6 +351,16 @@ void notifyCompleted() { new FireAndForgetTask(() -> _callbackHandler.taskCompleted(Task.this))); } + /** + * FSM Action + */ + void notifyCompletedWithInsufficientReplicas() { + _parameters.executor.execute( + new FireAndForgetTask(() -> _callbackHandler.taskCompletedWithNote(Task.this, + String.format("File replicas truncated at %s due to pool availability (%s requested)", + _replicas.size(), _parameters.replicas)))); + } + /** * FSM Action */ diff --git a/modules/dcache/src/main/java/org/dcache/pool/migration/TaskCompletionHandler.java b/modules/dcache/src/main/java/org/dcache/pool/migration/TaskCompletionHandler.java index 8ed0083ce1d..547be54f6a5 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/migration/TaskCompletionHandler.java +++ b/modules/dcache/src/main/java/org/dcache/pool/migration/TaskCompletionHandler.java @@ -26,4 +26,5 @@ public interface TaskCompletionHandler { * The task completed without error. */ void taskCompleted(Task task); + default void taskCompletedWithNote(Task task, String msg) { taskCompleted(task); } } diff --git a/modules/dcache/src/main/java/org/dcache/pool/migration/TaskParameters.java b/modules/dcache/src/main/java/org/dcache/pool/migration/TaskParameters.java index f9c9748c01a..ab48ddf2f35 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/migration/TaskParameters.java +++ b/modules/dcache/src/main/java/org/dcache/pool/migration/TaskParameters.java @@ -88,11 +88,16 @@ public class TaskParameters { */ public final int replicas; + /** + * Whether to wait for targets to become available to satisfy number of replicas. + */ + public final boolean waitForTargets; + public TaskParameters(CellStub pool, CellStub pnfs, CellStub pinManager, ScheduledExecutorService executor, PoolSelectionStrategy selectionStrategy, RefreshablePoolList poolList, boolean isEager, boolean isMetaOnly, boolean computeChecksumOnUpdate, boolean forceSourceMode, - boolean maintainAtime, int replicas) { + boolean maintainAtime, int replicas, boolean waitForTargets) { this.pool = pool; this.pnfs = pnfs; this.pinManager = pinManager; @@ -105,5 +110,6 @@ public TaskParameters(CellStub pool, CellStub pnfs, CellStub pinManager, this.forceSourceMode = forceSourceMode; this.maintainAtime = maintainAtime; this.replicas = replicas; + this.waitForTargets = waitForTargets; } } diff --git a/modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm b/modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm index f1031bd485e..c977c389e54 100644 --- a/modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm +++ b/modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm @@ -181,6 +181,7 @@ Entry messageArrived(message: PoolMigrationCopyFinishedMessage) Done { + notifyCompleted(); } } @@ -266,6 +267,7 @@ Entry messageArrived(message: PoolMigrationCopyFinishedMessage) Done { + notifyCompleted(); } } @@ -359,6 +361,12 @@ Exit UpdatingExistingFile { } + messageArrived(message: PoolMigrationCopyFinishedMessage) + [ ctxt.needsMoreReplicas() && ! (ctxt.moreReplicasPossible() || ctxt.isMetaOnly()) ] + Done + { + notifyCompletedWithInsufficientReplicas(); + } messageArrived(message: PoolMigrationCopyFinishedMessage) [ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() ] InitiatingCopy @@ -378,6 +386,7 @@ Exit messageArrived(message: PoolMigrationCopyFinishedMessage) Done { + notifyCompleted(); } cancel Cancelling @@ -435,6 +444,12 @@ Entry UpdatingExistingFile { } + messageArrived(message: PoolMigrationCopyFinishedMessage) + [ ctxt.needsMoreReplicas() && ! (ctxt.moreReplicasPossible() || ctxt.isMetaOnly()) ] + Done + { + notifyCompletedWithInsufficientReplicas(); + } messageArrived(message: PoolMigrationCopyFinishedMessage) [ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() ] InitiatingCopy @@ -454,6 +469,7 @@ Entry messageArrived(message: PoolMigrationCopyFinishedMessage) Done { + notifyCompleted(); } cancel Cancelling @@ -527,6 +543,12 @@ Exit UpdatingExistingFile { } + messageArrived(message: PoolMigrationCopyFinishedMessage) + [ ctxt.needsMoreReplicas() && ! (ctxt.moreReplicasPossible() || ctxt.isMetaOnly()) ] + Done + { + notifyCompletedWithInsufficientReplicas(); + } messageArrived(message: PoolMigrationCopyFinishedMessage) [ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() ] InitiatingCopy @@ -546,6 +568,7 @@ Exit messageArrived(message: PoolMigrationCopyFinishedMessage) Done { + notifyCompleted(); } cancel Cancelling @@ -595,6 +618,12 @@ Exit UpdatingExistingFile { } + messageArrived(message: PoolMigrationCopyFinishedMessage) + [ ctxt.needsMoreReplicas() && ! (ctxt.moreReplicasPossible() || ctxt.isMetaOnly()) ] + Done + { + notifyCompletedWithInsufficientReplicas(); + } messageArrived(message: PoolMigrationCopyFinishedMessage) [ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() ] InitiatingCopy @@ -614,6 +643,7 @@ Exit messageArrived(message: PoolMigrationCopyFinishedMessage) Done { + notifyCompleted(); } } @@ -626,6 +656,7 @@ Entry move_success Done { + notifyCompleted(); } move_failure(rc: Integer, cause: Object) Failed @@ -689,6 +720,7 @@ Exit [ message.getReturnCode() == 0 ] Done { + notifyCompleted(); } cancel_success nil @@ -731,7 +763,6 @@ Failed Done Entry { - notifyCompleted(); } { } diff --git a/modules/dcache/src/main/smc/org/dcache/pool/migration/Task_sm.dot b/modules/dcache/src/main/smc/org/dcache/pool/migration/Task_sm.dot new file mode 100644 index 00000000000..43d4128e399 --- /dev/null +++ b/modules/dcache/src/main/smc/org/dcache/pool/migration/Task_sm.dot @@ -0,0 +1,410 @@ +digraph Task { + + node + [shape=Mrecord width=1.5]; + + subgraph cluster_TASK { + + label="TASK"; + + // + // States (Nodes) + // + + "TASK::Queued" + [label="{Queued|Default()/\l}"]; + + "TASK::GettingLocations" + [label="{GettingLocations|Entry/\l   queryLocations();\l|Default()/\l}"]; + + "TASK::UpdatingExistingFile" + [label="{UpdatingExistingFile|Entry/\l   updateExistingReplica();\l|copy_noroute()\l\[ ctxt.hasMoreLocations() \]/\l   updateExistingReplica();\lcopy_failure(rc: Integer, cause: Object)\l\[ ctxt.hasMoreLocations() \]/\l   updateExistingReplica();\lDefault()/\l}"]; + + "TASK::InitiatingCopy" + [label="{InitiatingCopy|Entry/\l   initiateCopy();\l|Default()/\l}"]; + + "TASK::WaitingForCopyReplicaReply" + [label="{WaitingForCopyReplicaReply|Default()/\l}"]; + + "TASK::Copying" + [label="{Copying|Entry/\l   startTimer(ctxt.getPingPeriod());\lExit/\l   stopTimer();\l|Default()/\l}"]; + + "TASK::Pinging" + [label="{Pinging|Entry/\l   ping();\l|Default()/\l}"]; + + "TASK::NoResponse" + [label="{NoResponse|Entry/\l   startTimer(ctxt.getNoResponseTimeout());\lExit/\l   stopTimer();\l|timer()/\l   ping();\lDefault()/\l}"]; + + "TASK::WaitingForCopyFinishedMessage" + [label="{WaitingForCopyFinishedMessage|Entry/\l   startTimer(ctxt.getTaskDeadTimeout());\lExit/\l   stopTimer();\l|Default()/\l}"]; + + "TASK::MovingPin" + [label="{MovingPin|Entry/\l   movePin();\l|Default()/\l}"]; + + "TASK::Cancelling" + [label="{Cancelling|Entry/\l   startTimer(ctxt.getTaskDeadTimeout());\l   cancelCopy();\lExit/\l   stopTimer();\l|cancel_success()/\lcancel_failure(rc: Integer, cause: Object)/\lcancel_timeout()/\lDefault()/\l}"]; + + "TASK::Cancelled" + [label="{Cancelled|Entry/\l   notifyCancelled();\l|Default()/\l}"]; + + "TASK::Failed" + [label="{Failed|Default()/\l}"]; + + "TASK::Done" + [label="{Done|Entry/\l|Default()/\l}"]; + + "%start" + [label="" shape=circle style=filled fillcolor=black width=0.25]; + + // + // Transitions (Edges) + // + + "TASK::Queued" -> "TASK::GettingLocations" + [label="startWithoutLocations()/\l"]; + + "TASK::Queued" -> "TASK::UpdatingExistingFile" + [label="startWithLocations()\l\[ ctxt.hasMoreLocations() \]/\l"]; + + "TASK::Queued" -> "TASK::InitiatingCopy" + [label="startWithLocations()\l\[ !ctxt.isMetaOnly() \]/\l"]; + + "TASK::Queued" -> "TASK::Failed" + [label="startWithLocations()/\lfailPermanently(FILE_NOT_IN_REPOSITORY, \"File skipped because it has no existing replicas\");\l"]; + + "TASK::GettingLocations" -> "TASK::Failed" + [label="query_timeout()/\lfail(TIMEOUT, \"PnfsManager failed (no response)\");\l"]; + + "TASK::GettingLocations" -> "TASK::Failed" + [label="query_noroute()/\lfail(SERVICE_UNAVAILABLE, \"PnfsManager failed (no route to cell)\");\l"]; + + "TASK::GettingLocations" -> "TASK::Failed" + [label="query_failure(rc: Integer, cause: Object)\l\[ rc == FILE_NOT_FOUND \]/\lfailPermanently(FILE_NOT_FOUND, \"File does not exist, skipped\");\l"]; + + "TASK::GettingLocations" -> "TASK::Failed" + [label="query_failure(rc: Integer, cause: Object)/\lfail(rc, \"PnfsManager failed (\" + cause + \")\");\l"]; + + "TASK::GettingLocations" -> "TASK::Cancelled" + [label="cancel()/\l"]; + + "TASK::GettingLocations" -> "TASK::UpdatingExistingFile" + [label="query_success()\l\[ ctxt.hasMoreLocations() \]/\l"]; + + "TASK::GettingLocations" -> "TASK::InitiatingCopy" + [label="query_success()\l\[ !ctxt.isMetaOnly() \]/\l"]; + + "TASK::GettingLocations" -> "TASK::Failed" + [label="query_success()/\lfailPermanently(FILE_NOT_IN_REPOSITORY, \"File skipped because it has no existing replicas\");\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::UpdatingExistingFile" + [label="copy_timeout()\l\[ ctxt.hasMoreLocations() \]/\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::InitiatingCopy" + [label="copy_timeout()\l\[ ctxt.isEager() && !ctxt.isMetaOnly() \]/\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Failed" + [label="copy_timeout()/\lfail(TIMEOUT, \"Remote pool failed (no response)\");\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::InitiatingCopy" + [label="copy_noroute()\l\[ ctxt.isEager() && !ctxt.isMetaOnly() \]/\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Failed" + [label="copy_noroute()/\lfail(SERVICE_UNAVAILABLE, \"Remote pool failed (no route to cell)\");\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Failed" + [label="copy_failure(rc: Integer, cause: Object)\l\[ rc == LOCKED \]/\lfail(rc, \"Replica is locked on target pool\");\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::InitiatingCopy" + [label="copy_failure(rc: Integer, cause: Object)\l\[ !ctxt.isMetaOnly() \]/\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Failed" + [label="copy_failure(rc: Integer, cause: Object)/\lfail(rc, String.format(\"Transfer to %s failed (%s)\", ctxt.getTarget(), cause));\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Copying" + [label="copy_success()/\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Cancelling" + [label="cancel()/\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() == FILE_NOT_FOUND \]/\lfailPermanently(message.getReturnCode(), String.format(\"Transfer to %s failed (%s); will not be retried\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() != 0 \]/\lfail(message.getReturnCode(), String.format(\"Transfer to %s failed (%s)\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::WaitingForCopyReplicaReply" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() \]/\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::MovingPin" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.getMustMovePins() \]/\l"]; + + "TASK::UpdatingExistingFile" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)/\lnotifyCompleted();\l"]; + + "TASK::InitiatingCopy" -> "TASK::Copying" + [label="copy_success()/\l"]; + + "TASK::InitiatingCopy" -> "TASK::Failed" + [label="copy_nopools()/\lfail(NO_POOL_ONLINE, \"No targets\");\l"]; + + "TASK::InitiatingCopy" -> "TASK::Failed" + [label="copy_noroute()/\lfail(SERVICE_UNAVAILABLE, String.format(\"Pool %s failed (no route to cell)\", + ctxt.getTarget()));\l"]; + + "TASK::InitiatingCopy" -> "TASK::Failed" + [label="copy_failure(rc: Integer, cause: Object)\l\[ rc == FILE_CORRUPTED \]/\lfailPermanently(rc, String.format(\"Pool %s failed (%s)\", + ctxt.getTarget(), cause));\l"]; + + "TASK::InitiatingCopy" -> "TASK::Failed" + [label="copy_failure(rc: Integer, cause: Object)/\lfail(rc, String.format(\"Pool %s failed (%s)\", + ctxt.getTarget(), cause));\l"]; + + "TASK::InitiatingCopy" -> "TASK::WaitingForCopyFinishedMessage" + [label="copy_timeout()/\lcancelCopy(\"Timeout waiting for target pool \" + + ctxt.getTarget());\l"]; + + "TASK::InitiatingCopy" -> "TASK::Cancelling" + [label="cancel()/\l"]; + + "TASK::InitiatingCopy" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() == FILE_NOT_FOUND || message.getReturnCode() == FILE_CORRUPTED \]/\lfailPermanently(message.getReturnCode(), String.format(\"Transfer to %s failed (%s); will not be retried\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::InitiatingCopy" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() != 0 \]/\lfail(message.getReturnCode(), String.format(\"Transfer to %s failed (%s)\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::InitiatingCopy" -> "TASK::WaitingForCopyReplicaReply" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() \]/\l"]; + + "TASK::InitiatingCopy" -> "TASK::MovingPin" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.getMustMovePins() \]/\l"]; + + "TASK::InitiatingCopy" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)/\lnotifyCompleted();\l"]; + + "TASK::WaitingForCopyReplicaReply" -> "TASK::UpdatingExistingFile" + [label="copy_success()\l\[ ctxt.hasMoreLocations() \]/\l"]; + + "TASK::WaitingForCopyReplicaReply" -> "TASK::InitiatingCopy" + [label="copy_success()\l\[ !ctxt.isMetaOnly() \]/\l"]; + + "TASK::WaitingForCopyReplicaReply" -> "TASK::Failed" + [label="copy_success()/\lfailPermanently(FILE_NOT_IN_REPOSITORY, \"File skipped because it does not have enough existing replicas\");\l"]; + + "TASK::WaitingForCopyReplicaReply" -> "TASK::Failed" + [label="copy_nopools()/\lfail(NO_POOL_ONLINE, \"No targets\");\l"]; + + "TASK::WaitingForCopyReplicaReply" -> "TASK::Failed" + [label="copy_noroute()/\lfail(SERVICE_UNAVAILABLE, String.format(\"Pool %s failed (no route to cell)\", + ctxt.getTarget()));\l"]; + + "TASK::WaitingForCopyReplicaReply" -> "TASK::Failed" + [label="copy_failure(rc: Integer, cause: Object)/\lfail(rc, String.format(\"Pool %s failed (%s)\", + ctxt.getTarget(), cause));\l"]; + + "TASK::WaitingForCopyReplicaReply" -> "TASK::Failed" + [label="copy_timeout()/\lfail(TIMEOUT, String.format(\"Pool %s failed (no response)\", + ctxt.getTarget()));\l"]; + + "TASK::WaitingForCopyReplicaReply" -> "TASK::Cancelled" + [label="cancel()/\l"]; + + "TASK::Copying" -> "TASK::Pinging" + [label="timer()/\l"]; + + "TASK::Copying" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() == FILE_NOT_FOUND || message.getReturnCode() == FILE_CORRUPTED \]/\lfailPermanently(message.getReturnCode(), String.format(\"Transfer to %s failed (%s); will not be retried\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::Copying" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() != 0 \]/\lfail(message.getReturnCode(), String.format(\"Transfer to %s failed (%s)\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::Copying" -> "TASK::UpdatingExistingFile" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && ctxt.hasMoreLocations() \]/\l"]; + + "TASK::Copying" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && ! (ctxt.moreReplicasPossible() || ctxt.isMetaOnly()) \]/\lnotifyCompletedWithInsufficientReplicas();\l"]; + + "TASK::Copying" -> "TASK::InitiatingCopy" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() \]/\l"]; + + "TASK::Copying" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() \]/\lfailPermanently(FILE_NOT_IN_REPOSITORY, \"File skipped because it does not have enough existing replicas\");\l"]; + + "TASK::Copying" -> "TASK::MovingPin" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.getMustMovePins() \]/\l"]; + + "TASK::Copying" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)/\lnotifyCompleted();\l"]; + + "TASK::Copying" -> "TASK::Cancelling" + [label="cancel()/\l"]; + + "TASK::Pinging" -> "TASK::Copying" + [label="ping_success()/\l"]; + + "TASK::Pinging" -> "TASK::WaitingForCopyFinishedMessage" + [label="ping_failure(rc: Integer, cause: Object)/\l"]; + + "TASK::Pinging" -> "TASK::NoResponse" + [label="ping_noroute()/\l"]; + + "TASK::Pinging" -> "TASK::NoResponse" + [label="ping_timeout()/\l"]; + + "TASK::Pinging" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() == FILE_NOT_FOUND \]/\lfailPermanently(message.getReturnCode(), String.format(\"Transfer to %s failed (%s); will not be retried\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::Pinging" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() != 0 \]/\lfail(message.getReturnCode(), String.format(\"Transfer to %s failed (%s)\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::Pinging" -> "TASK::UpdatingExistingFile" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && ctxt.hasMoreLocations() \]/\l"]; + + "TASK::Pinging" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && ! (ctxt.moreReplicasPossible() || ctxt.isMetaOnly()) \]/\lnotifyCompletedWithInsufficientReplicas();\l"]; + + "TASK::Pinging" -> "TASK::InitiatingCopy" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() \]/\l"]; + + "TASK::Pinging" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() \]/\lfailPermanently(FILE_NOT_IN_REPOSITORY, \"File skipped because it does not have enough existing replicas\");\l"]; + + "TASK::Pinging" -> "TASK::MovingPin" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.getMustMovePins() \]/\l"]; + + "TASK::Pinging" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)/\lnotifyCompleted();\l"]; + + "TASK::Pinging" -> "TASK::Cancelling" + [label="cancel()/\l"]; + + "TASK::NoResponse" -> "TASK::Copying" + [label="ping_success()/\l"]; + + "TASK::NoResponse" -> "TASK::WaitingForCopyFinishedMessage" + [label="ping_failure(rc: Integer, cause: Object)/\l"]; + + "TASK::NoResponse" -> "TASK::Failed" + [label="ping_noroute()/\lfail(SERVICE_UNAVAILABLE, String.format(\"Pool %s failed (no route to cell)\", + ctxt.getTarget()));\l"]; + + "TASK::NoResponse" -> "TASK::Failed" + [label="ping_timeout()/\lfail(TIMEOUT, String.format(\"Pool %s failed (no response)\", + ctxt.getTarget()));\l"]; + + "TASK::NoResponse" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() == FILE_NOT_FOUND \]/\lfailPermanently(message.getReturnCode(), String.format(\"Transfer to %s failed (%s); will not be retried\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::NoResponse" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() != 0 \]/\lfail(message.getReturnCode(), String.format(\"Transfer to %s failed (%s)\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::NoResponse" -> "TASK::UpdatingExistingFile" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && ctxt.hasMoreLocations() \]/\l"]; + + "TASK::NoResponse" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && ! (ctxt.moreReplicasPossible() || ctxt.isMetaOnly()) \]/\lnotifyCompletedWithInsufficientReplicas();\l"]; + + "TASK::NoResponse" -> "TASK::InitiatingCopy" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() \]/\l"]; + + "TASK::NoResponse" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() \]/\lfailPermanently(FILE_NOT_IN_REPOSITORY, \"File skipped because it does not have enough existing replicas\");\l"]; + + "TASK::NoResponse" -> "TASK::MovingPin" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.getMustMovePins() \]/\l"]; + + "TASK::NoResponse" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)/\lnotifyCompleted();\l"]; + + "TASK::NoResponse" -> "TASK::Cancelling" + [label="cancel()/\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::Failed" + [label="timer()/\lfail(TIMEOUT, String.format(\"Pool %s failed (no response)\", + ctxt.getTarget()));\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() == FILE_NOT_FOUND \]/\lfailPermanently(message.getReturnCode(), String.format(\"Transfer to %s failed (%s); will not be retried\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() != 0 \]/\lfail(message.getReturnCode(), String.format(\"Transfer to %s failed (%s)\", + ctxt.getTarget(), + message.getErrorObject()));\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::UpdatingExistingFile" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && ctxt.hasMoreLocations() \]/\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && ! (ctxt.moreReplicasPossible() || ctxt.isMetaOnly()) \]/\lnotifyCompletedWithInsufficientReplicas();\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::InitiatingCopy" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() \]/\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.needsMoreReplicas() \]/\lfailPermanently(FILE_NOT_IN_REPOSITORY, \"File skipped because it does not have enough existing replicas\");\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::MovingPin" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ ctxt.getMustMovePins() \]/\l"]; + + "TASK::WaitingForCopyFinishedMessage" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)/\lnotifyCompleted();\l"]; + + "TASK::MovingPin" -> "TASK::Done" + [label="move_success()/\lnotifyCompleted();\l"]; + + "TASK::MovingPin" -> "TASK::Failed" + [label="move_failure(rc: Integer, cause: Object)/\lfail(rc, \"Pin manager failed (\" + cause + \")\");\l"]; + + "TASK::MovingPin" -> "TASK::Failed" + [label="move_noroute()/\lfail(SERVICE_UNAVAILABLE, \"Pin manager failed (no route to cell)\");\l"]; + + "TASK::MovingPin" -> "TASK::Failed" + [label="move_timeout()/\lfail(TIMEOUT, \"Pin manager failed (timeout)\");\l"]; + + "TASK::MovingPin" -> "TASK::Failed" + [label="cancel()/\lfail(DEFAULT_ERROR_CODE, String.format(\"Cancelling task (%s) failed (data migrated but pin movement still underway)\", + ctxt.getCancelReason()));\l"]; + + "TASK::Cancelling" -> "TASK::Failed" + [label="timer()/\lfail(TIMEOUT, String.format(\"Cancelling task (%s) failed (no response from %s)\", + ctxt.getCancelReason(), ctxt.getTarget()));\l"]; + + "TASK::Cancelling" -> "TASK::Cancelled" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() != 0 \]/\l"]; + + "TASK::Cancelling" -> "TASK::Failed" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() == 0 && ctxt.getMustMovePins() \]/\lfail(DEFAULT_ERROR_CODE, String.format(\"Cancelling task (%s) failed (data migrated but still pins to move)\", + ctxt.getCancelReason()));\l"]; + + "TASK::Cancelling" -> "TASK::Done" + [label="messageArrived(message: PoolMigrationCopyFinishedMessage)\l\[ message.getReturnCode() == 0 \]/\lnotifyCompleted();\l"]; + + "TASK::Cancelling" -> "TASK::Failed" + [label="cancel_noroute()/\lfail(SERVICE_UNAVAILABLE, String.format(\"Cancelling task (%s) failed (no route)\", + ctxt.getCancelReason()));\l"]; + + "%start" -> "TASK::Queued" + } + +} diff --git a/modules/dcache/src/main/smc/org/dcache/pool/migration/Task_sm.dot.png b/modules/dcache/src/main/smc/org/dcache/pool/migration/Task_sm.dot.png new file mode 100644 index 00000000000..f3f88e645a2 Binary files /dev/null and b/modules/dcache/src/main/smc/org/dcache/pool/migration/Task_sm.dot.png differ