diff --git a/silk-core/src/main/scala/org/silkframework/config/Port.scala b/silk-core/src/main/scala/org/silkframework/config/Port.scala index c07e4242b7..d6c25d51a4 100644 --- a/silk-core/src/main/scala/org/silkframework/config/Port.scala +++ b/silk-core/src/main/scala/org/silkframework/config/Port.scala @@ -64,4 +64,8 @@ case class FixedNumberOfInputs(ports: Seq[Port]) extends InputPorts */ case class FlexibleNumberOfInputs(portDefinition: Port = FlexibleSchemaPort, min: Int = 0, - max: Option[Int] = None) extends InputPorts \ No newline at end of file + max: Option[Int] = None) extends InputPorts + +object InputPorts { + final val NoInputPorts = FixedNumberOfInputs(Seq.empty) +} \ No newline at end of file diff --git a/silk-core/src/main/scala/org/silkframework/config/SilkVocab.scala b/silk-core/src/main/scala/org/silkframework/config/SilkVocab.scala index 13ccf444fb..154995b42d 100644 --- a/silk-core/src/main/scala/org/silkframework/config/SilkVocab.scala +++ b/silk-core/src/main/scala/org/silkframework/config/SilkVocab.scala @@ -27,6 +27,9 @@ object SilkVocab { val TripleSchemaType: String = namespace + "TripleSchemaType" val QuadSchemaType: String = namespace + "QuadSchemaType" + // Clear dataset + val ClearDatasetType: String = namespace + "ClearDatasetType" + // Empty table val EmptySchemaType: String = namespace + "EmptySchemaType" diff --git a/silk-core/src/main/scala/org/silkframework/dataset/CombinedEntitySink.scala b/silk-core/src/main/scala/org/silkframework/dataset/CombinedEntitySink.scala index 4982ce3483..705766e57e 100644 --- a/silk-core/src/main/scala/org/silkframework/dataset/CombinedEntitySink.scala +++ b/silk-core/src/main/scala/org/silkframework/dataset/CombinedEntitySink.scala @@ -28,9 +28,9 @@ class CombinedEntitySink(val sinks: Seq[EntitySink]) extends EntitySink { } } - override def clear()(implicit userContext: UserContext): Unit = { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { for(sink <- sinks) { - sink.clear() + sink.clear(force) } } diff --git a/silk-core/src/main/scala/org/silkframework/dataset/DataSink.scala b/silk-core/src/main/scala/org/silkframework/dataset/DataSink.scala index 88e7cd8ab7..bfd4a99bd6 100644 --- a/silk-core/src/main/scala/org/silkframework/dataset/DataSink.scala +++ b/silk-core/src/main/scala/org/silkframework/dataset/DataSink.scala @@ -9,6 +9,8 @@ trait DataSink extends CloseableDataset { /** * Makes sure that the next write will start from an empty dataset. + * + * @param force If set to true, it should clear the dataset no matter what the config is. */ - def clear()(implicit userContext: UserContext): Unit + def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { } } diff --git a/silk-core/src/main/scala/org/silkframework/dataset/DatasetSpec.scala b/silk-core/src/main/scala/org/silkframework/dataset/DatasetSpec.scala index fa0b66c627..e94c6cb38f 100644 --- a/silk-core/src/main/scala/org/silkframework/dataset/DatasetSpec.scala +++ b/silk-core/src/main/scala/org/silkframework/dataset/DatasetSpec.scala @@ -78,7 +78,7 @@ case class DatasetSpec[+DatasetType <: Dataset](plugin: DatasetType, /** Datasets don't define input schemata, because any data can be written to them. */ override def inputPorts: InputPorts = { if(readOnly || characteristics.readOnly) { - FixedNumberOfInputs(Seq.empty) + InputPorts.NoInputPorts } else if(characteristics.supportsMultipleWrites) { FlexibleNumberOfInputs() } else { @@ -291,7 +291,12 @@ object DatasetSpec { /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = entitySink.clear() + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { + if(datasetSpec.readOnly) { + throw new RuntimeException(s"Cannot clear dataset, because it is configured as read-only.") + } + entitySink.clear(force) + } @inline private def prependUri(uri: String, values: IndexedSeq[Seq[String]]): IndexedSeq[Seq[String]] = { @@ -354,7 +359,12 @@ object DatasetSpec { /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = linkSink.clear() + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { + if(datasetSpec.readOnly) { + throw new RuntimeException(s"Cannot clear dataset, because it is configured as read-only.") + } + linkSink.clear(force) + } } /** diff --git a/silk-core/src/main/scala/org/silkframework/dataset/DirtyTrackingFileDataSink.scala b/silk-core/src/main/scala/org/silkframework/dataset/DirtyTrackingFileDataSink.scala index f6ffb7d896..eca558249c 100644 --- a/silk-core/src/main/scala/org/silkframework/dataset/DirtyTrackingFileDataSink.scala +++ b/silk-core/src/main/scala/org/silkframework/dataset/DirtyTrackingFileDataSink.scala @@ -15,6 +15,11 @@ trait DirtyTrackingFileDataSink extends DataSink { DirtyTrackingFileDataSink.addUpdatedFile(resource.name) super.close() } + + abstract override def clear(force: Boolean)(implicit userContext: UserContext): Unit = { + DirtyTrackingFileDataSink.addUpdatedFile(resource.name) + super.clear(force) + } } object DirtyTrackingFileDataSink { diff --git a/silk-core/src/main/scala/org/silkframework/dataset/EmptyDataset.scala b/silk-core/src/main/scala/org/silkframework/dataset/EmptyDataset.scala index b1e5b750a5..ce7ce12f97 100644 --- a/silk-core/src/main/scala/org/silkframework/dataset/EmptyDataset.scala +++ b/silk-core/src/main/scala/org/silkframework/dataset/EmptyDataset.scala @@ -43,7 +43,7 @@ object EmptyDataset extends Dataset with Serializable { * Makes sure that the next write will start from an empty dataset. * Does nothing as this dataset is always empty */ - override def clear()(implicit userContext: UserContext): Unit = {} + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = {} } /** @@ -67,7 +67,7 @@ object EmptyDataset extends Dataset with Serializable { * Makes sure that the next write will start from an empty dataset. * Does nothing as this dataset is always empty */ - override def clear()(implicit userContext: UserContext): Unit = {} + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = {} } override def characteristics: DatasetCharacteristics = DatasetCharacteristics.attributesOnly() diff --git a/silk-core/src/main/scala/org/silkframework/dataset/FilteredLinkSink.scala b/silk-core/src/main/scala/org/silkframework/dataset/FilteredLinkSink.scala index 07c426f635..d69cac4259 100644 --- a/silk-core/src/main/scala/org/silkframework/dataset/FilteredLinkSink.scala +++ b/silk-core/src/main/scala/org/silkframework/dataset/FilteredLinkSink.scala @@ -30,5 +30,5 @@ case class FilteredLinkSink(linkSink: LinkSink, filterFn: Link => Boolean) exten /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = linkSink.clear() + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = linkSink.clear(force) } diff --git a/silk-core/src/main/scala/org/silkframework/dataset/SafeModeDataSource.scala b/silk-core/src/main/scala/org/silkframework/dataset/SafeModeDataSource.scala index b30c621212..26d0546dad 100644 --- a/silk-core/src/main/scala/org/silkframework/dataset/SafeModeDataSource.scala +++ b/silk-core/src/main/scala/org/silkframework/dataset/SafeModeDataSource.scala @@ -37,7 +37,7 @@ object SafeModeDataSource extends DataSource { object SafeModeSink extends DataSink with LinkSink with EntitySink { - override def clear()(implicit userContext: UserContext): Unit = { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { SafeModeException.throwSafeModeException() } diff --git a/silk-core/src/main/scala/org/silkframework/dataset/TableLinkSink.scala b/silk-core/src/main/scala/org/silkframework/dataset/TableLinkSink.scala index 44bd2c5e14..1d8434895e 100644 --- a/silk-core/src/main/scala/org/silkframework/dataset/TableLinkSink.scala +++ b/silk-core/src/main/scala/org/silkframework/dataset/TableLinkSink.scala @@ -27,8 +27,8 @@ class TableLinkSink(entitySink: EntitySink) extends LinkSink { entitySink.close() } - override def clear()(implicit userContext: UserContext): Unit = { - entitySink.clear() + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { + entitySink.clear(force) } } diff --git a/silk-core/src/main/scala/org/silkframework/dataset/operations/ClearDatasetOperator.scala b/silk-core/src/main/scala/org/silkframework/dataset/operations/ClearDatasetOperator.scala new file mode 100644 index 0000000000..91b052d1ac --- /dev/null +++ b/silk-core/src/main/scala/org/silkframework/dataset/operations/ClearDatasetOperator.scala @@ -0,0 +1,35 @@ +package org.silkframework.dataset.operations + +import org.silkframework.config._ +import org.silkframework.entity.EntitySchema +import org.silkframework.execution.EmptyEntityHolder +import org.silkframework.execution.local.LocalEntities +import org.silkframework.runtime.plugin.annotations.Plugin + +@Plugin( + id = "clearDataset", + label = "Clear dataset", + description = + """Clears the dataset that is connected to the output of this operator.""" +) +case class ClearDatasetOperator() extends CustomTask { + + /** + * The input ports and their schemata. + */ + override def inputPorts: InputPorts = InputPorts.NoInputPorts + + /** + * The output port and it's schema. + * None, if this operator does not generate any output. + */ + override def outputPort: Option[Port] = Some(FixedSchemaPort(ClearDatasetOperator.clearDatasetSchema)) +} + +object ClearDatasetOperator { + private val clearDatasetSchema = EntitySchema(SilkVocab.ClearDatasetType, IndexedSeq.empty) + + case class ClearDatasetTable(task: Task[TaskSpec]) extends LocalEntities with EmptyEntityHolder { + override def entitySchema: EntitySchema = clearDatasetSchema + } +} \ No newline at end of file diff --git a/silk-core/src/main/scala/org/silkframework/dataset/operations/ClearDatasetOperatorLocalExecutor.scala b/silk-core/src/main/scala/org/silkframework/dataset/operations/ClearDatasetOperatorLocalExecutor.scala new file mode 100644 index 0000000000..7c94e27ed7 --- /dev/null +++ b/silk-core/src/main/scala/org/silkframework/dataset/operations/ClearDatasetOperatorLocalExecutor.scala @@ -0,0 +1,47 @@ +package org.silkframework.dataset.operations + +import org.silkframework.config.{Task, TaskSpec} +import org.silkframework.dataset.operations.ClearDatasetOperator.ClearDatasetTable +import org.silkframework.execution.local.{LocalEntities, LocalExecution, LocalExecutor} +import org.silkframework.execution.{ExecutionReport, ExecutionReportUpdater, ExecutorOutput, SimpleExecutionReport} +import org.silkframework.runtime.activity.ActivityContext +import org.silkframework.runtime.plugin.PluginContext + +/** Executes a clear dataset operator. */ +case class ClearDatasetOperatorLocalExecutor() extends LocalExecutor[ClearDatasetOperator] { + + override def execute(task: Task[ClearDatasetOperator], + inputs: Seq[LocalEntities], + output: ExecutorOutput, + execution: LocalExecution, + context: ActivityContext[ExecutionReport]) + (implicit pluginContext: PluginContext): Option[LocalEntities] = { + context.value.update(SimpleExecutionReport( + task = task, + summary = Seq.empty, + warnings = Seq.empty, + error = None, + isDone = true, + entityCount = 1, + operation = Some("generate clear instruction"), + operationDesc = "clear instruction generated" + )) + Some(ClearDatasetTable(task)) + } +} + +case class ClearDatasetOperatorExecutionReportUpdater(task: Task[TaskSpec], + context: ActivityContext[ExecutionReport]) extends ExecutionReportUpdater { + + override def operationLabel: Option[String] = Some("clear dataset") + + override def entityLabelSingle: String = "dataset" + override def entityLabelPlural: String = "datasets" + override def entityProcessVerb: String = "cleared" + + override def minEntitiesBetweenUpdates: Int = 1 + + override def additionalFields(): Seq[(String, String)] = Seq( + "Cleared dataset" -> task.fullLabel + ) +} \ No newline at end of file diff --git a/silk-core/src/main/scala/org/silkframework/execution/local/LocalDatasetExecutor.scala b/silk-core/src/main/scala/org/silkframework/execution/local/LocalDatasetExecutor.scala index 6996fc1669..cd1c1ca83d 100644 --- a/silk-core/src/main/scala/org/silkframework/execution/local/LocalDatasetExecutor.scala +++ b/silk-core/src/main/scala/org/silkframework/execution/local/LocalDatasetExecutor.scala @@ -4,6 +4,8 @@ import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.dataset.CloseableDataset.using import org.silkframework.dataset.DatasetSpec.{EntitySinkWrapper, GenericDatasetSpec} import org.silkframework.dataset._ +import org.silkframework.dataset.operations.ClearDatasetOperator.ClearDatasetTable +import org.silkframework.dataset.operations.ClearDatasetOperatorExecutionReportUpdater import org.silkframework.dataset.rdf._ import org.silkframework.entity._ import org.silkframework.execution._ @@ -147,6 +149,8 @@ abstract class LocalDatasetExecutor[DatasetType <: Dataset] extends DatasetExecu uploadFilesViaGraphStore(dataset, graphStoreFiles, reportUpdater) case sparqlUpdateTable: SparqlUpdateEntityTable => executeSparqlUpdateQueries(dataset, sparqlUpdateTable, execution) + case _: ClearDatasetTable => + executeClearDataset(dataset) case et: LocalEntities => writeGenericLocalEntities(dataset, et, execution) } @@ -229,6 +233,17 @@ abstract class LocalDatasetExecutor[DatasetType <: Dataset] extends DatasetExecu } } + private def executeClearDataset(dataset: Task[DatasetSpec[DatasetType]]) + (implicit userContext: UserContext, context: ActivityContext[ExecutionReport]): Unit = { + if(dataset.readOnly) { + throw new RuntimeException(s"Cannot clear dataset '${dataset.fullLabel}', because it is configured as read-only.") + } + val executionReport = ClearDatasetOperatorExecutionReportUpdater(dataset, context) + dataset.entitySink.clear(force = true) + executionReport.increaseEntityCounter() + executionReport.executionDone() + } + /** Buffers queries to make prediction about how many queries will be executed. * * @param bufferSize max size of queries that should be buffered diff --git a/silk-core/src/main/scala/org/silkframework/plugins/CorePlugins.scala b/silk-core/src/main/scala/org/silkframework/plugins/CorePlugins.scala index c6a7d9a632..4541c1fca0 100644 --- a/silk-core/src/main/scala/org/silkframework/plugins/CorePlugins.scala +++ b/silk-core/src/main/scala/org/silkframework/plugins/CorePlugins.scala @@ -19,6 +19,7 @@ import org.silkframework.config.Task.GenericTaskFormat import org.silkframework.config.TaskSpec.TaskSpecXmlFormat import org.silkframework.dataset.DatasetSpec.{DatasetSpecFormat, DatasetTaskXmlFormat} import org.silkframework.dataset.VariableDataset +import org.silkframework.dataset.operations.{ClearDatasetOperator, ClearDatasetOperatorLocalExecutor} import org.silkframework.entity.EntitySchema.EntitySchemaFormat import org.silkframework.entity.ValueType import org.silkframework.execution.local.LocalExecutionManager @@ -32,13 +33,19 @@ import scala.language.existentials */ class CorePlugins extends PluginModule { - override def pluginClasses: Seq[Class[_ <: AnyPlugin]] = datasets ++ serializers ++ valueTypes :+ classOf[LocalExecutionManager] + override def pluginClasses: Seq[Class[_ <: AnyPlugin]] = datasets ++ datasetOperations ++ serializers ++ valueTypes :+ classOf[LocalExecutionManager] private def datasets: Seq[Class[_ <: AnyPlugin]] = classOf[InternalDataset] :: classOf[VariableDataset] :: Nil + private def datasetOperations: Seq[Class[_ <: AnyPlugin]] = { + classOf[ClearDatasetOperator] :: + classOf[ClearDatasetOperatorLocalExecutor] :: + Nil + } + private def serializers: Seq[Class[_ <: AnyPlugin]] = TaskSpecXmlFormat.getClass :: GenericTaskFormat.getClass :: diff --git a/silk-core/src/test/scala/org/silkframework/dataset/MockDataset.scala b/silk-core/src/test/scala/org/silkframework/dataset/MockDataset.scala index ccc5364137..25428553aa 100644 --- a/silk-core/src/test/scala/org/silkframework/dataset/MockDataset.scala +++ b/silk-core/src/test/scala/org/silkframework/dataset/MockDataset.scala @@ -64,7 +64,7 @@ case class DummyLinkSink(writeLinkFn: (Link, String) => Unit, writeLinkFn(link, predicateUri) } - override def clear()(implicit userContext: UserContext): Unit = { clearFn() } + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { clearFn() } override def close()(implicit userContext: UserContext): Unit = {} } @@ -79,7 +79,7 @@ case class DummyEntitySink(writeEntityFn: (String, Seq[Seq[String]]) => Unit, writeEntityFn(subject, values) } - override def clear()(implicit userContext: UserContext): Unit = { clearFn() } + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { clearFn() } override def closeTable()(implicit userContext: UserContext): Unit = {} diff --git a/silk-plugins/silk-persistent-caching/src/main/scala/org/silkframework/plugins/dataset/hierarchical/HierarchicalSink.scala b/silk-plugins/silk-persistent-caching/src/main/scala/org/silkframework/plugins/dataset/hierarchical/HierarchicalSink.scala index dd62fc3baa..762285a507 100644 --- a/silk-plugins/silk-persistent-caching/src/main/scala/org/silkframework/plugins/dataset/hierarchical/HierarchicalSink.scala +++ b/silk-plugins/silk-persistent-caching/src/main/scala/org/silkframework/plugins/dataset/hierarchical/HierarchicalSink.scala @@ -92,7 +92,7 @@ abstract class HierarchicalSink extends EntitySink { /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = { } + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { } /** * Outputs all entities in the cache to a HierarchicalEntityWriter. diff --git a/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/csv/CsvDataset.scala b/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/csv/CsvDataset.scala index f02d779a66..cbf96cb105 100644 --- a/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/csv/CsvDataset.scala +++ b/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/csv/CsvDataset.scala @@ -45,8 +45,8 @@ case class CsvDataset ( quoteEscapeCharacter: String = "\"", @Param(label = "ZIP file regex", value = "If the input resource is a ZIP file, files inside the file are filtered via this regex.", advanced = true) override val zipFileRegex: String = CsvDataset.defaultZipFileRegex, - @Param(label = "Delete file before workflow execution", - value = "If set to true this will clear the specified file before executing a workflow that writes to it.", + @Param(label = "Delete file before workflow execution (deprecated)", + value = "This is deprecated, use the 'Clear dataset' operator instead to clear a dataset in a workflow. If set to true this will clear the specified file before executing a workflow that writes to it.", advanced = true) clearBeforeExecution: Boolean = false) extends Dataset with DatasetPluginAutoConfigurable[CsvDataset] with CsvDatasetTrait with TextBulkResourceBasedDataset with WritableResourceDataset { diff --git a/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/csv/CsvSink.scala b/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/csv/CsvSink.scala index e506dc7b6a..72ba36c200 100644 --- a/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/csv/CsvSink.scala +++ b/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/csv/CsvSink.scala @@ -9,7 +9,7 @@ import org.silkframework.util.Uri import java.io.{File, IOException} import java.util.logging.Logger -class CsvSink(val resource: WritableResource, settings: CsvSettings) extends DataSink with DirtyTrackingFileDataSink { +class CsvSink(val resource: WritableResource, settings: CsvSettings) extends DirtyTrackingFileDataSink { private val log: Logger = Logger.getLogger(getClass.getName) @volatile @@ -41,8 +41,8 @@ class CsvSink(val resource: WritableResource, settings: CsvSettings) extends Dat /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = { - if(settings.clearBeforeExecution) { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { + if(settings.clearBeforeExecution || force) { val resourceFile = new File(resource.path).getAbsoluteFile val resourcePath = resourceFile.toPath val crcFile = new File(resourcePath.getParent.toFile, s".${resourcePath.getFileName.toString}.crc") @@ -56,6 +56,7 @@ class CsvSink(val resource: WritableResource, settings: CsvSettings) extends Dat case e: IOException => log.warning("IO exception occurred when deleting CRC file: " + e.getMessage) } + super.clear(force) } } } diff --git a/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/text/TextFileSink.scala b/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/text/TextFileSink.scala index c46028d2d0..23fffdc6c4 100644 --- a/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/text/TextFileSink.scala +++ b/silk-plugins/silk-plugins-csv/src/main/scala/org/silkframework/plugins/dataset/text/TextFileSink.scala @@ -40,7 +40,7 @@ class TextFileSink(ds: TextFileDataset) extends EntitySink with LinkSink { writeEntity("", IndexedSeq(Seq(link.source), Seq(link.target))) } - override def clear()(implicit userContext: UserContext): Unit = { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { ds.file.writeString("", codec = ds.codec) } diff --git a/silk-plugins/silk-plugins-json/src/main/scala/org/silkframework/plugins/dataset/json/JsonSink.scala b/silk-plugins/silk-plugins-json/src/main/scala/org/silkframework/plugins/dataset/json/JsonSink.scala index 12f55cc9b5..ffc8f1df4c 100644 --- a/silk-plugins/silk-plugins-json/src/main/scala/org/silkframework/plugins/dataset/json/JsonSink.scala +++ b/silk-plugins/silk-plugins-json/src/main/scala/org/silkframework/plugins/dataset/json/JsonSink.scala @@ -25,8 +25,9 @@ class JsonSink (val resource: WritableResource, /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { resource.delete() + super.clear(force) } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/access/GraphStoreSink.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/access/GraphStoreSink.scala index 5a6d16beac..8bfdf786fa 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/access/GraphStoreSink.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/access/GraphStoreSink.scala @@ -138,8 +138,8 @@ case class GraphStoreSink(graphStore: GraphStoreTrait, writeStatement(subject, predicate, obj, valueType) } - override def clear()(implicit userContext: UserContext): Unit = { - if(dropGraphOnClear) { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { + if(dropGraphOnClear || force) { log.fine("Clearing graph " + graphUri) graphStore.deleteGraph(graphUri) } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/access/SparqlSink.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/access/SparqlSink.scala index d3551c5d47..d7e0c7353b 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/access/SparqlSink.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/access/SparqlSink.scala @@ -66,8 +66,8 @@ class SparqlSink(params: SparqlParams, } } - override def clear()(implicit userContext: UserContext): Unit = { - if(dropGraphOnClear) { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { + if(dropGraphOnClear || force) { params.graph match { case Some(graph) => endpoint.update(s"DROP SILENT GRAPH <$graph>") diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/InMemoryDataset.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/InMemoryDataset.scala index 7bc7529a0c..96597fce64 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/InMemoryDataset.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/InMemoryDataset.scala @@ -14,9 +14,10 @@ import org.silkframework.runtime.plugin.annotations.{Param, Plugin} categories = Array(DatasetCategories.embedded), description = "A Dataset that holds all data in-memory." ) -case class InMemoryDataset(@Param(label = "Clear graph before workflow execution", - value = "If set to true this will clear this dataset before it is used in a workflow execution.") - clearGraphBeforeExecution: Boolean = true) extends RdfDataset with TripleSinkDataset { +case class InMemoryDataset(@Param(label = "Clear graph before workflow execution (deprecated)", + value = "This is deprecated, use the 'Clear dataset' operator instead to clear a dataset in a workflow. If set to true this will clear this dataset before it is used in a workflow execution.", + advanced = true) + clearGraphBeforeExecution: Boolean = false) extends RdfDataset with TripleSinkDataset { private val model = ModelFactory.createDefaultModel() diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/RdfInMemoryDataset.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/RdfInMemoryDataset.scala index 05a06b4b45..2b774964f9 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/RdfInMemoryDataset.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/RdfInMemoryDataset.scala @@ -20,9 +20,10 @@ import org.silkframework.runtime.plugin.annotations.{Param, Plugin} @Plugin(id = "rdf", label = "RDF in-memory", description = "A Dataset where all entities are given directly in the configuration.") case class RdfInMemoryDataset(data: String, format: String, - @Param(label = "Clear graph before workflow execution", - value = "If set to true this will clear the specified graph before executing a workflow that writes to it.") - clearBeforeExecution: Boolean = true) extends RdfDataset with TripleSinkDataset { + @Param(label = "Clear graph before workflow execution (deprecated)", + value = "This is deprecated, use the 'Clear dataset' operator instead to clear a dataset in a workflow. If set to true this will clear the specified graph before executing a workflow that writes to it.", + advanced = true) + clearBeforeExecution: Boolean = false) extends RdfDataset with TripleSinkDataset { private lazy val model = ModelFactory.createDefaultModel model.read(new StringReader(data), null, format) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/SparqlDataset.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/SparqlDataset.scala index f25a076754..5e3f9b93de 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/SparqlDataset.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/datasets/SparqlDataset.scala @@ -38,8 +38,9 @@ case class SparqlDataset( strategy: EntityRetrieverStrategy = EntityRetrieverStrategy.parallel, @Param("Include useOrderBy in queries to enforce correct order of values.") useOrderBy: Boolean = true, - @Param(label = "Clear graph before workflow execution", - value = "If set to true this will clear the specified graph before executing a workflow that writes to it.") + @Param(label = "Clear graph before workflow execution (deprecated)", + value = "This is deprecated, use the 'Clear dataset' operator instead to clear a dataset in a workflow. If set to true this will clear the specified graph before executing a workflow that writes to it.", + advanced = true) clearGraphBeforeExecution: Boolean = false, @Param( label = "SPARQL query timeout (ms)", diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedEntitySink.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedEntitySink.scala index 12a98dafbd..ea22b5b96a 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedEntitySink.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedEntitySink.scala @@ -71,5 +71,5 @@ class FormattedEntitySink(resource: WritableResource, formatter: EntityFormatter /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = resource.delete() + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = resource.delete() } \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedJenaLinkSink.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedJenaLinkSink.scala index 03b87e123b..6f16dc29f6 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedJenaLinkSink.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedJenaLinkSink.scala @@ -34,7 +34,7 @@ class FormattedJenaLinkSink(model: Model, /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { model.removeAll() } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedLinkSink.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedLinkSink.scala index eda9adeff1..c0203f5f38 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedLinkSink.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/formatters/FormattedLinkSink.scala @@ -58,7 +58,7 @@ class FormattedLinkSink (resource: WritableResource, formatter: LinkFormatter) e /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { resource.delete() } } \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 60fd80e717..b37525b099 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -46,7 +46,7 @@ case class SparqlUpdateCustomTask(@Param(label = "SPARQL update query", value = override def inputPorts: InputPorts = { if(isStaticTemplate) { - FixedNumberOfInputs(Seq.empty) + InputPorts.NoInputPorts } else { FixedNumberOfInputs(Seq(FixedSchemaPort(expectedInputSchema))) } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala index 7ff29d546a..df0db83c4a 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala @@ -125,7 +125,7 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW class DummyTaskSpec(params: Map[String, String]) extends CustomTask { - override def inputPorts: InputPorts = FixedNumberOfInputs(Seq.empty) + override def inputPorts: InputPorts = InputPorts.NoInputPorts override def outputPort: Option[Port] = None diff --git a/silk-plugins/silk-plugins-xml/src/main/scala/org/silkframework/plugins/dataset/xml/XmlSink.scala b/silk-plugins/silk-plugins-xml/src/main/scala/org/silkframework/plugins/dataset/xml/XmlSink.scala index 148d1a12ef..589f4c97bc 100644 --- a/silk-plugins/silk-plugins-xml/src/main/scala/org/silkframework/plugins/dataset/xml/XmlSink.scala +++ b/silk-plugins/silk-plugins-xml/src/main/scala/org/silkframework/plugins/dataset/xml/XmlSink.scala @@ -27,7 +27,8 @@ class XmlSink(val resource: WritableResource, /** * Makes sure that the next write will start from an empty dataset. */ - override def clear()(implicit userContext: UserContext): Unit = { + override def clear(force: Boolean = false)(implicit userContext: UserContext): Unit = { resource.delete() + super.clear(force) } } \ No newline at end of file diff --git a/silk-workbench/silk-workbench-core/test/controllers/core/PluginApiTest.scala b/silk-workbench/silk-workbench-core/test/controllers/core/PluginApiTest.scala index 0117d83761..81ea091d79 100644 --- a/silk-workbench/silk-workbench-core/test/controllers/core/PluginApiTest.scala +++ b/silk-workbench/silk-workbench-core/test/controllers/core/PluginApiTest.scala @@ -96,7 +96,7 @@ case class AutoCompletableTestPlugin(@Param(value = "Some param", autoCompletion autoCompleteValueWithLabels = true, allowOnlyAutoCompletedValues = true, autoCompletionDependsOnParameters = Array("otherParam")) completableParam: String, otherParam: String) extends CustomTask { - override def inputPorts: InputPorts = FixedNumberOfInputs(Seq.empty) + override def inputPorts: InputPorts = InputPorts.NoInputPorts override def outputPort: Option[Port] = None } diff --git a/silk-workbench/silk-workbench-workflow/test/controllers/workflowApi/WorkflowApiTest.scala b/silk-workbench/silk-workbench-workflow/test/controllers/workflowApi/WorkflowApiTest.scala index 4445bf13ad..32a13692ee 100644 --- a/silk-workbench/silk-workbench-workflow/test/controllers/workflowApi/WorkflowApiTest.scala +++ b/silk-workbench/silk-workbench-workflow/test/controllers/workflowApi/WorkflowApiTest.scala @@ -146,7 +146,7 @@ object BlockingTask { /** Task that blocks until externally released. */ case class BlockingTask() extends CustomTask { - override def inputPorts: InputPorts = FixedNumberOfInputs(Seq.empty) + override def inputPorts: InputPorts = InputPorts.NoInputPorts override def outputPort: Option[Port] = None } diff --git a/silk-workbench/silk-workbench-workspace/app/controllers/workspace/DatasetApi.scala b/silk-workbench/silk-workbench-workspace/app/controllers/workspace/DatasetApi.scala index b2c8a21fd0..cbfb59b817 100644 --- a/silk-workbench/silk-workbench-workspace/app/controllers/workspace/DatasetApi.scala +++ b/silk-workbench/silk-workbench-workspace/app/controllers/workspace/DatasetApi.scala @@ -26,7 +26,7 @@ import org.silkframework.rule.TransformSpec import org.silkframework.runtime.activity.UserContext import org.silkframework.runtime.plugin.{ParameterValues, PluginContext} import org.silkframework.runtime.serialization.ReadContext -import org.silkframework.runtime.validation.{BadUserInputException, RequestException} +import org.silkframework.runtime.validation.{BadUserInputException, ConflictRequestException, RequestException} import org.silkframework.util.Uri import org.silkframework.workbench.Context import org.silkframework.workbench.utils.ErrorResult @@ -265,6 +265,49 @@ class LegacyDatasetApi @Inject() (implicit workspaceReact: WorkspaceReact) exten NoContent } + @Operation( + summary = "Clear dataset", + description = "Clears the data/content of a dataset.", + responses = Array( + new ApiResponse( + responseCode = "204", + description = "If the dataset has cleared." + ), + new ApiResponse( + responseCode = "404", + description = "If the specified project or dataset has not been found." + ), + new ApiResponse( + responseCode = "409", + description = "If the dataset is currently configured as read-only. The user needs to change the config before trying again." + ) + ) + ) + def clearDataset(@Parameter( + name = "projectId", + description = "The project identifier", + required = true, + in = ParameterIn.PATH, + schema = new Schema(implementation = classOf[String]) + ) + projectId: String, + @Parameter( + name = "datasetId", + description = "The dataset identifier", + required = true, + in = ParameterIn.PATH, + schema = new Schema(implementation = classOf[String]) + ) + datasetId: String): Action[AnyContent] = UserContextAction { implicit userContext => + val dataset = task[GenericDatasetSpec](projectId, datasetId) + if(dataset.readOnly) { + throw ConflictRequestException(s"Dataset '${dataset.fullLabel}' is set to read-only and cannot be cleared!") + } else { + dataset.data.entitySink.clear(force = true) + NoContent + } + } + def datasetDialog(projectName: String, datasetName: String, title: String = "Edit Dataset", diff --git a/silk-workbench/silk-workbench-workspace/conf/workspace.routes b/silk-workbench/silk-workbench-workspace/conf/workspace.routes index f567b27984..578d02ccd7 100644 --- a/silk-workbench/silk-workbench-workspace/conf/workspace.routes +++ b/silk-workbench/silk-workbench-workspace/conf/workspace.routes @@ -46,6 +46,7 @@ GET /projects/:project/datasets/:name/file PUT /projects/:project/datasets/:name/file controllers.workspace.LegacyDatasetApi.uploadFile(project: String, name: String) GET /projects/:project/datasets/:name/mappingCoverage controllers.workspace.LegacyDatasetApi.getMappingCoverage(project: String, name: String) POST /projects/:project/datasets/:name/mappingCoverage/values controllers.workspace.LegacyDatasetApi.getMappingValueCoverage(project: String, name: String) +POST /projects/:projectId/datasets/:datasetId/clear controllers.workspace.LegacyDatasetApi.clearDataset(projectId: String, datasetId: String) ############################################################################################################################################ # CustomTasks diff --git a/silk-workbench/silk-workbench-workspace/test/controllers/workspace/ActivityApiTest.scala b/silk-workbench/silk-workbench-workspace/test/controllers/workspace/ActivityApiTest.scala index 9b16167d4f..e678b9954e 100644 --- a/silk-workbench/silk-workbench-workspace/test/controllers/workspace/ActivityApiTest.scala +++ b/silk-workbench/silk-workbench-workspace/test/controllers/workspace/ActivityApiTest.scala @@ -178,7 +178,7 @@ class ActivityApiTest extends PlaySpec with ConfigTestTrait with IntegrationTest } case class MessageTask(message: String) extends CustomTask { - override def inputPorts: InputPorts = FixedNumberOfInputs(Seq.empty) + override def inputPorts: InputPorts = InputPorts.NoInputPorts override def outputPort: Option[Port] = None } diff --git a/silk-workbench/silk-workbench-workspace/test/controllers/workspace/WorkflowExecutionIntegrationTest.scala b/silk-workbench/silk-workbench-workspace/test/controllers/workspace/WorkflowExecutionIntegrationTest.scala index 5ae35dd3ed..cc51572e93 100644 --- a/silk-workbench/silk-workbench-workspace/test/controllers/workspace/WorkflowExecutionIntegrationTest.scala +++ b/silk-workbench/silk-workbench-workspace/test/controllers/workspace/WorkflowExecutionIntegrationTest.scala @@ -55,7 +55,7 @@ object CountingTask { } /** Task that counts its executions. */ case class CountingTask() extends CustomTask { - override def inputPorts: InputPorts = FixedNumberOfInputs(Seq.empty) + override def inputPorts: InputPorts = InputPorts.NoInputPorts override def outputPort: Option[Port] = None } diff --git a/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala b/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala index b9b29aa166..c6e79c629d 100644 --- a/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala +++ b/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala @@ -716,7 +716,7 @@ trait WorkspaceProviderTestTrait extends AnyFlatSpec with Matchers with MockitoS @Plugin(id = "test", label = "test task") case class TestCustomTask(stringParam: String, numberParam: Int) extends CustomTask { - override def inputPorts: InputPorts = FixedNumberOfInputs(Seq.empty) + override def inputPorts: InputPorts = InputPorts.NoInputPorts override def outputPort: Option[Port] = None } @@ -733,7 +733,7 @@ object WorkspaceProviderTestPlugins { throw new FailingTaskException("Failed!") } - override def inputPorts: InputPorts = FixedNumberOfInputs(Seq.empty) + override def inputPorts: InputPorts = InputPorts.NoInputPorts override def outputPort: Option[Port] = None } diff --git a/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceTest.scala b/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceTest.scala index 33053f35aa..5097fa4e41 100644 --- a/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceTest.scala +++ b/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceTest.scala @@ -152,7 +152,7 @@ object WorkspaceTest { } case class TestTask(testParam: String = "test value") extends CustomTask { - override def inputPorts: InputPorts = FixedNumberOfInputs(Seq.empty) + override def inputPorts: InputPorts = InputPorts.NoInputPorts override def outputPort: Option[Port] = None } diff --git a/workspace/src/app/views/shared/modals/DeleteModal.tsx b/workspace/src/app/views/shared/modals/DeleteModal.tsx index 814aa94deb..53f971f34d 100644 --- a/workspace/src/app/views/shared/modals/DeleteModal.tsx +++ b/workspace/src/app/views/shared/modals/DeleteModal.tsx @@ -31,6 +31,8 @@ export interface IDeleteModalOptions extends TestableComponent { //optional prop to disable the delete button deleteDisabled?: boolean; alternativeCancelButtonLabel?: string; + /** An alternative button text than the 'Delete' text. */ + alternativeDeleteButtonText?: string; } export default function DeleteModal({ @@ -46,6 +48,7 @@ export default function DeleteModal({ submitOnEnter = true, deleteDisabled, alternativeCancelButtonLabel, + alternativeDeleteButtonText, ...otherProps }: IDeleteModalOptions) { const [isConfirmed, setIsConfirmed] = useState(false); @@ -88,7 +91,7 @@ export default function DeleteModal({ disabled={(confirmationRequired && !isConfirmed) || deleteDisabled} data-test-id={"remove-item-button"} > - {t("common.action.delete", "Delete")} + {alternativeDeleteButtonText ?? t("common.action.delete", "Delete")} ,