diff --git a/.build/build-rat.xml b/.build/build-rat.xml
index 755b76ea58ee..17c1d0ede27d 100644
--- a/.build/build-rat.xml
+++ b/.build/build-rat.xml
@@ -62,6 +62,7 @@
+
diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml
index c41c3cb973a4..ee6c1dd9e255 100644
--- a/conf/cassandra.yaml
+++ b/conf/cassandra.yaml
@@ -604,6 +604,50 @@ commitlog_segment_size: 32MiB
#
# flush_compression: fast
+# Defines the default compression used on tables when none is specified
+# in the CQL command.
+#
+# The class_name is the compressor class name. It may be one of the aliases,
+# the class name of a system ICompressor implementation, or fully qualified
+# name of a class that implements ICompressor and has a public static 'create' method that accepts
+# a Map argument and returns an instance of the class.
+#
+# class aliases are:
+# Alias System compressor impl.
+# deflate DeflateCompressor
+# lz4 LZ4Compressor
+# none (null) -- compresson disabled
+# noop NoopCompressor
+# snappy SnappyCompressor
+# zstd ZstdCompressor
+#
+# The standard parameters are any required or optional parameter for the instantiation of the
+# specified class, or one of the following standard parameters:
+# Parameter Usage
+# enabled Disables compression if set to false. Defaults to true.
+# chunk_length The length of the compresson chunks, must include KiB, MiB or GiB suffix, defaults to 16KiB.
+# chunk_length_in_kb Same as above but expects an integer.
+# min_compress_ratio The minimal acceptable compression, must greater than or equal to 1.0.
+# max_compressed_length The maximum size for a compressed block. Must be less than or equal to chunk_length.
+# Must include KiB, MiB or GiB suffix. Defaults to Integer.MAX_VALUE
+#
+# Only one of the min_compress_ratio and max_compressed_length options can be specified.
+# They are mathematically related in that 'chunk_length / max_compressed_length = min_compress_ratio'.
+# If neither option is specified a min_compress_ratio of 0.0 and a max_compressed_length of
+# Integer.MAX_VALUE KB is the default.
+#
+# Only one of chunk_length or chunk_length_in_kb may be specified.
+#
+# Additional class specific parameters may be added to the parameters section. The value of the class specific
+# parameter must be a string.
+#
+#sstable_compression:
+# - class_name: lz4
+# parameters:
+# - enabled: "true"
+# chunk_length: 16KiB
+# max_compressed_length: 16KiB
+
# any class that implements the SeedProvider interface and has a
# constructor that takes a Map of parameters will do.
seed_provider:
diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py
index 74c6fc10f526..16501e0346f2 100644
--- a/pylib/cqlshlib/cql3handling.py
+++ b/pylib/cqlshlib/cql3handling.py
@@ -64,7 +64,7 @@ class Cql3ParsingRuleSet(CqlParsingRuleSet):
('class', 'max_threshold', 'tombstone_compaction_interval', 'tombstone_threshold', 'enabled',
'unchecked_tombstone_compaction', 'only_purge_repaired_tombstones', 'provide_overlapping_tombstones')),
('compression', 'compression_parameters',
- ('sstable_compression', 'chunk_length_kb', 'crc_check_chance')),
+ ('class', 'chunk_length', 'chunk_length_in_kb', 'crc_check_chance', 'enabled', 'min_compress_ratio', 'max_compressed_length')),
('caching', None,
('rows_per_partition', 'keys')),
)
@@ -498,7 +498,7 @@ def cf_prop_val_completer(ctxt, cass):
exist_opts = ctxt.get_binding('propname')
this_opt = exist_opts[-1]
if this_opt == 'compression':
- return ["{'sstable_compression': '"]
+ return ["{'class': '"]
if this_opt == 'compaction':
return ["{'class': '"]
if this_opt == 'caching':
@@ -563,7 +563,7 @@ def cf_prop_val_mapval_completer(ctxt, cass):
return [Hint('')]
return [Hint('')]
elif opt == 'compression':
- if key == 'sstable_compression':
+ if key == 'class':
return list(map(escape_value, CqlRuleSet.available_compression_classes))
return [Hint('')]
elif opt == 'caching':
diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java
index 2157b225ebdc..2c1a6f215514 100644
--- a/src/java/org/apache/cassandra/config/Config.java
+++ b/src/java/org/apache/cassandra/config/Config.java
@@ -1110,6 +1110,9 @@ public enum PaxosOnLinearizabilityViolation
*/
public ParameterizedClass default_compaction = null;
+ @Nullable
+ public ParameterizedClass sstable_compression;
+
public static Supplier getOverrideLoadConfig()
{
return overrideLoadConfig;
diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
index 298ed3f8f92c..3fe57c7af645 100644
--- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
+++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
@@ -226,6 +226,8 @@ public class DatabaseDescriptor
private static ImmutableMap> sstableFormats;
private static volatile SSTableFormat, ?> selectedSSTableFormat;
+ private static ParameterizedClass sstableCompression;
+
private static Function commitLogSegmentMgrProvider = c -> DatabaseDescriptor.isCDCEnabled()
? new CommitLogSegmentManagerCDC(c, DatabaseDescriptor.getCommitLogLocation())
: new CommitLogSegmentManagerStandard(c, DatabaseDescriptor.getCommitLogLocation());
@@ -867,7 +869,7 @@ else if (conf.repair_session_space.toMebibytes() > (int) (Runtime.getRuntime().m
if (conf.allow_extra_insecure_udfs)
logger.warn("Allowing java.lang.System.* access in UDFs is dangerous and not recommended. Set allow_extra_insecure_udfs: false to disable.");
- if(conf.scripted_user_defined_functions_enabled)
+ if (conf.scripted_user_defined_functions_enabled)
throw new ConfigurationException("JavaScript user-defined functions were removed in CASSANDRA-18252. " +
"Hooks are planned to be introduced as part of CASSANDRA-17280");
@@ -966,6 +968,8 @@ else if (conf.max_value_size.toMebibytes() >= 2048)
if (conf.paxos_state_purging == null)
conf.paxos_state_purging = PaxosStatePurging.legacy;
+ sstableCompression = conf.sstable_compression;
+
logInitializationOutcome(logger);
if (conf.max_space_usable_for_compactions_in_percentage < 0 || conf.max_space_usable_for_compactions_in_percentage > 1)
@@ -2569,6 +2573,16 @@ public static void setFlushCompression(Config.FlushCompression compression)
conf.flush_compression = compression;
}
+ public static ParameterizedClass getSSTableCompression()
+ {
+ return sstableCompression;
+ }
+
+ public static void setSSTableCompression(ParameterizedClass compressor)
+ {
+ conf.sstable_compression = compressor;
+ }
+
/**
* Maximum number of buffers in the compression pool. The default value is 3, it should not be set lower than that
* (one segment in compression, one written to, one in reserve); delays in compression may cause the log to use
diff --git a/src/java/org/apache/cassandra/io/sstable/format/DataComponent.java b/src/java/org/apache/cassandra/io/sstable/format/DataComponent.java
index 9367cb444d80..0a026dcea5e6 100644
--- a/src/java/org/apache/cassandra/io/sstable/format/DataComponent.java
+++ b/src/java/org/apache/cassandra/io/sstable/format/DataComponent.java
@@ -89,7 +89,7 @@ private static CompressionParams buildCompressionParams(TableMetadata metadata,
if (!compressor.recommendedUses().contains(ICompressor.Uses.FAST_COMPRESSION))
{
// The default compressor is generally fast (LZ4 with 16KiB block size)
- compressionParams = CompressionParams.DEFAULT;
+ compressionParams = CompressionParams.defaultParams();
break;
}
// else fall through
diff --git a/src/java/org/apache/cassandra/schema/CompressionParams.java b/src/java/org/apache/cassandra/schema/CompressionParams.java
index d826acc28b49..90c580018a18 100644
--- a/src/java/org/apache/cassandra/schema/CompressionParams.java
+++ b/src/java/org/apache/cassandra/schema/CompressionParams.java
@@ -18,23 +18,24 @@
package org.apache.cassandra.schema;
import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
+import java.util.function.Function;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Objects;
import com.google.common.collect.ImmutableMap;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.HashCodeBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DataStorageSpec;
+import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.config.ParameterizedClass;
import org.apache.cassandra.db.TypeSizes;
import org.apache.cassandra.exceptions.ConfigurationException;
@@ -43,18 +44,16 @@
import org.apache.cassandra.io.util.DataInputPlus;
import org.apache.cassandra.io.util.DataOutputPlus;
import org.apache.cassandra.net.MessagingService;
+import org.apache.cassandra.utils.FBUtilities;
import static java.lang.String.format;
+import static java.util.Collections.emptyMap;
+import static org.apache.cassandra.config.CassandraRelevantProperties.DETERMINISM_SSTABLE_COMPRESSION_DEFAULT;
public final class CompressionParams
{
- private static final Logger logger = LoggerFactory.getLogger(CompressionParams.class);
-
- private static volatile boolean hasLoggedSsTableCompressionWarning;
- private static volatile boolean hasLoggedChunkLengthWarning;
- private static volatile boolean hasLoggedCrcCheckChanceWarning;
-
- public static final int DEFAULT_CHUNK_LENGTH = 1024 * 16;
+ public static final CompressorType DEFAULT_COMPRESSION_TYPE = CompressorType.lz4;
+ public static final int DEFAULT_CHUNK_LENGTH = 1024 * 16; // in KB
public static final double DEFAULT_MIN_COMPRESS_RATIO = 0.0; // Since pre-4.0 versions do not understand the
// new compression parameter we can't use a
// different default value.
@@ -62,169 +61,216 @@ public final class CompressionParams
public static final String CLASS = "class";
public static final String CHUNK_LENGTH_IN_KB = "chunk_length_in_kb";
+ /**
+ * Requires a DataStorageSpec suffix
+ */
+ public static final String CHUNK_LENGTH = "chunk_length";
+ /**
+ * Requires a DataStorageSpec suffix
+ */
+ public static final String MAX_COMPRESSED_LENGTH = "max_compressed_length";
public static final String ENABLED = "enabled";
public static final String MIN_COMPRESS_RATIO = "min_compress_ratio";
- public static final CompressionParams DEFAULT = !CassandraRelevantProperties.DETERMINISM_SSTABLE_COMPRESSION_DEFAULT.getBoolean()
- ? noCompression()
- : new CompressionParams(LZ4Compressor.create(Collections.emptyMap()),
- DEFAULT_CHUNK_LENGTH,
- calcMaxCompressedLength(DEFAULT_CHUNK_LENGTH, DEFAULT_MIN_COMPRESS_RATIO),
- DEFAULT_MIN_COMPRESS_RATIO,
- Collections.emptyMap());
-
- public static final CompressionParams NOOP = new CompressionParams(NoopCompressor.create(Collections.emptyMap()),
+ public static final CompressionParams NOOP = new CompressionParams(NoopCompressor.create(emptyMap()),
// 4 KiB is often the underlying disk block size
1024 * 4,
Integer.MAX_VALUE,
DEFAULT_MIN_COMPRESS_RATIO,
- Collections.emptyMap());
+ emptyMap());
- private static final String CRC_CHECK_CHANCE_WARNING = "The option crc_check_chance was deprecated as a compression option. " +
- "You should specify it as a top-level table option instead";
+ private static final CompressionParams DEFAULT = new CompressionParams(LZ4Compressor.create(Collections.emptyMap()),
+ DEFAULT_CHUNK_LENGTH,
+ calcMaxCompressedLength(DEFAULT_CHUNK_LENGTH, DEFAULT_MIN_COMPRESS_RATIO),
+ DEFAULT_MIN_COMPRESS_RATIO,
+ emptyMap());
- @Deprecated public static final String SSTABLE_COMPRESSION = "sstable_compression";
- @Deprecated public static final String CHUNK_LENGTH_KB = "chunk_length_kb";
- @Deprecated public static final String CRC_CHECK_CHANCE = "crc_check_chance";
+ @VisibleForTesting
+ static final String TOO_MANY_CHUNK_LENGTH = format("Only one of '%s' or '%s' may be specified", CHUNK_LENGTH, CHUNK_LENGTH_IN_KB);
private final ICompressor sstableCompressor;
+ /**
+ * The chunk length in KB
+ */
private final int chunkLength;
- private final int maxCompressedLength; // In content we store max length to avoid rounding errors causing compress/decompress mismatch.
- private final double minCompressRatio; // In configuration we store min ratio, the input parameter.
+ /**
+ * The compressed length in KB.
+ * In content we store max length to avoid rounding errors causing compress/decompress mismatch.
+ */
+ private final int maxCompressedLength;
+ /**
+ * The minimum compression ratio.
+ * In configuration we store min ratio, the input parameter.
+ * Ths is mathematically related to chunkLength and maxCompressedLength in that
+ * # chunk_length / max_compressed_length = min_compress_ratio
+ */
+ private final double minCompressRatio;
private final ImmutableMap otherOptions; // Unrecognized options, can be used by the compressor
// TODO: deprecated, should now be carefully removed. Doesn't affect schema code as it isn't included in equals() and hashCode()
private volatile double crcCheckChance = 1.0;
- public static CompressionParams fromMap(Map opts)
- {
- Map options = copyOptions(opts);
- String sstableCompressionClass;
+ public enum CompressorType
+ {
+ lz4(LZ4Compressor.class.getName(), LZ4Compressor::create),
+ noop(NoopCompressor.class.getName(), NoopCompressor::create),
+ snappy(SnappyCompressor.class.getName(), SnappyCompressor::create),
+ deflate(DeflateCompressor.class.getName(), DeflateCompressor::create),
+ zstd(ZstdCompressor.class.getName(), ZstdCompressor::create),
+ none(null, (opt) -> null);
- if (!opts.isEmpty() && isEnabled(opts) && !containsSstableCompressionClass(opts))
- throw new ConfigurationException(format("Missing sub-option '%s' for the 'compression' option.", CLASS));
+ final String className;
+ final Function