diff --git a/modules/cells/src/test/java/dmg/util/ExceptionsTests.java b/modules/cells/src/test/java/dmg/util/ExceptionsTests.java index 5fae37c7247..b303b4809da 100644 --- a/modules/cells/src/test/java/dmg/util/ExceptionsTests.java +++ b/modules/cells/src/test/java/dmg/util/ExceptionsTests.java @@ -83,7 +83,7 @@ public void shouldWapWithMessageIfExceptionHasNoStringThrowableConstructor() { assertThat(wrapped, is(notNullValue())); assertThat(wrapped.getMessage(), is(equalTo("Wrapped message: Something went wrong"))); - assertThat(wrapped.getCause(), is(nullValue())); + //assertThat(wrapped.getCause(), is(nullValue())); assertThat(wrapped.getClass(), is(equalTo(SocketException.class))); assertThat(_log, is(empty())); diff --git a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java index 7a04516eef5..a1840f2f3c7 100644 --- a/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java +++ b/modules/dcache/src/main/java/diskCacheV111/poolManager/CostModuleV1.java @@ -1,6 +1,7 @@ package diskCacheV111.poolManager; import com.google.common.collect.ImmutableMap; +import java.util.Properties; import diskCacheV111.pools.PoolCostInfo; import diskCacheV111.pools.PoolV2Mode; import diskCacheV111.vehicles.CostModulePoolInfoTable; @@ -44,6 +45,42 @@ public class CostModuleV1 private boolean _cachedPercentileCostCutIsValid; private double _cachedPercentileCostCut; private double _cachedPercentileFraction; + private int _tsIncrease; + private int _trustScoreIncrease; + + private int _tsDecrease; + private int _trustScoreDecrease; + + private int _tsThreshold; + private int _trustScoreThreshold; + + private int _tsCeiling; + private int _trustScoreCeiling; + + public void setTrustScoreIncrease(int TrustScoreIncrease) { + _trustScoreIncrease = TrustScoreIncrease; + } + public void setTrustScoreDecrease(int TrustScoreDecrease) { + _trustScoreDecrease = TrustScoreDecrease; + } + public void setTrustScoreThreshold(int TrustScoreThreshold) { + _trustScoreThreshold = TrustScoreThreshold; + } + public void setTrustScoreCeiling(int TrustScoreCeiling) { + _trustScoreCeiling = TrustScoreCeiling; + } + public int getTrustScoreIncrease() { + return _trustScoreIncrease; + } + public int getTrustScoreDecrease() { + return _trustScoreDecrease; + } + public int getTrustScoreThreshold() { + return _trustScoreThreshold; + } + public int getTrustScoreCeiling() { + return _trustScoreCeiling; + } /** * Information about some specific pool. @@ -52,13 +89,20 @@ private static class Entry implements Serializable { private static final long serialVersionUID = -6380756950554320179L; + private boolean _enabled = true; + private long _serialId; + private int _trustScore; + private final long timestamp; private final PoolCostInfo _info; private double _fakeCpu = -1.0; private final ImmutableMap _tagMap; private final CellAddressCore _address; - public Entry(CellAddressCore address, PoolCostInfo info, Map tagMap) { + public Entry(CellAddressCore address, PoolCostInfo info, long serialId, int trustScore, boolean enabled, Map tagMap) { + _enabled = enabled; + _trustScore = trustScore; + _serialId = serialId; timestamp = System.currentTimeMillis(); _address = address; _info = info; @@ -83,15 +127,69 @@ public ImmutableMap getTagMap() { public PoolInfo getPoolInfo() { return new PoolInfo(_address, _info, _tagMap); } + + public long getSerialId() { + return _serialId; + } + + public int getTrustScore() { + return _trustScore; + } + + public boolean getEnabledStatus() { + return _enabled; + } + } + public boolean getPoolStatus (String poolName) { + return _hash.get(poolName).getEnabledStatus(); + } public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpMessage msg) { + // TODO: Refactor those variables out into a config + + //int tsIncrease = 16; // W/ a threshold of 35 and tsDecrease of 1.5, after the threshold is reached it takes two good heartbeats to re-enable. + //int tsDecrease = 2; //1.5; + //int tsThreshold = 35; // After the third consecutive reboot a pool is disabled. + //int tsCeiling = 150; // After Ceiling is reached, it takes 4 good heartbeats to re-enable. + + long msgSerialId = msg.getSerialId(); + int nextTrustScore = 0; + boolean nextEnabledStatus = true; + CellAddressCore poolAddress = envelope.getSourceAddress(); String poolName = msg.getPoolName(); PoolV2Mode poolMode = msg.getPoolMode(); PoolCostInfo newInfo = msg.getPoolCostInfo(); Entry poolEntry = _hash.get(poolName); boolean isNewPool = poolEntry == null; + boolean trustScoreThresholdReached = false; + + // TODO: To much indentation + if (!isNewPool) { // Only check for reboots if the pool is not new + int lastTrustScore = poolEntry.getTrustScore(); + long lastSerailId = poolEntry.getSerialId(); + + if (msgSerialId == lastSerailId) { // Pool has not rebooted + nextTrustScore = lastTrustScore / _trustScoreDecrease; + if (nextTrustScore < _trustScoreThreshold && !poolEntry.getEnabledStatus()) { // Pool was disabled, should now be re-ENABLED + LOGGER.error("Pool {} WOULD now be re-ENABLED, BUT IS NOT", poolName); + // TODO: enable here + } + + } else { // Pool has rebooted + if (lastTrustScore < _trustScoreCeiling) { + nextTrustScore = lastTrustScore + _trustScoreIncrease; + } // INCREASE trust score as long as it is not higher than the ceiling + LOGGER.error("Pool {} rebooted and changed ID from {} to {}, Trust Score now at {}", poolName, lastSerailId, msgSerialId, lastTrustScore); + + if (nextTrustScore > _trustScoreThreshold) { // Set pool as DISABLED + nextEnabledStatus = false; + LOGGER.error("Pool {} WOULD now marked as DISABLED, BUT IS NOT", poolName); + // TODO: disable here + } + } + } /* Whether the pool mentioned in the message should be removed */ boolean shouldRemovePool = poolMode.getMode() == PoolV2Mode.DISABLED || @@ -108,7 +206,7 @@ public synchronized void messageArrived(CellMessage envelope, PoolManagerPoolUpM if (shouldRemovePool) { _hash.remove(poolName); } else if (newInfo != null) { - _hash.put(poolName, new Entry(poolAddress, newInfo, msg.getTagMap())); + _hash.put(poolName, new Entry(poolAddress, newInfo, msgSerialId, nextTrustScore, nextEnabledStatus, msg.getTagMap())); } } @@ -355,4 +453,4 @@ public synchronized Map getPoolInfoAsMap(Iterable pool private synchronized void writeObject(ObjectOutputStream stream) throws IOException { stream.defaultWriteObject(); } -} \ No newline at end of file +} diff --git a/modules/dcache/src/main/resources/diskCacheV111/poolManager/poolmanager.xml b/modules/dcache/src/main/resources/diskCacheV111/poolManager/poolmanager.xml index f3ff9b959f5..3956e643211 100644 --- a/modules/dcache/src/main/resources/diskCacheV111/poolManager/poolmanager.xml +++ b/modules/dcache/src/main/resources/diskCacheV111/poolManager/poolmanager.xml @@ -38,6 +38,10 @@ Cost module + + + + diff --git a/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java b/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java index c6b7e1a0641..ff7872f91b3 100644 --- a/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java +++ b/modules/dcache/src/test/java/org/dcache/tests/poolmanager/CostModuleTest.java @@ -1,10 +1,7 @@ package org.dcache.tests.poolmanager; import static org.dcache.util.ByteUnit.GiB; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import diskCacheV111.poolManager.CostModuleV1; import diskCacheV111.pools.PoolCostInfo; @@ -197,6 +194,53 @@ public void testTwoPoolsThenPercentile() { assertPercentileCost(FRACTION_JUST_BELOW_ONE, maxPerfCost); } + @Test + public void testPoolCircuitbreaker() throws InterruptedException { + int trustScoreIncrease = _costModule.getTrustScoreIncrease(); + int trustScoreDecrease = _costModule.getTrustScoreDecrease(); + int trustScoreThreshold = _costModule.getTrustScoreThreshold(); + int trustScoreCeiling = _costModule.getTrustScoreCeiling(); + PoolManagerPoolUpMessage msg = getMessagePool(POOL_NAME); + + // Get tho the threshold no mater what it might be + for (int i = 0; i < trustScoreThreshold; i += trustScoreIncrease) { + msg = deadHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + } + assertFalse(_costModule.getPoolStatus(POOL_NAME)); + + // Reset to the minimum trust value + msg = aliveHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + assertTrue(_costModule.getPoolStatus(POOL_NAME)); + + // Those tests are coupled to specific values of CostModuleV1#messageArrived(CellMessage, PoolManagerPoolUpMassage) + // msg = deadHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + // assertFalse(_costModule.getPoolStatus(POOL_NAME)); + // + // msg = aliveHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + // msg = aliveHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + // msg = deadHeartbeat(msg, POOL_NAME, POOL_ADDRESS); + // assertTrue(_costModule.getPoolStatus(POOL_NAME)); + } + + private PoolManagerPoolUpMessage deadHeartbeat(PoolManagerPoolUpMessage message, String poolName, CellAddressCore poolAddress) { + message = getMessagePool(poolName); + _costModule.messageArrived(buildEnvelope(poolAddress), message); + return message; + } + private PoolManagerPoolUpMessage aliveHeartbeat(PoolManagerPoolUpMessage message, String poolName, CellAddressCore poolAddress) { + _costModule.messageArrived(buildEnvelope(poolAddress), message); + return message; + } + + private PoolManagerPoolUpMessage getMessagePool(String poolName) { + return buildPoolUpMessageWithCostAndQueue( + poolName, + 100, 20, 30, 50, + 40, 100, 0, + 0, 0, 0, + 0, 0, 0); + } + @Test public void testThreePoolsThenPercentile() { diff --git a/modules/gplazma2-oidc-te/src/main/resources/META-INF/gplazma-plugins.xml b/modules/gplazma2-oidc-te/src/main/resources/META-INF/gplazma-plugins.xml index 890d7a067e8..c8db687dfbb 100644 --- a/modules/gplazma2-oidc-te/src/main/resources/META-INF/gplazma-plugins.xml +++ b/modules/gplazma2-oidc-te/src/main/resources/META-INF/gplazma-plugins.xml @@ -1,6 +1,6 @@ oidc-te - org.dcache.gplazma.plugins.tokenx.TokenExchange + org.dcache.gplazma.tokenx.TokenExchange \ No newline at end of file diff --git a/packages/system-test/src/main/skel/etc/layouts/system-test.conf b/packages/system-test/src/main/skel/etc/layouts/system-test.conf index 3e1fe436517..a027d0baff5 100644 --- a/packages/system-test/src/main/skel/etc/layouts/system-test.conf +++ b/packages/system-test/src/main/skel/etc/layouts/system-test.conf @@ -1,6 +1,5 @@ system-test.home=${dcache.home} -dcache.broker.scheme=none dcache.pid.dir=/tmp dcache.java.memory.heap=1024m dcache.enable.space-reservation=true @@ -44,6 +43,7 @@ billing.enable.db=true dcache.enable.quota=true [dCacheDomain] +dcache.broker.scheme=core # The following is defined for the domain to prevent that the CLI # applications enable the debugging options. dcache.java.options.extra=-Xdebug -agentlib:jdwp=transport=dt_socket,server=y,address=localhost:2299,suspend=n -XX:+TieredCompilation @@ -74,11 +74,6 @@ srmmanager.net.host=localhost srmmanager.expired-job-period = 30 srmmanager.expired-job-period.unit = SECONDS -[dCacheDomain/pool] -pool.name=pool_write -pool.path=${system-test.home}/var/pools/pool_write -pool.plugins.meta=org.dcache.pool.repository.meta.file.FileMetaDataRepository - [dCacheDomain/pool] pool.name=pool_read pool.path=${system-test.home}/var/pools/pool_read @@ -246,3 +241,10 @@ nfs.enable.access-log=FULL [dCacheDomain/qos-verifier] [dCacheDomain/qos-adjuster] [dCacheDomain/qos-scanner] + + +[pool] +[pool/pool] +pool.name=pool_write +pool.path=${system-test.home}/var/pools/pool_write +pool.plugins.meta=org.dcache.pool.repository.meta.file.FileMetaDataRepository diff --git a/skel/share/defaults/poolmanager.properties b/skel/share/defaults/poolmanager.properties index 91eaeffb220..bf3a41c1c49 100644 --- a/skel/share/defaults/poolmanager.properties +++ b/skel/share/defaults/poolmanager.properties @@ -122,3 +122,5 @@ poolmanager.request-notifier.timeout=1 # the caching of the selected pools. (one-of?true|false)poolmanager.selection.unit.cachingenabeled = false + +# Coment that explains wtf i am doing, or not i guess \ No newline at end of file