1 package org.gffs.network;
3 import java.util.HashMap;
5 import org.apache.commons.lang3.builder.HashCodeBuilder;
6 import org.apache.commons.logging.Log;
7 import org.apache.commons.logging.LogFactory;
9 //import edu.virginia.vcgr.genii.client.ClientProperties;
12 * Manages a list of hosts that have proven to be down recently. Has support to not immediately fail the host, as this can lead to too quick
13 * an assumption that the host is down, but after N tries, the host is out. It will then be tested again periodically so we know when it comes
16 public class DeadHostChecker
18 static private Log _logger = LogFactory.getLog(DeadHostChecker.class);
20 //need better substitute for cli props.
21 public static class ClientProperties {
22 int timeout = 2 * 60 * 1000; // 2 minutes timeout by default, in ms.
27 static public ClientProperties props;
30 // this value specifies how many attempts can fail before the host is considered down.
31 static private final int HOW_MANY_DOWNS_ALLOWED = 1;
34 * this is the longest amount of time between checking of dead hosts that we'll ever pause. exponential backoff will occur up until this
35 * delay time, and then stay at this delay time afterwards.
37 static private final int MAXIMUM_ALLOWABLE_CHECKING_DELAY = 60 * 1000 * 5; // current is 5 minutes max for exponential backoff on retries.
39 public static class HostKey
41 public String hostname;
44 HostKey(String hostname, int port)
46 this.hostname = hostname;
53 return new HashCodeBuilder(37, 839). // two randomly chosen prime numbers
54 // if deriving: appendSuper(super.hashCode()).
55 append(hostname).append(port).toHashCode();
59 public boolean equals(Object o)
61 if (!(o instanceof HostKey))
62 return false; // wrong object.
63 HostKey realo = (HostKey) o;
64 return realo.hostname.equals(hostname) && (realo.port == port);
68 public String toString()
70 return hostname + ":" + port;
74 static final HashMap<HostKey, RetryInfo> deadHosts = new HashMap<HostKey, RetryInfo>();
76 public static class RetryInfo
80 public int downCount = 0;
84 // We just failed, so base a delay on the overall timeout to delay our next attempt.
85 delay = initialDelay();
86 nextTime = System.currentTimeMillis() + delay;
91 return props.timeout / 2;
94 boolean isThisHostDead()
96 if (downCount < HOW_MANY_DOWNS_ALLOWED) {
99 if (System.currentTimeMillis() > nextTime) {
100 // this host is being allowed a retry.
101 nextTime = System.currentTimeMillis() + delay;
114 * checks the host in our records and returns true if it is considered alive and false if it is considered dead.
116 public static boolean evaluateHostAlive(String host, int port)
118 HostKey key = new HostKey(host, port);
120 // Added July 14, 2015 by ASG to deal with dead hosts and not bother trying to talk to them. The timeouts kill us.
121 synchronized (deadHosts) {
122 if (deadHosts.containsKey(host)) {
123 RetryInfo inf = deadHosts.get(key);
125 _logger.warn("logic error: dead hosts list said it had host " + key + " was listed but we got a null record for it.");
128 return !inf.isThisHostDead();
130 // up as far as we know; no record exists.
131 if (_logger.isTraceEnabled())
132 _logger.debug("host " + key + " is fine as far as we know.");
138 public static void addHostToDeadPool(String host, int port)
140 HostKey key = new HostKey(host, port);
142 synchronized (deadHosts) {
143 RetryInfo inf = deadHosts.get(key);
145 // Not there, set it up and add it.
146 inf = new RetryInfo();
147 deadHosts.put(key, inf);
150 boolean alreadyDead = false;
151 if (inf.isThisHostDead()) {
152 // this one is already down so expand the timeout.
153 if (_logger.isDebugEnabled())
154 _logger.warn("host " + key + " is considered dead already; increasing delay.");
156 inf.nextTime = System.currentTimeMillis() + inf.delay;
158 if (inf.delay > MAXIMUM_ALLOWABLE_CHECKING_DELAY) {
159 inf.delay = MAXIMUM_ALLOWABLE_CHECKING_DELAY;
161 // flag this so we don't say something again below.
165 // we definitely saw this host as down at least once, so record that now.
168 if (!inf.isThisHostDead()) {
169 // still up, although we needed to record that failure.
170 if (_logger.isDebugEnabled())
171 _logger.debug("host " + key + " is not dead yet but suffered a connection problem.");
173 // this is dead now. say something about it if we didn't already.
174 if (!alreadyDead && _logger.isDebugEnabled())
175 _logger.warn("host " + key + " is newly considered dead due to communication problems.");
180 public static void removeHostFromDeadPool(String host, int port)
182 HostKey key = new HostKey(host, port);
184 // Well, the host was reported alive again, so remove if it is in deadHosts.
185 synchronized (deadHosts) {
186 if (deadHosts.containsKey(key)) {
187 if (_logger.isDebugEnabled()) {
188 // if it's not present, we don't say anything.
189 _logger.debug("host " + key + " is being removed from dead host pool.");
191 // drop it from the list.
192 deadHosts.remove(key);