feisty meow concerns codebase  2.140
DeadHostChecker.java
Go to the documentation of this file.
1 package org.gffs.network;
2 
3 import java.util.HashMap;
4 
5 import org.apache.commons.lang3.builder.HashCodeBuilder;
6 import org.apache.commons.logging.Log;
7 import org.apache.commons.logging.LogFactory;
8 
9 //import edu.virginia.vcgr.genii.client.ClientProperties;
10 
16 public class DeadHostChecker
17 {
18  static private Log _logger = LogFactory.getLog(DeadHostChecker.class);
19 
20  //need better substitute for cli props.
21  public static class ClientProperties {
22  int timeout = 2 * 60 * 1000; // 2 minutes timeout by default, in ms.
23 
24  ClientProperties() {
25  }
26  }
27  static public ClientProperties props;
28 
29 
30  // this value specifies how many attempts can fail before the host is considered down.
31  static private final int HOW_MANY_DOWNS_ALLOWED = 1;
32 
33  /*
34  * this is the longest amount of time between checking of dead hosts that we'll ever pause. exponential backoff will occur up until this
35  * delay time, and then stay at this delay time afterwards.
36  */
37  static private final int MAXIMUM_ALLOWABLE_CHECKING_DELAY = 60 * 1000 * 5; // current is 5 minutes max for exponential backoff on retries.
38 
39  public static class HostKey
40  {
41  public String hostname;
42  public int port;
43 
44  HostKey(String hostname, int port)
45  {
46  this.hostname = hostname;
47  this.port = port;
48  }
49 
50  @Override
51  public int hashCode()
52  {
53  return new HashCodeBuilder(37, 839). // two randomly chosen prime numbers
54  // if deriving: appendSuper(super.hashCode()).
55  append(hostname).append(port).toHashCode();
56  }
57 
58  @Override
59  public boolean equals(Object o)
60  {
61  if (!(o instanceof HostKey))
62  return false; // wrong object.
63  HostKey realo = (HostKey) o;
64  return realo.hostname.equals(hostname) && (realo.port == port);
65  }
66 
67  @Override
68  public String toString()
69  {
70  return hostname + ":" + port;
71  }
72  }
73 
74  static final HashMap<HostKey, RetryInfo> deadHosts = new HashMap<HostKey, RetryInfo>();
75 
76  public static class RetryInfo
77  {
78  public long nextTime;
79  public int delay;
80  public int downCount = 0;
81 
82  public RetryInfo()
83  {
84  // We just failed, so base a delay on the overall timeout to delay our next attempt.
85  delay = initialDelay();
86  nextTime = System.currentTimeMillis() + delay;
87  }
88 
89  int initialDelay()
90  {
91  return props.timeout / 2;
92  }
93 
94  boolean isThisHostDead()
95  {
96  if (downCount < HOW_MANY_DOWNS_ALLOWED) {
97  return false;
98  }
99  if (System.currentTimeMillis() > nextTime) {
100  // this host is being allowed a retry.
101  nextTime = System.currentTimeMillis() + delay;
102  return false;
103  }
104  return true;
105  }
106 
107  void recordDown()
108  {
109  downCount++;
110  }
111  }
112 
116  public static boolean evaluateHostAlive(String host, int port)
117  {
118  HostKey key = new HostKey(host, port);
119 
120  // Added July 14, 2015 by ASG to deal with dead hosts and not bother trying to talk to them. The timeouts kill us.
121  synchronized (deadHosts) {
122  if (deadHosts.containsKey(host)) {
123  RetryInfo inf = deadHosts.get(key);
124  if (inf == null) {
125  _logger.warn("logic error: dead hosts list said it had host " + key + " was listed but we got a null record for it.");
126  return true;
127  }
128  return !inf.isThisHostDead();
129  } else {
130  // up as far as we know; no record exists.
131  if (_logger.isTraceEnabled())
132  _logger.debug("host " + key + " is fine as far as we know.");
133  return true;
134  }
135  }
136  }
137 
138  public static void addHostToDeadPool(String host, int port)
139  {
140  HostKey key = new HostKey(host, port);
141 
142  synchronized (deadHosts) {
143  RetryInfo inf = deadHosts.get(key);
144  if (inf == null) {
145  // Not there, set it up and add it.
146  inf = new RetryInfo();
147  deadHosts.put(key, inf);
148  }
149 
150  boolean alreadyDead = false;
151  if (inf.isThisHostDead()) {
152  // this one is already down so expand the timeout.
153  if (_logger.isDebugEnabled())
154  _logger.warn("host " + key + " is considered dead already; increasing delay.");
155  inf.delay *= 2;
156  inf.nextTime = System.currentTimeMillis() + inf.delay;
157 
158  if (inf.delay > MAXIMUM_ALLOWABLE_CHECKING_DELAY) {
159  inf.delay = MAXIMUM_ALLOWABLE_CHECKING_DELAY;
160  }
161  // flag this so we don't say something again below.
162  alreadyDead = true;
163  }
164 
165  // we definitely saw this host as down at least once, so record that now.
166  inf.recordDown();
167 
168  if (!inf.isThisHostDead()) {
169  // still up, although we needed to record that failure.
170  if (_logger.isDebugEnabled())
171  _logger.debug("host " + key + " is not dead yet but suffered a connection problem.");
172  } else {
173  // this is dead now. say something about it if we didn't already.
174  if (!alreadyDead && _logger.isDebugEnabled())
175  _logger.warn("host " + key + " is newly considered dead due to communication problems.");
176  }
177  }
178  }
179 
180  public static void removeHostFromDeadPool(String host, int port)
181  {
182  HostKey key = new HostKey(host, port);
183 
184  // Well, the host was reported alive again, so remove if it is in deadHosts.
185  synchronized (deadHosts) {
186  if (deadHosts.containsKey(key)) {
187  if (_logger.isDebugEnabled()) {
188  // if it's not present, we don't say anything.
189  _logger.debug("host " + key + " is being removed from dead host pool.");
190  }
191  // drop it from the list.
192  deadHosts.remove(key);
193  }
194  }
195  }
196 
197 }
static void addHostToDeadPool(String host, int port)
static ClientProperties props
static boolean evaluateHostAlive(String host, int port)
static void removeHostFromDeadPool(String host, int port)
bool append
Definition: makedep.cpp:110