feisty meow concerns codebase 2.140
DeadHostChecker.java
Go to the documentation of this file.
1package org.gffs.network;
2
3import java.util.HashMap;
4
5import org.apache.commons.lang3.builder.HashCodeBuilder;
6import org.apache.commons.logging.Log;
7import org.apache.commons.logging.LogFactory;
8
9//import edu.virginia.vcgr.genii.client.ClientProperties;
10
16public class DeadHostChecker
17{
18 static private Log _logger = LogFactory.getLog(DeadHostChecker.class);
19
20 //need better substitute for cli props.
21 public static class ClientProperties {
22 int timeout = 2 * 60 * 1000; // 2 minutes timeout by default, in ms.
23
24 ClientProperties() {
25 }
26 }
27 static public ClientProperties props;
28
29
30 // this value specifies how many attempts can fail before the host is considered down.
31 static private final int HOW_MANY_DOWNS_ALLOWED = 1;
32
33 /*
34 * this is the longest amount of time between checking of dead hosts that we'll ever pause. exponential backoff will occur up until this
35 * delay time, and then stay at this delay time afterwards.
36 */
37 static private final int MAXIMUM_ALLOWABLE_CHECKING_DELAY = 60 * 1000 * 5; // current is 5 minutes max for exponential backoff on retries.
38
39 public static class HostKey
40 {
41 public String hostname;
42 public int port;
43
44 HostKey(String hostname, int port)
45 {
46 this.hostname = hostname;
47 this.port = port;
48 }
49
50 @Override
51 public int hashCode()
52 {
53 return new HashCodeBuilder(37, 839). // two randomly chosen prime numbers
54 // if deriving: appendSuper(super.hashCode()).
55 append(hostname).append(port).toHashCode();
56 }
57
58 @Override
59 public boolean equals(Object o)
60 {
61 if (!(o instanceof HostKey))
62 return false; // wrong object.
63 HostKey realo = (HostKey) o;
64 return realo.hostname.equals(hostname) && (realo.port == port);
65 }
66
67 @Override
68 public String toString()
69 {
70 return hostname + ":" + port;
71 }
72 }
73
74 static final HashMap<HostKey, RetryInfo> deadHosts = new HashMap<HostKey, RetryInfo>();
75
76 public static class RetryInfo
77 {
78 public long nextTime;
79 public int delay;
80 public int downCount = 0;
81
82 public RetryInfo()
83 {
84 // We just failed, so base a delay on the overall timeout to delay our next attempt.
85 delay = initialDelay();
86 nextTime = System.currentTimeMillis() + delay;
87 }
88
89 int initialDelay()
90 {
91 return props.timeout / 2;
92 }
93
94 boolean isThisHostDead()
95 {
96 if (downCount < HOW_MANY_DOWNS_ALLOWED) {
97 return false;
98 }
99 if (System.currentTimeMillis() > nextTime) {
100 // this host is being allowed a retry.
101 nextTime = System.currentTimeMillis() + delay;
102 return false;
103 }
104 return true;
105 }
106
107 void recordDown()
108 {
109 downCount++;
110 }
111 }
112
116 public static boolean evaluateHostAlive(String host, int port)
117 {
118 HostKey key = new HostKey(host, port);
119
120 // Added July 14, 2015 by ASG to deal with dead hosts and not bother trying to talk to them. The timeouts kill us.
121 synchronized (deadHosts) {
122 if (deadHosts.containsKey(host)) {
123 RetryInfo inf = deadHosts.get(key);
124 if (inf == null) {
125 _logger.warn("logic error: dead hosts list said it had host " + key + " was listed but we got a null record for it.");
126 return true;
127 }
128 return !inf.isThisHostDead();
129 } else {
130 // up as far as we know; no record exists.
131 if (_logger.isTraceEnabled())
132 _logger.debug("host " + key + " is fine as far as we know.");
133 return true;
134 }
135 }
136 }
137
138 public static void addHostToDeadPool(String host, int port)
139 {
140 HostKey key = new HostKey(host, port);
141
142 synchronized (deadHosts) {
143 RetryInfo inf = deadHosts.get(key);
144 if (inf == null) {
145 // Not there, set it up and add it.
146 inf = new RetryInfo();
147 deadHosts.put(key, inf);
148 }
149
150 boolean alreadyDead = false;
151 if (inf.isThisHostDead()) {
152 // this one is already down so expand the timeout.
153 if (_logger.isDebugEnabled())
154 _logger.warn("host " + key + " is considered dead already; increasing delay.");
155 inf.delay *= 2;
156 inf.nextTime = System.currentTimeMillis() + inf.delay;
157
158 if (inf.delay > MAXIMUM_ALLOWABLE_CHECKING_DELAY) {
159 inf.delay = MAXIMUM_ALLOWABLE_CHECKING_DELAY;
160 }
161 // flag this so we don't say something again below.
162 alreadyDead = true;
163 }
164
165 // we definitely saw this host as down at least once, so record that now.
166 inf.recordDown();
167
168 if (!inf.isThisHostDead()) {
169 // still up, although we needed to record that failure.
170 if (_logger.isDebugEnabled())
171 _logger.debug("host " + key + " is not dead yet but suffered a connection problem.");
172 } else {
173 // this is dead now. say something about it if we didn't already.
174 if (!alreadyDead && _logger.isDebugEnabled())
175 _logger.warn("host " + key + " is newly considered dead due to communication problems.");
176 }
177 }
178 }
179
180 public static void removeHostFromDeadPool(String host, int port)
181 {
182 HostKey key = new HostKey(host, port);
183
184 // Well, the host was reported alive again, so remove if it is in deadHosts.
185 synchronized (deadHosts) {
186 if (deadHosts.containsKey(key)) {
187 if (_logger.isDebugEnabled()) {
188 // if it's not present, we don't say anything.
189 _logger.debug("host " + key + " is being removed from dead host pool.");
190 }
191 // drop it from the list.
192 deadHosts.remove(key);
193 }
194 }
195 }
196
197}
static void addHostToDeadPool(String host, int port)
static boolean evaluateHostAlive(String host, int port)
static void removeHostFromDeadPool(String host, int port)
bool append
Definition makedep.cpp:110