awesome assets from gffs code
[feisty_meow.git] / kona / src / org / gffs / network / DeadHostChecker.java
1 package org.gffs.network;
2
3 import java.util.HashMap;
4
5 import org.apache.commons.lang3.builder.HashCodeBuilder;
6 import org.apache.commons.logging.Log;
7 import org.apache.commons.logging.LogFactory;
8
9 //import edu.virginia.vcgr.genii.client.ClientProperties;
10
11 /**
12  * Manages a list of hosts that have proven to be down recently. Has support to not immediately fail the host, as this can lead to too quick
13  * an assumption that the host is down, but after N tries, the host is out. It will then be tested again periodically so we know when it comes
14  * back online.
15  */
16 public class DeadHostChecker
17 {
18         static private Log _logger = LogFactory.getLog(DeadHostChecker.class);
19         
20         //need better substitute for cli props.
21         public static class ClientProperties {
22                 int timeout = 2 * 60 * 1000;  // 2 minutes timeout by default, in ms.
23                 
24                 ClientProperties() {
25                 }
26         }
27         static public ClientProperties props;
28         
29
30         // this value specifies how many attempts can fail before the host is considered down.
31         static private final int HOW_MANY_DOWNS_ALLOWED = 1;
32
33         /*
34          * this is the longest amount of time between checking of dead hosts that we'll ever pause. exponential backoff will occur up until this
35          * delay time, and then stay at this delay time afterwards.
36          */
37         static private final int MAXIMUM_ALLOWABLE_CHECKING_DELAY = 60 * 1000 * 5; // current is 5 minutes max for exponential backoff on retries.
38
39         public static class HostKey
40         {
41                 public String hostname;
42                 public int port;
43
44                 HostKey(String hostname, int port)
45                 {
46                         this.hostname = hostname;
47                         this.port = port;
48                 }
49
50                 @Override
51                 public int hashCode()
52                 {
53                         return new HashCodeBuilder(37, 839). // two randomly chosen prime numbers
54                         // if deriving: appendSuper(super.hashCode()).
55                                 append(hostname).append(port).toHashCode();
56                 }
57
58                 @Override
59                 public boolean equals(Object o)
60                 {
61                         if (!(o instanceof HostKey))
62                                 return false; // wrong object.
63                         HostKey realo = (HostKey) o;
64                         return realo.hostname.equals(hostname) && (realo.port == port);
65                 }
66
67                 @Override
68                 public String toString()
69                 {
70                         return hostname + ":" + port;
71                 }
72         }
73
74         static final HashMap<HostKey, RetryInfo> deadHosts = new HashMap<HostKey, RetryInfo>();
75
76         public static class RetryInfo
77         {
78                 public long nextTime;
79                 public int delay;
80                 public int downCount = 0;
81
82                 public RetryInfo()
83                 {
84                         // We just failed, so base a delay on the overall timeout to delay our next attempt.
85                         delay = initialDelay();
86                         nextTime = System.currentTimeMillis() + delay;
87                 }
88
89                 int initialDelay()
90                 {
91                         return props.timeout / 2;
92                 }
93
94                 boolean isThisHostDead()
95                 {
96                         if (downCount < HOW_MANY_DOWNS_ALLOWED) {
97                                 return false;
98                         }
99                         if (System.currentTimeMillis() > nextTime) {
100                                 // this host is being allowed a retry.
101                                 nextTime = System.currentTimeMillis() + delay;
102                                 return false;
103                         }
104                         return true;
105                 }
106
107                 void recordDown()
108                 {
109                         downCount++;
110                 }
111         }
112
113         /**
114          * checks the host in our records and returns true if it is considered alive and false if it is considered dead.
115          */
116         public static boolean evaluateHostAlive(String host, int port)
117         {
118                 HostKey key = new HostKey(host, port);
119
120                 // Added July 14, 2015 by ASG to deal with dead hosts and not bother trying to talk to them. The timeouts kill us.
121                 synchronized (deadHosts) {
122                         if (deadHosts.containsKey(host)) {
123                                 RetryInfo inf = deadHosts.get(key);
124                                 if (inf == null) {
125                                         _logger.warn("logic error: dead hosts list said it had host " + key + " was listed but we got a null record for it.");
126                                         return true;
127                                 }
128                                 return !inf.isThisHostDead();
129                         } else {
130                                 // up as far as we know; no record exists.
131                                 if (_logger.isTraceEnabled())
132                                         _logger.debug("host " + key + " is fine as far as we know.");
133                                 return true;
134                         }
135                 }
136         }
137
138         public static void addHostToDeadPool(String host, int port)
139         {
140                 HostKey key = new HostKey(host, port);
141
142                 synchronized (deadHosts) {
143                         RetryInfo inf = deadHosts.get(key);
144                         if (inf == null) {
145                                 // Not there, set it up and add it.
146                                 inf = new RetryInfo();
147                                 deadHosts.put(key, inf);
148                         }
149
150                         boolean alreadyDead = false;
151                         if (inf.isThisHostDead()) {
152                                 // this one is already down so expand the timeout.
153                                 if (_logger.isDebugEnabled())
154                                         _logger.warn("host " + key + " is considered dead already; increasing delay.");
155                                 inf.delay *= 2;
156                                 inf.nextTime = System.currentTimeMillis() + inf.delay;
157
158                                 if (inf.delay > MAXIMUM_ALLOWABLE_CHECKING_DELAY) {
159                                         inf.delay = MAXIMUM_ALLOWABLE_CHECKING_DELAY;
160                                 }
161                                 // flag this so we don't say something again below.
162                                 alreadyDead = true;
163                         }
164
165                         // we definitely saw this host as down at least once, so record that now.
166                         inf.recordDown();
167
168                         if (!inf.isThisHostDead()) {
169                                 // still up, although we needed to record that failure.
170                                 if (_logger.isDebugEnabled())
171                                         _logger.debug("host " + key + " is not dead yet but suffered a connection problem.");
172                         } else {
173                                 // this is dead now. say something about it if we didn't already.
174                                 if (!alreadyDead && _logger.isDebugEnabled())
175                                         _logger.warn("host " + key + " is newly considered dead due to communication problems.");
176                         }
177                 }
178         }
179
180         public static void removeHostFromDeadPool(String host, int port)
181         {
182                 HostKey key = new HostKey(host, port);
183
184                 // Well, the host was reported alive again, so remove if it is in deadHosts.
185                 synchronized (deadHosts) {
186                         if (deadHosts.containsKey(key)) {
187                                 if (_logger.isDebugEnabled()) {
188                                         // if it's not present, we don't say anything.
189                                         _logger.debug("host " + key + " is being removed from dead host pool.");
190                                 }
191                                 // drop it from the list.
192                                 deadHosts.remove(key);
193                         }
194                 }
195         }
196
197 }