f452fd30e32bb93b923ee0749bfae72a116469b0
[feisty_meow.git] / nucleus / library / filesystem / huge_file.cpp
1 /*****************************************************************************\
2 *                                                                             *
3 *  Name   : huge_file                                                         *
4 *  Author : Chris Koeritz                                                     *
5 *                                                                             *
6 *******************************************************************************
7 * Copyright (c) 2007-$now By Author.  This program is free software; you can  *
8 * redistribute it and/or modify it under the terms of the GNU General Public  *
9 * License as published by the Free Software Foundation; either version 2 of   *
10 * the License or (at your option) any later version.  This is online at:      *
11 *     http://www.fsf.org/copyleft/gpl.html                                    *
12 * Please send any updates to: fred@gruntose.com                               *
13 \*****************************************************************************/
14
15 #include "byte_filer.h"
16 #include "huge_file.h"
17
18 #include <basis/byte_array.h>
19 #include <basis/functions.h>
20 #include <basis/guards.h>
21
22 #include <stdio.h>
23 #include <sys/time.h>
24
25 #undef LOG
26 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
27
28 //#define DEBUG_HUGE_FILE
29   // uncomment for noisy version.
30
31 using namespace basis;
32
33 namespace filesystem {
34
35 huge_file::huge_file(const astring &filename, const astring &permissions)
36 : _real_file(new byte_filer(filename, permissions)),
37   _file_pointer(0)
38 {
39 }
40
41 huge_file::~huge_file()
42 {
43   WHACK(_real_file);
44 }
45
46 const astring &huge_file::name() const { return _real_file->name(); }
47
48 void huge_file::flush() { _real_file->flush(); }
49
50 bool huge_file::truncate() { return _real_file->truncate(); }
51
52 double huge_file::length()
53 {
54   FUNCDEF("length");
55
56 //trying to read to see if we're past endpoint.
57 //  if this approach works, length may want to close and reopen file for
58 //  reading, since we can't add any bytes to it for writing just to find
59 //  the length out.
60
61
62   double save_posn = _file_pointer;
63   // skip to the beginning of the file so we can try to find the end.
64   _file_pointer = 0;
65   _real_file->seek(0, byte_filer::FROM_START);
66   size_t naive_size = _real_file->length();
67   if (naive_size < _real_file->file_size_limit()) {
68     // lucked out; we are within normal file size limitations.
69     seek(save_posn, byte_filer::FROM_START);
70     return double(naive_size);
71   }
72   
73   double best_highest = 0.0;  // the maximum we've safely seeked to.
74
75   size_t big_jump = byte_filer::file_size_limit();
76     // try with the largest possible seek at first.
77
78   while (true) {
79 #ifdef DEBUG_HUGE_FILE
80     LOG(a_sprintf("best highest=%.0f", best_highest));
81 #endif
82     // iterate until we reach our exit condition, which seems like it must
83     // always occur eventually unless the file is being monkeyed with.
84     bool seek_ret = _real_file->seek(int(big_jump), byte_filer::FROM_CURRENT);
85 #ifdef DEBUG_HUGE_FILE
86     LOG(a_sprintf("  seek ret=%d", int(seek_ret)));
87 #endif
88     byte_array temp_bytes;
89     int bytes_read = _real_file->read(temp_bytes, 1);
90     if (bytes_read < 1)
91       seek_ret = false;
92 #ifdef DEBUG_HUGE_FILE
93     LOG(a_sprintf("  read %d bytes", bytes_read));
94 #endif
95     bool at_eof = _real_file->eof();
96 #ifdef DEBUG_HUGE_FILE
97     LOG(a_sprintf("  at_eof=%d", int(at_eof)));
98 #endif
99     if (seek_ret && !at_eof) {
100 #ifdef DEBUG_HUGE_FILE
101       LOG("seek worked, incrementing best highest and trying same jump again");
102 #endif
103       // the seek worked, so we'll just jump forward again.
104       best_highest += double(big_jump);
105       _file_pointer += double(big_jump);
106       continue;
107     } else if (seek_ret && at_eof) {
108 #ifdef DEBUG_HUGE_FILE
109       LOG("seek worked but found eof exactly.");
110 #endif
111       // the seek did worked, but apparently we've also found the end point.
112       best_highest += double(big_jump);
113       _file_pointer += double(big_jump);
114       break;
115     } else {
116       // that seek was too large, so we need to back down and try a smaller
117       // seek size.
118 #ifdef DEBUG_HUGE_FILE
119       LOG("seek failed, going back to best highest and trying same jump again");
120 #endif
121       _file_pointer = 0;
122       _real_file->seek(0, byte_filer::FROM_START); 
123       outcome worked = seek(best_highest, byte_filer::FROM_START);
124         // this uses our version to position at large sizes.
125       if (worked != OKAY) {
126         // this is a bad failure; it says that the file size changed or
127         // something malfunctioned.  we should always be able to get back to
128         // the last good size we found if the file is static.
129         LOG(a_sprintf("failed to seek back to best highest %.0f on ",
130             best_highest) + _real_file->name());
131         // try to repair our ideas about the file by starting the process
132         // over.
133 //hmmm: count the number of times restarted and bail after N.
134         seek_ret = _real_file->seek(0, byte_filer::FROM_START);
135         _file_pointer = 0;
136         if (!seek_ret) {
137           // the heck with this.  we can't even go back to the start.  this
138           // file seems to be screwed up now.
139           LOG(astring("failed to seek back to start of file!  on ")
140               + _real_file->name());
141           return 0;
142         }
143         // reset the rest of the positions for our failed attempt to return
144         // to what we already thought was good.
145         _file_pointer = 0;
146         big_jump = byte_filer::file_size_limit();
147         best_highest = 0;
148         continue;
149       }
150       // okay, nothing bad happened when we went back to our last good point.
151       if (big_jump <= 0) {
152         // success in finding the smallest place that we can't seek between.
153 #ifdef DEBUG_HUGE_FILE
154         LOG("got down to smallest big jump, 0!");
155 #endif
156         break;
157       }
158       // formula expects that the maximum file size is a power of 2.
159       big_jump /= 2;
160 #ifdef DEBUG_HUGE_FILE
161       LOG(a_sprintf("restraining big jump down to %u.", big_jump));
162 #endif
163       continue;
164     }
165   }
166
167   // go back to where we started out.
168   seek(0, byte_filer::FROM_START);
169   seek(save_posn, byte_filer::FROM_CURRENT);
170 #ifdef DEBUG_HUGE_FILE
171   LOG(a_sprintf("saying file len is %.0f.", best_highest + 1.0));
172 #endif
173   return best_highest + 1.0;
174 }
175
176 bool huge_file::good() const { return _real_file->good(); }
177
178 bool huge_file::eof() const { return _real_file->eof(); }
179
180 outcome huge_file::move_to(double absolute_posn)
181 {
182 #ifdef DEBUG_HUGE_FILE
183   FUNCDEF("move_to");
184 #endif
185   double difference = absolute_posn - _file_pointer;
186     // calculate the size we want to offset.
187 #ifdef DEBUG_HUGE_FILE
188   LOG(a_sprintf("abs_pos=%.0f difference=%.0f old_filepoint=%.0f",
189       absolute_posn, difference, _file_pointer));
190 #endif
191   // if we're at the same place, we don't have to do anything.
192   if (difference < 0.000001) {
193 #ifdef DEBUG_HUGE_FILE
194     LOG("difference was minimal, saying we're done.");
195 #endif
196     return OKAY;
197   }
198   while (absolute_value(difference) > 0.000001) {
199     double seek_size = minimum(double(byte_filer::file_size_limit() - 1),
200         absolute_value(difference));
201     if (difference < 0)
202       seek_size *= -1.0;  // flip sign of seek.
203 #ifdef DEBUG_HUGE_FILE
204     LOG(a_sprintf("  seeksize=%d", int(seek_size)));
205 #endif
206     bool seek_ret = _real_file->seek(int(seek_size),
207         byte_filer::FROM_CURRENT);
208     if (!seek_ret) {
209 #ifdef DEBUG_HUGE_FILE
210       LOG(a_sprintf("failed to seek %d from current", int(seek_size)));
211 #endif
212       return FAILURE;  // seek failed somehow.
213     }
214     _file_pointer += seek_size;
215 #ifdef DEBUG_HUGE_FILE
216     LOG(a_sprintf("  now_filepoint=%.0f", _file_pointer));
217 #endif
218     difference = absolute_posn - _file_pointer;
219 #ifdef DEBUG_HUGE_FILE
220     LOG(a_sprintf("  now_difference=%.0f", difference));
221 #endif
222   }
223   return OKAY;
224 }
225
226 outcome huge_file::seek(double new_position, byte_filer::origins origin)
227 {
228 #ifdef DEBUG_HUGE_FILE
229   FUNCDEF("seek");
230 #endif
231   if (origin == byte_filer::FROM_CURRENT) {
232     return move_to(_file_pointer + new_position);
233   } else if (origin == byte_filer::FROM_START) {
234     _file_pointer = 0;
235     if (!_real_file->seek(0, byte_filer::FROM_START))
236       return FAILURE;
237     return move_to(new_position);
238   } else if (origin == byte_filer::FROM_END) {
239 #ifdef DEBUG_HUGE_FILE
240     LOG("into precarious FROM_END case.");
241 #endif
242     double file_len = length();  // could take a scary long time possibly.
243 #ifdef DEBUG_HUGE_FILE
244     LOG(a_sprintf("  FROM_END got len %.0f.", file_len));
245 #endif
246     _file_pointer = file_len;
247       // it's safe, although not efficient, for us to call the length()
248       // method here.  our current version of length() uses the byte_filer's
249       // seek method directly and only FROM_CURRENT and FROM_START from this
250       // class's seek method.
251     _real_file->seek(0, byte_filer::FROM_END);
252     return move_to(_file_pointer - new_position);
253   }
254   // unknown origin.
255   return BAD_INPUT;
256 }
257
258 outcome huge_file::read(byte_array &to_fill, int desired_size, int &size_read)
259 {
260   FUNCDEF("read");
261   size_read = 0;
262   int ret = _real_file->read(to_fill, desired_size);
263   if (ret < 0)
264     return FAILURE;  // couldn't read the bytes.
265   _file_pointer += double(size_read);
266   size_read = ret;
267   return OKAY; 
268 }
269
270 outcome huge_file::write(const byte_array &to_write, int &size_written)
271 {
272   FUNCDEF("write");
273   size_written = 0;
274   int ret = _real_file->write(to_write);
275   if (ret < 0)
276     return FAILURE;  // couldn't write the bytes.
277   _file_pointer += double(size_written);
278   size_written = ret;
279   return OKAY;
280 }
281
282 basis::outcome huge_file::touch()
283 {
284   if (filename(_real_file->name()).exists()) {
285     // file exists, so just update time.
286     int ret = utimes(_real_file->name().observe(), NIL);
287     if (ret != 0)
288       return FAILURE;
289   } else {
290     // file doesn't exist yet.
291     byte_array junk(1);
292     int written;
293     outcome ret = write(junk, written);
294     if (ret != OKAY) ret;
295     if (!truncate())
296       return FAILURE;
297   }
298   return OKAY;
299 }
300
301 } //namespace.
302