33b4672a2f23f50d3cbe81324e5719c02a5c32d1
[feisty_meow.git] / nucleus / library / filesystem / huge_file.cpp
1 /*****************************************************************************\
2 *                                                                             *
3 *  Name   : huge_file                                                         *
4 *  Author : Chris Koeritz                                                     *
5 *                                                                             *
6 *******************************************************************************
7 * Copyright (c) 2007-$now By Author.  This program is free software; you can  *
8 * redistribute it and/or modify it under the terms of the GNU General Public  *
9 * License as published by the Free Software Foundation; either version 2 of   *
10 * the License or (at your option) any later version.  This is online at:      *
11 *     http://www.fsf.org/copyleft/gpl.html                                    *
12 * Please send any updates to: fred@gruntose.com                               *
13 \*****************************************************************************/
14
15 #include "byte_filer.h"
16 #include "huge_file.h"
17
18 #include <basis/byte_array.h>
19 #include <basis/functions.h>
20 #include <basis/guards.h>
21
22 #include <stdio.h>
23
24 #undef LOG
25 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
26
27 //#define DEBUG_HUGE_FILE
28   // uncomment for noisy version.
29
30 using namespace basis;
31
32 namespace filesystem {
33
34 huge_file::huge_file(const astring &filename, const astring &permissions)
35 : _real_file(new byte_filer(filename, permissions)),
36   _file_pointer(0)
37 {
38 }
39
40 huge_file::~huge_file()
41 {
42   WHACK(_real_file);
43 }
44
45 void huge_file::flush() { _real_file->flush(); }
46
47 bool huge_file::truncate() { return _real_file->truncate(); }
48
49 double huge_file::length()
50 {
51   FUNCDEF("length");
52
53 //trying to read to see if we're past endpoint.
54 //  if this approach works, length may want to close and reopen file for
55 //  reading, since we can't add any bytes to it for writing just to find
56 //  the length out.
57
58
59   double save_posn = _file_pointer;
60   // skip to the beginning of the file so we can try to find the end.
61   _file_pointer = 0;
62   _real_file->seek(0, byte_filer::FROM_START);
63   size_t naive_size = _real_file->length();
64   if (naive_size < _real_file->file_size_limit()) {
65     // lucked out; we are within normal file size limitations.
66     seek(save_posn, byte_filer::FROM_START);
67     return double(naive_size);
68   }
69   
70   double best_highest = 0.0;  // the maximum we've safely seeked to.
71
72   size_t big_jump = byte_filer::file_size_limit();
73     // try with the largest possible seek at first.
74
75   while (true) {
76 #ifdef DEBUG_HUGE_FILE
77     LOG(a_sprintf("best highest=%.0f", best_highest));
78 #endif
79     // iterate until we reach our exit condition, which seems like it must
80     // always occur eventually unless the file is being monkeyed with.
81     bool seek_ret = _real_file->seek(int(big_jump), byte_filer::FROM_CURRENT);
82 #ifdef DEBUG_HUGE_FILE
83     LOG(a_sprintf("  seek ret=%d", int(seek_ret)));
84 #endif
85     byte_array temp_bytes;
86     int bytes_read = _real_file->read(temp_bytes, 1);
87     if (bytes_read < 1)
88       seek_ret = false;
89 #ifdef DEBUG_HUGE_FILE
90     LOG(a_sprintf("  read %d bytes", bytes_read));
91 #endif
92     bool at_eof = _real_file->eof();
93 #ifdef DEBUG_HUGE_FILE
94     LOG(a_sprintf("  at_eof=%d", int(at_eof)));
95 #endif
96     if (seek_ret && !at_eof) {
97 #ifdef DEBUG_HUGE_FILE
98       LOG("seek worked, incrementing best highest and trying same jump again");
99 #endif
100       // the seek worked, so we'll just jump forward again.
101       best_highest += double(big_jump);
102       _file_pointer += double(big_jump);
103       continue;
104     } else if (seek_ret && at_eof) {
105 #ifdef DEBUG_HUGE_FILE
106       LOG("seek worked but found eof exactly.");
107 #endif
108       // the seek did worked, but apparently we've also found the end point.
109       best_highest += double(big_jump);
110       _file_pointer += double(big_jump);
111       break;
112     } else {
113       // that seek was too large, so we need to back down and try a smaller
114       // seek size.
115 #ifdef DEBUG_HUGE_FILE
116       LOG("seek failed, going back to best highest and trying same jump again");
117 #endif
118       _file_pointer = 0;
119       _real_file->seek(0, byte_filer::FROM_START); 
120       outcome worked = seek(best_highest, byte_filer::FROM_START);
121         // this uses our version to position at large sizes.
122       if (worked != OKAY) {
123         // this is a bad failure; it says that the file size changed or
124         // something malfunctioned.  we should always be able to get back to
125         // the last good size we found if the file is static.
126         LOG(a_sprintf("failed to seek back to best highest %.0f on ",
127             best_highest) + _real_file->name());
128         // try to repair our ideas about the file by starting the process
129         // over.
130 //hmmm: count the number of times restarted and bail after N.
131         seek_ret = _real_file->seek(0, byte_filer::FROM_START);
132         _file_pointer = 0;
133         if (!seek_ret) {
134           // the heck with this.  we can't even go back to the start.  this
135           // file seems to be screwed up now.
136           LOG(astring("failed to seek back to start of file!  on ")
137               + _real_file->name());
138           return 0;
139         }
140         // reset the rest of the positions for our failed attempt to return
141         // to what we already thought was good.
142         _file_pointer = 0;
143         big_jump = byte_filer::file_size_limit();
144         best_highest = 0;
145         continue;
146       }
147       // okay, nothing bad happened when we went back to our last good point.
148       if (big_jump <= 0) {
149         // success in finding the smallest place that we can't seek between.
150 #ifdef DEBUG_HUGE_FILE
151         LOG("got down to smallest big jump, 0!");
152 #endif
153         break;
154       }
155       // formula expects that the maximum file size is a power of 2.
156       big_jump /= 2;
157 #ifdef DEBUG_HUGE_FILE
158       LOG(a_sprintf("restraining big jump down to %u.", big_jump));
159 #endif
160       continue;
161     }
162   }
163
164   // go back to where we started out.
165   seek(0, byte_filer::FROM_START);
166   seek(save_posn, byte_filer::FROM_CURRENT);
167 #ifdef DEBUG_HUGE_FILE
168   LOG(a_sprintf("saying file len is %.0f.", best_highest + 1.0));
169 #endif
170   return best_highest + 1.0;
171 }
172
173 bool huge_file::good() const { return _real_file->good(); }
174
175 bool huge_file::eof() const { return _real_file->eof(); }
176
177 outcome huge_file::move_to(double absolute_posn)
178 {
179 #ifdef DEBUG_HUGE_FILE
180   FUNCDEF("move_to");
181 #endif
182   double difference = absolute_posn - _file_pointer;
183     // calculate the size we want to offset.
184 #ifdef DEBUG_HUGE_FILE
185   LOG(a_sprintf("abs_pos=%.0f difference=%.0f old_filepoint=%.0f",
186       absolute_posn, difference, _file_pointer));
187 #endif
188   // if we're at the same place, we don't have to do anything.
189   if (difference < 0.000001) {
190 #ifdef DEBUG_HUGE_FILE
191     LOG("difference was minimal, saying we're done.");
192 #endif
193     return OKAY;
194   }
195   while (absolute_value(difference) > 0.000001) {
196     double seek_size = minimum(double(byte_filer::file_size_limit() - 1),
197         absolute_value(difference));
198     if (difference < 0)
199       seek_size *= -1.0;  // flip sign of seek.
200 #ifdef DEBUG_HUGE_FILE
201     LOG(a_sprintf("  seeksize=%d", int(seek_size)));
202 #endif
203     bool seek_ret = _real_file->seek(int(seek_size),
204         byte_filer::FROM_CURRENT);
205     if (!seek_ret) {
206 #ifdef DEBUG_HUGE_FILE
207       LOG(a_sprintf("failed to seek %d from current", int(seek_size)));
208 #endif
209       return FAILURE;  // seek failed somehow.
210     }
211     _file_pointer += seek_size;
212 #ifdef DEBUG_HUGE_FILE
213     LOG(a_sprintf("  now_filepoint=%.0f", _file_pointer));
214 #endif
215     difference = absolute_posn - _file_pointer;
216 #ifdef DEBUG_HUGE_FILE
217     LOG(a_sprintf("  now_difference=%.0f", difference));
218 #endif
219   }
220   return OKAY;
221 }
222
223 outcome huge_file::seek(double new_position, byte_filer::origins origin)
224 {
225 #ifdef DEBUG_HUGE_FILE
226   FUNCDEF("seek");
227 #endif
228   if (origin == byte_filer::FROM_CURRENT) {
229     return move_to(_file_pointer + new_position);
230   } else if (origin == byte_filer::FROM_START) {
231     _file_pointer = 0;
232     if (!_real_file->seek(0, byte_filer::FROM_START))
233       return FAILURE;
234     return move_to(new_position);
235   } else if (origin == byte_filer::FROM_END) {
236 #ifdef DEBUG_HUGE_FILE
237     LOG("into precarious FROM_END case.");
238 #endif
239     double file_len = length();  // could take a scary long time possibly.
240 #ifdef DEBUG_HUGE_FILE
241     LOG(a_sprintf("  FROM_END got len %.0f.", file_len));
242 #endif
243     _file_pointer = file_len;
244       // it's safe, although not efficient, for us to call the length()
245       // method here.  our current version of length() uses the byte_filer's
246       // seek method directly and only FROM_CURRENT and FROM_START from this
247       // class's seek method.
248     _real_file->seek(0, byte_filer::FROM_END);
249     return move_to(_file_pointer - new_position);
250   }
251   // unknown origin.
252   return BAD_INPUT;
253 }
254
255 outcome huge_file::read(byte_array &to_fill, int desired_size, int &size_read)
256 {
257   FUNCDEF("read");
258   size_read = 0;
259   int ret = _real_file->read(to_fill, desired_size);
260   if (ret < 0)
261     return FAILURE;  // couldn't read the bytes.
262   _file_pointer += double(size_read);
263   size_read = ret;
264   return OKAY; 
265 }
266
267 outcome huge_file::write(const byte_array &to_write, int &size_written)
268 {
269   FUNCDEF("write");
270   size_written = 0;
271   int ret = _real_file->write(to_write);
272   if (ret < 0)
273     return FAILURE;  // couldn't write the bytes.
274   _file_pointer += double(size_written);
275   size_written = ret;
276   return OKAY;
277 }
278
279 } //namespace.
280