Merge branch 'release-2.140.101'
[feisty_meow.git] / nucleus / library / filesystem / huge_file.cpp
1 /*****************************************************************************\
2 *                                                                             *
3 *  Name   : huge_file                                                         *
4 *  Author : Chris Koeritz                                                     *
5 *                                                                             *
6 *******************************************************************************
7 * Copyright (c) 2007-$now By Author.  This program is free software; you can  *
8 * redistribute it and/or modify it under the terms of the GNU General Public  *
9 * License as published by the Free Software Foundation; either version 2 of   *
10 * the License or (at your option) any later version.  This is online at:      *
11 *     http://www.fsf.org/copyleft/gpl.html                                    *
12 * Please send any updates to: fred@gruntose.com                               *
13 \*****************************************************************************/
14
15 #include "byte_filer.h"
16 #include "huge_file.h"
17
18 #include <basis/byte_array.h>
19 #include <basis/functions.h>
20 #include <basis/guards.h>
21
22 #include <stdio.h>
23 #ifndef __WIN32__
24   #include <sys/time.h>
25 #else
26   #include <time.h>
27 #endif
28
29 #undef LOG
30 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
31
32 //#define DEBUG_HUGE_FILE
33   // uncomment for noisy version.
34
35 using namespace basis;
36
37 namespace filesystem {
38
39 huge_file::huge_file(const astring &filename, const astring &permissions)
40 : _real_file(new byte_filer(filename, permissions)),
41   _file_pointer(0)
42 {
43 }
44
45 huge_file::~huge_file()
46 {
47   WHACK(_real_file);
48 }
49
50 const astring &huge_file::name() const { return _real_file->name(); }
51
52 void huge_file::flush() { _real_file->flush(); }
53
54 bool huge_file::truncate() { return _real_file->truncate(); }
55
56 double huge_file::length()
57 {
58   FUNCDEF("length");
59
60 //trying to read to see if we're past endpoint.
61 //  if this approach works, length may want to close and reopen file for
62 //  reading, since we can't add any bytes to it for writing just to find
63 //  the length out.
64
65
66   double save_posn = _file_pointer;
67   // skip to the beginning of the file so we can try to find the end.
68   _file_pointer = 0;
69   _real_file->seek(0, byte_filer::FROM_START);
70   size_t naive_size = _real_file->length();
71   if (naive_size < _real_file->file_size_limit()) {
72     // lucked out; we are within normal file size limitations.
73     seek(save_posn, byte_filer::FROM_START);
74     return double(naive_size);
75   }
76   
77   double best_highest = 0.0;  // the maximum we've safely seeked to.
78
79   size_t big_jump = byte_filer::file_size_limit();
80     // try with the largest possible seek at first.
81
82   while (true) {
83 #ifdef DEBUG_HUGE_FILE
84     LOG(a_sprintf("best highest=%.0f", best_highest));
85 #endif
86     // iterate until we reach our exit condition, which seems like it must
87     // always occur eventually unless the file is being monkeyed with.
88     bool seek_ret = _real_file->seek(int(big_jump), byte_filer::FROM_CURRENT);
89 #ifdef DEBUG_HUGE_FILE
90     LOG(a_sprintf("  seek ret=%d", int(seek_ret)));
91 #endif
92     byte_array temp_bytes;
93     int bytes_read = _real_file->read(temp_bytes, 1);
94     if (bytes_read < 1)
95       seek_ret = false;
96 #ifdef DEBUG_HUGE_FILE
97     LOG(a_sprintf("  read %d bytes", bytes_read));
98 #endif
99     bool at_eof = _real_file->eof();
100 #ifdef DEBUG_HUGE_FILE
101     LOG(a_sprintf("  at_eof=%d", int(at_eof)));
102 #endif
103     if (seek_ret && !at_eof) {
104 #ifdef DEBUG_HUGE_FILE
105       LOG("seek worked, incrementing best highest and trying same jump again");
106 #endif
107       // the seek worked, so we'll just jump forward again.
108       best_highest += double(big_jump);
109       _file_pointer += double(big_jump);
110       continue;
111     } else if (seek_ret && at_eof) {
112 #ifdef DEBUG_HUGE_FILE
113       LOG("seek worked but found eof exactly.");
114 #endif
115       // the seek did worked, but apparently we've also found the end point.
116       best_highest += double(big_jump);
117       _file_pointer += double(big_jump);
118       break;
119     } else {
120       // that seek was too large, so we need to back down and try a smaller
121       // seek size.
122 #ifdef DEBUG_HUGE_FILE
123       LOG("seek failed, going back to best highest and trying same jump again");
124 #endif
125       _file_pointer = 0;
126       _real_file->seek(0, byte_filer::FROM_START); 
127       outcome worked = seek(best_highest, byte_filer::FROM_START);
128         // this uses our version to position at large sizes.
129       if (worked != OKAY) {
130         // this is a bad failure; it says that the file size changed or
131         // something malfunctioned.  we should always be able to get back to
132         // the last good size we found if the file is static.
133         LOG(a_sprintf("failed to seek back to best highest %.0f on ",
134             best_highest) + _real_file->name());
135         // try to repair our ideas about the file by starting the process
136         // over.
137 //hmmm: count the number of times restarted and bail after N.
138         seek_ret = _real_file->seek(0, byte_filer::FROM_START);
139         _file_pointer = 0;
140         if (!seek_ret) {
141           // the heck with this.  we can't even go back to the start.  this
142           // file seems to be screwed up now.
143           LOG(astring("failed to seek back to start of file!  on ")
144               + _real_file->name());
145           return 0;
146         }
147         // reset the rest of the positions for our failed attempt to return
148         // to what we already thought was good.
149         _file_pointer = 0;
150         big_jump = byte_filer::file_size_limit();
151         best_highest = 0;
152         continue;
153       }
154       // okay, nothing bad happened when we went back to our last good point.
155       if (big_jump <= 0) {
156         // success in finding the smallest place that we can't seek between.
157 #ifdef DEBUG_HUGE_FILE
158         LOG("got down to smallest big jump, 0!");
159 #endif
160         break;
161       }
162       // formula expects that the maximum file size is a power of 2.
163       big_jump /= 2;
164 #ifdef DEBUG_HUGE_FILE
165       LOG(a_sprintf("restraining big jump down to %u.", big_jump));
166 #endif
167       continue;
168     }
169   }
170
171   // go back to where we started out.
172   seek(0, byte_filer::FROM_START);
173   seek(save_posn, byte_filer::FROM_CURRENT);
174 #ifdef DEBUG_HUGE_FILE
175   LOG(a_sprintf("saying file len is %.0f.", best_highest + 1.0));
176 #endif
177   return best_highest + 1.0;
178 }
179
180 bool huge_file::good() const { return _real_file->good(); }
181
182 bool huge_file::eof() const { return _real_file->eof(); }
183
184 outcome huge_file::move_to(double absolute_posn)
185 {
186 #ifdef DEBUG_HUGE_FILE
187   FUNCDEF("move_to");
188 #endif
189   double difference = absolute_posn - _file_pointer;
190     // calculate the size we want to offset.
191 #ifdef DEBUG_HUGE_FILE
192   LOG(a_sprintf("abs_pos=%.0f difference=%.0f old_filepoint=%.0f",
193       absolute_posn, difference, _file_pointer));
194 #endif
195   // if we're at the same place, we don't have to do anything.
196   if (difference < 0.000001) {
197 #ifdef DEBUG_HUGE_FILE
198     LOG("difference was minimal, saying we're done.");
199 #endif
200     return OKAY;
201   }
202   while (absolute_value(difference) > 0.000001) {
203     double seek_size = minimum(double(byte_filer::file_size_limit() - 1),
204         absolute_value(difference));
205     if (difference < 0)
206       seek_size *= -1.0;  // flip sign of seek.
207 #ifdef DEBUG_HUGE_FILE
208     LOG(a_sprintf("  seeksize=%d", int(seek_size)));
209 #endif
210     bool seek_ret = _real_file->seek(int(seek_size),
211         byte_filer::FROM_CURRENT);
212     if (!seek_ret) {
213 #ifdef DEBUG_HUGE_FILE
214       LOG(a_sprintf("failed to seek %d from current", int(seek_size)));
215 #endif
216       return FAILURE;  // seek failed somehow.
217     }
218     _file_pointer += seek_size;
219 #ifdef DEBUG_HUGE_FILE
220     LOG(a_sprintf("  now_filepoint=%.0f", _file_pointer));
221 #endif
222     difference = absolute_posn - _file_pointer;
223 #ifdef DEBUG_HUGE_FILE
224     LOG(a_sprintf("  now_difference=%.0f", difference));
225 #endif
226   }
227   return OKAY;
228 }
229
230 outcome huge_file::seek(double new_position, byte_filer::origins origin)
231 {
232 #ifdef DEBUG_HUGE_FILE
233   FUNCDEF("seek");
234 #endif
235   if (origin == byte_filer::FROM_CURRENT) {
236     return move_to(_file_pointer + new_position);
237   } else if (origin == byte_filer::FROM_START) {
238     _file_pointer = 0;
239     if (!_real_file->seek(0, byte_filer::FROM_START))
240       return FAILURE;
241     return move_to(new_position);
242   } else if (origin == byte_filer::FROM_END) {
243 #ifdef DEBUG_HUGE_FILE
244     LOG("into precarious FROM_END case.");
245 #endif
246     double file_len = length();  // could take a scary long time possibly.
247 #ifdef DEBUG_HUGE_FILE
248     LOG(a_sprintf("  FROM_END got len %.0f.", file_len));
249 #endif
250     _file_pointer = file_len;
251       // it's safe, although not efficient, for us to call the length()
252       // method here.  our current version of length() uses the byte_filer's
253       // seek method directly and only FROM_CURRENT and FROM_START from this
254       // class's seek method.
255     _real_file->seek(0, byte_filer::FROM_END);
256     return move_to(_file_pointer - new_position);
257   }
258   // unknown origin.
259   return BAD_INPUT;
260 }
261
262 outcome huge_file::read(byte_array &to_fill, int desired_size, int &size_read)
263 {
264   FUNCDEF("read");
265   size_read = 0;
266   int ret = _real_file->read(to_fill, desired_size);
267   if (ret < 0)
268     return FAILURE;  // couldn't read the bytes.
269   _file_pointer += double(size_read);
270   size_read = ret;
271   return OKAY; 
272 }
273
274 outcome huge_file::write(const byte_array &to_write, int &size_written)
275 {
276   FUNCDEF("write");
277   size_written = 0;
278   int ret = _real_file->write(to_write);
279   if (ret < 0)
280     return FAILURE;  // couldn't write the bytes.
281   _file_pointer += double(size_written);
282   size_written = ret;
283   return OKAY;
284 }
285
286 basis::outcome huge_file::touch()
287 {
288   FUNCDEF("touch")
289   if (filename(_real_file->name()).exists()) {
290     // file exists, so just update time.
291 #ifndef __WIN32__
292     int ret = utimes(_real_file->name().observe(), NULL_POINTER);
293     if (ret != 0)
294       return FAILURE;
295 #else
296     // open the file, although the function says create in its name...
297     HANDLE f = CreateFile(_real_file->name().observe(),
298         GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE,
299         NULL_POINTER, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL_POINTER);
300     if (!f) {
301       LOG(a_sprintf("failed to open file %s", _real_file->name().observe()));
302       return FAILURE;
303     }
304     // get current system time in UTC.
305     SYSTEMTIME *st = new SYSTEMTIME;
306     GetSystemTime(st);
307     // convert system time into file time.
308     FILETIME *t = new FILETIME;
309     SystemTimeToFileTime(st, t);
310     // set the file's time.
311     SetFileTime(f, NULL_POINTER, t, t);
312 #endif
313   } else {
314     // file doesn't exist yet.
315     byte_array junk(1);
316     int written;
317     outcome ret = write(junk, written);
318     if (ret != OKAY) ret;
319     if (!truncate())
320       return FAILURE;
321   }
322   return OKAY;
323 }
324
325 } //namespace.
326