updates from orpheus for windoze build
[feisty_meow.git] / nucleus / library / filesystem / huge_file.cpp
1 /*****************************************************************************\
2 *                                                                             *
3 *  Name   : huge_file                                                         *
4 *  Author : Chris Koeritz                                                     *
5 *                                                                             *
6 *******************************************************************************
7 * Copyright (c) 2007-$now By Author.  This program is free software; you can  *
8 * redistribute it and/or modify it under the terms of the GNU General Public  *
9 * License as published by the Free Software Foundation; either version 2 of   *
10 * the License or (at your option) any later version.  This is online at:      *
11 *     http://www.fsf.org/copyleft/gpl.html                                    *
12 * Please send any updates to: fred@gruntose.com                               *
13 \*****************************************************************************/
14
15 #include "byte_filer.h"
16 #include "huge_file.h"
17
18 #include <basis/byte_array.h>
19 #include <basis/functions.h>
20 #include <basis/guards.h>
21 #include <application/windoze_helper.h>
22
23 #include <stdio.h>
24 //#ifndef __WIN32__
25 #include <sys/time.h>
26 //#else
27 //  #include <time.h>
28 //#endif
29
30 #undef LOG
31 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
32
33 //#define DEBUG_HUGE_FILE
34   // uncomment for noisy version.
35
36 using namespace basis;
37
38 namespace filesystem {
39
40 huge_file::huge_file(const astring &filename, const astring &permissions)
41 : _real_file(new byte_filer(filename, permissions)),
42   _file_pointer(0)
43 {
44 }
45
46 huge_file::~huge_file()
47 {
48   WHACK(_real_file);
49 }
50
51 const astring &huge_file::name() const { return _real_file->name(); }
52
53 void huge_file::flush() { _real_file->flush(); }
54
55 bool huge_file::truncate() { return _real_file->truncate(); }
56
57 double huge_file::length()
58 {
59   FUNCDEF("length");
60
61 //trying to read to see if we're past endpoint.
62 //  if this approach works, length may want to close and reopen file for
63 //  reading, since we can't add any bytes to it for writing just to find
64 //  the length out.
65
66
67   double save_posn = _file_pointer;
68   // skip to the beginning of the file so we can try to find the end.
69   _file_pointer = 0;
70   _real_file->seek(0, byte_filer::FROM_START);
71   size_t naive_size = _real_file->length();
72   if (naive_size < _real_file->file_size_limit()) {
73     // lucked out; we are within normal file size limitations.
74     seek(save_posn, byte_filer::FROM_START);
75     return double(naive_size);
76   }
77   
78   double best_highest = 0.0;  // the maximum we've safely seeked to.
79
80   size_t big_jump = byte_filer::file_size_limit();
81     // try with the largest possible seek at first.
82
83   while (true) {
84 #ifdef DEBUG_HUGE_FILE
85     LOG(a_sprintf("best highest=%.0f", best_highest));
86 #endif
87     // iterate until we reach our exit condition, which seems like it must
88     // always occur eventually unless the file is being monkeyed with.
89     bool seek_ret = _real_file->seek(int(big_jump), byte_filer::FROM_CURRENT);
90 #ifdef DEBUG_HUGE_FILE
91     LOG(a_sprintf("  seek ret=%d", int(seek_ret)));
92 #endif
93     byte_array temp_bytes;
94     int bytes_read = _real_file->read(temp_bytes, 1);
95     if (bytes_read < 1)
96       seek_ret = false;
97 #ifdef DEBUG_HUGE_FILE
98     LOG(a_sprintf("  read %d bytes", bytes_read));
99 #endif
100     bool at_eof = _real_file->eof();
101 #ifdef DEBUG_HUGE_FILE
102     LOG(a_sprintf("  at_eof=%d", int(at_eof)));
103 #endif
104     if (seek_ret && !at_eof) {
105 #ifdef DEBUG_HUGE_FILE
106       LOG("seek worked, incrementing best highest and trying same jump again");
107 #endif
108       // the seek worked, so we'll just jump forward again.
109       best_highest += double(big_jump);
110       _file_pointer += double(big_jump);
111       continue;
112     } else if (seek_ret && at_eof) {
113 #ifdef DEBUG_HUGE_FILE
114       LOG("seek worked but found eof exactly.");
115 #endif
116       // the seek did worked, but apparently we've also found the end point.
117       best_highest += double(big_jump);
118       _file_pointer += double(big_jump);
119       break;
120     } else {
121       // that seek was too large, so we need to back down and try a smaller
122       // seek size.
123 #ifdef DEBUG_HUGE_FILE
124       LOG("seek failed, going back to best highest and trying same jump again");
125 #endif
126       _file_pointer = 0;
127       _real_file->seek(0, byte_filer::FROM_START); 
128       outcome worked = seek(best_highest, byte_filer::FROM_START);
129         // this uses our version to position at large sizes.
130       if (worked != OKAY) {
131         // this is a bad failure; it says that the file size changed or
132         // something malfunctioned.  we should always be able to get back to
133         // the last good size we found if the file is static.
134         LOG(a_sprintf("failed to seek back to best highest %.0f on ",
135             best_highest) + _real_file->name());
136         // try to repair our ideas about the file by starting the process
137         // over.
138 //hmmm: count the number of times restarted and bail after N.
139         seek_ret = _real_file->seek(0, byte_filer::FROM_START);
140         _file_pointer = 0;
141         if (!seek_ret) {
142           // the heck with this.  we can't even go back to the start.  this
143           // file seems to be screwed up now.
144           LOG(astring("failed to seek back to start of file!  on ")
145               + _real_file->name());
146           return 0;
147         }
148         // reset the rest of the positions for our failed attempt to return
149         // to what we already thought was good.
150         _file_pointer = 0;
151         big_jump = byte_filer::file_size_limit();
152         best_highest = 0;
153         continue;
154       }
155       // okay, nothing bad happened when we went back to our last good point.
156       if (big_jump <= 0) {
157         // success in finding the smallest place that we can't seek between.
158 #ifdef DEBUG_HUGE_FILE
159         LOG("got down to smallest big jump, 0!");
160 #endif
161         break;
162       }
163       // formula expects that the maximum file size is a power of 2.
164       big_jump /= 2;
165 #ifdef DEBUG_HUGE_FILE
166       LOG(a_sprintf("restraining big jump down to %u.", big_jump));
167 #endif
168       continue;
169     }
170   }
171
172   // go back to where we started out.
173   seek(0, byte_filer::FROM_START);
174   seek(save_posn, byte_filer::FROM_CURRENT);
175 #ifdef DEBUG_HUGE_FILE
176   LOG(a_sprintf("saying file len is %.0f.", best_highest + 1.0));
177 #endif
178   return best_highest + 1.0;
179 }
180
181 bool huge_file::good() const { return _real_file->good(); }
182
183 bool huge_file::eof() const { return _real_file->eof(); }
184
185 outcome huge_file::move_to(double absolute_posn)
186 {
187 #ifdef DEBUG_HUGE_FILE
188   FUNCDEF("move_to");
189 #endif
190   double difference = absolute_posn - _file_pointer;
191     // calculate the size we want to offset.
192 #ifdef DEBUG_HUGE_FILE
193   LOG(a_sprintf("abs_pos=%.0f difference=%.0f old_filepoint=%.0f",
194       absolute_posn, difference, _file_pointer));
195 #endif
196   // if we're at the same place, we don't have to do anything.
197   if (difference < 0.000001) {
198 #ifdef DEBUG_HUGE_FILE
199     LOG("difference was minimal, saying we're done.");
200 #endif
201     return OKAY;
202   }
203   while (absolute_value(difference) > 0.000001) {
204     double seek_size = minimum(double(byte_filer::file_size_limit() - 1),
205         absolute_value(difference));
206     if (difference < 0)
207       seek_size *= -1.0;  // flip sign of seek.
208 #ifdef DEBUG_HUGE_FILE
209     LOG(a_sprintf("  seeksize=%d", int(seek_size)));
210 #endif
211     bool seek_ret = _real_file->seek(int(seek_size),
212         byte_filer::FROM_CURRENT);
213     if (!seek_ret) {
214 #ifdef DEBUG_HUGE_FILE
215       LOG(a_sprintf("failed to seek %d from current", int(seek_size)));
216 #endif
217       return FAILURE;  // seek failed somehow.
218     }
219     _file_pointer += seek_size;
220 #ifdef DEBUG_HUGE_FILE
221     LOG(a_sprintf("  now_filepoint=%.0f", _file_pointer));
222 #endif
223     difference = absolute_posn - _file_pointer;
224 #ifdef DEBUG_HUGE_FILE
225     LOG(a_sprintf("  now_difference=%.0f", difference));
226 #endif
227   }
228   return OKAY;
229 }
230
231 outcome huge_file::seek(double new_position, byte_filer::origins origin)
232 {
233 #ifdef DEBUG_HUGE_FILE
234   FUNCDEF("seek");
235 #endif
236   if (origin == byte_filer::FROM_CURRENT) {
237     return move_to(_file_pointer + new_position);
238   } else if (origin == byte_filer::FROM_START) {
239     _file_pointer = 0;
240     if (!_real_file->seek(0, byte_filer::FROM_START))
241       return FAILURE;
242     return move_to(new_position);
243   } else if (origin == byte_filer::FROM_END) {
244 #ifdef DEBUG_HUGE_FILE
245     LOG("into precarious FROM_END case.");
246 #endif
247     double file_len = length();  // could take a scary long time possibly.
248 #ifdef DEBUG_HUGE_FILE
249     LOG(a_sprintf("  FROM_END got len %.0f.", file_len));
250 #endif
251     _file_pointer = file_len;
252       // it's safe, although not efficient, for us to call the length()
253       // method here.  our current version of length() uses the byte_filer's
254       // seek method directly and only FROM_CURRENT and FROM_START from this
255       // class's seek method.
256     _real_file->seek(0, byte_filer::FROM_END);
257     return move_to(_file_pointer - new_position);
258   }
259   // unknown origin.
260   return BAD_INPUT;
261 }
262
263 outcome huge_file::read(byte_array &to_fill, int desired_size, int &size_read)
264 {
265   FUNCDEF("read");
266   size_read = 0;
267   int ret = _real_file->read(to_fill, desired_size);
268   if (ret < 0)
269     return FAILURE;  // couldn't read the bytes.
270   _file_pointer += double(size_read);
271   size_read = ret;
272   return OKAY; 
273 }
274
275 outcome huge_file::write(const byte_array &to_write, int &size_written)
276 {
277   FUNCDEF("write");
278   size_written = 0;
279   int ret = _real_file->write(to_write);
280   if (ret < 0)
281     return FAILURE;  // couldn't write the bytes.
282   _file_pointer += double(size_written);
283   size_written = ret;
284   return OKAY;
285 }
286
287 basis::outcome huge_file::touch()
288 {
289   FUNCDEF("touch")
290   if (filename(_real_file->name()).exists()) {
291     // file exists, so just update time.
292 #ifndef __WIN32__
293     int ret = utimes(_real_file->name().observe(), NULL_POINTER);
294     if (ret != 0)
295       return FAILURE;
296 #else
297     // open the file, although the function says create in its name...
298     HANDLE f = CreateFile(_real_file->name().observe(),
299         GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE,
300         NULL_POINTER, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL_POINTER);
301     if (!f) {
302       LOG(a_sprintf("failed to open file %s", _real_file->name().observe()));
303       return FAILURE;
304     }
305     // get current system time in UTC.
306     SYSTEMTIME *st = new SYSTEMTIME;
307     GetSystemTime(st);
308     // convert system time into file time.
309     FILETIME *t = new FILETIME;
310     SystemTimeToFileTime(st, t);
311     // set the file's time.
312     SetFileTime(f, NULL_POINTER, t, t);
313 #endif
314   } else {
315     // file doesn't exist yet.
316     byte_array junk(1);
317     int written;
318     outcome ret = write(junk, written);
319     if (ret != OKAY) ret;
320     if (!truncate())
321       return FAILURE;
322   }
323   return OKAY;
324 }
325
326 } //namespace.
327