feisty meow concerns codebase  2.140
huge_file.cpp
Go to the documentation of this file.
1 /*****************************************************************************\
2 * *
3 * Name : huge_file *
4 * Author : Chris Koeritz *
5 * *
6 *******************************************************************************
7 * Copyright (c) 2007-$now By Author. This program is free software; you can *
8 * redistribute it and/or modify it under the terms of the GNU General Public *
9 * License as published by the Free Software Foundation; either version 2 of *
10 * the License or (at your option) any later version. This is online at: *
11 * http://www.fsf.org/copyleft/gpl.html *
12 * Please send any updates to: fred@gruntose.com *
13 \*****************************************************************************/
14 
15 #include "byte_filer.h"
16 #include "huge_file.h"
17 
18 #include <basis/byte_array.h>
19 #include <basis/functions.h>
20 #include <basis/guards.h>
22 
23 #include <stdio.h>
24 //#ifndef __WIN32__
25 #include <sys/time.h>
26 //#else
27 // #include <time.h>
28 //#endif
29 
30 #undef LOG
31 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
32 
33 //#define DEBUG_HUGE_FILE
34  // uncomment for noisy version.
35 
36 using namespace basis;
37 
38 namespace filesystem {
39 
40 huge_file::huge_file(const astring &filename, const astring &permissions)
41 : _real_file(new byte_filer(filename, permissions)),
42  _file_pointer(0)
43 {
44 }
45 
47 {
48  WHACK(_real_file);
49 }
50 
51 const astring &huge_file::name() const { return _real_file->name(); }
52 
53 void huge_file::flush() { _real_file->flush(); }
54 
55 bool huge_file::truncate() { return _real_file->truncate(); }
56 
58 {
59  FUNCDEF("length");
60 
61 //trying to read to see if we're past endpoint.
62 // if this approach works, length may want to close and reopen file for
63 // reading, since we can't add any bytes to it for writing just to find
64 // the length out.
65 
66 
67  double save_posn = _file_pointer;
68  // skip to the beginning of the file so we can try to find the end.
69  _file_pointer = 0;
70  _real_file->seek(0, byte_filer::FROM_START);
71  size_t naive_size = _real_file->length();
72  if (naive_size < _real_file->file_size_limit()) {
73  // lucked out; we are within normal file size limitations.
74  seek(save_posn, byte_filer::FROM_START);
75  return double(naive_size);
76  }
77 
78  double best_highest = 0.0; // the maximum we've safely seeked to.
79 
80  size_t big_jump = byte_filer::file_size_limit();
81  // try with the largest possible seek at first.
82 
83  while (true) {
84 #ifdef DEBUG_HUGE_FILE
85  LOG(a_sprintf("best highest=%.0f", best_highest));
86 #endif
87  // iterate until we reach our exit condition, which seems like it must
88  // always occur eventually unless the file is being monkeyed with.
89  bool seek_ret = _real_file->seek(int(big_jump), byte_filer::FROM_CURRENT);
90 #ifdef DEBUG_HUGE_FILE
91  LOG(a_sprintf(" seek ret=%d", int(seek_ret)));
92 #endif
93  byte_array temp_bytes;
94  int bytes_read = _real_file->read(temp_bytes, 1);
95  if (bytes_read < 1)
96  seek_ret = false;
97 #ifdef DEBUG_HUGE_FILE
98  LOG(a_sprintf(" read %d bytes", bytes_read));
99 #endif
100  bool at_eof = _real_file->eof();
101 #ifdef DEBUG_HUGE_FILE
102  LOG(a_sprintf(" at_eof=%d", int(at_eof)));
103 #endif
104  if (seek_ret && !at_eof) {
105 #ifdef DEBUG_HUGE_FILE
106  LOG("seek worked, incrementing best highest and trying same jump again");
107 #endif
108  // the seek worked, so we'll just jump forward again.
109  best_highest += double(big_jump);
110  _file_pointer += double(big_jump);
111  continue;
112  } else if (seek_ret && at_eof) {
113 #ifdef DEBUG_HUGE_FILE
114  LOG("seek worked but found eof exactly.");
115 #endif
116  // the seek did worked, but apparently we've also found the end point.
117  best_highest += double(big_jump);
118  _file_pointer += double(big_jump);
119  break;
120  } else {
121  // that seek was too large, so we need to back down and try a smaller
122  // seek size.
123 #ifdef DEBUG_HUGE_FILE
124  LOG("seek failed, going back to best highest and trying same jump again");
125 #endif
126  _file_pointer = 0;
127  _real_file->seek(0, byte_filer::FROM_START);
128  outcome worked = seek(best_highest, byte_filer::FROM_START);
129  // this uses our version to position at large sizes.
130  if (worked != OKAY) {
131  // this is a bad failure; it says that the file size changed or
132  // something malfunctioned. we should always be able to get back to
133  // the last good size we found if the file is static.
134  LOG(a_sprintf("failed to seek back to best highest %.0f on ",
135  best_highest) + _real_file->name());
136  // try to repair our ideas about the file by starting the process
137  // over.
138 //hmmm: count the number of times restarted and bail after N.
139  seek_ret = _real_file->seek(0, byte_filer::FROM_START);
140  _file_pointer = 0;
141  if (!seek_ret) {
142  // the heck with this. we can't even go back to the start. this
143  // file seems to be screwed up now.
144  LOG(astring("failed to seek back to start of file! on ")
145  + _real_file->name());
146  return 0;
147  }
148  // reset the rest of the positions for our failed attempt to return
149  // to what we already thought was good.
150  _file_pointer = 0;
151  big_jump = byte_filer::file_size_limit();
152  best_highest = 0;
153  continue;
154  }
155  // okay, nothing bad happened when we went back to our last good point.
156  if (big_jump <= 0) {
157  // success in finding the smallest place that we can't seek between.
158 #ifdef DEBUG_HUGE_FILE
159  LOG("got down to smallest big jump, 0!");
160 #endif
161  break;
162  }
163  // formula expects that the maximum file size is a power of 2.
164  big_jump /= 2;
165 #ifdef DEBUG_HUGE_FILE
166  LOG(a_sprintf("restraining big jump down to %u.", big_jump));
167 #endif
168  continue;
169  }
170  }
171 
172  // go back to where we started out.
174  seek(save_posn, byte_filer::FROM_CURRENT);
175 #ifdef DEBUG_HUGE_FILE
176  LOG(a_sprintf("saying file len is %.0f.", best_highest + 1.0));
177 #endif
178  return best_highest + 1.0;
179 }
180 
181 bool huge_file::good() const { return _real_file->good(); }
182 
183 bool huge_file::eof() const { return _real_file->eof(); }
184 
185 outcome huge_file::move_to(double absolute_posn)
186 {
187 #ifdef DEBUG_HUGE_FILE
188  FUNCDEF("move_to");
189 #endif
190  double difference = absolute_posn - _file_pointer;
191  // calculate the size we want to offset.
192 #ifdef DEBUG_HUGE_FILE
193  LOG(a_sprintf("abs_pos=%.0f difference=%.0f old_filepoint=%.0f",
194  absolute_posn, difference, _file_pointer));
195 #endif
196  // if we're at the same place, we don't have to do anything.
197  if (difference < 0.000001) {
198 #ifdef DEBUG_HUGE_FILE
199  LOG("difference was minimal, saying we're done.");
200 #endif
201  return OKAY;
202  }
203  while (absolute_value(difference) > 0.000001) {
204  double seek_size = minimum(double(byte_filer::file_size_limit() - 1),
205  absolute_value(difference));
206  if (difference < 0)
207  seek_size *= -1.0; // flip sign of seek.
208 #ifdef DEBUG_HUGE_FILE
209  LOG(a_sprintf(" seeksize=%d", int(seek_size)));
210 #endif
211  bool seek_ret = _real_file->seek(int(seek_size),
213  if (!seek_ret) {
214 #ifdef DEBUG_HUGE_FILE
215  LOG(a_sprintf("failed to seek %d from current", int(seek_size)));
216 #endif
217  return FAILURE; // seek failed somehow.
218  }
219  _file_pointer += seek_size;
220 #ifdef DEBUG_HUGE_FILE
221  LOG(a_sprintf(" now_filepoint=%.0f", _file_pointer));
222 #endif
223  difference = absolute_posn - _file_pointer;
224 #ifdef DEBUG_HUGE_FILE
225  LOG(a_sprintf(" now_difference=%.0f", difference));
226 #endif
227  }
228  return OKAY;
229 }
230 
231 outcome huge_file::seek(double new_position, byte_filer::origins origin)
232 {
233 #ifdef DEBUG_HUGE_FILE
234  FUNCDEF("seek");
235 #endif
236  if (origin == byte_filer::FROM_CURRENT) {
237  return move_to(_file_pointer + new_position);
238  } else if (origin == byte_filer::FROM_START) {
239  _file_pointer = 0;
240  if (!_real_file->seek(0, byte_filer::FROM_START))
241  return FAILURE;
242  return move_to(new_position);
243  } else if (origin == byte_filer::FROM_END) {
244 #ifdef DEBUG_HUGE_FILE
245  LOG("into precarious FROM_END case.");
246 #endif
247  double file_len = length(); // could take a scary long time possibly.
248 #ifdef DEBUG_HUGE_FILE
249  LOG(a_sprintf(" FROM_END got len %.0f.", file_len));
250 #endif
251  _file_pointer = file_len;
252  // it's safe, although not efficient, for us to call the length()
253  // method here. our current version of length() uses the byte_filer's
254  // seek method directly and only FROM_CURRENT and FROM_START from this
255  // class's seek method.
256  _real_file->seek(0, byte_filer::FROM_END);
257  return move_to(_file_pointer - new_position);
258  }
259  // unknown origin.
260  return BAD_INPUT;
261 }
262 
263 outcome huge_file::read(byte_array &to_fill, int desired_size, int &size_read)
264 {
265  FUNCDEF("read");
266  size_read = 0;
267  int ret = _real_file->read(to_fill, desired_size);
268  if (ret < 0)
269  return FAILURE; // couldn't read the bytes.
270  _file_pointer += double(size_read);
271  size_read = ret;
272  return OKAY;
273 }
274 
275 outcome huge_file::write(const byte_array &to_write, int &size_written)
276 {
277  FUNCDEF("write");
278  size_written = 0;
279  int ret = _real_file->write(to_write);
280  if (ret < 0)
281  return FAILURE; // couldn't write the bytes.
282  _file_pointer += double(size_written);
283  size_written = ret;
284  return OKAY;
285 }
286 
288 {
289  FUNCDEF("touch")
290  if (filename(_real_file->name()).exists()) {
291  // file exists, so just update time.
292 #ifndef __WIN32__
293  int ret = utimes(_real_file->name().observe(), NULL_POINTER);
294  if (ret != 0)
295  return FAILURE;
296 #else
297  // open the file, although the function says create in its name...
298  HANDLE f = CreateFile(_real_file->name().observe(),
299  GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE,
300  NULL_POINTER, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL_POINTER);
301  if (!f) {
302  LOG(a_sprintf("failed to open file %s", _real_file->name().observe()));
303  return FAILURE;
304  }
305  // get current system time in UTC.
306  SYSTEMTIME *st = new SYSTEMTIME;
307  GetSystemTime(st);
308  // convert system time into file time.
309  FILETIME *t = new FILETIME;
310  SystemTimeToFileTime(st, t);
311  // set the file's time.
312  SetFileTime(f, NULL_POINTER, t, t);
313 #endif
314  } else {
315  // file doesn't exist yet.
316  byte_array junk(1);
317  int written;
318  outcome ret = write(junk, written);
319  if (ret != OKAY) ret;
320  if (!truncate())
321  return FAILURE;
322  }
323  return OKAY;
324 }
325 
326 } //namespace.
327 
a_sprintf is a specialization of astring that provides printf style support.
Definition: astring.h:440
Provides a dynamically resizable ASCII character string.
Definition: astring.h:35
virtual const char * observe() const
observes the underlying pointer to the zero-terminated string.
Definition: astring.cpp:140
A very common template for a dynamic array of bytes.
Definition: byte_array.h:36
Outcomes describe the state of completion for an operation.
Definition: outcome.h:31
Provides file managment services using the standard I/O support.
Definition: byte_filer.h:32
static size_t file_size_limit()
returns the maximum size that seek and length can support.
Definition: byte_filer.cpp:84
int write(const basis::abyte *buffer, int buffer_size)
writes "buffer_size" bytes into the file from "buffer".
Definition: byte_filer.cpp:126
@ FROM_START
offset is from the beginning of the file.
Definition: byte_filer.h:94
@ FROM_CURRENT
offset is from current cursor position.
Definition: byte_filer.h:96
@ FROM_END
offset is from the end of the file.
Definition: byte_filer.h:95
bool seek(int where, origins origin=FROM_START)
places the cursor in the file at "where", based on the "origin".
Definition: byte_filer.cpp:187
const basis::astring & name() const
returns the file name that the object is operating on.
Definition: byte_filer.cpp:82
int read(basis::abyte *buffer, int buffer_size)
reads "buffer_size" bytes from the file into "buffer".
Definition: byte_filer.cpp:123
bool eof()
returns true if the cursor is at (or after) the end of the file.
Definition: byte_filer.cpp:121
void flush()
forces any pending writes to actually be saved to the file.
Definition: byte_filer.cpp:165
size_t length()
returns the file's total length, in bytes.
Definition: byte_filer.cpp:140
bool truncate()
truncates the file after the current position.
Definition: byte_filer.cpp:171
bool good()
returns true if the file seems to be in the appropriate desired state.
Definition: byte_filer.cpp:103
Provides operations commonly needed on file names.
Definition: filename.h:64
bool exists() const
returns true if the file exists.
Definition: filename.cpp:426
double length()
expensive operation accesses the file to find length.
Definition: huge_file.cpp:57
bool truncate()
truncates the file after the current position.
Definition: huge_file.cpp:55
basis::outcome move_to(double absolute_posn)
simpler seek just goes from current location to "absolute_posn".
Definition: huge_file.cpp:185
bool eof() const
reports when the file pointer has reached the end of the file.
Definition: huge_file.cpp:183
basis::outcome seek(double new_position, byte_filer::origins origin=byte_filer::FROM_CURRENT)
move the file pointer to "new_position" if possible.
Definition: huge_file.cpp:231
basis::outcome touch()
Definition: huge_file.cpp:287
const basis::astring & name() const
returns the name of the file this operates on.
Definition: huge_file.cpp:51
basis::outcome write(const basis::byte_array &to_write, int &size_written)
stores the array "to_write" into the file.
Definition: huge_file.cpp:275
bool good() const
reports if the file was opened successfully.
Definition: huge_file.cpp:181
void flush()
forces any pending writes to actually be saved to the file.
Definition: huge_file.cpp:53
basis::outcome read(basis::byte_array &to_fill, int desired_size, int &size_read)
reads "desired_size" into "to_fill" if possible.
Definition: huge_file.cpp:263
#define NULL_POINTER
The value representing a pointer to nothing.
Definition: definitions.h:32
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition: enhance_cpp.h:57
#define LOG(to_print)
Definition: huge_file.cpp:31
The guards collection helps in testing preconditions and reporting errors.
Definition: array.h:30
void WHACK(contents *&ptr)
deletion with clearing of the pointer.
Definition: functions.h:121
type minimum(type a, type b)
maximum returns the greater of two values.
Definition: functions.h:29
type absolute_value(type a)
Returns a if a is non-negative, and returns -a otherwise.
Definition: functions.h:33
A platform independent way to obtain the timestamp of a file.
Definition: byte_filer.cpp:37
Aids in achievement of platform independence.
void * HANDLE