feisty meow concerns codebase 2.140
huge_file.cpp
Go to the documentation of this file.
1/*****************************************************************************\
2* *
3* Name : huge_file *
4* Author : Chris Koeritz *
5* *
6*******************************************************************************
7* Copyright (c) 2007-$now By Author. This program is free software; you can *
8* redistribute it and/or modify it under the terms of the GNU General Public *
9* License as published by the Free Software Foundation; either version 2 of *
10* the License or (at your option) any later version. This is online at: *
11* http://www.fsf.org/copyleft/gpl.html *
12* Please send any updates to: fred@gruntose.com *
13\*****************************************************************************/
14
15#include "byte_filer.h"
16#include "huge_file.h"
17
18#include <basis/byte_array.h>
19#include <basis/functions.h>
20#include <basis/guards.h>
22
23#include <stdio.h>
24//#ifndef __WIN32__
25#include <sys/time.h>
26//#else
27// #include <time.h>
28//#endif
29
30#undef LOG
31#define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
32
33//#define DEBUG_HUGE_FILE
34 // uncomment for noisy version.
35
36using namespace basis;
37
38namespace filesystem {
39
40huge_file::huge_file(const astring &filename, const astring &permissions)
41: _real_file(new byte_filer(filename, permissions)),
42 _file_pointer(0)
43{
44}
45
47{
48 WHACK(_real_file);
49}
50
51const astring &huge_file::name() const { return _real_file->name(); }
52
53void huge_file::flush() { _real_file->flush(); }
54
55bool huge_file::truncate() { return _real_file->truncate(); }
56
58{
59 FUNCDEF("length");
60
61//trying to read to see if we're past endpoint.
62// if this approach works, length may want to close and reopen file for
63// reading, since we can't add any bytes to it for writing just to find
64// the length out.
65
66
67 double save_posn = _file_pointer;
68 // skip to the beginning of the file so we can try to find the end.
69 _file_pointer = 0;
70 _real_file->seek(0, byte_filer::FROM_START);
71 size_t naive_size = _real_file->length();
72 if (naive_size < _real_file->file_size_limit()) {
73 // lucked out; we are within normal file size limitations.
74 seek(save_posn, byte_filer::FROM_START);
75 return double(naive_size);
76 }
77
78 double best_highest = 0.0; // the maximum we've safely seeked to.
79
80 size_t big_jump = byte_filer::file_size_limit();
81 // try with the largest possible seek at first.
82
83 while (true) {
84#ifdef DEBUG_HUGE_FILE
85 LOG(a_sprintf("best highest=%.0f", best_highest));
86#endif
87 // iterate until we reach our exit condition, which seems like it must
88 // always occur eventually unless the file is being monkeyed with.
89 bool seek_ret = _real_file->seek(int(big_jump), byte_filer::FROM_CURRENT);
90#ifdef DEBUG_HUGE_FILE
91 LOG(a_sprintf(" seek ret=%d", int(seek_ret)));
92#endif
93 byte_array temp_bytes;
94 int bytes_read = _real_file->read(temp_bytes, 1);
95 if (bytes_read < 1)
96 seek_ret = false;
97#ifdef DEBUG_HUGE_FILE
98 LOG(a_sprintf(" read %d bytes", bytes_read));
99#endif
100 bool at_eof = _real_file->eof();
101#ifdef DEBUG_HUGE_FILE
102 LOG(a_sprintf(" at_eof=%d", int(at_eof)));
103#endif
104 if (seek_ret && !at_eof) {
105#ifdef DEBUG_HUGE_FILE
106 LOG("seek worked, incrementing best highest and trying same jump again");
107#endif
108 // the seek worked, so we'll just jump forward again.
109 best_highest += double(big_jump);
110 _file_pointer += double(big_jump);
111 continue;
112 } else if (seek_ret && at_eof) {
113#ifdef DEBUG_HUGE_FILE
114 LOG("seek worked but found eof exactly.");
115#endif
116 // the seek did worked, but apparently we've also found the end point.
117 best_highest += double(big_jump);
118 _file_pointer += double(big_jump);
119 break;
120 } else {
121 // that seek was too large, so we need to back down and try a smaller
122 // seek size.
123#ifdef DEBUG_HUGE_FILE
124 LOG("seek failed, going back to best highest and trying same jump again");
125#endif
126 _file_pointer = 0;
127 _real_file->seek(0, byte_filer::FROM_START);
128 outcome worked = seek(best_highest, byte_filer::FROM_START);
129 // this uses our version to position at large sizes.
130 if (worked != OKAY) {
131 // this is a bad failure; it says that the file size changed or
132 // something malfunctioned. we should always be able to get back to
133 // the last good size we found if the file is static.
134 LOG(a_sprintf("failed to seek back to best highest %.0f on ",
135 best_highest) + _real_file->name());
136 // try to repair our ideas about the file by starting the process
137 // over.
138//hmmm: count the number of times restarted and bail after N.
139 seek_ret = _real_file->seek(0, byte_filer::FROM_START);
140 _file_pointer = 0;
141 if (!seek_ret) {
142 // the heck with this. we can't even go back to the start. this
143 // file seems to be screwed up now.
144 LOG(astring("failed to seek back to start of file! on ")
145 + _real_file->name());
146 return 0;
147 }
148 // reset the rest of the positions for our failed attempt to return
149 // to what we already thought was good.
150 _file_pointer = 0;
151 big_jump = byte_filer::file_size_limit();
152 best_highest = 0;
153 continue;
154 }
155 // okay, nothing bad happened when we went back to our last good point.
156 if (big_jump <= 0) {
157 // success in finding the smallest place that we can't seek between.
158#ifdef DEBUG_HUGE_FILE
159 LOG("got down to smallest big jump, 0!");
160#endif
161 break;
162 }
163 // formula expects that the maximum file size is a power of 2.
164 big_jump /= 2;
165#ifdef DEBUG_HUGE_FILE
166 LOG(a_sprintf("restraining big jump down to %u.", big_jump));
167#endif
168 continue;
169 }
170 }
171
172 // go back to where we started out.
174 seek(save_posn, byte_filer::FROM_CURRENT);
175#ifdef DEBUG_HUGE_FILE
176 LOG(a_sprintf("saying file len is %.0f.", best_highest + 1.0));
177#endif
178 return best_highest + 1.0;
179}
180
181bool huge_file::good() const { return _real_file->good(); }
182
183bool huge_file::eof() const { return _real_file->eof(); }
184
185outcome huge_file::move_to(double absolute_posn)
186{
187#ifdef DEBUG_HUGE_FILE
188 FUNCDEF("move_to");
189#endif
190 double difference = absolute_posn - _file_pointer;
191 // calculate the size we want to offset.
192#ifdef DEBUG_HUGE_FILE
193 LOG(a_sprintf("abs_pos=%.0f difference=%.0f old_filepoint=%.0f",
194 absolute_posn, difference, _file_pointer));
195#endif
196 // if we're at the same place, we don't have to do anything.
197 if (difference < 0.000001) {
198#ifdef DEBUG_HUGE_FILE
199 LOG("difference was minimal, saying we're done.");
200#endif
201 return OKAY;
202 }
203 while (absolute_value(difference) > 0.000001) {
204 double seek_size = minimum(double(byte_filer::file_size_limit() - 1),
205 absolute_value(difference));
206 if (difference < 0)
207 seek_size *= -1.0; // flip sign of seek.
208#ifdef DEBUG_HUGE_FILE
209 LOG(a_sprintf(" seeksize=%d", int(seek_size)));
210#endif
211 bool seek_ret = _real_file->seek(int(seek_size),
213 if (!seek_ret) {
214#ifdef DEBUG_HUGE_FILE
215 LOG(a_sprintf("failed to seek %d from current", int(seek_size)));
216#endif
217 return FAILURE; // seek failed somehow.
218 }
219 _file_pointer += seek_size;
220#ifdef DEBUG_HUGE_FILE
221 LOG(a_sprintf(" now_filepoint=%.0f", _file_pointer));
222#endif
223 difference = absolute_posn - _file_pointer;
224#ifdef DEBUG_HUGE_FILE
225 LOG(a_sprintf(" now_difference=%.0f", difference));
226#endif
227 }
228 return OKAY;
229}
230
231outcome huge_file::seek(double new_position, byte_filer::origins origin)
232{
233#ifdef DEBUG_HUGE_FILE
234 FUNCDEF("seek");
235#endif
236 if (origin == byte_filer::FROM_CURRENT) {
237 return move_to(_file_pointer + new_position);
238 } else if (origin == byte_filer::FROM_START) {
239 _file_pointer = 0;
240 if (!_real_file->seek(0, byte_filer::FROM_START))
241 return FAILURE;
242 return move_to(new_position);
243 } else if (origin == byte_filer::FROM_END) {
244#ifdef DEBUG_HUGE_FILE
245 LOG("into precarious FROM_END case.");
246#endif
247 double file_len = length(); // could take a scary long time possibly.
248#ifdef DEBUG_HUGE_FILE
249 LOG(a_sprintf(" FROM_END got len %.0f.", file_len));
250#endif
251 _file_pointer = file_len;
252 // it's safe, although not efficient, for us to call the length()
253 // method here. our current version of length() uses the byte_filer's
254 // seek method directly and only FROM_CURRENT and FROM_START from this
255 // class's seek method.
256 _real_file->seek(0, byte_filer::FROM_END);
257 return move_to(_file_pointer - new_position);
258 }
259 // unknown origin.
260 return BAD_INPUT;
261}
262
263outcome huge_file::read(byte_array &to_fill, int desired_size, int &size_read)
264{
265 FUNCDEF("read");
266 size_read = 0;
267 int ret = _real_file->read(to_fill, desired_size);
268 if (ret < 0)
269 return FAILURE; // couldn't read the bytes.
270 _file_pointer += double(size_read);
271 size_read = ret;
272 return OKAY;
273}
274
275outcome huge_file::write(const byte_array &to_write, int &size_written)
276{
277 FUNCDEF("write");
278 size_written = 0;
279 int ret = _real_file->write(to_write);
280 if (ret < 0)
281 return FAILURE; // couldn't write the bytes.
282 _file_pointer += double(size_written);
283 size_written = ret;
284 return OKAY;
285}
286
288{
289 FUNCDEF("touch")
290 if (filename(_real_file->name()).exists()) {
291 // file exists, so just update time.
292#ifndef __WIN32__
293 int ret = utimes(_real_file->name().observe(), NULL_POINTER);
294 if (ret != 0)
295 return FAILURE;
296#else
297 // open the file, although the function says create in its name...
298 HANDLE f = CreateFile(_real_file->name().observe(),
299 GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE,
300 NULL_POINTER, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL_POINTER);
301 if (!f) {
302 LOG(a_sprintf("failed to open file %s", _real_file->name().observe()));
303 return FAILURE;
304 }
305 // get current system time in UTC.
306 SYSTEMTIME *st = new SYSTEMTIME;
307 GetSystemTime(st);
308 // convert system time into file time.
309 FILETIME *t = new FILETIME;
310 SystemTimeToFileTime(st, t);
311 // set the file's time.
312 SetFileTime(f, NULL_POINTER, t, t);
313#endif
314 } else {
315 // file doesn't exist yet.
316 byte_array junk(1);
317 int written;
318 outcome ret = write(junk, written);
319 if (ret != OKAY) ret;
320 if (!truncate())
321 return FAILURE;
322 }
323 return OKAY;
324}
325
326} //namespace.
327
#define write
Definition Xos2defs.h:46
#define LOG(s)
a_sprintf is a specialization of astring that provides printf style support.
Definition astring.h:440
Provides a dynamically resizable ASCII character string.
Definition astring.h:35
virtual const char * observe() const
observes the underlying pointer to the zero-terminated string.
Definition astring.cpp:140
A very common template for a dynamic array of bytes.
Definition byte_array.h:36
Outcomes describe the state of completion for an operation.
Definition outcome.h:31
Provides file managment services using the standard I/O support.
Definition byte_filer.h:32
static size_t file_size_limit()
returns the maximum size that seek and length can support.
int write(const basis::abyte *buffer, int buffer_size)
writes "buffer_size" bytes into the file from "buffer".
@ FROM_START
offset is from the beginning of the file.
Definition byte_filer.h:94
@ FROM_CURRENT
offset is from current cursor position.
Definition byte_filer.h:96
@ FROM_END
offset is from the end of the file.
Definition byte_filer.h:95
bool seek(int where, origins origin=FROM_START)
places the cursor in the file at "where", based on the "origin".
const basis::astring & name() const
returns the file name that the object is operating on.
int read(basis::abyte *buffer, int buffer_size)
reads "buffer_size" bytes from the file into "buffer".
bool eof()
returns true if the cursor is at (or after) the end of the file.
void flush()
forces any pending writes to actually be saved to the file.
size_t length()
returns the file's total length, in bytes.
bool truncate()
truncates the file after the current position.
bool good()
returns true if the file seems to be in the appropriate desired state.
Provides operations commonly needed on file names.
Definition filename.h:64
bool exists() const
returns true if the file exists.
Definition filename.cpp:426
double length()
expensive operation accesses the file to find length.
Definition huge_file.cpp:57
bool truncate()
truncates the file after the current position.
Definition huge_file.cpp:55
basis::outcome move_to(double absolute_posn)
simpler seek just goes from current location to "absolute_posn".
bool eof() const
reports when the file pointer has reached the end of the file.
huge_file(const basis::astring &filename, const basis::astring &permissions)
opens "filename" for access, where it presumably is a very large file.
Definition huge_file.cpp:40
basis::outcome seek(double new_position, byte_filer::origins origin=byte_filer::FROM_CURRENT)
move the file pointer to "new_position" if possible.
basis::outcome touch()
const basis::astring & name() const
returns the name of the file this operates on.
Definition huge_file.cpp:51
basis::outcome write(const basis::byte_array &to_write, int &size_written)
stores the array "to_write" into the file.
bool good() const
reports if the file was opened successfully.
void flush()
forces any pending writes to actually be saved to the file.
Definition huge_file.cpp:53
basis::outcome read(basis::byte_array &to_fill, int desired_size, int &size_read)
reads "desired_size" into "to_fill" if possible.
#define NULL_POINTER
The value representing a pointer to nothing.
Definition definitions.h:32
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition enhance_cpp.h:54
The guards collection helps in testing preconditions and reporting errors.
Definition array.h:30
void WHACK(contents *&ptr)
deletion with clearing of the pointer.
Definition functions.h:121
type minimum(type a, type b)
maximum returns the greater of two values.
Definition functions.h:29
type absolute_value(type a)
Returns a if a is non-negative, and returns -a otherwise.
Definition functions.h:33
A platform independent way to obtain the timestamp of a file.
Aids in achievement of platform independence.
void * HANDLE