feisty meow concerns codebase  2.140
heavy_file_ops.cpp
Go to the documentation of this file.
1 /*****************************************************************************\
2 * *
3 * Name : heavy file operations *
4 * Author : Chris Koeritz *
5 * *
6 *******************************************************************************
7 * Copyright (c) 2005-$now By Author. This program is free software; you can *
8 * redistribute it and/or modify it under the terms of the GNU General Public *
9 * License as published by the Free Software Foundation; either version 2 of *
10 * the License or (at your option) any later version. This is online at: *
11 * http://www.fsf.org/copyleft/gpl.html *
12 * Please send any updates to: fred@gruntose.com *
13 \*****************************************************************************/
14 
15 #include "directory.h"
16 #include "filename.h"
17 #include "filename_list.h"
18 #include "heavy_file_ops.h"
19 #include "huge_file.h"
20 
21 #include <basis/functions.h>
22 #include <basis/guards.h>
24 
25 using namespace basis;
26 using namespace structures;
27 
28 namespace filesystem {
29 
30 //#define DEBUG_HEAVY_FILE_OPS
31  // uncomment for noisier debugging.
32 
33 #undef LOG
34 #include <stdio.h>
35 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
36 
38 
39 // the smallest we let the packing area's available space get before we stop filling it.
40 const int MINIMUM_ARRAY_SIZE = 1024;
41 
43 
44 file_transfer_header::file_transfer_header(const file_time &time_stamp)
45 : _filename(),
46  _byte_start(0),
47  _length(0),
48  _time(time_stamp)
49 {
50 }
51 
53 {
54  astring time_text;
55  _time.text_form(time_text);
56  return astring("file=") + _filename
57  + a_sprintf(" start=%d len=%d stamp=", _byte_start, _length)
58  + time_text;
59 }
60 
62 {
63  astring time_text;
64  _time.readable_text_form(time_text);
65  return _filename
66  + a_sprintf(" [%d bytes, mod ", _length)
67  + time_text + "]";
68 }
69 
70 void file_transfer_header::pack(byte_array &packed_form) const
71 {
72  _filename.pack(packed_form);
73  attach(packed_form, _byte_start);
74  attach(packed_form, _length);
75  _time.pack(packed_form);
76 }
77 
79 {
80  if (!_filename.unpack(packed_form)) return false;
81  if (!detach(packed_form, _byte_start)) return false;
82  if (!detach(packed_form, _length)) return false;
83  if (!_time.unpack(packed_form)) return false;
84  return true;
85 }
86 
88 {
89 byte_array temp;
90 attach(temp, _byte_start);
91 //hmmm: really ugly above; we should get a more exact way to know the size of
92 // packed doubles.
93  return _filename.length() + 1
94  + temp.length()
95  + sizeof(int)
96  + _time.packed_size();
97 }
98 
100 
102 
104 { return COPY_CHUNK_FACTOR; }
105 
107  // we only need this due to our use of the root_object class_name support.
108 
109 const char *heavy_file_operations::outcome_name(const outcome &to_name)
110 {
111  switch (to_name.value()) {
112  case SOURCE_MISSING: return "SOURCE_MISSING";
113  case TARGET_ACCESS_ERROR: return "TARGET_ACCESS_ERROR";
114  case TARGET_DIR_ERROR: return "TARGET_DIR_ERROR";
115  default: return common::outcome_name(to_name);
116  }
117 }
118 
120  const astring &destination, int copy_chunk_factor)
121 {
122 #ifdef DEBUG_HEAVY_FILE_OPS
123  FUNCDEF("copy_file");
124 #endif
125  // check that the source exists...
126  filename source_path(source);
127  if (!source_path.exists()) return SOURCE_MISSING;
128  file_time source_time(source_path); // get the time on the source.
129 
130  // make sure the target directory exists...
131  filename target_path(destination);
132  filename targ_dir = target_path.dirname();
133  if (!directory::recursive_create(targ_dir.raw())) return TARGET_DIR_ERROR;
134 
135  // open the source for reading.
136  huge_file source_file(source, "rb");
137  if (!source_file.good()) return SOURCE_MISSING;
138 //hmmm: could be source is not accessible instead.
139 
140  // open target file for writing.
141  huge_file target_file(destination, "wb");
142  if (!target_file.good()) return TARGET_ACCESS_ERROR;
143 
144  byte_array chunk;
145  int bytes_read = 0;
146  outcome ret;
147  while ( (ret = source_file.read(chunk, copy_chunk_factor, bytes_read))
148  == huge_file::OKAY) {
149  int bytes_stored;
150  ret = target_file.write(chunk, bytes_stored);
151  if (bytes_stored != bytes_read) return TARGET_ACCESS_ERROR;
152  if (source_file.eof()) break; // time to escape.
153  }
154 
155  // set the time on the target file from the source's time.
156  source_time.set_time(target_path);
157 
158 #ifdef DEBUG_HEAVY_FILE_OPS
159  astring time;
160  source_time.text_form(time);
161  LOG(astring("setting file time for ") + source + " to " + time);
162 #endif
163 
164  return OKAY;
165 }
166 
168  double byte_start, const byte_array &chunk, bool truncate,
169  int copy_chunk_factor)
170 {
171  FUNCDEF("write_file_chunk");
172  if (byte_start < 0) return BAD_INPUT;
173 
174  filename targ_name(target);
175  astring targ_dir = targ_name.dirname().raw();
176 #ifdef DEBUG_HEAVY_FILE_OPS
177  LOG(astring("creating target's directory: ") + targ_name.dirname().raw());
178 #endif
179  if (!directory::recursive_create(targ_dir)) {
180  LOG(astring("failed to create directory: ") + targ_name.dirname().raw());
181  return TARGET_DIR_ERROR;
182  }
183 
184  if (!targ_name.exists()) {
185  huge_file target_file(target, "w");
186  }
187 
188  huge_file target_file(target, "r+b");
189  // open the file for updating (either read or write).
190  if (!target_file.good()) return TARGET_ACCESS_ERROR;
191  double curr_len = target_file.length();
192  target_file.touch();
193 
194  if (curr_len < byte_start) {
195  byte_array new_chunk;
196  while (curr_len < byte_start) {
197  target_file.seek(0, byte_filer::FROM_END); // go to the end of the file.
198  new_chunk.reset(minimum(copy_chunk_factor,
199  int(curr_len - byte_start + 1)));
200  int written;
201  outcome ret = target_file.write(new_chunk, written);
202  if (written < new_chunk.length()) return TARGET_ACCESS_ERROR;
203  curr_len = target_file.length();
204  }
205  }
206  target_file.seek(byte_start, byte_filer::FROM_START);
207  // jump to the proper location in the file.
208  int wrote;
209  outcome ret = target_file.write(chunk, wrote);
210  if (wrote != chunk.length()) return TARGET_ACCESS_ERROR;
211  if (truncate) {
212  target_file.truncate();
213  }
214  return OKAY;
215 }
216 
217 basis::outcome heavy_file_operations::advance(const filename_list &to_transfer,
218  file_transfer_header &last_action)
219 {
220  FUNCDEF("advance");
221  int indy = to_transfer.locate(last_action._filename);
222  if (negative(indy)) return BAD_INPUT; // error, file not found in list.
223  if (indy >= to_transfer.elements() - 1) return FINISHED; // done.
224  const file_info *currfile = to_transfer.get(indy + 1);
225  last_action._filename = currfile->raw();
226  last_action._time = currfile->_time;
227 
228 #ifdef DEBUG_HEAVY_FILE_OPS
229  if (currfile->_time == file_time(time_t(0)))
230  LOG(astring("failed for ") + currfile->raw() + " -- has zero file time");
231 #endif
232 
233  last_action._byte_start = 0;
234  last_action._length = 0;
235  return OKAY;
236 }
237 
239  const filename_list &to_transfer, file_transfer_header &last_action,
240  byte_array &storage, int maximum_bytes)
241 {
242  FUNCDEF("buffer_files");
243  storage.reset(); // clear out the current contents.
244 
245  if (!to_transfer.elements()) {
246  // we seem to be done.
247  return FINISHED;
248  }
249 
250  outcome to_return = OKAY;
251 
252  // this records if we're working on a new file.
253  bool fresh_file = false;
254 
255  // start filling the array with bytes from the files.
256  while (storage.length() < maximum_bytes) {
257  double remaining_in_array = maximum_bytes - storage.length()
258  - last_action.packed_size();
259  if (remaining_in_array < MINIMUM_ARRAY_SIZE) {
260  // ensure that we at least have a reasonable amount of space left
261  // for storing into the array.
262  break;
263  }
264 
265  // find the current file we're at, as provided in record.
266  if (!last_action._filename) {
267  // no filename yet. assume this is the first thing we've done.
268  const file_info *currfile = to_transfer.get(0);
269  last_action._filename = currfile->raw();
270  last_action._time = currfile->_time;
271  last_action._byte_start = 0;
272  last_action._length = 0;
273  fresh_file = true;
274  }
275 
276  const file_info *found = to_transfer.find(last_action._filename);
277  if (!found) {
278  // they have referenced a file that we don't have. that's bad news.
279  LOG(astring("unknown last file requested in transfer: ") + last_action._filename);
280  return BAD_INPUT;
281  }
282 
283  astring full_file = source_root + "/" + last_action._filename;
284  huge_file current(full_file, "rb");
285  if (!current.good()) {
286  // we need to skip this file.
287  LOG(astring("skipping bad file: ") + full_file);
288  fresh_file = true;
289  to_return = advance(to_transfer, last_action);
290  if (to_return != OKAY) break;
291  continue;
292  }
293 //LOG(astring("working on file: ") + current.name());
294 
295  // we don't try to check done if we just started this file.
296  if (!fresh_file && (last_action._byte_start + last_action._length >= current.length())) {
297  // this file is done now. go to the next one.
298 #ifdef DEBUG_HEAVY_FILE_OPS
299  LOG(astring("finished stuffing file: ") + full_file);
300 #endif
301  fresh_file = true;
302  to_return = advance(to_transfer, last_action);
303  if (to_return != OKAY) break;
304  continue;
305  }
306  // now that we tested if the file was fresh in our 'finished' check above, we
307  // consider the file not to be fresh until told otherwise.
308  fresh_file = false;
309 
310  // calculate the largest piece remaining of that file that will fit in the
311  // allotted space.
312  double new_start = last_action._byte_start + last_action._length;
313  double remaining_in_file = current.length() - new_start;
314  if (remaining_in_file < 0) remaining_in_file = 0;
315  double new_len = minimum(remaining_in_file, remaining_in_array);
316 
317  // pack this new piece of the file.
318  current.seek(new_start, byte_filer::FROM_START);
319  byte_array new_chunk;
320  int bytes_read = 0;
321  outcome ret = current.read(new_chunk, int(new_len), bytes_read);
322  if (bytes_read != new_len) {
323  if (!bytes_read) {
324  // some kind of problem reading the file.
325  fresh_file = true;
326  to_return = advance(to_transfer, last_action);
327  if (to_return != OKAY) break;
328  continue;
329  }
330 //why would this happen? just complain, i guess.
331  }
332 
333  // update the record since it seems we're successful here.
334  last_action._byte_start = new_start;
335  last_action._length = int(new_len);
336 
337  // add in this next new chunk of file.
338  last_action.pack(storage); // add the header.
339  storage += new_chunk; // add the new stuff.
340 
341  if (!current.length()) {
342  // ensure we don't get stuck redoing zero length files, which we allowed
343  // to go past their end above (since otherwise we'd never see them).
344  fresh_file = true;
345  to_return = advance(to_transfer, last_action);
346  if (to_return != OKAY) break;
347  continue;
348  }
349 
350  // just keep going, if there's space...
351  }
352 
353  return to_return;
354 }
355 
356 } //namespace.
357 
a_sprintf is a specialization of astring that provides printf style support.
Definition: astring.h:440
void reset(int number=0, const contents *initial_contents=NULL_POINTER)
Resizes this array and sets the contents from an array of contents.
Definition: array.h:349
int length() const
Returns the current reported length of the allocated C array.
Definition: array.h:115
Provides a dynamically resizable ASCII character string.
Definition: astring.h:35
void pack(byte_array &target) const
stores this string in the "target". it can later be unpacked again.
Definition: astring.cpp:961
int length() const
Returns the current length of the string.
Definition: astring.cpp:132
bool unpack(byte_array &source)
retrieves a string (packed with pack()) from "source" into this string.
Definition: astring.cpp:964
A very common template for a dynamic array of bytes.
Definition: byte_array.h:36
Outcomes describe the state of completion for an operation.
Definition: outcome.h:31
int value() const
Definition: outcome.h:51
@ FROM_START
offset is from the beginning of the file.
Definition: byte_filer.h:94
@ FROM_END
offset is from the end of the file.
Definition: byte_filer.h:95
static bool recursive_create(const basis::astring &directory_name)
returns true if the "directory_name" can be created or already exists.
Definition: directory.cpp:257
Encapsulates some measures and calculations based on a file's contents.
Definition: file_info.h:29
file_time _time
the file's access time.
Definition: file_info.h:41
bool set_time(const basis::astring &filename)
sets the time for the the "filename" to the currently held time.
Definition: file_time.cpp:59
virtual int packed_size() const
Definition: file_time.cpp:132
virtual void pack(basis::byte_array &packed_form) const
Definition: file_time.cpp:134
virtual void text_form(basis::base_string &time_string) const
returns a definitive but sorta ugly version of the file's time.
Definition: file_time.cpp:116
virtual void readable_text_form(basis::base_string &time_string) const
sets "time_string" to a human readable form of the file's time.
Definition: file_time.cpp:121
virtual bool unpack(basis::byte_array &packed_form)
Definition: file_time.cpp:137
describes one portion of an ongoing file transfer.
double _byte_start
the starting location in the file being sent.
file_time _time
the timestamp on the file.
virtual int packed_size() const
Estimates the space needed for the packed structure.
int _length
the length of the transferred piece.
virtual bool unpack(basis::byte_array &packed_form)
Restores the packable from the "packed_form".
basis::astring text_form() const
basis::astring _filename
the name of the file being transferred.
basis::astring readable_text_form() const
a nicer formatting of the information.
virtual void pack(basis::byte_array &packed_form) const
Creates a packed form of the packable object in "packed_form".
const file_info * find(const filename &to_check) const
locates the record of information for the filename "to_check".
int locate(const filename &to_find) const
Provides operations commonly needed on file names.
Definition: filename.h:64
bool exists() const
returns true if the file exists.
Definition: filename.cpp:426
const basis::astring & raw() const
returns the astring that we're holding onto for the path.
Definition: filename.cpp:97
filename dirname() const
returns the directory for the filename.
Definition: filename.cpp:393
static size_t copy_chunk_factor()
method can be exported for use by shared libs.
static const size_t COPY_CHUNK_FACTOR
the default copy chunk size for the file copy method.
static basis::outcome copy_file(const basis::astring &source, const basis::astring &destination, int copy_chunk_factor=heavy_file_operations::copy_chunk_factor())
copies a file from the "source" location to the "destination".
static basis::outcome write_file_chunk(const basis::astring &target, double byte_start, const basis::byte_array &chunk, bool truncate=true, int copy_chunk_factor=heavy_file_operations::copy_chunk_factor())
stores a chunk of bytes into the "target" file.
static const char * outcome_name(const basis::outcome &to_name)
static basis::outcome buffer_files(const basis::astring &source_root, const filename_list &to_transfer, file_transfer_header &last_action, basis::byte_array &storage, int maximum_bytes)
reads files in "to_transfer" and packs them into a "storage" buffer.
Supports reading and writing to very large files, > 4 gigabytes.
Definition: huge_file.h:36
double length()
expensive operation accesses the file to find length.
Definition: huge_file.cpp:57
bool truncate()
truncates the file after the current position.
Definition: huge_file.cpp:55
bool eof() const
reports when the file pointer has reached the end of the file.
Definition: huge_file.cpp:183
basis::outcome seek(double new_position, byte_filer::origins origin=byte_filer::FROM_CURRENT)
move the file pointer to "new_position" if possible.
Definition: huge_file.cpp:231
basis::outcome touch()
Definition: huge_file.cpp:287
basis::outcome write(const basis::byte_array &to_write, int &size_written)
stores the array "to_write" into the file.
Definition: huge_file.cpp:275
bool good() const
reports if the file was opened successfully.
Definition: huge_file.cpp:181
basis::outcome read(basis::byte_array &to_fill, int desired_size, int &size_read)
reads "desired_size" into "to_fill" if possible.
Definition: huge_file.cpp:263
int elements() const
the maximum number of elements currently allowed in this amorph.
Definition: amorph.h:66
const contents * get(int field) const
Returns a constant pointer to the information at the index "field".
Definition: amorph.h:312
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition: enhance_cpp.h:57
#define LOG(to_print)
The guards collection helps in testing preconditions and reporting errors.
Definition: array.h:30
const int MEGABYTE
Number of bytes in a megabyte.
Definition: definitions.h:135
void attach(byte_array &packed_form, const char *to_attach)
Packs a character string "to_attach" into "packed_form".
Definition: astring.cpp:1015
bool detach(byte_array &packed_form, astring &to_detach)
Unpacks a character string "to_attach" from "packed_form".
Definition: astring.cpp:1023
type minimum(type a, type b)
maximum returns the greater of two values.
Definition: functions.h:29
bool negative(const type &a)
negative returns true if "a" is less than zero.
Definition: functions.h:43
A platform independent way to obtain the timestamp of a file.
Definition: byte_filer.cpp:37
const int MINIMUM_ARRAY_SIZE
A dynamic container class that holds any kind of object via pointers.
Definition: amorph.h:55