6f5b19db1832feffb4e815c3df9c05069745a541
[feisty_meow.git] / nucleus / library / filesystem / heavy_file_ops.cpp
1 /*****************************************************************************\
2 *                                                                             *
3 *  Name   : heavy file operations                                             *
4 *  Author : Chris Koeritz                                                     *
5 *                                                                             *
6 *******************************************************************************
7 * Copyright (c) 2005-$now By Author.  This program is free software; you can  *
8 * redistribute it and/or modify it under the terms of the GNU General Public  *
9 * License as published by the Free Software Foundation; either version 2 of   *
10 * the License or (at your option) any later version.  This is online at:      *
11 *     http://www.fsf.org/copyleft/gpl.html                                    *
12 * Please send any updates to: fred@gruntose.com                               *
13 \*****************************************************************************/
14
15 #include "directory.h"
16 #include "filename.h"
17 #include "filename_list.h"
18 #include "heavy_file_ops.h"
19 #include "huge_file.h"
20
21 #include <basis/functions.h>
22 #include <basis/guards.h>
23 #include <structures/object_packers.h>
24
25 using namespace basis;
26 using namespace structures;
27
28 namespace filesystem {
29
30 #define DEBUG_HEAVY_FILE_OPS
31   // uncomment for noisier debugging.
32
33 #undef LOG
34 #ifdef DEBUG_HEAVY_FILE_OPS
35   #include <stdio.h>
36   #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
37 #else
38   #define LOG(s) {if(!!s){}}
39 #endif
40
41 //////////////
42
43 file_transfer_header::file_transfer_header(const file_time &time_stamp)
44 : _filename(),
45   _byte_start(0),
46   _length(0),
47   _time(time_stamp)
48 {
49 }
50
51 astring file_transfer_header::text_form() const
52 {
53   astring time_text;
54   _time.text_form(time_text);
55   return astring("file=") + _filename
56       + a_sprintf(" start=%d len=%d stamp=", _byte_start, _length)
57       + time_text;
58 }
59
60 astring file_transfer_header::readable_text_form() const
61 {
62   astring time_text;
63   _time.readable_text_form(time_text);
64   return _filename
65       + a_sprintf(" [%d bytes, mod ", _length)
66       + time_text + "]";
67 }
68
69 void file_transfer_header::pack(byte_array &packed_form) const
70 {
71   _filename.pack(packed_form);
72   attach(packed_form, _byte_start);
73   attach(packed_form, _length);
74   _time.pack(packed_form);
75 }
76
77 bool file_transfer_header::unpack(byte_array &packed_form)
78 {
79   if (!_filename.unpack(packed_form)) return false;
80   if (!detach(packed_form, _byte_start)) return false;
81   if (!detach(packed_form, _length)) return false;
82   if (!_time.unpack(packed_form)) return false;
83   return true;
84 }
85
86 int file_transfer_header::packed_size() const
87 {
88 byte_array temp;
89 attach(temp, _byte_start);
90 //hmmm: really ugly above; we should get a more exact way to know the size of
91 //      packed doubles.
92   return _filename.length() + 1
93       + temp.length()
94       + sizeof(int)
95       + _time.packed_size();
96 }
97
98 //////////////
99
100 const size_t heavy_file_operations::COPY_CHUNK_FACTOR = 1 * MEGABYTE;
101
102 size_t heavy_file_operations::copy_chunk_factor()
103 { return COPY_CHUNK_FACTOR; }
104
105 heavy_file_operations::~heavy_file_operations() {}
106   // we only need this due to our use of the root_object class_name support.
107
108 const char *heavy_file_operations::outcome_name(const outcome &to_name)
109 {
110   switch (to_name.value()) {
111     case SOURCE_MISSING: return "SOURCE_MISSING";
112     case TARGET_ACCESS_ERROR: return "TARGET_ACCESS_ERROR";
113     case TARGET_DIR_ERROR: return "TARGET_DIR_ERROR";
114     default: return common::outcome_name(to_name);
115   }
116 }
117
118 outcome heavy_file_operations::copy_file(const astring &source,
119     const astring &destination, int copy_chunk_factor)
120 {
121 #ifdef DEBUG_HEAVY_FILE_OPS
122   FUNCDEF("copy_file");
123 #endif
124   // check that the source exists...
125   filename source_path(source);
126   if (!source_path.exists()) return SOURCE_MISSING;
127   file_time source_time(source_path);  // get the time on the source.
128
129   // make sure the target directory exists...
130   filename target_path(destination);
131   filename targ_dir = target_path.dirname();
132   if (!directory::recursive_create(targ_dir.raw())) return TARGET_DIR_ERROR;
133
134   // open the source for reading.
135   huge_file source_file(source, "rb");
136   if (!source_file.good()) return SOURCE_MISSING;
137 //hmmm: could be source is not accessible instead.
138
139   // open target file for writing.
140   huge_file target_file(destination, "wb");
141   if (!target_file.good()) return TARGET_ACCESS_ERROR;
142
143   byte_array chunk;
144   int bytes_read = 0;
145   outcome ret;
146   while ( (ret = source_file.read(chunk, copy_chunk_factor, bytes_read))
147       == huge_file::OKAY) {
148     int bytes_stored;
149     ret = target_file.write(chunk, bytes_stored);
150     if (bytes_stored != bytes_read) return TARGET_ACCESS_ERROR;
151     if (source_file.eof()) break;  // time to escape.
152   }
153
154   // set the time on the target file from the source's time.
155   source_time.set_time(target_path);
156
157 #ifdef DEBUG_HEAVY_FILE_OPS
158   astring time;
159   source_time.text_form(time);
160   LOG(astring("setting file time for ") + source + " to " + time);
161 #endif
162
163   return OKAY;
164 }
165
166 outcome heavy_file_operations::write_file_chunk(const astring &target,
167     double byte_start, const byte_array &chunk, bool truncate,
168     int copy_chunk_factor)
169 {
170 #ifdef DEBUG_HEAVY_FILE_OPS
171 //  FUNCDEF("write_file_chunk");
172 #endif
173   if (byte_start < 0) return BAD_INPUT;
174
175   filename targ_name(target);
176   if (!directory::recursive_create(targ_name.dirname().raw()))
177     return TARGET_DIR_ERROR;
178
179   if (!targ_name.exists()) {
180     huge_file target_file(target, "w");
181   }
182
183   huge_file target_file(target, "r+b");
184     // open the file for updating (either read or write).
185   if (!target_file.good()) return TARGET_ACCESS_ERROR;
186   double curr_len = target_file.length();
187
188   if (curr_len < byte_start) {
189     byte_array new_chunk;
190     while (curr_len < byte_start) {
191       target_file.seek(0, byte_filer::FROM_END);  // go to the end of the file.
192       new_chunk.reset(minimum(copy_chunk_factor,
193           int(curr_len - byte_start + 1)));
194       int written;
195       outcome ret = target_file.write(new_chunk, written);
196       if (written < new_chunk.length()) return TARGET_ACCESS_ERROR;
197       curr_len = target_file.length();
198     }
199   }
200   target_file.seek(byte_start, byte_filer::FROM_START);
201     // jump to the proper location in the file.
202   int wrote;
203   outcome ret = target_file.write(chunk, wrote);
204   if (wrote != chunk.length()) return TARGET_ACCESS_ERROR;
205   if (truncate) {
206     target_file.truncate();
207   }
208   return OKAY;
209 }
210
211 bool heavy_file_operations::advance(const filename_list &to_transfer,
212     file_transfer_header &last_action)
213 {
214 #ifdef DEBUG_HEAVY_FILE_OPS
215   FUNCDEF("advance");
216 #endif
217   int indy = to_transfer.locate(last_action._filename);
218   if (negative(indy)) return false;  // error.
219   if (indy == to_transfer.elements() - 1) return false;  // done.
220   const file_info *currfile = to_transfer.get(indy + 1);
221   last_action._filename = currfile->raw();
222   last_action._time = currfile->_time;
223
224 #ifdef DEBUG_HEAVY_FILE_OPS
225   if (currfile->_time == file_time(time_t(0)))
226     LOG(astring("failed for ") + currfile->raw() + " -- has zero file time");
227 #endif
228
229   last_action._byte_start = 0;
230   last_action._length = 0;
231   return true;
232 }
233
234 outcome heavy_file_operations::buffer_files(const astring &source_root,
235     const filename_list &to_transfer, file_transfer_header &last_action,
236     byte_array &storage, int maximum_bytes)
237 {
238   FUNCDEF("buffer_files");
239   storage.reset();  // clear out the current contents.
240
241   if (!to_transfer.elements()) {
242     // we seem to be done.
243     return FINISHED;
244   }
245
246   outcome to_return = OKAY;
247
248   // start filling the array with bytes from the files.
249   while (storage.length() < maximum_bytes) {
250     double remaining_in_array = maximum_bytes - storage.length()
251         - last_action.packed_size();
252     if (remaining_in_array < 128) {
253       // ensure that we at least have a reasonable amount of space left
254       // for storing into the array.
255       break;
256     }
257
258     // find the current file we're at, as provided in record.
259     if (!last_action._filename) {
260       // no filename yet.  assume this is the first thing we've done.
261       const file_info *currfile = to_transfer.get(0);
262       last_action._filename = currfile->raw();
263       last_action._time = currfile->_time;
264       last_action._byte_start = 0;
265       last_action._length = 0;
266     }
267
268     const file_info *found = to_transfer.find(last_action._filename);
269     if (!found) {
270       // they have referenced a file that we don't have.  that's bad news.
271       return BAD_INPUT;
272     }
273
274     astring full_file = source_root + "/" + last_action._filename;
275     huge_file current(full_file, "rb");
276     if (!current.good()) {
277       // we need to skip this file.
278 LOG(astring("skipping bad file: ") + full_file);
279       if (!advance(to_transfer, last_action)) break;
280       continue;
281     }
282
283     if (last_action._byte_start + last_action._length >= current.length()) {
284 LOG(astring("finished handling file: ") + full_file);
285       // this file is done now.  go to the next one.
286       if (!advance(to_transfer, last_action)) break;
287       continue;
288     }
289
290     // calculate the largest piece remaining of that file that will fit in the
291     // allotted space.
292     double new_start = last_action._byte_start + last_action._length;
293     double remaining_in_file = current.length() - new_start;
294     if (remaining_in_file < 0) remaining_in_file = 0;
295     double new_len = minimum(remaining_in_file, remaining_in_array);
296     
297     // pack this new piece of the file.
298     current.seek(new_start, byte_filer::FROM_START);
299     byte_array new_chunk;
300     int bytes_read = 0;
301     outcome ret = current.read(new_chunk, int(new_len), bytes_read);
302     if (bytes_read != new_len) {
303       if (!bytes_read) {
304         // some kind of problem reading the file.
305         if (!advance(to_transfer, last_action)) break;
306         continue;
307       }
308 //why would this happen?  just complain, i guess.
309     }
310
311     // update the record since it seems we're successful here.
312     last_action._byte_start = new_start;
313     last_action._length = int(new_len);
314
315     // add in this next new chunk of file.
316     last_action.pack(storage);  // add the header.
317     storage += new_chunk;  // add the new stuff.
318
319     if (!current.length()) {
320       // ensure we don't get stuck redoing zero length files, which we allowed
321       // to go past their end above (since otherwise we'd never see them).
322       if (!advance(to_transfer, last_action)) break;
323       continue;
324     }
325     
326     // just keep going, if there's space...
327   }
328
329   return to_return;
330 }
331
332 } //namespace.
333