Merge branch 'main' of feistymeow.org:feisty_meow
[feisty_meow.git] / filesystem / heavy_file_ops.cpp
1 /*****************************************************************************\
2 *                                                                             *
3 *  Name   : heavy file operations                                             *
4 *  Author : Chris Koeritz                                                     *
5 *                                                                             *
6 *******************************************************************************
7 * Copyright (c) 2005-$now By Author.  This program is free software; you can  *
8 * redistribute it and/or modify it under the terms of the GNU General Public  *
9 * License as published by the Free Software Foundation; either version 2 of   *
10 * the License or (at your option) any later version.  This is online at:      *
11 *     http://www.fsf.org/copyleft/gpl.html                                    *
12 * Please send any updates to: fred@gruntose.com                               *
13 \*****************************************************************************/
14
15 #include "directory.h"
16 #include "filename.h"
17 #include "filename_list.h"
18 #include "heavy_file_ops.h"
19 #include "huge_file.h"
20
21 #include <basis/functions.h>
22 #include <basis/guards.h>
23 #include <structures/object_packers.h>
24
25 using namespace basis;
26 using namespace structures;
27
28 namespace filesystem {
29
30 //#define DEBUG_HEAVY_FILE_OPS
31   // uncomment for noisier debugging.
32
33 #undef LOG
34 #include <stdio.h>
35 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
36
37 //////////////
38
39 // the smallest we let the packing area's available space get before we stop filling it.
40 const int MINIMUM_ARRAY_SIZE = 1024;
41
42 //////////////
43
44 file_transfer_header::file_transfer_header(const file_time &time_stamp)
45 : _filename(),
46   _byte_start(0),
47   _length(0),
48   _time(time_stamp)
49 {
50 }
51
52 astring file_transfer_header::text_form() const
53 {
54   astring time_text;
55   _time.text_form(time_text);
56   return astring("file=") + _filename
57       + a_sprintf(" start=%d len=%d stamp=", _byte_start, _length)
58       + time_text;
59 }
60
61 astring file_transfer_header::readable_text_form() const
62 {
63   astring time_text;
64   _time.readable_text_form(time_text);
65   return _filename
66       + a_sprintf(" [%d bytes, mod ", _length)
67       + time_text + "]";
68 }
69
70 void file_transfer_header::pack(byte_array &packed_form) const
71 {
72   _filename.pack(packed_form);
73   attach(packed_form, _byte_start);
74   attach(packed_form, _length);
75   _time.pack(packed_form);
76 }
77
78 bool file_transfer_header::unpack(byte_array &packed_form)
79 {
80   if (!_filename.unpack(packed_form)) return false;
81   if (!detach(packed_form, _byte_start)) return false;
82   if (!detach(packed_form, _length)) return false;
83   if (!_time.unpack(packed_form)) return false;
84   return true;
85 }
86
87 int file_transfer_header::packed_size() const
88 {
89 byte_array temp;
90 attach(temp, _byte_start);
91 //hmmm: really ugly above; we should get a more exact way to know the size of
92 //      packed doubles.
93   return _filename.length() + 1
94       + temp.length()
95       + sizeof(int)
96       + _time.packed_size();
97 }
98
99 //////////////
100
101 const size_t heavy_file_operations::COPY_CHUNK_FACTOR = 1 * MEGABYTE;
102
103 size_t heavy_file_operations::copy_chunk_factor()
104 { return COPY_CHUNK_FACTOR; }
105
106 heavy_file_operations::~heavy_file_operations() {}
107   // we only need this due to our use of the root_object class_name support.
108
109 const char *heavy_file_operations::outcome_name(const outcome &to_name)
110 {
111   switch (to_name.value()) {
112     case SOURCE_MISSING: return "SOURCE_MISSING";
113     case TARGET_ACCESS_ERROR: return "TARGET_ACCESS_ERROR";
114     case TARGET_DIR_ERROR: return "TARGET_DIR_ERROR";
115     default: return common::outcome_name(to_name);
116   }
117 }
118
119 outcome heavy_file_operations::copy_file(const astring &source,
120     const astring &destination, int copy_chunk_factor)
121 {
122 #ifdef DEBUG_HEAVY_FILE_OPS
123   FUNCDEF("copy_file");
124 #endif
125   // check that the source exists...
126   filename source_path(source);
127   if (!source_path.exists()) return SOURCE_MISSING;
128   file_time source_time(source_path);  // get the time on the source.
129
130   // make sure the target directory exists...
131   filename target_path(destination);
132   filename targ_dir = target_path.dirname();
133   if (!directory::recursive_create(targ_dir.raw())) return TARGET_DIR_ERROR;
134
135   // open the source for reading.
136   huge_file source_file(source, "rb");
137   if (!source_file.good()) return SOURCE_MISSING;
138 //hmmm: could be source is not accessible instead.
139
140   // open target file for writing.
141   huge_file target_file(destination, "wb");
142   if (!target_file.good()) return TARGET_ACCESS_ERROR;
143
144   byte_array chunk;
145   int bytes_read = 0;
146   outcome ret;
147   while ( (ret = source_file.read(chunk, copy_chunk_factor, bytes_read))
148       == huge_file::OKAY) {
149     int bytes_stored;
150     ret = target_file.write(chunk, bytes_stored);
151     if (bytes_stored != bytes_read) return TARGET_ACCESS_ERROR;
152     if (source_file.eof()) break;  // time to escape.
153   }
154
155   // set the time on the target file from the source's time.
156   source_time.set_time(target_path);
157
158 #ifdef DEBUG_HEAVY_FILE_OPS
159   astring time;
160   source_time.text_form(time);
161   LOG(astring("setting file time for ") + source + " to " + time);
162 #endif
163
164   return OKAY;
165 }
166
167 outcome heavy_file_operations::write_file_chunk(const astring &target,
168     double byte_start, const byte_array &chunk, bool truncate,
169     int copy_chunk_factor)
170 {
171   FUNCDEF("write_file_chunk");
172   if (byte_start < 0) return BAD_INPUT;
173
174   filename targ_name(target);
175   astring targ_dir = targ_name.dirname().raw();
176 #ifdef DEBUG_HEAVY_FILE_OPS
177   LOG(astring("creating target's directory: ") + targ_name.dirname().raw());
178 #endif
179   if (!directory::recursive_create(targ_dir)) {
180     LOG(astring("failed to create directory: ") + targ_name.dirname().raw());
181     return TARGET_DIR_ERROR;
182   }
183
184   if (!targ_name.exists()) {
185     huge_file target_file(target, "w");
186   }
187
188   huge_file target_file(target, "r+b");
189     // open the file for updating (either read or write).
190   if (!target_file.good()) return TARGET_ACCESS_ERROR;
191   double curr_len = target_file.length();
192   target_file.touch();
193
194   if (curr_len < byte_start) {
195     byte_array new_chunk;
196     while (curr_len < byte_start) {
197       target_file.seek(0, byte_filer::FROM_END);  // go to the end of the file.
198       new_chunk.reset(minimum(copy_chunk_factor,
199           int(curr_len - byte_start + 1)));
200       int written;
201       outcome ret = target_file.write(new_chunk, written);
202       if (written < new_chunk.length()) return TARGET_ACCESS_ERROR;
203       curr_len = target_file.length();
204     }
205   }
206   target_file.seek(byte_start, byte_filer::FROM_START);
207     // jump to the proper location in the file.
208   int wrote;
209   outcome ret = target_file.write(chunk, wrote);
210   if (wrote != chunk.length()) return TARGET_ACCESS_ERROR;
211   if (truncate) {
212     target_file.truncate();
213   }
214   return OKAY;
215 }
216
217 basis::outcome heavy_file_operations::advance(const filename_list &to_transfer,
218     file_transfer_header &last_action)
219 {
220   FUNCDEF("advance");
221   int indy = to_transfer.locate(last_action._filename);
222   if (negative(indy)) return BAD_INPUT;  // error, file not found in list.
223   if (indy >= to_transfer.elements() - 1) return FINISHED;  // done.
224   const file_info *currfile = to_transfer.get(indy + 1);
225   last_action._filename = currfile->raw();
226   last_action._time = currfile->_time;
227
228 #ifdef DEBUG_HEAVY_FILE_OPS
229   if (currfile->_time == file_time(time_t(0)))
230     LOG(astring("failed for ") + currfile->raw() + " -- has zero file time");
231 #endif
232
233   last_action._byte_start = 0;
234   last_action._length = 0;
235   return OKAY;
236 }
237
238 outcome heavy_file_operations::buffer_files(const astring &source_root,
239     const filename_list &to_transfer, file_transfer_header &last_action,
240     byte_array &storage, int maximum_bytes)
241 {
242   FUNCDEF("buffer_files");
243   storage.reset();  // clear out the current contents.
244
245   if (!to_transfer.elements()) {
246     // we seem to be done.
247     return FINISHED;
248   }
249
250   outcome to_return = OKAY;
251
252   // this records if we're working on a new file.
253   bool fresh_file = false;
254
255   // start filling the array with bytes from the files.
256   while (storage.length() < maximum_bytes) {
257     double remaining_in_array = maximum_bytes - storage.length()
258         - last_action.packed_size();
259     if (remaining_in_array < MINIMUM_ARRAY_SIZE) {
260       // ensure that we at least have a reasonable amount of space left
261       // for storing into the array.
262       break;
263     }
264
265     // find the current file we're at, as provided in record.
266     if (!last_action._filename) {
267       // no filename yet.  assume this is the first thing we've done.
268       const file_info *currfile = to_transfer.get(0);
269       last_action._filename = currfile->raw();
270       last_action._time = currfile->_time;
271       last_action._byte_start = 0;
272       last_action._length = 0;
273       fresh_file = true;
274     }
275
276     const file_info *found = to_transfer.find(last_action._filename);
277     if (!found) {
278       // they have referenced a file that we don't have.  that's bad news.
279       LOG(astring("unknown last file requested in transfer: ") + last_action._filename);
280       return BAD_INPUT;
281     }
282
283     astring full_file = source_root + "/" + last_action._filename;
284     huge_file current(full_file, "rb");
285     if (!current.good()) {
286       // we need to skip this file.
287       LOG(astring("skipping bad file: ") + full_file);
288       fresh_file = true;
289       to_return = advance(to_transfer, last_action);
290       if (to_return != OKAY) break;
291       continue;
292     }
293 //LOG(astring("working on file: ") + current.name());
294
295     // we don't try to check done if we just started this file.
296     if (!fresh_file && (last_action._byte_start + last_action._length >= current.length())) {
297       // this file is done now.  go to the next one.
298 #ifdef DEBUG_HEAVY_FILE_OPS
299       LOG(astring("finished stuffing file: ") + full_file);
300 #endif
301       fresh_file = true;
302       to_return = advance(to_transfer, last_action);
303       if (to_return != OKAY) break;
304       continue;
305     }
306     // now that we tested if the file was fresh in our 'finished' check above, we
307     // consider the file not to be fresh until told otherwise.
308     fresh_file = false;
309
310     // calculate the largest piece remaining of that file that will fit in the
311     // allotted space.
312     double new_start = last_action._byte_start + last_action._length;
313     double remaining_in_file = current.length() - new_start;
314     if (remaining_in_file < 0) remaining_in_file = 0;
315     double new_len = minimum(remaining_in_file, remaining_in_array);
316     
317     // pack this new piece of the file.
318     current.seek(new_start, byte_filer::FROM_START);
319     byte_array new_chunk;
320     int bytes_read = 0;
321     outcome ret = current.read(new_chunk, int(new_len), bytes_read);
322     if (bytes_read != new_len) {
323       if (!bytes_read) {
324         // some kind of problem reading the file.
325         fresh_file = true;
326         to_return = advance(to_transfer, last_action);
327         if (to_return != OKAY) break;
328         continue;
329       }
330 //why would this happen?  just complain, i guess.
331     }
332
333     // update the record since it seems we're successful here.
334     last_action._byte_start = new_start;
335     last_action._length = int(new_len);
336
337     // add in this next new chunk of file.
338     last_action.pack(storage);  // add the header.
339     storage += new_chunk;  // add the new stuff.
340
341     if (!current.length()) {
342       // ensure we don't get stuck redoing zero length files, which we allowed
343       // to go past their end above (since otherwise we'd never see them).
344       fresh_file = true;
345       to_return = advance(to_transfer, last_action);
346       if (to_return != OKAY) break;
347       continue;
348     }
349     
350     // just keep going, if there's space...
351   }
352
353   return to_return;
354 }
355
356 } //namespace.
357