seeking out problem file.
[feisty_meow.git] / nucleus / library / filesystem / file_info.cpp
1 /*****************************************************************************\
2 *                                                                             *
3 *  Name   : file_info                                                         *
4 *  Author : Chris Koeritz                                                     *
5 *                                                                             *
6 *******************************************************************************
7 * Copyright (c) 1993-$now By Author.  This program is free software; you can  *
8 * redistribute it and/or modify it under the terms of the GNU General Public  *
9 * License as published by the Free Software Foundation; either version 2 of   *
10 * the License or (at your option) any later version.  This is online at:      *
11 *     http://www.fsf.org/copyleft/gpl.html                                    *
12 * Please send any updates to: fred@gruntose.com                               *
13 \*****************************************************************************/
14
15 #include "file_info.h"
16 #include "huge_file.h"
17
18 #include <basis/astring.h>
19 #include <basis/byte_array.h>
20 #include <basis/contracts.h>
21 #include <basis/functions.h>
22 #include <structures/checksums.h>
23 #include <structures/object_packers.h>
24
25 #include <stdio.h>
26
27 #define DEBUG_FILE_INFO
28   // uncomment for noisy version.
29
30 #undef LOG
31 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
32
33 using namespace basis;
34 using namespace structures;
35
36 namespace filesystem {
37
38 file_info::file_info()
39 : filename(astring::empty_string()),
40   _file_size(0),
41   _time(),
42   _checksum(),
43   c_secondary(),
44   c_attachment()
45 {}
46
47 file_info::file_info(const filename &to_copy, double file_size,
48     const file_time &time, int checksum)
49 : filename(to_copy),
50   _file_size(file_size),
51   _time(time),
52   _checksum(checksum),
53   c_secondary(),
54   c_attachment()
55 {}
56
57 file_info::file_info(const file_info &to_copy)
58 : filename(to_copy),
59   _file_size(to_copy._file_size),
60   _time(to_copy._time),
61   _checksum(to_copy._checksum),
62   c_secondary(to_copy.c_secondary),
63   c_attachment(to_copy.c_attachment)
64 {
65 }
66
67 file_info::~file_info() {}
68
69 const byte_array &file_info::attachment() const { return c_attachment; }
70
71 void file_info::attachment(const byte_array &new_attachment)
72 { c_attachment = new_attachment; }
73
74 const astring &file_info::secondary() const { return c_secondary; }
75
76 void file_info::secondary(const astring &new_sec) { c_secondary = new_sec; }
77
78 astring file_info::text_form() const
79 {
80   astring to_return = raw()
81       + a_sprintf(", size=%0.f, chksum=%d", _file_size, _checksum);
82   if (c_secondary.t())
83     to_return += astring(", 2ndary=") + c_secondary;
84   return to_return;
85 }
86
87 bool file_info::calculate(const astring &prefix, bool just_size, int checksum_edge)
88 {
89   FUNCDEF("calculate");
90   filename full;
91   if (prefix.t()) full = prefix + "/" + *this;
92   else full = *this;
93   if (!full.exists()) {
94 #ifdef DEBUG_FILE_INFO
95     LOG(astring("failed to find file: ") + full.raw());
96 #endif
97     return false;
98   }
99   // get time again.
100   _time = file_time(full);
101
102 //#ifdef DEBUG_FILE_INFO
103 //  astring temptext;
104 //  _time.text_form(temptext);
105 //  LOG(astring("file calculate on ") + full.raw() + " time=" + temptext);
106 //#endif
107
108   // open the file for reading.
109   huge_file to_read(full.raw(), "rb");
110   if (!to_read.good()) {
111 #ifdef DEBUG_FILE_INFO
112     LOG(astring("file has non-good status: ") + full.raw());
113 #endif
114     return false;  // why did that happen?
115   }
116   // set the size appropriately.
117   _file_size = to_read.length();
118   if (just_size)
119     return true;  // done for that case.
120
121   // now read the file and compute a checksum.
122   uint16 curr_sum = 0;  // the current checksum being computed.
123   byte_array chunk;  // temporary chunk of data from file.
124
125 //hmmm: make this optimization (hack) optional!
126
127   // this algorithm takes a chunk on each end of the file for checksums.
128   // this saves us from reading a huge amount of data, although it will be
129   // fooled if a huge binary file is changed only in the middle and has the
130   // same size as before.  for most purposes, this is not a problem, although
131   // databases that are fixed size might fool us.  if records are written in
132   // the middle without updating the head or tail sections, then we're hosed.
133
134   bool skip_tail = false;  // true if we don't need the tail piece.
135   double head_start = 0, head_end = 0, tail_start = 0,
136       tail_end = _file_size - 1;
137   if (_file_size == 0) {
138     head_end = 0;
139     skip_tail = true;
140   } else if (_file_size <= double(2 * checksum_edge)) {
141     // we're applying a rule for when the file is too small compared to
142     // the chunk factor doubled; we'll just read the whole file.
143     head_end = _file_size - 1;
144     skip_tail = true;
145   } else {
146     // here we compute the ending of the head piece and the beginning of
147     // the tail piece.  each will be about checksum_edge in size.
148     head_end = minimum(_file_size / 2, double(checksum_edge)) - 1;
149     tail_start = _file_size - minimum(_file_size / 2, double(checksum_edge));
150   }
151
152   // read the head end of the file.
153   int size_read = 0;
154   outcome ret = to_read.read(chunk, int(head_end - head_start + 1), size_read);
155   if (ret != huge_file::OKAY) {
156 #ifdef DEBUG_FILE_INFO
157     LOG(astring("reading file failed: ") + full.raw());
158 #endif
159     return false;  // failed to read.
160   }
161   curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
162       chunk.length());
163
164   // read the tail end of the file.
165   if (!skip_tail) {
166     to_read.seek(tail_start, byte_filer::FROM_START);
167     ret = to_read.read(chunk, int(tail_end - tail_start + 1), size_read);
168     if (ret != huge_file::OKAY) {
169 #ifdef DEBUG_FILE_INFO
170       LOG(astring("reading tail of file failed: ") + full.raw());
171 #endif
172       return false;  // failed to read.
173     }
174     curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
175         chunk.length());
176   }
177
178   _checksum = curr_sum;
179   return true;
180 }
181
182 int file_info::packed_size() const
183 {
184   return filename::packed_size()
185       + structures::packed_size(_file_size)
186       + _time.packed_size()
187       + PACKED_SIZE_INT32
188       + c_secondary.packed_size()
189       + structures::packed_size(c_attachment);
190 }
191
192 void file_info::pack(byte_array &packed_form) const
193 {
194   FUNCDEF("pack");
195   filename::pack(packed_form);
196   attach(packed_form, _file_size);
197   _time.pack(packed_form);
198   attach(packed_form, _checksum);
199   c_secondary.pack(packed_form);
200   attach(packed_form, c_attachment);
201 }
202
203 bool file_info::unpack(byte_array &packed_form)
204 {
205   if (!filename::unpack(packed_form))
206     return false;
207   if (!detach(packed_form, _file_size))
208     return false;
209   if (!_time.unpack(packed_form))
210     return false;
211   if (!detach(packed_form, _checksum))
212     return false;
213   if (!c_secondary.unpack(packed_form))
214     return false;
215   if (!detach(packed_form, c_attachment))
216     return false;
217   return true;
218 }
219
220 file_info &file_info::operator = (const file_info &to_copy)
221 {
222   if (this == &to_copy)
223     return *this;
224   (filename &)(*this) = (filename &)to_copy;
225   c_attachment = to_copy.c_attachment;
226   _time = to_copy._time;
227   _file_size = to_copy._file_size;
228   c_secondary = to_copy.c_secondary;
229   _checksum = to_copy._checksum;
230   return *this;
231 }
232
233 } //namespace.
234