ed66d7b4de58ff28ab4c227990b515954461ca9b
[feisty_meow.git] / nucleus / library / filesystem / file_info.cpp
1 /*****************************************************************************\
2 *                                                                             *
3 *  Name   : file_info                                                         *
4 *  Author : Chris Koeritz                                                     *
5 *                                                                             *
6 *******************************************************************************
7 * Copyright (c) 1993-$now By Author.  This program is free software; you can  *
8 * redistribute it and/or modify it under the terms of the GNU General Public  *
9 * License as published by the Free Software Foundation; either version 2 of   *
10 * the License or (at your option) any later version.  This is online at:      *
11 *     http://www.fsf.org/copyleft/gpl.html                                    *
12 * Please send any updates to: fred@gruntose.com                               *
13 \*****************************************************************************/
14
15 #include "file_info.h"
16 #include "huge_file.h"
17
18 #include <basis/astring.h>
19 #include <basis/byte_array.h>
20 #include <basis/contracts.h>
21 #include <basis/functions.h>
22 #include <structures/checksums.h>
23 #include <structures/object_packers.h>
24
25 #include <stdio.h>
26
27 #define DEBUG_FILE_INFO
28   // uncomment for noisy version.
29
30 #undef LOG
31 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
32
33 using namespace basis;
34 using namespace structures;
35
36 namespace filesystem {
37
38 file_info::file_info()
39 : filename(astring::empty_string()),
40   _file_size(0),
41   _time(),
42   _checksum(),
43   c_secondary(),
44   c_attachment()
45 {}
46
47 file_info::file_info(const filename &to_copy, double file_size,
48     const file_time &time, int checksum)
49 : filename(to_copy),
50   _file_size(file_size),
51   _time(time),
52   _checksum(checksum),
53   c_secondary(),
54   c_attachment()
55 {}
56
57 file_info::file_info(const file_info &to_copy)
58 : filename(to_copy),
59   _file_size(to_copy._file_size),
60   _time(to_copy._time),
61   _checksum(to_copy._checksum),
62   c_secondary(to_copy.c_secondary),
63   c_attachment(to_copy.c_attachment)
64 {
65 }
66
67 file_info::~file_info() {}
68
69 const byte_array &file_info::attachment() const { return c_attachment; }
70
71 void file_info::attachment(const byte_array &new_attachment)
72 { c_attachment = new_attachment; }
73
74 const astring &file_info::secondary() const { return c_secondary; }
75
76 void file_info::secondary(const astring &new_sec) { c_secondary = new_sec; }
77
78 astring file_info::text_form() const
79 {
80   astring to_return = raw()
81       + a_sprintf(", size=%0.f, chksum=%d", _file_size, _checksum);
82   if (c_secondary.t())
83     to_return += astring(", 2ndary=") + c_secondary;
84   return to_return;
85 }
86
87 bool file_info::calculate(const astring &prefix, bool just_size, int checksum_edge)
88 {
89   FUNCDEF("calculate");
90   filename full;
91   if (prefix.t()) full = prefix + "/" + *this;
92   else full = *this;
93   if (!full.exists()) {
94 #ifdef DEBUG_FILE_INFO
95     LOG(astring("failed to find file: ") + full.raw());
96 #endif
97     return false;
98   }
99   // get time again.
100   _time = file_time(full);
101
102 //#ifdef DEBUG_FILE_INFO
103 //  astring temptext;
104 //  _time.text_form(temptext);
105 //  LOG(astring("file calculate on ") + full.raw() + " time=" + temptext);
106 //#endif
107
108   // open the file for reading.
109   huge_file to_read(full.raw(), "rb");
110   if (!to_read.good()) {
111 #ifdef DEBUG_FILE_INFO
112     LOG(astring("file has non-good status: ") + full.raw());
113 #endif
114     return false;  // why did that happen?
115   }
116   // set the size appropriately.
117   _file_size = to_read.length();
118   if (just_size)
119     return true;  // done for that case.
120
121   // now read the file and compute a checksum.
122   uint16 curr_sum = 0;  // the current checksum being computed.
123   byte_array chunk;  // temporary chunk of data from file.
124
125 //hmmm: make this optimization (hack) optional!
126
127   // this algorithm takes a chunk on each end of the file for checksums.
128   // this saves us from reading a huge amount of data, although it will be
129   // fooled if a huge binary file is changed only in the middle and has the
130   // same size as before.  for most purposes, this is not a problem, although
131   // databases that are fixed size might fool us.  if records are written in
132   // the middle without updating the head or tail sections, then we're hosed.
133
134   bool skip_tail = false;  // true if we don't need the tail piece.
135   double head_start = 0, head_end = 0, tail_start = 0,
136       tail_end = _file_size - 1;
137   if (_file_size <= double(2 * checksum_edge)) {
138     // we're applying a rule for when the file is too small compared to
139     // the chunk factor doubled; we'll just read the whole file.
140     head_end = _file_size - 1;
141     skip_tail = true;
142   } else {
143     // here we compute the ending of the head piece and the beginning of
144     // the tail piece.  each will be about checksum_edge in size.
145     head_end = minimum(_file_size / 2, double(checksum_edge)) - 1;
146     tail_start = _file_size - minimum(_file_size / 2, double(checksum_edge));
147   }
148
149   // read the head end of the file.
150   int size_read = 0;
151   outcome ret = to_read.read(chunk, int(head_end - head_start + 1), size_read);
152   if (ret != huge_file::OKAY) {
153 #ifdef DEBUG_FILE_INFO
154     LOG(astring("reading file failed: ") + full.raw());
155 #endif
156     return false;  // failed to read.
157   }
158   curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
159       chunk.length());
160
161   // read the tail end of the file.
162   if (!skip_tail) {
163     to_read.seek(tail_start, byte_filer::FROM_START);
164     ret = to_read.read(chunk, int(tail_end - tail_start + 1), size_read);
165     if (ret != huge_file::OKAY) {
166 #ifdef DEBUG_FILE_INFO
167       LOG(astring("reading tail of file failed: ") + full.raw());
168 #endif
169       return false;  // failed to read.
170     }
171     curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
172         chunk.length());
173   }
174
175   _checksum = curr_sum;
176   return true;
177 }
178
179 int file_info::packed_size() const
180 {
181   return filename::packed_size()
182       + structures::packed_size(_file_size)
183       + _time.packed_size()
184       + PACKED_SIZE_INT32
185       + c_secondary.packed_size()
186       + structures::packed_size(c_attachment);
187 }
188
189 void file_info::pack(byte_array &packed_form) const
190 {
191   filename::pack(packed_form);
192   attach(packed_form, _file_size);
193   _time.pack(packed_form);
194   attach(packed_form, _checksum);
195   c_secondary.pack(packed_form);
196   attach(packed_form, c_attachment);
197 }
198
199 bool file_info::unpack(byte_array &packed_form)
200 {
201   if (!filename::unpack(packed_form))
202     return false;
203   if (!detach(packed_form, _file_size))
204     return false;
205   if (!_time.unpack(packed_form))
206     return false;
207   if (!detach(packed_form, _checksum))
208     return false;
209   if (!c_secondary.unpack(packed_form))
210     return false;
211   if (!detach(packed_form, c_attachment))
212     return false;
213   return true;
214 }
215
216 file_info &file_info::operator = (const file_info &to_copy)
217 {
218   if (this == &to_copy)
219     return *this;
220   (filename &)(*this) = (filename &)to_copy;
221   c_attachment = to_copy.c_attachment;
222   _time = to_copy._time;
223   _file_size = to_copy._file_size;
224   c_secondary = to_copy.c_secondary;
225   _checksum = to_copy._checksum;
226   return *this;
227 }
228
229 } //namespace.
230