feisty meow concerns codebase  2.140
file_info.cpp
Go to the documentation of this file.
1 /*****************************************************************************\
2 * *
3 * Name : file_info *
4 * Author : Chris Koeritz *
5 * *
6 *******************************************************************************
7 * Copyright (c) 1993-$now By Author. This program is free software; you can *
8 * redistribute it and/or modify it under the terms of the GNU General Public *
9 * License as published by the Free Software Foundation; either version 2 of *
10 * the License or (at your option) any later version. This is online at: *
11 * http://www.fsf.org/copyleft/gpl.html *
12 * Please send any updates to: fred@gruntose.com *
13 \*****************************************************************************/
14 
15 #include "file_info.h"
16 #include "huge_file.h"
17 
18 #include <basis/astring.h>
19 #include <basis/byte_array.h>
20 #include <basis/contracts.h>
21 #include <basis/functions.h>
22 #include <structures/checksums.h>
24 
25 #include <stdio.h>
26 
27 #define DEBUG_FILE_INFO
28  // uncomment for noisy version.
29 
30 #undef LOG
31 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
32 
33 using namespace basis;
34 using namespace structures;
35 
36 namespace filesystem {
37 
38 file_info::file_info()
39 : filename(astring::empty_string()),
40  _file_size(0),
41  _time(),
42  _checksum(),
43  c_secondary(),
44  c_attachment()
45 {}
46 
47 file_info::file_info(const filename &to_copy, double file_size,
48  const file_time &time, int checksum)
49 : filename(to_copy),
50  _file_size(file_size),
51  _time(time),
52  _checksum(checksum),
53  c_secondary(),
54  c_attachment()
55 {}
56 
58 : filename(to_copy),
59  _file_size(to_copy._file_size),
60  _time(to_copy._time),
61  _checksum(to_copy._checksum),
62  c_secondary(to_copy.c_secondary),
63  c_attachment(to_copy.c_attachment)
64 {
65 }
66 
68 
69 const byte_array &file_info::attachment() const { return c_attachment; }
70 
71 void file_info::attachment(const byte_array &new_attachment)
72 { c_attachment = new_attachment; }
73 
74 const astring &file_info::secondary() const { return c_secondary; }
75 
76 void file_info::secondary(const astring &new_sec) { c_secondary = new_sec; }
77 
79 {
80  astring to_return = raw()
81  + a_sprintf(", size=%0.f, chksum=%d", _file_size, _checksum);
82  if (c_secondary.t())
83  to_return += astring(", 2ndary=") + c_secondary;
84  return to_return;
85 }
86 
87 bool file_info::calculate(const astring &prefix, bool just_size, int checksum_edge)
88 {
89  FUNCDEF("calculate");
90  filename full;
91  if (prefix.t()) full = prefix + "/" + *this;
92  else full = *this;
93  if (!full.exists()) {
94 #ifdef DEBUG_FILE_INFO
95  LOG(astring("failed to find file: ") + full.raw());
96 #endif
97  return false;
98  }
99  // get time again.
100  _time = file_time(full);
101 
102 //#ifdef DEBUG_FILE_INFO
103 // astring temptext;
104 // _time.text_form(temptext);
105 // LOG(astring("file calculate on ") + full.raw() + " time=" + temptext);
106 //#endif
107 
108  // open the file for reading.
109  huge_file to_read(full.raw(), "rb");
110  if (!to_read.good()) {
111 #ifdef DEBUG_FILE_INFO
112  LOG(astring("file has non-good status: ") + full.raw());
113 #endif
114  return false; // why did that happen?
115  }
116  // set the size appropriately.
117  _file_size = to_read.length();
118  if (just_size)
119  return true; // done for that case.
120 
121  // now read the file and compute a checksum.
122  uint16 curr_sum = 0; // the current checksum being computed.
123  byte_array chunk; // temporary chunk of data from file.
124 
125 //hmmm: make this optimization (hack) optional!
126 
127  // this algorithm takes a chunk on each end of the file for checksums.
128  // this saves us from reading a huge amount of data, although it will be
129  // fooled if a huge binary file is changed only in the middle and has the
130  // same size as before. for most purposes, this is not a problem, although
131  // databases that are fixed size might fool us. if records are written in
132  // the middle without updating the head or tail sections, then we're hosed.
133 
134  bool skip_tail = false; // true if we don't need the tail piece.
135  double head_start = 0, head_end = 0, tail_start = 0,
136  tail_end = _file_size - 1;
137  if (_file_size == 0) {
138  head_end = 0;
139  skip_tail = true;
140  } else if (_file_size <= double(2 * checksum_edge)) {
141  // we're applying a rule for when the file is too small compared to
142  // the chunk factor doubled; we'll just read the whole file.
143  head_end = _file_size - 1;
144  skip_tail = true;
145  } else {
146  // here we compute the ending of the head piece and the beginning of
147  // the tail piece. each will be about checksum_edge in size.
148  head_end = minimum(_file_size / 2, double(checksum_edge)) - 1;
149  tail_start = _file_size - minimum(_file_size / 2, double(checksum_edge));
150  }
151 
152  // read the head end of the file.
153  int size_read = 0;
154  outcome ret = to_read.read(chunk, int(head_end - head_start + 1), size_read);
155  if (ret != huge_file::OKAY) {
156 #ifdef DEBUG_FILE_INFO
157  LOG(astring("reading file failed: ") + full.raw());
158 #endif
159  return false; // failed to read.
160  }
161  curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
162  chunk.length());
163 
164  // read the tail end of the file.
165  if (!skip_tail) {
166  to_read.seek(tail_start, byte_filer::FROM_START);
167  ret = to_read.read(chunk, int(tail_end - tail_start + 1), size_read);
168  if (ret != huge_file::OKAY) {
169 #ifdef DEBUG_FILE_INFO
170  LOG(astring("reading tail of file failed: ") + full.raw());
171 #endif
172  return false; // failed to read.
173  }
174  curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
175  chunk.length());
176  }
177 
178  _checksum = curr_sum;
179  return true;
180 }
181 
183 {
184  return filename::packed_size()
186  + _time.packed_size()
188  + c_secondary.packed_size()
189  + structures::packed_size(c_attachment);
190 }
191 
192 void file_info::pack(byte_array &packed_form) const
193 {
194  FUNCDEF("pack");
195  filename::pack(packed_form);
196  attach(packed_form, _file_size);
197  _time.pack(packed_form);
198  attach(packed_form, _checksum);
199  c_secondary.pack(packed_form);
200  attach(packed_form, c_attachment);
201 }
202 
203 bool file_info::unpack(byte_array &packed_form)
204 {
205  if (!filename::unpack(packed_form))
206  return false;
207  if (!detach(packed_form, _file_size))
208  return false;
209  if (!_time.unpack(packed_form))
210  return false;
211  if (!detach(packed_form, _checksum))
212  return false;
213  if (!c_secondary.unpack(packed_form))
214  return false;
215  if (!detach(packed_form, c_attachment))
216  return false;
217  return true;
218 }
219 
221 {
222  if (this == &to_copy)
223  return *this;
224  (filename &)(*this) = (filename &)to_copy;
225  c_attachment = to_copy.c_attachment;
226  _time = to_copy._time;
227  _file_size = to_copy._file_size;
228  c_secondary = to_copy.c_secondary;
229  _checksum = to_copy._checksum;
230  return *this;
231 }
232 
233 } //namespace.
234 
a_sprintf is a specialization of astring that provides printf style support.
Definition: astring.h:440
const contents * observe() const
Returns a pointer to the underlying C array of data.
Definition: array.h:172
int length() const
Returns the current reported length of the allocated C array.
Definition: array.h:115
Provides a dynamically resizable ASCII character string.
Definition: astring.h:35
bool t() const
t() is a shortcut for the string being "true", as in non-empty.
Definition: astring.h:97
void pack(byte_array &target) const
stores this string in the "target". it can later be unpacked again.
Definition: astring.cpp:961
int packed_size() const
Reports the size required to pack this string into a byte array.
Definition: astring.cpp:959
astring()
constructs an empty string.
Definition: astring.cpp:59
bool unpack(byte_array &source)
retrieves a string (packed with pack()) from "source" into this string.
Definition: astring.cpp:964
A very common template for a dynamic array of bytes.
Definition: byte_array.h:36
Outcomes describe the state of completion for an operation.
Definition: outcome.h:31
@ FROM_START
offset is from the beginning of the file.
Definition: byte_filer.h:94
Encapsulates some measures and calculations based on a file's contents.
Definition: file_info.h:29
double _file_size
the size of the file.
Definition: file_info.h:40
file_info()
blank constructor.
Definition: file_info.cpp:38
virtual void pack(basis::byte_array &packed_form) const
Creates a packed form of the packable object in "packed_form".
Definition: file_info.cpp:192
const basis::astring & secondary() const
observes the alternate form of the name.
Definition: file_info.cpp:74
int _checksum
the checksum for the file.
Definition: file_info.h:42
file_time _time
the file's access time.
Definition: file_info.h:41
bool calculate(const basis::astring &prefix, bool just_size_n_time, int checksum_edge=1 *basis::KILOBYTE)
fills in the correct file size and checksum information for this file.
Definition: file_info.cpp:87
basis::astring text_form() const
Definition: file_info.cpp:78
virtual bool unpack(basis::byte_array &packed_form)
Restores the packable from the "packed_form".
Definition: file_info.cpp:203
virtual int packed_size() const
Estimates the space needed for the packed structure.
Definition: file_info.cpp:182
file_info & operator=(const file_info &to_copy)
Definition: file_info.cpp:220
const basis::byte_array & attachment() const
returns the chunk of data optionally attached to the file's info.
Definition: file_info.cpp:69
virtual int packed_size() const
Definition: file_time.cpp:132
virtual void pack(basis::byte_array &packed_form) const
Definition: file_time.cpp:134
virtual bool unpack(basis::byte_array &packed_form)
Definition: file_time.cpp:137
Provides operations commonly needed on file names.
Definition: filename.h:64
bool exists() const
returns true if the file exists.
Definition: filename.cpp:426
virtual bool unpack(basis::byte_array &packed_form)
Restores the packable from the "packed_form".
Definition: filename.cpp:471
virtual void pack(basis::byte_array &packed_form) const
Creates a packed form of the packable object in "packed_form".
Definition: filename.cpp:465
const basis::astring & raw() const
returns the astring that we're holding onto for the path.
Definition: filename.cpp:97
virtual int packed_size() const
Estimates the space needed for the packed structure.
Definition: filename.cpp:460
Supports reading and writing to very large files, > 4 gigabytes.
Definition: huge_file.h:36
double length()
expensive operation accesses the file to find length.
Definition: huge_file.cpp:57
basis::outcome seek(double new_position, byte_filer::origins origin=byte_filer::FROM_CURRENT)
move the file pointer to "new_position" if possible.
Definition: huge_file.cpp:231
bool good() const
reports if the file was opened successfully.
Definition: huge_file.cpp:181
basis::outcome read(basis::byte_array &to_fill, int desired_size, int &size_read)
reads "desired_size" into "to_fill" if possible.
Definition: huge_file.cpp:263
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition: enhance_cpp.h:57
#define LOG(to_print)
Definition: file_info.cpp:31
The guards collection helps in testing preconditions and reporting errors.
Definition: array.h:30
unsigned short uint16
Definition: definitions.h:111
void attach(byte_array &packed_form, const char *to_attach)
Packs a character string "to_attach" into "packed_form".
Definition: astring.cpp:1015
bool detach(byte_array &packed_form, astring &to_detach)
Unpacks a character string "to_attach" from "packed_form".
Definition: astring.cpp:1023
type minimum(type a, type b)
maximum returns the greater of two values.
Definition: functions.h:29
A platform independent way to obtain the timestamp of a file.
Definition: byte_filer.cpp:37
A dynamic container class that holds any kind of object via pointers.
Definition: amorph.h:55
const int PACKED_SIZE_INT32
int packed_size(const byte_array &packed_form)
Reports the size required to pack a byte array into a byte array.