feisty meow concerns codebase 2.140
file_info.cpp
Go to the documentation of this file.
1/*****************************************************************************\
2* *
3* Name : file_info *
4* Author : Chris Koeritz *
5* *
6*******************************************************************************
7* Copyright (c) 1993-$now By Author. This program is free software; you can *
8* redistribute it and/or modify it under the terms of the GNU General Public *
9* License as published by the Free Software Foundation; either version 2 of *
10* the License or (at your option) any later version. This is online at: *
11* http://www.fsf.org/copyleft/gpl.html *
12* Please send any updates to: fred@gruntose.com *
13\*****************************************************************************/
14
15#include "file_info.h"
16#include "huge_file.h"
17
18#include <basis/astring.h>
19#include <basis/byte_array.h>
20#include <basis/contracts.h>
21#include <basis/functions.h>
24
25#include <stdio.h>
26
27#define DEBUG_FILE_INFO
28 // uncomment for noisy version.
29
30#undef LOG
31#define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
32
33using namespace basis;
34using namespace structures;
35
36namespace filesystem {
37
39: filename(astring::empty_string()),
40 _file_size(0),
41 _time(),
42 _checksum(),
43 c_secondary(),
44 c_attachment()
45{}
46
47file_info::file_info(const filename &to_copy, double file_size,
48 const file_time &time, int checksum)
49: filename(to_copy),
50 _file_size(file_size),
51 _time(time),
52 _checksum(checksum),
53 c_secondary(),
54 c_attachment()
55{}
56
58: filename(to_copy),
59 _file_size(to_copy._file_size),
60 _time(to_copy._time),
61 _checksum(to_copy._checksum),
62 c_secondary(to_copy.c_secondary),
63 c_attachment(to_copy.c_attachment)
64{
65}
66
68
69const byte_array &file_info::attachment() const { return c_attachment; }
70
71void file_info::attachment(const byte_array &new_attachment)
72{ c_attachment = new_attachment; }
73
74const astring &file_info::secondary() const { return c_secondary; }
75
76void file_info::secondary(const astring &new_sec) { c_secondary = new_sec; }
77
79{
80 astring to_return = raw()
81 + a_sprintf(", size=%0.f, chksum=%d", _file_size, _checksum);
82 if (c_secondary.t())
83 to_return += astring(", 2ndary=") + c_secondary;
84 return to_return;
85}
86
87bool file_info::calculate(const astring &prefix, bool just_size, int checksum_edge)
88{
89 FUNCDEF("calculate");
90 filename full;
91 if (prefix.t()) full = prefix + "/" + *this;
92 else full = *this;
93 if (!full.exists()) {
94#ifdef DEBUG_FILE_INFO
95 LOG(astring("failed to find file: ") + full.raw());
96#endif
97 return false;
98 }
99 // get time again.
100 _time = file_time(full);
101
102//#ifdef DEBUG_FILE_INFO
103// astring temptext;
104// _time.text_form(temptext);
105// LOG(astring("file calculate on ") + full.raw() + " time=" + temptext);
106//#endif
107
108 // open the file for reading.
109 huge_file to_read(full.raw(), "rb");
110 if (!to_read.good()) {
111#ifdef DEBUG_FILE_INFO
112 LOG(astring("file has non-good status: ") + full.raw());
113#endif
114 return false; // why did that happen?
115 }
116 // set the size appropriately.
117 _file_size = to_read.length();
118 if (just_size)
119 return true; // done for that case.
120
121 // now read the file and compute a checksum.
122 uint16 curr_sum = 0; // the current checksum being computed.
123 byte_array chunk; // temporary chunk of data from file.
124
125//hmmm: make this optimization (hack) optional!
126
127 // this algorithm takes a chunk on each end of the file for checksums.
128 // this saves us from reading a huge amount of data, although it will be
129 // fooled if a huge binary file is changed only in the middle and has the
130 // same size as before. for most purposes, this is not a problem, although
131 // databases that are fixed size might fool us. if records are written in
132 // the middle without updating the head or tail sections, then we're hosed.
133
134 bool skip_tail = false; // true if we don't need the tail piece.
135 double head_start = 0, head_end = 0, tail_start = 0,
136 tail_end = _file_size - 1;
137 if (_file_size == 0) {
138 head_end = 0;
139 skip_tail = true;
140 } else if (_file_size <= double(2 * checksum_edge)) {
141 // we're applying a rule for when the file is too small compared to
142 // the chunk factor doubled; we'll just read the whole file.
143 head_end = _file_size - 1;
144 skip_tail = true;
145 } else {
146 // here we compute the ending of the head piece and the beginning of
147 // the tail piece. each will be about checksum_edge in size.
148 head_end = minimum(_file_size / 2, double(checksum_edge)) - 1;
149 tail_start = _file_size - minimum(_file_size / 2, double(checksum_edge));
150 }
151
152 // read the head end of the file.
153 int size_read = 0;
154 outcome ret = to_read.read(chunk, int(head_end - head_start + 1), size_read);
155 if (ret != huge_file::OKAY) {
156#ifdef DEBUG_FILE_INFO
157 LOG(astring("reading file failed: ") + full.raw());
158#endif
159 return false; // failed to read.
160 }
161 curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
162 chunk.length());
163
164 // read the tail end of the file.
165 if (!skip_tail) {
166 to_read.seek(tail_start, byte_filer::FROM_START);
167 ret = to_read.read(chunk, int(tail_end - tail_start + 1), size_read);
168 if (ret != huge_file::OKAY) {
169#ifdef DEBUG_FILE_INFO
170 LOG(astring("reading tail of file failed: ") + full.raw());
171#endif
172 return false; // failed to read.
173 }
174 curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
175 chunk.length());
176 }
177
178 _checksum = curr_sum;
179 return true;
180}
181
183{
184 return filename::packed_size()
188 + c_secondary.packed_size()
189 + structures::packed_size(c_attachment);
190}
191
192void file_info::pack(byte_array &packed_form) const
193{
194 FUNCDEF("pack");
195 filename::pack(packed_form);
196 attach(packed_form, _file_size);
197 _time.pack(packed_form);
198 attach(packed_form, _checksum);
199 c_secondary.pack(packed_form);
200 attach(packed_form, c_attachment);
201}
202
204{
205 if (!filename::unpack(packed_form))
206 return false;
207 if (!detach(packed_form, _file_size))
208 return false;
209 if (!_time.unpack(packed_form))
210 return false;
211 if (!detach(packed_form, _checksum))
212 return false;
213 if (!c_secondary.unpack(packed_form))
214 return false;
215 if (!detach(packed_form, c_attachment))
216 return false;
217 return true;
218}
219
221{
222 if (this == &to_copy)
223 return *this;
224 (filename &)(*this) = (filename &)to_copy;
225 c_attachment = to_copy.c_attachment;
226 _time = to_copy._time;
227 _file_size = to_copy._file_size;
228 c_secondary = to_copy.c_secondary;
229 _checksum = to_copy._checksum;
230 return *this;
231}
232
233} //namespace.
234
#define LOG(s)
a_sprintf is a specialization of astring that provides printf style support.
Definition astring.h:440
const contents * observe() const
Returns a pointer to the underlying C array of data.
Definition array.h:172
int length() const
Returns the current reported length of the allocated C array.
Definition array.h:115
Provides a dynamically resizable ASCII character string.
Definition astring.h:35
bool t() const
t() is a shortcut for the string being "true", as in non-empty.
Definition astring.h:97
void pack(byte_array &target) const
stores this string in the "target". it can later be unpacked again.
Definition astring.cpp:964
int packed_size() const
Reports the size required to pack this string into a byte array.
Definition astring.cpp:962
astring()
constructs an empty string.
Definition astring.cpp:59
bool unpack(byte_array &source)
retrieves a string (packed with pack()) from "source" into this string.
Definition astring.cpp:967
A very common template for a dynamic array of bytes.
Definition byte_array.h:36
Outcomes describe the state of completion for an operation.
Definition outcome.h:31
@ FROM_START
offset is from the beginning of the file.
Definition byte_filer.h:94
Encapsulates some measures and calculations based on a file's contents.
Definition file_info.h:29
double _file_size
the size of the file.
Definition file_info.h:40
file_info()
blank constructor.
Definition file_info.cpp:38
virtual void pack(basis::byte_array &packed_form) const
Creates a packed form of the packable object in "packed_form".
const basis::astring & secondary() const
observes the alternate form of the name.
Definition file_info.cpp:74
int _checksum
the checksum for the file.
Definition file_info.h:42
file_time _time
the file's access time.
Definition file_info.h:41
bool calculate(const basis::astring &prefix, bool just_size_n_time, int checksum_edge=1 *basis::KILOBYTE)
fills in the correct file size and checksum information for this file.
Definition file_info.cpp:87
basis::astring text_form() const
Definition file_info.cpp:78
virtual bool unpack(basis::byte_array &packed_form)
Restores the packable from the "packed_form".
virtual int packed_size() const
Estimates the space needed for the packed structure.
file_info & operator=(const file_info &to_copy)
const basis::byte_array & attachment() const
returns the chunk of data optionally attached to the file's info.
Definition file_info.cpp:69
virtual int packed_size() const
virtual void pack(basis::byte_array &packed_form) const
virtual bool unpack(basis::byte_array &packed_form)
Provides operations commonly needed on file names.
Definition filename.h:64
bool exists() const
returns true if the file exists.
Definition filename.cpp:426
virtual bool unpack(basis::byte_array &packed_form)
Restores the packable from the "packed_form".
Definition filename.cpp:471
virtual void pack(basis::byte_array &packed_form) const
Creates a packed form of the packable object in "packed_form".
Definition filename.cpp:465
const basis::astring & raw() const
returns the astring that we're holding onto for the path.
Definition filename.cpp:97
virtual int packed_size() const
Estimates the space needed for the packed structure.
Definition filename.cpp:460
Supports reading and writing to very large files, > 4 gigabytes.
Definition huge_file.h:36
double length()
expensive operation accesses the file to find length.
Definition huge_file.cpp:57
basis::outcome seek(double new_position, byte_filer::origins origin=byte_filer::FROM_CURRENT)
move the file pointer to "new_position" if possible.
bool good() const
reports if the file was opened successfully.
basis::outcome read(basis::byte_array &to_fill, int desired_size, int &size_read)
reads "desired_size" into "to_fill" if possible.
static basis::un_short rolling_fletcher_checksum(basis::un_short previous, const basis::abyte *data, int len)
Fletcher checksums applied to streaming data.
Definition checksums.cpp:56
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition enhance_cpp.h:54
The guards collection helps in testing preconditions and reporting errors.
Definition array.h:30
unsigned short uint16
void attach(byte_array &packed_form, const char *to_attach)
Packs a character string "to_attach" into "packed_form".
Definition astring.cpp:1018
bool detach(byte_array &packed_form, astring &to_detach)
Unpacks a character string "to_attach" from "packed_form".
Definition astring.cpp:1026
type minimum(type a, type b)
maximum returns the greater of two values.
Definition functions.h:29
A platform independent way to obtain the timestamp of a file.
A dynamic container class that holds any kind of object via pointers.
Definition amorph.h:55
const int PACKED_SIZE_INT32
int packed_size(const byte_array &packed_form)
Reports the size required to pack a byte array into a byte array.