nucleus/library/filesystem/file_info.cpp

   1 /*****************************************************************************\
   2 *                                                                             *
   3 *  Name   : file_info                                                         *
   4 *  Author : Chris Koeritz                                                     *
   5 *                                                                             *
   6 *******************************************************************************
   7 * Copyright (c) 1993-$now By Author.  This program is free software; you can  *
   8 * redistribute it and/or modify it under the terms of the GNU General Public  *
   9 * License as published by the Free Software Foundation; either version 2 of   *
  10 * the License or (at your option) any later version.  This is online at:      *
  11 *     http://www.fsf.org/copyleft/gpl.html                                    *
  12 * Please send any updates to: fred@gruntose.com                               *
  13 \*****************************************************************************/
  14
  15 #include "file_info.h"
  16 #include "huge_file.h"
  17
  18 #include <basis/astring.h>
  19 #include <basis/byte_array.h>
  20 #include <basis/contracts.h>
  21 #include <basis/functions.h>
  22 #include <structures/checksums.h>
  23 #include <structures/object_packers.h>
  24
  25 #include <stdio.h>
  26
  27 #define DEBUG_FILE_INFO
  28   // uncomment for noisy version.
  29
  30 #undef LOG
  31 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
  32
  33 using namespace basis;
  34 using namespace structures;
  35
  36 namespace filesystem {
  37
  38 file_info::file_info()
  39 : filename(astring::empty_string()),
  40   _file_size(0),
  41   _time(),
  42   _checksum(),
  43   c_secondary(),
  44   c_attachment()
  45 {}
  46
  47 file_info::file_info(const filename &to_copy, double file_size,
  48     const file_time &time, int checksum)
  49 : filename(to_copy),
  50   _file_size(file_size),
  51   _time(time),
  52   _checksum(checksum),
  53   c_secondary(),
  54   c_attachment()
  55 {}
  56
  57 file_info::file_info(const file_info &to_copy)
  58 : filename(to_copy),
  59   _file_size(to_copy._file_size),
  60   _time(to_copy._time),
  61   _checksum(to_copy._checksum),
  62   c_secondary(to_copy.c_secondary),
  63   c_attachment(to_copy.c_attachment)
  64 {
  65 }
  66
  67 file_info::~file_info() {}
  68
  69 const byte_array &file_info::attachment() const { return c_attachment; }
  70
  71 void file_info::attachment(const byte_array &new_attachment)
  72 { c_attachment = new_attachment; }
  73
  74 const astring &file_info::secondary() const { return c_secondary; }
  75
  76 void file_info::secondary(const astring &new_sec) { c_secondary = new_sec; }
  77
  78 astring file_info::text_form() const
  79 {
  80   astring to_return = raw()
  81       + a_sprintf(", size=%0.f, chksum=%d", _file_size, _checksum);
  82   if (c_secondary.t())
  83     to_return += astring(", 2ndary=") + c_secondary;
  84   return to_return;
  85 }
  86
  87 bool file_info::calculate(const astring &prefix, bool just_size, int checksum_edge)
  88 {
  89   FUNCDEF("calculate");
  90   filename full;
  91   if (prefix.t()) full = prefix + "/" + *this;
  92   else full = *this;
  93   if (!full.exists()) {
  94 #ifdef DEBUG_FILE_INFO
  95     LOG(astring("failed to find file: ") + full.raw());
  96 #endif
  97     return false;
  98   }
  99   // get time again.
 100   _time = file_time(full);
 101
 102 //#ifdef DEBUG_FILE_INFO
 103 //  astring temptext;
 104 //  _time.text_form(temptext);
 105 //  LOG(astring("file calculate on ") + full.raw() + " time=" + temptext);
 106 //#endif
 107
 108   // open the file for reading.
 109   huge_file to_read(full.raw(), "rb");
 110   if (!to_read.good()) {
 111 #ifdef DEBUG_FILE_INFO
 112     LOG(astring("file has non-good status: ") + full.raw());
 113 #endif
 114     return false;  // why did that happen?
 115   }
 116   // set the size appropriately.
 117   _file_size = to_read.length();
 118   if (just_size)
 119     return true;  // done for that case.
 120
 121   // now read the file and compute a checksum.
 122   uint16 curr_sum = 0;  // the current checksum being computed.
 123   byte_array chunk;  // temporary chunk of data from file.
 124
 125 //hmmm: make this optimization (hack) optional!
 126
 127   // this algorithm takes a chunk on each end of the file for checksums.
 128   // this saves us from reading a huge amount of data, although it will be
 129   // fooled if a huge binary file is changed only in the middle and has the
 130   // same size as before.  for most purposes, this is not a problem, although
 131   // databases that are fixed size might fool us.  if records are written in
 132   // the middle without updating the head or tail sections, then we're hosed.
 133
 134   bool skip_tail = false;  // true if we don't need the tail piece.
 135   double head_start = 0, head_end = 0, tail_start = 0,
 136       tail_end = _file_size - 1;
 137   if (_file_size == 0) {
 138     head_end = 0;
 139     skip_tail = true;
 140   } else if (_file_size <= double(2 * checksum_edge)) {
 141     // we're applying a rule for when the file is too small compared to
 142     // the chunk factor doubled; we'll just read the whole file.
 143     head_end = _file_size - 1;
 144     skip_tail = true;
 145   } else {
 146     // here we compute the ending of the head piece and the beginning of
 147     // the tail piece.  each will be about checksum_edge in size.
 148     head_end = minimum(_file_size / 2, double(checksum_edge)) - 1;
 149     tail_start = _file_size - minimum(_file_size / 2, double(checksum_edge));
 150   }
 151
 152   // read the head end of the file.
 153   int size_read = 0;
 154   outcome ret = to_read.read(chunk, int(head_end - head_start + 1), size_read);
 155   if (ret != huge_file::OKAY) {
 156 #ifdef DEBUG_FILE_INFO
 157     LOG(astring("reading file failed: ") + full.raw());
 158 #endif
 159     return false;  // failed to read.
 160   }
 161   curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
 162       chunk.length());
 163
 164   // read the tail end of the file.
 165   if (!skip_tail) {
 166     to_read.seek(tail_start, byte_filer::FROM_START);
 167     ret = to_read.read(chunk, int(tail_end - tail_start + 1), size_read);
 168     if (ret != huge_file::OKAY) {
 169 #ifdef DEBUG_FILE_INFO
 170       LOG(astring("reading tail of file failed: ") + full.raw());
 171 #endif
 172       return false;  // failed to read.
 173     }
 174     curr_sum = checksums::rolling_fletcher_checksum(curr_sum, chunk.observe(),
 175         chunk.length());
 176   }
 177
 178   _checksum = curr_sum;
 179   return true;
 180 }
 181
 182 int file_info::packed_size() const
 183 {
 184   return filename::packed_size()
 185       + structures::packed_size(_file_size)
 186       + _time.packed_size()
 187       + PACKED_SIZE_INT32
 188       + c_secondary.packed_size()
 189       + structures::packed_size(c_attachment);
 190 }
 191
 192 void file_info::pack(byte_array &packed_form) const
 193 {
 194   FUNCDEF("pack");
 195   filename::pack(packed_form);
 196   attach(packed_form, _file_size);
 197   _time.pack(packed_form);
 198   attach(packed_form, _checksum);
 199   c_secondary.pack(packed_form);
 200   attach(packed_form, c_attachment);
 201 }
 202
 203 bool file_info::unpack(byte_array &packed_form)
 204 {
 205   if (!filename::unpack(packed_form))
 206     return false;
 207   if (!detach(packed_form, _file_size))
 208     return false;
 209   if (!_time.unpack(packed_form))
 210     return false;
 211   if (!detach(packed_form, _checksum))
 212     return false;
 213   if (!c_secondary.unpack(packed_form))
 214     return false;
 215   if (!detach(packed_form, c_attachment))
 216     return false;
 217   return true;
 218 }
 219
 220 file_info &file_info::operator = (const file_info &to_copy)
 221 {
 222   if (this == &to_copy)
 223     return *this;
 224   (filename &)(*this) = (filename &)to_copy;
 225   c_attachment = to_copy.c_attachment;
 226   _time = to_copy._time;
 227   _file_size = to_copy._file_size;
 228   c_secondary = to_copy.c_secondary;
 229   _checksum = to_copy._checksum;
 230   return *this;
 231 }
 232
 233 } //namespace.
 234