feisty meow concerns codebase  2.140
test_parse_csv.cpp
Go to the documentation of this file.
1 /*
2 * Name : test parsing of csv
3 * Author : Chris Koeritz
4 * Purpose: Checks that the CSV parsing function handles a few common scenarios.
5 **
6 * Copyright (c) 2005-$now By Author. This program is free software; you can *
7 * redistribute it and/or modify it under the terms of the GNU General Public *
8 * License as published by the Free Software Foundation; either version 2 of *
9 * the License or (at your option) any later version. This is online at: *
10 * http://www.fsf.org/copyleft/gpl.html *
11 */
12 
14 #include <basis/astring.h>
15 #include <basis/functions.h>
16 #include <basis/guards.h>
18 #include <filesystem/byte_filer.h>
19 #include <filesystem/filename.h>
22 #include <textual/list_parsing.h>
23 #include <timely/stopwatch.h>
24 #include <unit_test/unit_base.h>
25 
26 using namespace application;
27 using namespace basis;
28 using namespace configuration;
29 using namespace filesystem;
30 using namespace loggers;
31 using namespace mathematics;
32 using namespace structures;
33 using namespace textual;
34 using namespace timely;
35 using namespace unit_test;
36 
37 #define LOG(s) EMERGENCY_LOG(program_wide_logger::get(), s)
38 
39 // the number of times we scan our data file for performance test.
40 const int MAX_DATA_FILE_ITERS = 4000;
41 
42 class test_parsing_csv : public virtual unit_base, public virtual application_shell
43 {
44 public:
45  test_parsing_csv() {}
46  DEFINE_CLASS_NAME("test_parsing_csv");
47  int execute();
48 };
49 
50 //hmmm: too congratulatory?
51 #define COMPLAIN_FIELD(list, index, value) \
52  ASSERT_EQUAL(list[index], astring(value), \
53  a_sprintf("comparison test should have field %d correct in %s", index, #list))
54 
55 int test_parsing_csv::execute()
56 {
57  FUNCDEF("execute");
58  astring line1 = "\"fupe\",\"snoorp\",\"lutem\",\"fipe\"";
59  string_array fields1;
60  bool works1 = list_parsing::parse_csv_line(line1, fields1);
61  ASSERT_TRUE(works1, "first test should not fail to parse");
62 //LOG(a_sprintf("fields len now %d", fields1.length()));
63  ASSERT_EQUAL(fields1.length(), 4, "first test should have right count of strings found");
64  COMPLAIN_FIELD(fields1, 0, "fupe");
65  COMPLAIN_FIELD(fields1, 1, "snoorp");
66  COMPLAIN_FIELD(fields1, 2, "lutem");
67  COMPLAIN_FIELD(fields1, 3, "fipe");
68 
69  astring line2 = "fupe,\"snoorp\",lutem,\"fipe\"";
70  string_array fields2;
71  bool works2 = list_parsing::parse_csv_line(line2, fields2);
72  ASSERT_TRUE(works2, "second test should not fail to parse");
73  ASSERT_EQUAL(fields2.length(), 4, "second test should have right count of strings found");
74  COMPLAIN_FIELD(fields2, 0, "fupe");
75  COMPLAIN_FIELD(fields2, 1, "snoorp");
76  COMPLAIN_FIELD(fields2, 2, "lutem");
77  COMPLAIN_FIELD(fields2, 3, "fipe");
78 
79  astring line3 = "\"lowenburger\",\"wazizzle\",morphel";
80  string_array fields3;
81  bool works3 = list_parsing::parse_csv_line(line3, fields3);
82  ASSERT_TRUE(works3, "third test should not fail to parse");
83  ASSERT_EQUAL(fields3.length(), 3, "third test should have right count of strings found");
84  COMPLAIN_FIELD(fields3, 0, "lowenburger");
85  COMPLAIN_FIELD(fields3, 1, "wazizzle");
86  COMPLAIN_FIELD(fields3, 2, "morphel");
87 
88  astring line4 = "\"lowenburger\",\"wazizzle\",morphel,";
89  string_array fields4;
90  bool works4 = list_parsing::parse_csv_line(line4, fields4);
91  ASSERT_TRUE(works4, "fourth test should not fail to parse");
92  ASSERT_EQUAL(fields4.length(), 4, "fourth test should not have wrong count of strings found");
93  COMPLAIN_FIELD(fields4, 0, "lowenburger");
94  COMPLAIN_FIELD(fields4, 1, "wazizzle");
95  COMPLAIN_FIELD(fields4, 2, "morphel");
96  COMPLAIN_FIELD(fields4, 3, "");
97 
98  astring line5 = "\"lowenburger\",,";
99  string_array fields5;
100  bool works5 = list_parsing::parse_csv_line(line5, fields5);
101  ASSERT_TRUE(works5, "fifth test should not fail to parse");
102  ASSERT_EQUAL(fields5.length(), 3, "fifth test should have right count of strings found");
103  COMPLAIN_FIELD(fields5, 0, "lowenburger");
104  COMPLAIN_FIELD(fields5, 1, "");
105  COMPLAIN_FIELD(fields5, 2, "");
106 
107  astring line6 = ",,,\"rasputy\",,\"spunk\",ralph";
108  string_array fields6;
109  bool works6 = list_parsing::parse_csv_line(line6, fields6);
110  ASSERT_TRUE(works6, "sixth test should not fail to parse");
111  ASSERT_EQUAL(fields6.length(), 7, "sixth test should have right count of strings found");
112  COMPLAIN_FIELD(fields6, 0, "");
113  COMPLAIN_FIELD(fields6, 1, "");
114  COMPLAIN_FIELD(fields6, 2, "");
115  COMPLAIN_FIELD(fields6, 3, "rasputy");
116  COMPLAIN_FIELD(fields6, 4, "");
117  COMPLAIN_FIELD(fields6, 5, "spunk");
118  COMPLAIN_FIELD(fields6, 6, "ralph");
119 
120  astring line7 = "\"SRV0001337CHN0000001DSP0000001SRV0001337LAY0003108,16,0,8,192\",\"\\\"row_3\\\" on 12.5.55.159\",3";
121  string_array fields7;
122  bool works7 = list_parsing::parse_csv_line(line7, fields7);
123  ASSERT_TRUE(works7, "seventh test should not fail to parse");
124  ASSERT_EQUAL(fields7.length(), 3, "seventh test should have right count of strings found");
125  COMPLAIN_FIELD(fields7, 0, "SRV0001337CHN0000001DSP0000001SRV0001337LAY0003108,16,0,8,192");
126  COMPLAIN_FIELD(fields7, 1, "\"row_3\" on 12.5.55.159");
127  COMPLAIN_FIELD(fields7, 2, "3");
128 
129  // test 8... use data file.
130  filename df_dir = filename(application_configuration::application_name()).dirname();
131  byte_filer test_data(df_dir.raw() + "/df_1.csv", "rb");
132  string_array parsed;
133  string_array lines;
134  astring curr_line;
135  int read_result;
136  while ( (read_result = test_data.getline(curr_line, 1024)) > 0 )
137  lines += curr_line;
138  if (lines.length()) {
139  // now we have the data file loaded.
140  stopwatch clicker;
141  clicker.start();
142  for (int iterations = 0; iterations < MAX_DATA_FILE_ITERS; iterations++) {
143  for (int line = 0; line < lines.length(); line++) {
144  const astring &current = lines[line];
145  list_parsing::parse_csv_line(current, parsed);
146  }
147  }
148  clicker.stop();
149  log(a_sprintf("%d csv lines with %d iterations took %d ms (or %d s).",
150  lines.length(), MAX_DATA_FILE_ITERS, clicker.elapsed(),
151  clicker.elapsed() / 1000));
152  }
153 
154  // test 9: process results of create_csv_line.
155  string_array fields9;
156  fields9 += "ACk\"boozort";
157  fields9 += "sme\"ra\"\"foop";
158  fields9 += "\"gumby\"";
159  astring line9 = "\"ACk\\\"boozort\",\"sme\\\"ra\\\"\\\"foop\",\"\\\"gumby\\\"\"";
160  astring gen_line_9;
161  list_parsing::create_csv_line(fields9, gen_line_9);
162 //log(astring(" got gen line: ") + gen_line_9);
163 //log(astring("expected line: ") + line9);
164  ASSERT_EQUAL(gen_line_9, line9, "ninth test should not fail to create expected text");
165  string_array fields9_b;
166  bool works9 = list_parsing::parse_csv_line(gen_line_9, fields9_b);
167  ASSERT_TRUE(works9, "ninth test should not fail to parse");
168  ASSERT_TRUE(fields9_b == fields9, "ninth test should match original fields");
169 
170  return final_report();
171 }
172 
174 
175 HOOPLE_MAIN(test_parsing_csv, )
176 
The application_shell is a base object for console programs.
a_sprintf is a specialization of astring that provides printf style support.
Definition: astring.h:440
int length() const
Returns the current reported length of the allocated C array.
Definition: array.h:115
Provides a dynamically resizable ASCII character string.
Definition: astring.h:35
Provides file managment services using the standard I/O support.
Definition: byte_filer.h:32
int getline(basis::abyte *buffer, int desired_size)
reads a line of text (terminated by a return) into the "buffer".
Definition: byte_filer.cpp:201
Provides operations commonly needed on file names.
Definition: filename.h:64
const basis::astring & raw() const
returns the astring that we're holding onto for the path.
Definition: filename.cpp:97
filename dirname() const
returns the directory for the filename.
Definition: filename.cpp:393
An array of strings with some additional helpful methods.
Definition: string_array.h:32
A class for measuring event durations in real time.
Definition: stopwatch.h:29
void stop()
a synonym for halt().
Definition: stopwatch.h:46
int elapsed()
a synonym for milliseconds().
Definition: stopwatch.h:54
void start()
Begins the timing.
Definition: stopwatch.cpp:61
#define DEFINE_CLASS_NAME(objname)
Defines the name of a class by providing a couple standard methods.
Definition: enhance_cpp.h:45
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition: enhance_cpp.h:57
Provides macros that implement the 'main' program of an application.
#define HOOPLE_MAIN(obj_name, obj_args)
options that should work for most unix and linux apps.
Definition: hoople_main.h:61
Implements an application lock to ensure only one is running at once.
The guards collection helps in testing preconditions and reporting errors.
Definition: array.h:30
A platform independent way to obtain the timestamp of a file.
Definition: byte_filer.cpp:37
A logger that sends to the console screen using the standard output device.
An extension to floating point primitives providing approximate equality.
Definition: averager.h:21
A dynamic container class that holds any kind of object via pointers.
Definition: amorph.h:55
#include <time.h>
Definition: earth_time.cpp:37
Useful support functions for unit testing, especially within hoople.
Definition: unit_base.cpp:35
#define COMPLAIN_FIELD(list, index, value)
const int MAX_DATA_FILE_ITERS
#define ASSERT_EQUAL(a, b, test_name)
Definition: unit_base.h:38
#define ASSERT_TRUE(a, test_name)
Definition: unit_base.h:46