feisty meow concerns codebase 2.140
test_parse_csv.cpp
Go to the documentation of this file.
1/*
2* Name : test parsing of csv
3* Author : Chris Koeritz
4* Purpose: Checks that the CSV parsing function handles a few common scenarios.
5**
6* Copyright (c) 2005-$now By Author. This program is free software; you can *
7* redistribute it and/or modify it under the terms of the GNU General Public *
8* License as published by the Free Software Foundation; either version 2 of *
9* the License or (at your option) any later version. This is online at: *
10* http://www.fsf.org/copyleft/gpl.html *
11*/
12
14#include <basis/astring.h>
15#include <basis/functions.h>
16#include <basis/guards.h>
19#include <filesystem/filename.h>
23#include <timely/stopwatch.h>
24#include <unit_test/unit_base.h>
25
26using namespace application;
27using namespace basis;
28using namespace configuration;
29using namespace filesystem;
30using namespace loggers;
31using namespace mathematics;
32using namespace structures;
33using namespace textual;
34using namespace timely;
35using namespace unit_test;
36
37#define LOG(s) EMERGENCY_LOG(program_wide_logger::get(), s)
38
39// the number of times we scan our data file for performance test.
40const int MAX_DATA_FILE_ITERS = 4000;
41
42class test_parsing_csv : public virtual unit_base, public virtual application_shell
43{
44public:
45 test_parsing_csv() {}
46 DEFINE_CLASS_NAME("test_parsing_csv");
47 int execute();
48};
49
50//hmmm: too congratulatory?
51#define COMPLAIN_FIELD(list, index, value) \
52 ASSERT_EQUAL(list[index], astring(value), \
53 a_sprintf("comparison test should have field %d correct in %s", index, #list))
54
55int test_parsing_csv::execute()
56{
57 FUNCDEF("execute");
58 astring line1 = "\"fupe\",\"snoorp\",\"lutem\",\"fipe\"";
59 string_array fields1;
60 bool works1 = list_parsing::parse_csv_line(line1, fields1);
61 ASSERT_TRUE(works1, "first test should not fail to parse");
62//LOG(a_sprintf("fields len now %d", fields1.length()));
63 ASSERT_EQUAL(fields1.length(), 4, "first test should have right count of strings found");
64 COMPLAIN_FIELD(fields1, 0, "fupe");
65 COMPLAIN_FIELD(fields1, 1, "snoorp");
66 COMPLAIN_FIELD(fields1, 2, "lutem");
67 COMPLAIN_FIELD(fields1, 3, "fipe");
68
69 astring line2 = "fupe,\"snoorp\",lutem,\"fipe\"";
70 string_array fields2;
71 bool works2 = list_parsing::parse_csv_line(line2, fields2);
72 ASSERT_TRUE(works2, "second test should not fail to parse");
73 ASSERT_EQUAL(fields2.length(), 4, "second test should have right count of strings found");
74 COMPLAIN_FIELD(fields2, 0, "fupe");
75 COMPLAIN_FIELD(fields2, 1, "snoorp");
76 COMPLAIN_FIELD(fields2, 2, "lutem");
77 COMPLAIN_FIELD(fields2, 3, "fipe");
78
79 astring line3 = "\"lowenburger\",\"wazizzle\",morphel";
80 string_array fields3;
81 bool works3 = list_parsing::parse_csv_line(line3, fields3);
82 ASSERT_TRUE(works3, "third test should not fail to parse");
83 ASSERT_EQUAL(fields3.length(), 3, "third test should have right count of strings found");
84 COMPLAIN_FIELD(fields3, 0, "lowenburger");
85 COMPLAIN_FIELD(fields3, 1, "wazizzle");
86 COMPLAIN_FIELD(fields3, 2, "morphel");
87
88 astring line4 = "\"lowenburger\",\"wazizzle\",morphel,";
89 string_array fields4;
90 bool works4 = list_parsing::parse_csv_line(line4, fields4);
91 ASSERT_TRUE(works4, "fourth test should not fail to parse");
92 ASSERT_EQUAL(fields4.length(), 4, "fourth test should not have wrong count of strings found");
93 COMPLAIN_FIELD(fields4, 0, "lowenburger");
94 COMPLAIN_FIELD(fields4, 1, "wazizzle");
95 COMPLAIN_FIELD(fields4, 2, "morphel");
96 COMPLAIN_FIELD(fields4, 3, "");
97
98 astring line5 = "\"lowenburger\",,";
99 string_array fields5;
100 bool works5 = list_parsing::parse_csv_line(line5, fields5);
101 ASSERT_TRUE(works5, "fifth test should not fail to parse");
102 ASSERT_EQUAL(fields5.length(), 3, "fifth test should have right count of strings found");
103 COMPLAIN_FIELD(fields5, 0, "lowenburger");
104 COMPLAIN_FIELD(fields5, 1, "");
105 COMPLAIN_FIELD(fields5, 2, "");
106
107 astring line6 = ",,,\"rasputy\",,\"spunk\",ralph";
108 string_array fields6;
109 bool works6 = list_parsing::parse_csv_line(line6, fields6);
110 ASSERT_TRUE(works6, "sixth test should not fail to parse");
111 ASSERT_EQUAL(fields6.length(), 7, "sixth test should have right count of strings found");
112 COMPLAIN_FIELD(fields6, 0, "");
113 COMPLAIN_FIELD(fields6, 1, "");
114 COMPLAIN_FIELD(fields6, 2, "");
115 COMPLAIN_FIELD(fields6, 3, "rasputy");
116 COMPLAIN_FIELD(fields6, 4, "");
117 COMPLAIN_FIELD(fields6, 5, "spunk");
118 COMPLAIN_FIELD(fields6, 6, "ralph");
119
120 astring line7 = "\"SRV0001337CHN0000001DSP0000001SRV0001337LAY0003108,16,0,8,192\",\"\\\"row_3\\\" on 12.5.55.159\",3";
121 string_array fields7;
122 bool works7 = list_parsing::parse_csv_line(line7, fields7);
123 ASSERT_TRUE(works7, "seventh test should not fail to parse");
124 ASSERT_EQUAL(fields7.length(), 3, "seventh test should have right count of strings found");
125 COMPLAIN_FIELD(fields7, 0, "SRV0001337CHN0000001DSP0000001SRV0001337LAY0003108,16,0,8,192");
126 COMPLAIN_FIELD(fields7, 1, "\"row_3\" on 12.5.55.159");
127 COMPLAIN_FIELD(fields7, 2, "3");
128
129 // test 8... use data file.
131 byte_filer test_data(df_dir.raw() + "/df_1.csv", "rb");
132 string_array parsed;
133 string_array lines;
134 astring curr_line;
135 int read_result;
136 while ( (read_result = test_data.getline(curr_line, 1024)) > 0 )
137 lines += curr_line;
138 if (lines.length()) {
139 // now we have the data file loaded.
140 stopwatch clicker;
141 clicker.start();
142 for (int iterations = 0; iterations < MAX_DATA_FILE_ITERS; iterations++) {
143 for (int line = 0; line < lines.length(); line++) {
144 const astring &current = lines[line];
145 list_parsing::parse_csv_line(current, parsed);
146 }
147 }
148 clicker.stop();
149 log(a_sprintf("%d csv lines with %d iterations took %d ms (or %d s).",
150 lines.length(), MAX_DATA_FILE_ITERS, clicker.elapsed(),
151 clicker.elapsed() / 1000));
152 }
153
154 // test 9: process results of create_csv_line.
155 string_array fields9;
156 fields9 += "ACk\"boozort";
157 fields9 += "sme\"ra\"\"foop";
158 fields9 += "\"gumby\"";
159 astring line9 = "\"ACk\\\"boozort\",\"sme\\\"ra\\\"\\\"foop\",\"\\\"gumby\\\"\"";
160 astring gen_line_9;
161 list_parsing::create_csv_line(fields9, gen_line_9);
162//log(astring(" got gen line: ") + gen_line_9);
163//log(astring("expected line: ") + line9);
164 ASSERT_EQUAL(gen_line_9, line9, "ninth test should not fail to create expected text");
165 string_array fields9_b;
166 bool works9 = list_parsing::parse_csv_line(gen_line_9, fields9_b);
167 ASSERT_TRUE(works9, "ninth test should not fail to parse");
168 ASSERT_TRUE(fields9_b == fields9, "ninth test should match original fields");
169
170 return final_report();
171}
172
174
175HOOPLE_MAIN(test_parsing_csv, )
176
The application_shell is a base object for console programs.
virtual int execute()=0
< retrieves the command line from the /proc hierarchy on linux.
a_sprintf is a specialization of astring that provides printf style support.
Definition astring.h:440
int length() const
Returns the current reported length of the allocated C array.
Definition array.h:115
Provides a dynamically resizable ASCII character string.
Definition astring.h:35
static basis::astring application_name()
returns the full name of the current application.
Provides file managment services using the standard I/O support.
Definition byte_filer.h:32
int getline(basis::abyte *buffer, int desired_size)
reads a line of text (terminated by a return) into the "buffer".
Provides operations commonly needed on file names.
Definition filename.h:64
const basis::astring & raw() const
returns the astring that we're holding onto for the path.
Definition filename.cpp:97
filename dirname() const
returns the directory for the filename.
Definition filename.cpp:393
An array of strings with some additional helpful methods.
static bool parse_csv_line(const basis::astring &to_parse, structures::string_array &fields)
examines the string "to_parse" which should be in csv format.
static void create_csv_line(const structures::string_array &to_csv, basis::astring &target)
A class for measuring event durations in real time.
Definition stopwatch.h:29
void stop()
a synonym for halt().
Definition stopwatch.h:46
int elapsed()
a synonym for milliseconds().
Definition stopwatch.h:54
void start()
Begins the timing.
Definition stopwatch.cpp:61
#define DEFINE_CLASS_NAME(objname)
Defines the name of a class by providing a couple standard methods.
Definition enhance_cpp.h:42
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition enhance_cpp.h:54
Provides macros that implement the 'main' program of an application.
#define HOOPLE_MAIN(obj_name, obj_args)
options that should work for most unix and linux apps.
Definition hoople_main.h:61
Implements an application lock to ensure only one is running at once.
The guards collection helps in testing preconditions and reporting errors.
Definition array.h:30
A platform independent way to obtain the timestamp of a file.
A logger that sends to the console screen using the standard output device.
An extension to floating point primitives providing approximate equality.
Definition averager.h:21
A dynamic container class that holds any kind of object via pointers.
Definition amorph.h:55
#include <time.h>
Useful support functions for unit testing, especially within hoople.
Definition unit_base.cpp:35
#define COMPLAIN_FIELD(list, index, value)
const int MAX_DATA_FILE_ITERS
#define ASSERT_EQUAL(a, b, test_name)
Definition unit_base.h:38
#define ASSERT_TRUE(a, test_name)
Definition unit_base.h:46