#include #include "exception.h" #include "csvreader.h" #include "parser.h" namespace usql { CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) { skip_header = skip_hdr; field_separator = field_sep; quote_character = quote_ch; line_separator = line_sep; line_separator2 = line_sep2; header_skiped = !skip_hdr; } int CsvReader::parseCSV(const std::string &filename, std::vector &cols_def, Table &table) { std::vector cdefs; cdefs.reserve(cols_def.size()); for (auto &cd : cols_def) { cdefs.emplace_back(table.get_column_def(cd.name)); } int row_cnt = 0; bool inQuote(false); std::string field; std::vector line; line.reserve(32); errno = 0; FILE* fp = fopen(filename.c_str(), "r"); if (fp == NULL) throw Exception("load from csv, cannot read from file(" + std::to_string(errno) + ")"); char* line_str = NULL; size_t len = 0; int read_chars; while ((read_chars = getline(&line_str, &len, fp)) != -1) { if (skip_header && !header_skiped) { header_skiped = true; continue; } if (read_chars > 0 && line_str[read_chars - 1] == '\n') { line_str[read_chars - 1] = '\0'; --read_chars; } std::string csvSource{line_str}; std::string::const_iterator aChar = csvSource.begin(); std::string::const_iterator strEnd = csvSource.end(); while (aChar != strEnd) { if (*aChar == quote_character) { inQuote = !inQuote; } else if (*aChar == field_separator) { if (inQuote == true) { field += *aChar; } else { line.push_back(field); field.clear(); } } else { field.push_back(*aChar); } ++aChar; } if (!field.empty()) line.push_back(field); table.create_row_from_vector(cols_def, line); row_cnt++; field.clear(); line.clear(); // DEBUG // if (row_cnt > 50000) break; // } fclose(fp); if (line_str) free(line_str); return row_cnt; } int CsvReader::parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table) { int row_cnt = 0; bool inQuote(false); bool newLine(false); std::string field; std::vector line; line.reserve(32); std::string::const_iterator aChar = csvSource.begin(); while (aChar != csvSource.end()) { if (*aChar == quote_character) { newLine = false; inQuote = !inQuote; } else if (*aChar == field_separator) { newLine = false; if (inQuote == true) { field += *aChar; } else { line.push_back(field); field.clear(); } } else if (*aChar == line_separator || *aChar == line_separator2) { if (inQuote == true) { field += *aChar; } else { if (newLine == false) { line.push_back(field); if (header_skiped) { table.create_row_from_vector(cols_def, line); row_cnt++; } header_skiped = true; field.clear(); line.clear(); newLine = true; } } } else { newLine = false; field.push_back(*aChar); } aChar++; } if (!field.empty()) line.push_back(field); if (header_skiped) { table.create_row_from_vector(cols_def, line); row_cnt++; header_skiped = true; } return row_cnt; } } // namespace