diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f56f26..59b79aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,16 +10,19 @@ set(CPACK_PROJECT_NAME ${PROJECT_NAME}) set(CPACK_PROJECT_VERSION ${PROJECT_VERSION}) -include_directories(${CMAKE_SOURCE_DIR}/stdlib ${CMAKE_SOURCE_DIR} ) +include_directories(/usr/local/opt/openssl/include ${CMAKE_SOURCE_DIR}/clib ${CMAKE_SOURCE_DIR} ) + +link_directories(/usr/local/lib /usr/local/opt/openssl/lib) + project(ml) - set(PROJECT_NAME ml) set(SOURCE ml.cpp - stdlib/csvparser.cpp) + clib/csvparser.cpp + clib/sslclient.cpp) add_executable(${PROJECT_NAME} ${SOURCE}) -target_link_libraries(${PROJECT_NAME} stdc++ m) +target_link_libraries(${PROJECT_NAME} stdc++ m ssl crypto) diff --git a/Readme.md b/Readme.md index 8c37d43..9bad78c 100644 --- a/Readme.md +++ b/Readme.md @@ -1,5 +1,7 @@ ### TODO +- update openssl libs + #### Functionality - readline diff --git a/stdlib/csvparser.cpp b/clib/csvparser.cpp similarity index 100% rename from stdlib/csvparser.cpp rename to clib/csvparser.cpp diff --git a/stdlib/csvparser.h b/clib/csvparser.h similarity index 100% rename from stdlib/csvparser.h rename to clib/csvparser.h diff --git a/clib/sslclient.cpp b/clib/sslclient.cpp new file mode 100644 index 0000000..585dfd8 --- /dev/null +++ b/clib/sslclient.cpp @@ -0,0 +1,236 @@ +//============================================================================ +// clang++ -o sslclient sslclient.cpp -lssl -lcrypto -L/usr/local/opt/openssl/lib -I/usr/local/opt/openssl/include +//============================================================================ +// + +#include "sslclient.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +HttpClient::HttpClient(){}; + +std::pair HttpClient::doGetRequest(const std::string &url, const std::unordered_map &headers) { + // ^(?:((?:https?|s?ftp):)\/\/)([^:\/\s]+)(?::(\d*))?(?:\/([^\s?#]+)?([?][^?#]*)?(#.*)?)? + // viz test + // https://api.iextrading.com:443/1.0/stock/market/batch?symbols=cah,khc,syf,jnj&types=quote#muhehe + // https://stackoverflow.com/questions/25896916/parse-http-headers-in-c + + std::regex rgx{R"(^(?:((?:https?|s?ftp):)//)([^:/\s]+)(?::(\d*))?(?:/([^\s?#]+)?([?][^?#]*)?(#.*)?)?)"}; + std::smatch matches; + + if (std::regex_search(url, matches, rgx)) { + for (size_t i = 0; i < matches.size(); ++i) { + switch (i) { + case 0: + full_url = matches[i].str(); + break; + case 1: + proto = matches[i].str(); + break; + case 2: + server = matches[i].str(); + break; + case 3: + port = matches[i].str(); + break; + case 4: + uri = matches[i].str(); + break; + case 5: + params = matches[i].str(); + break; + case 6: + href = matches[i].str(); + break; + } + // std::cout << i << ": '" << matches[i].str() << "'\n"; + } + } else { + std::cerr << "Match not found\n"; // TODO better message + } + + std::string headers_string = ""; + for (auto it = headers.begin(); it != headers.end(); ++it) { + headers_string.append("\r\n" + it->first + ": " + it->second); + } + + std::string request = "GET " + full_url + " HTTP/1.0\r\nHost: " + server + headers_string + "\r\n\r\n"; + + int bytes_read = sslRequest(server, request); + + std::string::size_type position = ssl_read_packet.find("\r\n\r\n"); + if (position == std::string::npos) { + std::cerr << "substring not found" << std::endl; // TODO invalid packet + } + + std::string hdr = ssl_read_packet.substr(0, position); + auto status_pos = hdr.find("\r\n"); + + std::string status_str = hdr.substr(0, status_pos); + hdr = hdr.substr(status_pos + 2, hdr.length() - 2 - status_pos); + + // TODO parse status code + std::regex status_rgx{"^HTTP/\\d\\.\\d (\\d{3}) .+$"}; + std::smatch status_matches; + if (std::regex_search(status_str, status_matches, status_rgx)) { + if (status_matches.size() > 1) { + auto sta = status_matches[1].str(); // string "200" + // std::cout << "status: " << sta << std::endl; + } + } + + std::string body = ssl_read_packet.substr(position + 4, ssl_read_packet.length() - 4 - position); + + + std::istringstream resp(hdr); + std::string header; + std::string::size_type index; + while (std::getline(resp, header) && header != "\r") { + index = header.find(": ", 0); + if (index != std::string::npos) { + headers_map.insert(std::make_pair(header.substr(0, index), header.substr(index + 1))); + } + } + + // for(auto& kv: headers_map) { + // std::cout << "KEY: `" << kv.first << "`, VALUE: `" << kv.second << '`' << std::endl; + // } + + // TODO if error return error desc in string + return std::make_pair(200, body); +}; + +std::string HttpClient::inetAddress(std::string hostname) { + hostent *record = gethostbyname(hostname.c_str()); + if (record == NULL) { + std::cerr << hostname << " is unavailable" << std::endl; + exit(1); + } + in_addr *address = (in_addr *)record->h_addr; + std::string ip_address = inet_ntoa(*address); + + return ip_address; +} + +int HttpClient::sslRecvPacket() { + ssl_read_packet.resize(4096); + ssl_read_packet.clear(); + + int len = 16384; + char buf[len + 1]; + do { + len = SSL_read(ssl, buf, len); + buf[len] = 0; + ssl_read_packet.append((const char *)buf); + } while (len > 0); + + if (len < 0) { + int err = SSL_get_error(ssl, len); + if (err == SSL_ERROR_WANT_READ) + return 0; + if (err == SSL_ERROR_WANT_WRITE) + return 0; + if (err == SSL_ERROR_ZERO_RETURN || err == SSL_ERROR_SYSCALL || err == SSL_ERROR_SSL) + return -1; + } + + return ssl_read_packet.length(); +} + +int HttpClient::sslSendPacket(std::string buf) { + int len = SSL_write(ssl, buf.c_str(), strlen(buf.c_str())); + if (len < 0) { + int err = SSL_get_error(ssl, len); + switch (err) { + case SSL_ERROR_WANT_WRITE: + return 0; + case SSL_ERROR_WANT_READ: + return 0; + case SSL_ERROR_ZERO_RETURN: + case SSL_ERROR_SYSCALL: + case SSL_ERROR_SSL: + default: + return -1; + } + } + + return buf.length(); +} + +int HttpClient::sslRequest(const std::string &server_name, const std::string &request) { + // create socket + int s; + s = socket(AF_INET, SOCK_STREAM, 0); + if (!s) { + printf("Error creating socket.\n"); + return -1; + } + + // socket address + std::string server_ip = inetAddress(server_name); + struct sockaddr_in sa; + memset(&sa, 0, sizeof(sa)); + sa.sin_family = AF_INET; + sa.sin_addr.s_addr = inet_addr(server_ip.c_str()); + sa.sin_port = htons(443); + socklen_t socklen = sizeof(sa); + + // connect to server + if (connect(s, (struct sockaddr *)&sa, socklen)) { + printf("Error connecting to server.\n"); + return -1; + } + + SSL_library_init(); + SSLeay_add_ssl_algorithms(); + SSL_load_error_strings(); + const SSL_METHOD *meth = TLSv1_2_client_method(); + SSL_CTX *ctx = SSL_CTX_new(meth); + ssl = SSL_new(ctx); + if (!ssl) { + printf("Error creating SSL.\n"); + log_ssl(); + return -1; + } + sock = SSL_get_fd(ssl); + SSL_set_fd(ssl, s); + + SSL_ctrl(ssl, SSL_CTRL_SET_TLSEXT_HOSTNAME, TLSEXT_NAMETYPE_host_name, (void*)server.c_str()); + + int err = SSL_connect(ssl); + if (err <= 0) { + printf("Error creating SSL connection. err=%x\n", err); + log_ssl(); + fflush(stdout); + return -1; + } + + // log cipher + // printf ("SSL connection using %s\n", SSL_get_cipher (ssl)); + + // send request + sslSendPacket(request); + + // read response and return its length + return sslRecvPacket(); +} + +void HttpClient::log_ssl() { + int err; + while ((err = ERR_get_error())) { + char *str = ERR_error_string(err, 0); + if (!str) + return; + std::cerr << str << std::endl; + } +} diff --git a/clib/sslclient.h b/clib/sslclient.h new file mode 100644 index 0000000..8294284 --- /dev/null +++ b/clib/sslclient.h @@ -0,0 +1,32 @@ + +#include +#include + +#include +#include + + +class HttpClient { + // TODO at this moment only https is implemented + +private: + SSL *ssl; + int sock; + + std::string full_url, proto, server, port, uri, params, href; + std::basic_string ssl_read_packet; + std::unordered_map headers_map; + +public: + HttpClient(); + + std::pair doGetRequest(const std::string &url, const std::unordered_map &headers); + +private: + std::string inetAddress(std::string hostname); + + int sslRecvPacket(); + int sslSendPacket(std::string buf); + int sslRequest(const std::string &server_name, const std::string &request); + void log_ssl(); +}; diff --git a/debug.lisp b/debug.lisp index d792663..6aec8d8 100644 --- a/debug.lisp +++ b/debug.lisp @@ -4,9 +4,15 @@ (print "sorted: " (qs '(10 9 8 7 6 5 4 3 2 1))) (define csv (read-file "tmp/data.csv")) -(print csv) +; (print csv) (define csv_list (parse-csv csv)) -(print csv_list) -(for x csv_list (print x)) +; (print csv_list) +(for x csv_list + (print x)) + +(define web_page (read-url "https://query1.finance.yahoo.com/v7/finance/download/FDX?period1=1581272585&period2=1612894985&interval=1d&events=history&includeAdjustedClose=true")) +(print web_page) +(define fdx_list (parse-csv (index web_page 1))) +(print fdx_list) (print "Debug ends") diff --git a/ml.cpp b/ml.cpp index d193c84..7245b3b 100644 --- a/ml.cpp +++ b/ml.cpp @@ -53,6 +53,7 @@ std::string read_file_contents(std::string filename) { #include #include "csvparser.h" +#include "sslclient.h" //////////////////////////////////////////////////////////////////////////////// /// ERROR MESSAGES ///////////////////////////////////////////////////////////// @@ -1273,6 +1274,33 @@ namespace builtin { return result; } + // Read URL to (code content) + Value read_url(std::vector args, Environment &env) { + // Is not a special form, so we can evaluate our args. + eval_args(args, env); + + // PERF optimize it for memory usage and performance + // TODO handle second parameter (headers) + if (args.size() != 1) + throw Error(Value("read_url", write_file), env, args.size() > 1? TOO_MANY_ARGS : TOO_FEW_ARGS); + + std::unordered_map headers = {}; + HttpClient client; + + if (args.size() == 2) { + // do magick here + // for (auto i = map.begin(); i != map.end(); ++i) { + // headers[i->first] = i->second.getString(); + // } + } + + std::pair result = client.doGetRequest(args[0].as_string(), headers); + std::vector lst; + lst.push_back(Value(result.first)); + lst.push_back(Value::string(result.second)); + return lst; + } + // Read a file and execute its code Value include(std::vector args, Environment &env) { // Import is technically not a special form, it's more of a macro. @@ -1794,6 +1822,7 @@ Value Environment::get(std::string name) const { if (name == "parse-csv") return Value("parse-csv", builtin::parse_csv); if (name == "read-file") return Value("read-file", builtin::read_file); if (name == "write-file") return Value("write-file", builtin::write_file); + if (name == "read-url") return Value("read-url", builtin::read_url); #endif // String operations