added read-url

This commit is contained in:
VaclavT 2021-02-09 19:36:05 +01:00
parent 22f9410d17
commit 982241acf6
8 changed files with 315 additions and 7 deletions

View File

@ -10,16 +10,19 @@ set(CPACK_PROJECT_NAME ${PROJECT_NAME})
set(CPACK_PROJECT_VERSION ${PROJECT_VERSION}) set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
include_directories(${CMAKE_SOURCE_DIR}/stdlib ${CMAKE_SOURCE_DIR} ) include_directories(/usr/local/opt/openssl/include ${CMAKE_SOURCE_DIR}/clib ${CMAKE_SOURCE_DIR} )
link_directories(/usr/local/lib /usr/local/opt/openssl/lib)
project(ml) project(ml)
set(PROJECT_NAME ml) set(PROJECT_NAME ml)
set(SOURCE set(SOURCE
ml.cpp ml.cpp
stdlib/csvparser.cpp) clib/csvparser.cpp
clib/sslclient.cpp)
add_executable(${PROJECT_NAME} ${SOURCE}) add_executable(${PROJECT_NAME} ${SOURCE})
target_link_libraries(${PROJECT_NAME} stdc++ m) target_link_libraries(${PROJECT_NAME} stdc++ m ssl crypto)

View File

@ -1,5 +1,7 @@
### TODO ### TODO
- update openssl libs
#### Functionality #### Functionality
- readline - readline

236
clib/sslclient.cpp Normal file
View File

@ -0,0 +1,236 @@
//============================================================================
// clang++ -o sslclient sslclient.cpp -lssl -lcrypto -L/usr/local/opt/openssl/lib -I/usr/local/opt/openssl/include
//============================================================================
//
#include "sslclient.h"
#include <arpa/inet.h>
#include <netdb.h>
#include <netinet/in.h>
#include <stdio.h>
#include <sys/socket.h>
#include <iostream>
#include <regex>
#include <sstream>
#include <string>
HttpClient::HttpClient(){};
std::pair<int, std::string> HttpClient::doGetRequest(const std::string &url, const std::unordered_map<std::string, std::string> &headers) {
// ^(?:((?:https?|s?ftp):)\/\/)([^:\/\s]+)(?::(\d*))?(?:\/([^\s?#]+)?([?][^?#]*)?(#.*)?)?
// viz test
// https://api.iextrading.com:443/1.0/stock/market/batch?symbols=cah,khc,syf,jnj&types=quote#muhehe
// https://stackoverflow.com/questions/25896916/parse-http-headers-in-c
std::regex rgx{R"(^(?:((?:https?|s?ftp):)//)([^:/\s]+)(?::(\d*))?(?:/([^\s?#]+)?([?][^?#]*)?(#.*)?)?)"};
std::smatch matches;
if (std::regex_search(url, matches, rgx)) {
for (size_t i = 0; i < matches.size(); ++i) {
switch (i) {
case 0:
full_url = matches[i].str();
break;
case 1:
proto = matches[i].str();
break;
case 2:
server = matches[i].str();
break;
case 3:
port = matches[i].str();
break;
case 4:
uri = matches[i].str();
break;
case 5:
params = matches[i].str();
break;
case 6:
href = matches[i].str();
break;
}
// std::cout << i << ": '" << matches[i].str() << "'\n";
}
} else {
std::cerr << "Match not found\n"; // TODO better message
}
std::string headers_string = "";
for (auto it = headers.begin(); it != headers.end(); ++it) {
headers_string.append("\r\n" + it->first + ": " + it->second);
}
std::string request = "GET " + full_url + " HTTP/1.0\r\nHost: " + server + headers_string + "\r\n\r\n";
int bytes_read = sslRequest(server, request);
std::string::size_type position = ssl_read_packet.find("\r\n\r\n");
if (position == std::string::npos) {
std::cerr << "substring not found" << std::endl; // TODO invalid packet
}
std::string hdr = ssl_read_packet.substr(0, position);
auto status_pos = hdr.find("\r\n");
std::string status_str = hdr.substr(0, status_pos);
hdr = hdr.substr(status_pos + 2, hdr.length() - 2 - status_pos);
// TODO parse status code
std::regex status_rgx{"^HTTP/\\d\\.\\d (\\d{3}) .+$"};
std::smatch status_matches;
if (std::regex_search(status_str, status_matches, status_rgx)) {
if (status_matches.size() > 1) {
auto sta = status_matches[1].str(); // string "200"
// std::cout << "status: " << sta << std::endl;
}
}
std::string body = ssl_read_packet.substr(position + 4, ssl_read_packet.length() - 4 - position);
std::istringstream resp(hdr);
std::string header;
std::string::size_type index;
while (std::getline(resp, header) && header != "\r") {
index = header.find(": ", 0);
if (index != std::string::npos) {
headers_map.insert(std::make_pair(header.substr(0, index), header.substr(index + 1)));
}
}
// for(auto& kv: headers_map) {
// std::cout << "KEY: `" << kv.first << "`, VALUE: `" << kv.second << '`' << std::endl;
// }
// TODO if error return error desc in string
return std::make_pair(200, body);
};
std::string HttpClient::inetAddress(std::string hostname) {
hostent *record = gethostbyname(hostname.c_str());
if (record == NULL) {
std::cerr << hostname << " is unavailable" << std::endl;
exit(1);
}
in_addr *address = (in_addr *)record->h_addr;
std::string ip_address = inet_ntoa(*address);
return ip_address;
}
int HttpClient::sslRecvPacket() {
ssl_read_packet.resize(4096);
ssl_read_packet.clear();
int len = 16384;
char buf[len + 1];
do {
len = SSL_read(ssl, buf, len);
buf[len] = 0;
ssl_read_packet.append((const char *)buf);
} while (len > 0);
if (len < 0) {
int err = SSL_get_error(ssl, len);
if (err == SSL_ERROR_WANT_READ)
return 0;
if (err == SSL_ERROR_WANT_WRITE)
return 0;
if (err == SSL_ERROR_ZERO_RETURN || err == SSL_ERROR_SYSCALL || err == SSL_ERROR_SSL)
return -1;
}
return ssl_read_packet.length();
}
int HttpClient::sslSendPacket(std::string buf) {
int len = SSL_write(ssl, buf.c_str(), strlen(buf.c_str()));
if (len < 0) {
int err = SSL_get_error(ssl, len);
switch (err) {
case SSL_ERROR_WANT_WRITE:
return 0;
case SSL_ERROR_WANT_READ:
return 0;
case SSL_ERROR_ZERO_RETURN:
case SSL_ERROR_SYSCALL:
case SSL_ERROR_SSL:
default:
return -1;
}
}
return buf.length();
}
int HttpClient::sslRequest(const std::string &server_name, const std::string &request) {
// create socket
int s;
s = socket(AF_INET, SOCK_STREAM, 0);
if (!s) {
printf("Error creating socket.\n");
return -1;
}
// socket address
std::string server_ip = inetAddress(server_name);
struct sockaddr_in sa;
memset(&sa, 0, sizeof(sa));
sa.sin_family = AF_INET;
sa.sin_addr.s_addr = inet_addr(server_ip.c_str());
sa.sin_port = htons(443);
socklen_t socklen = sizeof(sa);
// connect to server
if (connect(s, (struct sockaddr *)&sa, socklen)) {
printf("Error connecting to server.\n");
return -1;
}
SSL_library_init();
SSLeay_add_ssl_algorithms();
SSL_load_error_strings();
const SSL_METHOD *meth = TLSv1_2_client_method();
SSL_CTX *ctx = SSL_CTX_new(meth);
ssl = SSL_new(ctx);
if (!ssl) {
printf("Error creating SSL.\n");
log_ssl();
return -1;
}
sock = SSL_get_fd(ssl);
SSL_set_fd(ssl, s);
SSL_ctrl(ssl, SSL_CTRL_SET_TLSEXT_HOSTNAME, TLSEXT_NAMETYPE_host_name, (void*)server.c_str());
int err = SSL_connect(ssl);
if (err <= 0) {
printf("Error creating SSL connection. err=%x\n", err);
log_ssl();
fflush(stdout);
return -1;
}
// log cipher
// printf ("SSL connection using %s\n", SSL_get_cipher (ssl));
// send request
sslSendPacket(request);
// read response and return its length
return sslRecvPacket();
}
void HttpClient::log_ssl() {
int err;
while ((err = ERR_get_error())) {
char *str = ERR_error_string(err, 0);
if (!str)
return;
std::cerr << str << std::endl;
}
}

32
clib/sslclient.h Normal file
View File

@ -0,0 +1,32 @@
#include <openssl/err.h>
#include <openssl/ssl.h>
#include <string>
#include <unordered_map>
class HttpClient {
// TODO at this moment only https is implemented
private:
SSL *ssl;
int sock;
std::string full_url, proto, server, port, uri, params, href;
std::basic_string<char> ssl_read_packet;
std::unordered_map<std::string, std::string> headers_map;
public:
HttpClient();
std::pair<int, std::string> doGetRequest(const std::string &url, const std::unordered_map<std::string, std::string> &headers);
private:
std::string inetAddress(std::string hostname);
int sslRecvPacket();
int sslSendPacket(std::string buf);
int sslRequest(const std::string &server_name, const std::string &request);
void log_ssl();
};

View File

@ -4,9 +4,15 @@
(print "sorted: " (qs '(10 9 8 7 6 5 4 3 2 1))) (print "sorted: " (qs '(10 9 8 7 6 5 4 3 2 1)))
(define csv (read-file "tmp/data.csv")) (define csv (read-file "tmp/data.csv"))
(print csv) ; (print csv)
(define csv_list (parse-csv csv)) (define csv_list (parse-csv csv))
(print csv_list) ; (print csv_list)
(for x csv_list (print x)) (for x csv_list
(print x))
(define web_page (read-url "https://query1.finance.yahoo.com/v7/finance/download/FDX?period1=1581272585&period2=1612894985&interval=1d&events=history&includeAdjustedClose=true"))
(print web_page)
(define fdx_list (parse-csv (index web_page 1)))
(print fdx_list)
(print "Debug ends") (print "Debug ends")

29
ml.cpp
View File

@ -53,6 +53,7 @@ std::string read_file_contents(std::string filename) {
#include <exception> #include <exception>
#include "csvparser.h" #include "csvparser.h"
#include "sslclient.h"
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// ERROR MESSAGES ///////////////////////////////////////////////////////////// /// ERROR MESSAGES /////////////////////////////////////////////////////////////
@ -1273,6 +1274,33 @@ namespace builtin {
return result; return result;
} }
// Read URL to (code content)
Value read_url(std::vector<Value> args, Environment &env) {
// Is not a special form, so we can evaluate our args.
eval_args(args, env);
// PERF optimize it for memory usage and performance
// TODO handle second parameter (headers)
if (args.size() != 1)
throw Error(Value("read_url", write_file), env, args.size() > 1? TOO_MANY_ARGS : TOO_FEW_ARGS);
std::unordered_map<std::string, std::string> headers = {};
HttpClient client;
if (args.size() == 2) {
// do magick here
// for (auto i = map.begin(); i != map.end(); ++i) {
// headers[i->first] = i->second.getString();
// }
}
std::pair<int, std::string> result = client.doGetRequest(args[0].as_string(), headers);
std::vector<Value> lst;
lst.push_back(Value(result.first));
lst.push_back(Value::string(result.second));
return lst;
}
// Read a file and execute its code // Read a file and execute its code
Value include(std::vector<Value> args, Environment &env) { Value include(std::vector<Value> args, Environment &env) {
// Import is technically not a special form, it's more of a macro. // Import is technically not a special form, it's more of a macro.
@ -1794,6 +1822,7 @@ Value Environment::get(std::string name) const {
if (name == "parse-csv") return Value("parse-csv", builtin::parse_csv); if (name == "parse-csv") return Value("parse-csv", builtin::parse_csv);
if (name == "read-file") return Value("read-file", builtin::read_file); if (name == "read-file") return Value("read-file", builtin::read_file);
if (name == "write-file") return Value("write-file", builtin::write_file); if (name == "write-file") return Value("write-file", builtin::write_file);
if (name == "read-url") return Value("read-url", builtin::read_url);
#endif #endif
// String operations // String operations