added read-url
This commit is contained in:
parent
22f9410d17
commit
982241acf6
|
|
@ -10,16 +10,19 @@ set(CPACK_PROJECT_NAME ${PROJECT_NAME})
|
|||
set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
|
||||
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR}/stdlib ${CMAKE_SOURCE_DIR} )
|
||||
include_directories(/usr/local/opt/openssl/include ${CMAKE_SOURCE_DIR}/clib ${CMAKE_SOURCE_DIR} )
|
||||
|
||||
link_directories(/usr/local/lib /usr/local/opt/openssl/lib)
|
||||
|
||||
|
||||
project(ml)
|
||||
|
||||
set(PROJECT_NAME ml)
|
||||
|
||||
set(SOURCE
|
||||
ml.cpp
|
||||
stdlib/csvparser.cpp)
|
||||
clib/csvparser.cpp
|
||||
clib/sslclient.cpp)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${SOURCE})
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} stdc++ m)
|
||||
target_link_libraries(${PROJECT_NAME} stdc++ m ssl crypto)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,236 @@
|
|||
//============================================================================
|
||||
// clang++ -o sslclient sslclient.cpp -lssl -lcrypto -L/usr/local/opt/openssl/lib -I/usr/local/opt/openssl/include
|
||||
//============================================================================
|
||||
//
|
||||
|
||||
#include "sslclient.h"
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <netdb.h>
|
||||
#include <netinet/in.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
|
||||
HttpClient::HttpClient(){};
|
||||
|
||||
std::pair<int, std::string> HttpClient::doGetRequest(const std::string &url, const std::unordered_map<std::string, std::string> &headers) {
|
||||
// ^(?:((?:https?|s?ftp):)\/\/)([^:\/\s]+)(?::(\d*))?(?:\/([^\s?#]+)?([?][^?#]*)?(#.*)?)?
|
||||
// viz test
|
||||
// https://api.iextrading.com:443/1.0/stock/market/batch?symbols=cah,khc,syf,jnj&types=quote#muhehe
|
||||
// https://stackoverflow.com/questions/25896916/parse-http-headers-in-c
|
||||
|
||||
std::regex rgx{R"(^(?:((?:https?|s?ftp):)//)([^:/\s]+)(?::(\d*))?(?:/([^\s?#]+)?([?][^?#]*)?(#.*)?)?)"};
|
||||
std::smatch matches;
|
||||
|
||||
if (std::regex_search(url, matches, rgx)) {
|
||||
for (size_t i = 0; i < matches.size(); ++i) {
|
||||
switch (i) {
|
||||
case 0:
|
||||
full_url = matches[i].str();
|
||||
break;
|
||||
case 1:
|
||||
proto = matches[i].str();
|
||||
break;
|
||||
case 2:
|
||||
server = matches[i].str();
|
||||
break;
|
||||
case 3:
|
||||
port = matches[i].str();
|
||||
break;
|
||||
case 4:
|
||||
uri = matches[i].str();
|
||||
break;
|
||||
case 5:
|
||||
params = matches[i].str();
|
||||
break;
|
||||
case 6:
|
||||
href = matches[i].str();
|
||||
break;
|
||||
}
|
||||
// std::cout << i << ": '" << matches[i].str() << "'\n";
|
||||
}
|
||||
} else {
|
||||
std::cerr << "Match not found\n"; // TODO better message
|
||||
}
|
||||
|
||||
std::string headers_string = "";
|
||||
for (auto it = headers.begin(); it != headers.end(); ++it) {
|
||||
headers_string.append("\r\n" + it->first + ": " + it->second);
|
||||
}
|
||||
|
||||
std::string request = "GET " + full_url + " HTTP/1.0\r\nHost: " + server + headers_string + "\r\n\r\n";
|
||||
|
||||
int bytes_read = sslRequest(server, request);
|
||||
|
||||
std::string::size_type position = ssl_read_packet.find("\r\n\r\n");
|
||||
if (position == std::string::npos) {
|
||||
std::cerr << "substring not found" << std::endl; // TODO invalid packet
|
||||
}
|
||||
|
||||
std::string hdr = ssl_read_packet.substr(0, position);
|
||||
auto status_pos = hdr.find("\r\n");
|
||||
|
||||
std::string status_str = hdr.substr(0, status_pos);
|
||||
hdr = hdr.substr(status_pos + 2, hdr.length() - 2 - status_pos);
|
||||
|
||||
// TODO parse status code
|
||||
std::regex status_rgx{"^HTTP/\\d\\.\\d (\\d{3}) .+$"};
|
||||
std::smatch status_matches;
|
||||
if (std::regex_search(status_str, status_matches, status_rgx)) {
|
||||
if (status_matches.size() > 1) {
|
||||
auto sta = status_matches[1].str(); // string "200"
|
||||
// std::cout << "status: " << sta << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::string body = ssl_read_packet.substr(position + 4, ssl_read_packet.length() - 4 - position);
|
||||
|
||||
|
||||
std::istringstream resp(hdr);
|
||||
std::string header;
|
||||
std::string::size_type index;
|
||||
while (std::getline(resp, header) && header != "\r") {
|
||||
index = header.find(": ", 0);
|
||||
if (index != std::string::npos) {
|
||||
headers_map.insert(std::make_pair(header.substr(0, index), header.substr(index + 1)));
|
||||
}
|
||||
}
|
||||
|
||||
// for(auto& kv: headers_map) {
|
||||
// std::cout << "KEY: `" << kv.first << "`, VALUE: `" << kv.second << '`' << std::endl;
|
||||
// }
|
||||
|
||||
// TODO if error return error desc in string
|
||||
return std::make_pair(200, body);
|
||||
};
|
||||
|
||||
std::string HttpClient::inetAddress(std::string hostname) {
|
||||
hostent *record = gethostbyname(hostname.c_str());
|
||||
if (record == NULL) {
|
||||
std::cerr << hostname << " is unavailable" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
in_addr *address = (in_addr *)record->h_addr;
|
||||
std::string ip_address = inet_ntoa(*address);
|
||||
|
||||
return ip_address;
|
||||
}
|
||||
|
||||
int HttpClient::sslRecvPacket() {
|
||||
ssl_read_packet.resize(4096);
|
||||
ssl_read_packet.clear();
|
||||
|
||||
int len = 16384;
|
||||
char buf[len + 1];
|
||||
do {
|
||||
len = SSL_read(ssl, buf, len);
|
||||
buf[len] = 0;
|
||||
ssl_read_packet.append((const char *)buf);
|
||||
} while (len > 0);
|
||||
|
||||
if (len < 0) {
|
||||
int err = SSL_get_error(ssl, len);
|
||||
if (err == SSL_ERROR_WANT_READ)
|
||||
return 0;
|
||||
if (err == SSL_ERROR_WANT_WRITE)
|
||||
return 0;
|
||||
if (err == SSL_ERROR_ZERO_RETURN || err == SSL_ERROR_SYSCALL || err == SSL_ERROR_SSL)
|
||||
return -1;
|
||||
}
|
||||
|
||||
return ssl_read_packet.length();
|
||||
}
|
||||
|
||||
int HttpClient::sslSendPacket(std::string buf) {
|
||||
int len = SSL_write(ssl, buf.c_str(), strlen(buf.c_str()));
|
||||
if (len < 0) {
|
||||
int err = SSL_get_error(ssl, len);
|
||||
switch (err) {
|
||||
case SSL_ERROR_WANT_WRITE:
|
||||
return 0;
|
||||
case SSL_ERROR_WANT_READ:
|
||||
return 0;
|
||||
case SSL_ERROR_ZERO_RETURN:
|
||||
case SSL_ERROR_SYSCALL:
|
||||
case SSL_ERROR_SSL:
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return buf.length();
|
||||
}
|
||||
|
||||
int HttpClient::sslRequest(const std::string &server_name, const std::string &request) {
|
||||
// create socket
|
||||
int s;
|
||||
s = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if (!s) {
|
||||
printf("Error creating socket.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// socket address
|
||||
std::string server_ip = inetAddress(server_name);
|
||||
struct sockaddr_in sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sin_family = AF_INET;
|
||||
sa.sin_addr.s_addr = inet_addr(server_ip.c_str());
|
||||
sa.sin_port = htons(443);
|
||||
socklen_t socklen = sizeof(sa);
|
||||
|
||||
// connect to server
|
||||
if (connect(s, (struct sockaddr *)&sa, socklen)) {
|
||||
printf("Error connecting to server.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
SSL_library_init();
|
||||
SSLeay_add_ssl_algorithms();
|
||||
SSL_load_error_strings();
|
||||
const SSL_METHOD *meth = TLSv1_2_client_method();
|
||||
SSL_CTX *ctx = SSL_CTX_new(meth);
|
||||
ssl = SSL_new(ctx);
|
||||
if (!ssl) {
|
||||
printf("Error creating SSL.\n");
|
||||
log_ssl();
|
||||
return -1;
|
||||
}
|
||||
sock = SSL_get_fd(ssl);
|
||||
SSL_set_fd(ssl, s);
|
||||
|
||||
SSL_ctrl(ssl, SSL_CTRL_SET_TLSEXT_HOSTNAME, TLSEXT_NAMETYPE_host_name, (void*)server.c_str());
|
||||
|
||||
int err = SSL_connect(ssl);
|
||||
if (err <= 0) {
|
||||
printf("Error creating SSL connection. err=%x\n", err);
|
||||
log_ssl();
|
||||
fflush(stdout);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// log cipher
|
||||
// printf ("SSL connection using %s\n", SSL_get_cipher (ssl));
|
||||
|
||||
// send request
|
||||
sslSendPacket(request);
|
||||
|
||||
// read response and return its length
|
||||
return sslRecvPacket();
|
||||
}
|
||||
|
||||
void HttpClient::log_ssl() {
|
||||
int err;
|
||||
while ((err = ERR_get_error())) {
|
||||
char *str = ERR_error_string(err, 0);
|
||||
if (!str)
|
||||
return;
|
||||
std::cerr << str << std::endl;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
|
||||
#include <openssl/err.h>
|
||||
#include <openssl/ssl.h>
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
class HttpClient {
|
||||
// TODO at this moment only https is implemented
|
||||
|
||||
private:
|
||||
SSL *ssl;
|
||||
int sock;
|
||||
|
||||
std::string full_url, proto, server, port, uri, params, href;
|
||||
std::basic_string<char> ssl_read_packet;
|
||||
std::unordered_map<std::string, std::string> headers_map;
|
||||
|
||||
public:
|
||||
HttpClient();
|
||||
|
||||
std::pair<int, std::string> doGetRequest(const std::string &url, const std::unordered_map<std::string, std::string> &headers);
|
||||
|
||||
private:
|
||||
std::string inetAddress(std::string hostname);
|
||||
|
||||
int sslRecvPacket();
|
||||
int sslSendPacket(std::string buf);
|
||||
int sslRequest(const std::string &server_name, const std::string &request);
|
||||
void log_ssl();
|
||||
};
|
||||
12
debug.lisp
12
debug.lisp
|
|
@ -4,9 +4,15 @@
|
|||
(print "sorted: " (qs '(10 9 8 7 6 5 4 3 2 1)))
|
||||
|
||||
(define csv (read-file "tmp/data.csv"))
|
||||
(print csv)
|
||||
; (print csv)
|
||||
(define csv_list (parse-csv csv))
|
||||
(print csv_list)
|
||||
(for x csv_list (print x))
|
||||
; (print csv_list)
|
||||
(for x csv_list
|
||||
(print x))
|
||||
|
||||
(define web_page (read-url "https://query1.finance.yahoo.com/v7/finance/download/FDX?period1=1581272585&period2=1612894985&interval=1d&events=history&includeAdjustedClose=true"))
|
||||
(print web_page)
|
||||
(define fdx_list (parse-csv (index web_page 1)))
|
||||
(print fdx_list)
|
||||
|
||||
(print "Debug ends")
|
||||
|
|
|
|||
29
ml.cpp
29
ml.cpp
|
|
@ -53,6 +53,7 @@ std::string read_file_contents(std::string filename) {
|
|||
#include <exception>
|
||||
|
||||
#include "csvparser.h"
|
||||
#include "sslclient.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// ERROR MESSAGES /////////////////////////////////////////////////////////////
|
||||
|
|
@ -1273,6 +1274,33 @@ namespace builtin {
|
|||
return result;
|
||||
}
|
||||
|
||||
// Read URL to (code content)
|
||||
Value read_url(std::vector<Value> args, Environment &env) {
|
||||
// Is not a special form, so we can evaluate our args.
|
||||
eval_args(args, env);
|
||||
|
||||
// PERF optimize it for memory usage and performance
|
||||
// TODO handle second parameter (headers)
|
||||
if (args.size() != 1)
|
||||
throw Error(Value("read_url", write_file), env, args.size() > 1? TOO_MANY_ARGS : TOO_FEW_ARGS);
|
||||
|
||||
std::unordered_map<std::string, std::string> headers = {};
|
||||
HttpClient client;
|
||||
|
||||
if (args.size() == 2) {
|
||||
// do magick here
|
||||
// for (auto i = map.begin(); i != map.end(); ++i) {
|
||||
// headers[i->first] = i->second.getString();
|
||||
// }
|
||||
}
|
||||
|
||||
std::pair<int, std::string> result = client.doGetRequest(args[0].as_string(), headers);
|
||||
std::vector<Value> lst;
|
||||
lst.push_back(Value(result.first));
|
||||
lst.push_back(Value::string(result.second));
|
||||
return lst;
|
||||
}
|
||||
|
||||
// Read a file and execute its code
|
||||
Value include(std::vector<Value> args, Environment &env) {
|
||||
// Import is technically not a special form, it's more of a macro.
|
||||
|
|
@ -1794,6 +1822,7 @@ Value Environment::get(std::string name) const {
|
|||
if (name == "parse-csv") return Value("parse-csv", builtin::parse_csv);
|
||||
if (name == "read-file") return Value("read-file", builtin::read_file);
|
||||
if (name == "write-file") return Value("write-file", builtin::write_file);
|
||||
if (name == "read-url") return Value("read-url", builtin::read_url);
|
||||
#endif
|
||||
|
||||
// String operations
|
||||
|
|
|
|||
Loading…
Reference in New Issue