[bug] Address slow directory responses in S3 mounts for deep nested directories #28
All checks were successful
BlockStorage/repertory/pipeline/head This commit looks good

[bug] S3 error responses are not being logged #29
This commit is contained in:
Scott E. Graves 2024-12-30 12:09:52 -06:00
parent ae573af89d
commit 62a303e856

View File

@ -43,8 +43,9 @@ namespace repertory {
s3_provider::s3_provider(app_config &config, i_http_comm &comm) s3_provider::s3_provider(app_config &config, i_http_comm &comm)
: base_provider(config, comm) {} : base_provider(config, comm) {}
auto s3_provider::add_if_not_found( auto s3_provider::add_if_not_found(api_file &file,
api_file &file, const std::string &object_name) const -> api_error { const std::string &object_name) const
-> api_error {
api_meta_map meta{}; api_meta_map meta{};
if (get_item_meta(file.api_path, meta) == api_error::item_not_found) { if (get_item_meta(file.api_path, meta) == api_error::item_not_found) {
auto err = create_path_directories( auto err = create_path_directories(
@ -70,7 +71,7 @@ auto s3_provider::convert_api_date(std::string_view date) -> std::uint64_t {
utils::string::split(date_parts.at(1U), 'Z', true).at(0U)) * utils::string::split(date_parts.at(1U), 'Z', true).at(0U)) *
1000000UL; 1000000UL;
struct tm tm1 {}; struct tm tm1{};
#if defined(_WIN32) #if defined(_WIN32)
utils::time::strptime(date_time.c_str(), "%Y-%m-%dT%T", &tm1); utils::time::strptime(date_time.c_str(), "%Y-%m-%dT%T", &tm1);
return nanos + utils::time::windows_time_t_to_unix_time(_mkgmtime(&tm1)); return nanos + utils::time::windows_time_t_to_unix_time(_mkgmtime(&tm1));
@ -112,10 +113,15 @@ auto s3_provider::create_directory_impl(const std::string &api_path,
auto object_name = auto object_name =
utils::path::create_api_path(is_encrypted ? meta[META_KEY] : api_path); utils::path::create_api_path(is_encrypted ? meta[META_KEY] : api_path);
std::string response_data;
curl::requests::http_put_file put_file{}; curl::requests::http_put_file put_file{};
put_file.allow_timeout = true; put_file.allow_timeout = true;
put_file.aws_service = "aws:amz:" + cfg.region + ":s3"; put_file.aws_service = "aws:amz:" + cfg.region + ":s3";
put_file.path = object_name + '/'; put_file.path = object_name + '/';
put_file.response_handler = [&response_data](auto &&data,
long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
long response_code{}; long response_code{};
if (not get_comm().make_request(put_file, response_code, stop_requested)) { if (not get_comm().make_request(put_file, response_code, stop_requested)) {
@ -126,8 +132,9 @@ auto s3_provider::create_directory_impl(const std::string &api_path,
} }
if (response_code != http_error_codes::ok) { if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, api_path, response_code, utils::error::raise_api_path_error(
"failed to create directory"); function_name, api_path, response_code,
fmt::format("failed to create directory|response|{}", response_data));
return api_error::comm_error; return api_error::comm_error;
} }
@ -162,8 +169,9 @@ auto s3_provider::create_file_extra(const std::string &api_path,
return api_error::success; return api_error::success;
} }
auto s3_provider::create_path_directories( auto s3_provider::create_path_directories(const std::string &api_path,
const std::string &api_path, const std::string &key) const -> api_error { const std::string &key) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME(); REPERTORY_USES_FUNCTION_NAME();
if (api_path == "/") { if (api_path == "/") {
@ -191,44 +199,55 @@ auto s3_provider::create_path_directories(
cur_path = utils::path::create_api_path( cur_path = utils::path::create_api_path(
utils::path::combine(cur_path, {path_parts.at(idx)})); utils::path::combine(cur_path, {path_parts.at(idx)}));
std::string value;
auto res = get_item_meta(cur_path, META_DIRECTORY, value);
if (res == api_error::success) {
if (not utils::string::to_bool(value)) {
return api_error::item_exists;
}
continue;
}
if (res == api_error::item_not_found) {
auto exists{false}; auto exists{false};
auto res = is_directory(cur_path, exists); res = is_directory(cur_path, exists);
if (res != api_error::success) { if (res != api_error::success) {
return res; return res;
} }
if (not exists) { if (not exists) {
std::string response_data;
curl::requests::http_put_file put_file{}; curl::requests::http_put_file put_file{};
put_file.allow_timeout = true; put_file.allow_timeout = true;
put_file.aws_service = "aws:amz:" + cfg.region + ":s3"; put_file.aws_service = "aws:amz:" + cfg.region + ":s3";
put_file.path = (is_encrypted ? cur_key : cur_path) + '/'; put_file.path = (is_encrypted ? cur_key : cur_path) + '/';
put_file.response_handler = [&response_data](auto &&data,
long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
stop_type stop_requested{false}; stop_type stop_requested{false};
long response_code{}; long response_code{};
if (not get_comm().make_request(put_file, response_code, if (not get_comm().make_request(put_file, response_code,
stop_requested)) { stop_requested)) {
utils::error::raise_api_path_error(function_name, cur_path, utils::error::raise_api_path_error(
api_error::comm_error, function_name, cur_path, api_error::comm_error,
"failed to create directory object"); "failed to create directory object");
return api_error::comm_error; return api_error::comm_error;
} }
if (response_code != http_error_codes::ok) { if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, cur_path, utils::error::raise_api_path_error(
response_code, function_name, api_path, response_code,
"failed to create directory object"); fmt::format("failed to create directory|response|{}",
response_data));
return api_error::comm_error; return api_error::comm_error;
} }
}
api_meta_map meta{};
res = get_item_meta(cur_path, meta);
if (res == api_error::item_not_found) {
auto dir = create_api_file(cur_path, cur_key, 0U, auto dir = create_api_file(cur_path, cur_key, 0U,
get_last_modified(true, cur_path)); get_last_modified(true, cur_path));
get_api_item_added()(true, dir); get_api_item_added()(true, dir);
}
continue;
} }
if (res != api_error::success) { if (res != api_error::success) {
@ -330,8 +349,9 @@ auto s3_provider::get_directory_item_count(const std::string &api_path) const
return 0U; return 0U;
} }
auto s3_provider::get_directory_items_impl( auto s3_provider::get_directory_items_impl(const std::string &api_path,
const std::string &api_path, directory_item_list &list) const -> api_error { directory_item_list &list) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME(); REPERTORY_USES_FUNCTION_NAME();
const auto &cfg = get_s3_config(); const auto &cfg = get_s3_config();
@ -389,60 +409,38 @@ auto s3_provider::get_directory_items_impl(
.as_string(); .as_string();
} }
const auto add_directory_item =
[&](bool directory, const std::string &name,
std::uint64_t last_modified,
std::function<std::uint64_t(const directory_item &)> get_size)
-> api_error {
auto child_api_path =
utils::path::create_api_path(utils::path::combine("/", {name}));
std::string child_object_name;
if (is_encrypted) {
child_object_name = child_api_path;
if (not utils::encryption::decrypt_file_path(cfg.encryption_token,
child_api_path)) {
return api_error::decryption_error;
}
}
directory_item dir_item{};
dir_item.api_path = child_api_path;
dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path);
dir_item.directory = directory;
dir_item.size = get_size(dir_item);
ret = get_item_meta(child_api_path, dir_item.meta);
if (ret == api_error::item_not_found) {
if (directory) {
ret = create_path_directories(child_api_path, child_object_name);
if (ret != api_error::success) {
return ret;
}
} else {
auto file = create_api_file(child_api_path, child_object_name,
dir_item.size, last_modified);
ret = add_if_not_found(file, child_object_name);
if (ret != api_error::success) {
return ret;
}
}
ret = get_item_meta(child_api_path, dir_item.meta);
}
if (ret != api_error::success) {
return ret;
}
list.push_back(std::move(dir_item));
return api_error::success;
};
auto node_list = auto node_list =
doc.select_nodes("/ListBucketResult/CommonPrefixes/Prefix"); doc.select_nodes("/ListBucketResult/CommonPrefixes/Prefix");
for (const auto &node : node_list) { for (const auto &node : node_list) {
add_directory_item( auto child_object_name = node.node().text().as_string();
true, node.node().text().as_string(), 0U, directory_item dir_item{};
[](const directory_item &) -> std::uint64_t { return 0U; }); dir_item.api_path = child_object_name;
if (is_encrypted) {
if (not utils::encryption::decrypt_file_path(cfg.encryption_token,
dir_item.api_path)) {
return api_error::decryption_error;
}
}
dir_item.api_path = utils::path::create_api_path(dir_item.api_path);
dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path);
dir_item.directory = true;
dir_item.size = 0U;
auto res = get_item_meta(dir_item.api_path, dir_item.meta);
if (res == api_error::item_not_found) {
res = create_path_directories(dir_item.api_path, child_object_name);
if (res != api_error::success) {
return res;
}
res = get_item_meta(dir_item.api_path, dir_item.meta);
}
if (res != api_error::success) {
return res;
}
list.push_back(std::move(dir_item));
} }
node_list = doc.select_nodes("/ListBucketResult/Contents"); node_list = doc.select_nodes("/ListBucketResult/Contents");
@ -453,31 +451,66 @@ auto s3_provider::get_directory_items_impl(
continue; continue;
} }
auto size = node.node().select_node("Size").node().text().as_ullong(); directory_item dir_item{};
auto last_modified = convert_api_date( dir_item.api_path = child_object_name;
node.node().select_node("LastModified").node().text().as_string()); if (is_encrypted) {
add_directory_item(false, child_object_name, last_modified, if (not utils::encryption::decrypt_file_path(cfg.encryption_token,
[this, &is_encrypted, &size]( dir_item.api_path)) {
const directory_item &dir_item) -> std::uint64_t { return api_error::decryption_error;
std::string size_str;
if (get_item_meta(dir_item.api_path, META_SIZE,
size_str) == api_error::success) {
return utils::string::to_uint64(size_str);
} }
}
dir_item.api_path = utils::path::create_api_path(dir_item.api_path);
dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path);
dir_item.directory = false;
return is_encrypted std::string size_str;
? utils::encryption::encrypting_reader:: if (get_item_meta(dir_item.api_path, META_SIZE, size_str) ==
api_error::success) {
dir_item.size = utils::string::to_uint64(size_str);
} else {
auto size = node.node().select_node("Size").node().text().as_ullong();
dir_item.size = is_encrypted ? utils::encryption::encrypting_reader::
calculate_decrypted_size(size) calculate_decrypted_size(size)
: size; : size;
}); }
auto res = get_item_meta(dir_item.api_path, dir_item.meta);
if (res == api_error::item_not_found) {
auto last_modified = convert_api_date(
node.node().select_node("LastModified").node().text().as_string());
api_file file{};
file.api_path = dir_item.api_path;
file.api_parent = dir_item.api_parent;
file.accessed_date = file.changed_date = file.creation_date =
file.modified_date = last_modified;
file.file_size = dir_item.size;
if (is_encrypted) {
file.key = child_object_name;
}
res = add_if_not_found(file, child_object_name);
if (res != api_error::success) {
return res;
}
res = get_item_meta(dir_item.api_path, dir_item.meta);
}
if (res != api_error::success) {
return res;
}
list.push_back(std::move(dir_item));
} }
} }
return ret; return ret;
} }
auto s3_provider::get_file(const std::string &api_path, auto s3_provider::get_file(const std::string &api_path, api_file &file) const
api_file &file) const -> api_error { -> api_error {
REPERTORY_USES_FUNCTION_NAME(); REPERTORY_USES_FUNCTION_NAME();
try { try {
@ -516,8 +549,8 @@ auto s3_provider::get_file(const std::string &api_path,
return api_error::error; return api_error::error;
} }
auto s3_provider::get_file_list(api_file_list &list, auto s3_provider::get_file_list(api_file_list &list, std::string &marker) const
std::string &marker) const -> api_error { -> api_error {
REPERTORY_USES_FUNCTION_NAME(); REPERTORY_USES_FUNCTION_NAME();
std::string response_data; std::string response_data;
@ -592,8 +625,9 @@ auto s3_provider::get_file_list(api_file_list &list,
return grab_more ? api_error::more_data : api_error::success; return grab_more ? api_error::more_data : api_error::success;
} }
auto s3_provider::get_last_modified( auto s3_provider::get_last_modified(bool directory,
bool directory, const std::string &api_path) const -> std::uint64_t { const std::string &api_path) const
-> std::uint64_t {
bool is_encrypted{}; bool is_encrypted{};
std::string object_name; std::string object_name;
head_object_result result{}; head_object_result result{};
@ -603,9 +637,10 @@ auto s3_provider::get_last_modified(
: utils::time::get_time_now(); : utils::time::get_time_now();
} }
auto s3_provider::get_object_info( auto s3_provider::get_object_info(bool directory, const std::string &api_path,
bool directory, const std::string &api_path, bool &is_encrypted, bool &is_encrypted, std::string &object_name,
std::string &object_name, head_object_result &result) const -> api_error { head_object_result &result) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME(); REPERTORY_USES_FUNCTION_NAME();
try { try {
@ -622,11 +657,16 @@ auto s3_provider::get_object_info(
object_name = utils::path::create_api_path(is_encrypted ? key : api_path); object_name = utils::path::create_api_path(is_encrypted ? key : api_path);
std::string response_data;
curl::requests::http_head head{}; curl::requests::http_head head{};
head.allow_timeout = true; head.allow_timeout = true;
head.aws_service = "aws:amz:" + cfg.region + ":s3"; head.aws_service = "aws:amz:" + cfg.region + ":s3";
head.path = directory ? object_name + '/' : object_name; head.path = directory ? object_name + '/' : object_name;
head.response_headers = http_headers{}; head.response_headers = http_headers{};
head.response_handler = [&response_data](auto &&data,
long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
stop_type stop_requested{false}; stop_type stop_requested{false};
long response_code{}; long response_code{};
@ -639,8 +679,9 @@ auto s3_provider::get_object_info(
} }
if (response_code != http_error_codes::ok) { if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, api_path, response_code, utils::error::raise_api_path_error(
"failed to get object info"); function_name, api_path, response_code,
fmt::format("failed to get object info|response|{}", response_data));
return api_error::comm_error; return api_error::comm_error;
} }
@ -654,10 +695,12 @@ auto s3_provider::get_object_info(
return api_error::error; return api_error::error;
} }
auto s3_provider::get_object_list( auto s3_provider::get_object_list(std::string &response_data,
std::string &response_data, long &response_code, long &response_code,
std::optional<std::string> delimiter, std::optional<std::string> prefix, std::optional<std::string> delimiter,
std::optional<std::string> token) const -> bool { std::optional<std::string> prefix,
std::optional<std::string> token) const
-> bool {
curl::requests::http_get get{}; curl::requests::http_get get{};
get.allow_timeout = true; get.allow_timeout = true;
get.aws_service = "aws:amz:" + get_s3_config().region + ":s3"; get.aws_service = "aws:amz:" + get_s3_config().region + ":s3";
@ -673,8 +716,7 @@ auto s3_provider::get_object_list(
if (token.has_value() && not token.value().empty()) { if (token.has_value() && not token.value().empty()) {
get.query["continuation-token"] = token.value(); get.query["continuation-token"] = token.value();
} }
get.response_handler = [&response_data](const data_buffer &data, get.response_handler = [&response_data](auto &&data, long /*response_code*/) {
long /*response_code*/) {
response_data = std::string(data.begin(), data.end()); response_data = std::string(data.begin(), data.end());
}; };
@ -686,8 +728,8 @@ auto s3_provider::get_total_drive_space() const -> std::uint64_t {
return std::numeric_limits<std::int64_t>::max() / std::int64_t(2); return std::numeric_limits<std::int64_t>::max() / std::int64_t(2);
} }
auto s3_provider::is_directory(const std::string &api_path, auto s3_provider::is_directory(const std::string &api_path, bool &exists) const
bool &exists) const -> api_error { -> api_error {
REPERTORY_USES_FUNCTION_NAME(); REPERTORY_USES_FUNCTION_NAME();
exists = false; exists = false;
@ -714,8 +756,8 @@ auto s3_provider::is_directory(const std::string &api_path,
return api_error::error; return api_error::error;
} }
auto s3_provider::is_file(const std::string &api_path, auto s3_provider::is_file(const std::string &api_path, bool &exists) const
bool &exists) const -> api_error { -> api_error {
REPERTORY_USES_FUNCTION_NAME(); REPERTORY_USES_FUNCTION_NAME();
exists = false; exists = false;
@ -782,7 +824,7 @@ auto s3_provider::read_file_bytes(const std::string &api_path, std::size_t size,
read_offset, read_offset,
read_offset + read_size - 1U, read_offset + read_size - 1U,
}}; }};
get.response_handler = [&read_buffer](const data_buffer &response_data, get.response_handler = [&read_buffer](auto &&response_data,
long /*response_code*/) { long /*response_code*/) {
read_buffer = response_data; read_buffer = response_data;
}; };
@ -872,10 +914,14 @@ auto s3_provider::remove_directory_impl(const std::string &api_path)
auto object_name = auto object_name =
utils::path::create_api_path(is_encrypted ? key : api_path); utils::path::create_api_path(is_encrypted ? key : api_path);
std::string response_data;
curl::requests::http_delete del{}; curl::requests::http_delete del{};
del.allow_timeout = true; del.allow_timeout = true;
del.aws_service = "aws:amz:" + cfg.region + ":s3"; del.aws_service = "aws:amz:" + cfg.region + ":s3";
del.path = object_name + '/'; del.path = object_name + '/';
del.response_handler = [&response_data](auto &&data, long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
long response_code{}; long response_code{};
stop_type stop_requested{}; stop_type stop_requested{};
@ -889,8 +935,9 @@ auto s3_provider::remove_directory_impl(const std::string &api_path)
if ((response_code < http_error_codes::ok || if ((response_code < http_error_codes::ok ||
response_code >= http_error_codes::multiple_choices) && response_code >= http_error_codes::multiple_choices) &&
response_code != http_error_codes::not_found) { response_code != http_error_codes::not_found) {
utils::error::raise_api_path_error(function_name, api_path, response_code, utils::error::raise_api_path_error(
"failed to remove directory"); function_name, api_path, response_code,
fmt::format("failed to remove directory|response|{}", response_data));
return api_error::comm_error; return api_error::comm_error;
} }
@ -987,9 +1034,14 @@ auto s3_provider::upload_file_impl(const std::string &api_path,
auto object_name = auto object_name =
utils::path::create_api_path(is_encrypted ? key : api_path); utils::path::create_api_path(is_encrypted ? key : api_path);
std::string response_data;
curl::requests::http_put_file put_file{}; curl::requests::http_put_file put_file{};
put_file.aws_service = "aws:amz:" + cfg.region + ":s3"; put_file.aws_service = "aws:amz:" + cfg.region + ":s3";
put_file.path = object_name; put_file.path = object_name;
put_file.response_handler = [&response_data](auto &&data,
long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
put_file.source_path = source_path; put_file.source_path = source_path;
if (is_encrypted && file_size > 0U) { if (is_encrypted && file_size > 0U) {
@ -1005,8 +1057,9 @@ auto s3_provider::upload_file_impl(const std::string &api_path,
} }
if (response_code != http_error_codes::ok) { if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, api_path, response_code, utils::error::raise_api_path_error(
"failed to get upload file"); function_name, api_path, response_code,
fmt::format("failed to upload file|response|{}", response_data));
return api_error::comm_error; return api_error::comm_error;
} }