From 62a303e8560e10cb9c1a3247009c75f257f5909b Mon Sep 17 00:00:00 2001 From: "Scott E. Graves" Date: Mon, 30 Dec 2024 12:09:52 -0600 Subject: [PATCH] [bug] Address slow directory responses in S3 mounts for deep nested directories #28 [bug] S3 error responses are not being logged #29 --- .../src/providers/s3/s3_provider.cpp | 329 ++++++++++-------- 1 file changed, 191 insertions(+), 138 deletions(-) diff --git a/repertory/librepertory/src/providers/s3/s3_provider.cpp b/repertory/librepertory/src/providers/s3/s3_provider.cpp index 9145a9d5..364e24bf 100644 --- a/repertory/librepertory/src/providers/s3/s3_provider.cpp +++ b/repertory/librepertory/src/providers/s3/s3_provider.cpp @@ -43,8 +43,9 @@ namespace repertory { s3_provider::s3_provider(app_config &config, i_http_comm &comm) : base_provider(config, comm) {} -auto s3_provider::add_if_not_found( - api_file &file, const std::string &object_name) const -> api_error { +auto s3_provider::add_if_not_found(api_file &file, + const std::string &object_name) const + -> api_error { api_meta_map meta{}; if (get_item_meta(file.api_path, meta) == api_error::item_not_found) { auto err = create_path_directories( @@ -70,7 +71,7 @@ auto s3_provider::convert_api_date(std::string_view date) -> std::uint64_t { utils::string::split(date_parts.at(1U), 'Z', true).at(0U)) * 1000000UL; - struct tm tm1 {}; + struct tm tm1{}; #if defined(_WIN32) utils::time::strptime(date_time.c_str(), "%Y-%m-%dT%T", &tm1); return nanos + utils::time::windows_time_t_to_unix_time(_mkgmtime(&tm1)); @@ -112,10 +113,15 @@ auto s3_provider::create_directory_impl(const std::string &api_path, auto object_name = utils::path::create_api_path(is_encrypted ? meta[META_KEY] : api_path); + std::string response_data; curl::requests::http_put_file put_file{}; put_file.allow_timeout = true; put_file.aws_service = "aws:amz:" + cfg.region + ":s3"; put_file.path = object_name + '/'; + put_file.response_handler = [&response_data](auto &&data, + long /*response_code*/) { + response_data = std::string(data.begin(), data.end()); + }; long response_code{}; if (not get_comm().make_request(put_file, response_code, stop_requested)) { @@ -126,8 +132,9 @@ auto s3_provider::create_directory_impl(const std::string &api_path, } if (response_code != http_error_codes::ok) { - utils::error::raise_api_path_error(function_name, api_path, response_code, - "failed to create directory"); + utils::error::raise_api_path_error( + function_name, api_path, response_code, + fmt::format("failed to create directory|response|{}", response_data)); return api_error::comm_error; } @@ -162,8 +169,9 @@ auto s3_provider::create_file_extra(const std::string &api_path, return api_error::success; } -auto s3_provider::create_path_directories( - const std::string &api_path, const std::string &key) const -> api_error { +auto s3_provider::create_path_directories(const std::string &api_path, + const std::string &key) const + -> api_error { REPERTORY_USES_FUNCTION_NAME(); if (api_path == "/") { @@ -191,46 +199,57 @@ auto s3_provider::create_path_directories( cur_path = utils::path::create_api_path( utils::path::combine(cur_path, {path_parts.at(idx)})); - auto exists{false}; - auto res = is_directory(cur_path, exists); - if (res != api_error::success) { - return res; - } - - if (not exists) { - curl::requests::http_put_file put_file{}; - put_file.allow_timeout = true; - put_file.aws_service = "aws:amz:" + cfg.region + ":s3"; - put_file.path = (is_encrypted ? cur_key : cur_path) + '/'; - - stop_type stop_requested{false}; - long response_code{}; - if (not get_comm().make_request(put_file, response_code, - stop_requested)) { - utils::error::raise_api_path_error(function_name, cur_path, - api_error::comm_error, - "failed to create directory object"); - return api_error::comm_error; + std::string value; + auto res = get_item_meta(cur_path, META_DIRECTORY, value); + if (res == api_error::success) { + if (not utils::string::to_bool(value)) { + return api_error::item_exists; } - - if (response_code != http_error_codes::ok) { - utils::error::raise_api_path_error(function_name, cur_path, - response_code, - "failed to create directory object"); - return api_error::comm_error; - } - } - - api_meta_map meta{}; - res = get_item_meta(cur_path, meta); - if (res == api_error::item_not_found) { - auto dir = create_api_file(cur_path, cur_key, 0U, - get_last_modified(true, cur_path)); - get_api_item_added()(true, dir); - continue; } + if (res == api_error::item_not_found) { + auto exists{false}; + res = is_directory(cur_path, exists); + if (res != api_error::success) { + return res; + } + + if (not exists) { + std::string response_data; + curl::requests::http_put_file put_file{}; + put_file.allow_timeout = true; + put_file.aws_service = "aws:amz:" + cfg.region + ":s3"; + put_file.path = (is_encrypted ? cur_key : cur_path) + '/'; + put_file.response_handler = [&response_data](auto &&data, + long /*response_code*/) { + response_data = std::string(data.begin(), data.end()); + }; + + stop_type stop_requested{false}; + long response_code{}; + if (not get_comm().make_request(put_file, response_code, + stop_requested)) { + utils::error::raise_api_path_error( + function_name, cur_path, api_error::comm_error, + "failed to create directory object"); + return api_error::comm_error; + } + + if (response_code != http_error_codes::ok) { + utils::error::raise_api_path_error( + function_name, api_path, response_code, + fmt::format("failed to create directory|response|{}", + response_data)); + return api_error::comm_error; + } + + auto dir = create_api_file(cur_path, cur_key, 0U, + get_last_modified(true, cur_path)); + get_api_item_added()(true, dir); + } + } + if (res != api_error::success) { return res; } @@ -330,8 +349,9 @@ auto s3_provider::get_directory_item_count(const std::string &api_path) const return 0U; } -auto s3_provider::get_directory_items_impl( - const std::string &api_path, directory_item_list &list) const -> api_error { +auto s3_provider::get_directory_items_impl(const std::string &api_path, + directory_item_list &list) const + -> api_error { REPERTORY_USES_FUNCTION_NAME(); const auto &cfg = get_s3_config(); @@ -389,60 +409,38 @@ auto s3_provider::get_directory_items_impl( .as_string(); } - const auto add_directory_item = - [&](bool directory, const std::string &name, - std::uint64_t last_modified, - std::function get_size) - -> api_error { - auto child_api_path = - utils::path::create_api_path(utils::path::combine("/", {name})); - std::string child_object_name; - if (is_encrypted) { - child_object_name = child_api_path; - if (not utils::encryption::decrypt_file_path(cfg.encryption_token, - child_api_path)) { - return api_error::decryption_error; - } - } - - directory_item dir_item{}; - dir_item.api_path = child_api_path; - dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path); - dir_item.directory = directory; - dir_item.size = get_size(dir_item); - ret = get_item_meta(child_api_path, dir_item.meta); - if (ret == api_error::item_not_found) { - if (directory) { - ret = create_path_directories(child_api_path, child_object_name); - if (ret != api_error::success) { - return ret; - } - } else { - auto file = create_api_file(child_api_path, child_object_name, - dir_item.size, last_modified); - ret = add_if_not_found(file, child_object_name); - if (ret != api_error::success) { - return ret; - } - } - - ret = get_item_meta(child_api_path, dir_item.meta); - } - - if (ret != api_error::success) { - return ret; - } - - list.push_back(std::move(dir_item)); - return api_error::success; - }; - auto node_list = doc.select_nodes("/ListBucketResult/CommonPrefixes/Prefix"); for (const auto &node : node_list) { - add_directory_item( - true, node.node().text().as_string(), 0U, - [](const directory_item &) -> std::uint64_t { return 0U; }); + auto child_object_name = node.node().text().as_string(); + directory_item dir_item{}; + dir_item.api_path = child_object_name; + if (is_encrypted) { + if (not utils::encryption::decrypt_file_path(cfg.encryption_token, + dir_item.api_path)) { + return api_error::decryption_error; + } + } + dir_item.api_path = utils::path::create_api_path(dir_item.api_path); + dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path); + dir_item.directory = true; + dir_item.size = 0U; + + auto res = get_item_meta(dir_item.api_path, dir_item.meta); + if (res == api_error::item_not_found) { + res = create_path_directories(dir_item.api_path, child_object_name); + if (res != api_error::success) { + return res; + } + + res = get_item_meta(dir_item.api_path, dir_item.meta); + } + + if (res != api_error::success) { + return res; + } + + list.push_back(std::move(dir_item)); } node_list = doc.select_nodes("/ListBucketResult/Contents"); @@ -453,31 +451,66 @@ auto s3_provider::get_directory_items_impl( continue; } - auto size = node.node().select_node("Size").node().text().as_ullong(); - auto last_modified = convert_api_date( - node.node().select_node("LastModified").node().text().as_string()); - add_directory_item(false, child_object_name, last_modified, - [this, &is_encrypted, &size]( - const directory_item &dir_item) -> std::uint64_t { - std::string size_str; - if (get_item_meta(dir_item.api_path, META_SIZE, - size_str) == api_error::success) { - return utils::string::to_uint64(size_str); - } + directory_item dir_item{}; + dir_item.api_path = child_object_name; + if (is_encrypted) { + if (not utils::encryption::decrypt_file_path(cfg.encryption_token, + dir_item.api_path)) { + return api_error::decryption_error; + } + } + dir_item.api_path = utils::path::create_api_path(dir_item.api_path); + dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path); + dir_item.directory = false; - return is_encrypted - ? utils::encryption::encrypting_reader:: - calculate_decrypted_size(size) - : size; - }); + std::string size_str; + if (get_item_meta(dir_item.api_path, META_SIZE, size_str) == + api_error::success) { + dir_item.size = utils::string::to_uint64(size_str); + } else { + auto size = node.node().select_node("Size").node().text().as_ullong(); + + dir_item.size = is_encrypted ? utils::encryption::encrypting_reader:: + calculate_decrypted_size(size) + : size; + } + + auto res = get_item_meta(dir_item.api_path, dir_item.meta); + if (res == api_error::item_not_found) { + auto last_modified = convert_api_date( + node.node().select_node("LastModified").node().text().as_string()); + + api_file file{}; + file.api_path = dir_item.api_path; + file.api_parent = dir_item.api_parent; + file.accessed_date = file.changed_date = file.creation_date = + file.modified_date = last_modified; + file.file_size = dir_item.size; + if (is_encrypted) { + file.key = child_object_name; + } + + res = add_if_not_found(file, child_object_name); + if (res != api_error::success) { + return res; + } + + res = get_item_meta(dir_item.api_path, dir_item.meta); + } + + if (res != api_error::success) { + return res; + } + + list.push_back(std::move(dir_item)); } } return ret; } -auto s3_provider::get_file(const std::string &api_path, - api_file &file) const -> api_error { +auto s3_provider::get_file(const std::string &api_path, api_file &file) const + -> api_error { REPERTORY_USES_FUNCTION_NAME(); try { @@ -516,8 +549,8 @@ auto s3_provider::get_file(const std::string &api_path, return api_error::error; } -auto s3_provider::get_file_list(api_file_list &list, - std::string &marker) const -> api_error { +auto s3_provider::get_file_list(api_file_list &list, std::string &marker) const + -> api_error { REPERTORY_USES_FUNCTION_NAME(); std::string response_data; @@ -592,8 +625,9 @@ auto s3_provider::get_file_list(api_file_list &list, return grab_more ? api_error::more_data : api_error::success; } -auto s3_provider::get_last_modified( - bool directory, const std::string &api_path) const -> std::uint64_t { +auto s3_provider::get_last_modified(bool directory, + const std::string &api_path) const + -> std::uint64_t { bool is_encrypted{}; std::string object_name; head_object_result result{}; @@ -603,9 +637,10 @@ auto s3_provider::get_last_modified( : utils::time::get_time_now(); } -auto s3_provider::get_object_info( - bool directory, const std::string &api_path, bool &is_encrypted, - std::string &object_name, head_object_result &result) const -> api_error { +auto s3_provider::get_object_info(bool directory, const std::string &api_path, + bool &is_encrypted, std::string &object_name, + head_object_result &result) const + -> api_error { REPERTORY_USES_FUNCTION_NAME(); try { @@ -622,11 +657,16 @@ auto s3_provider::get_object_info( object_name = utils::path::create_api_path(is_encrypted ? key : api_path); + std::string response_data; curl::requests::http_head head{}; head.allow_timeout = true; head.aws_service = "aws:amz:" + cfg.region + ":s3"; head.path = directory ? object_name + '/' : object_name; head.response_headers = http_headers{}; + head.response_handler = [&response_data](auto &&data, + long /*response_code*/) { + response_data = std::string(data.begin(), data.end()); + }; stop_type stop_requested{false}; long response_code{}; @@ -639,8 +679,9 @@ auto s3_provider::get_object_info( } if (response_code != http_error_codes::ok) { - utils::error::raise_api_path_error(function_name, api_path, response_code, - "failed to get object info"); + utils::error::raise_api_path_error( + function_name, api_path, response_code, + fmt::format("failed to get object info|response|{}", response_data)); return api_error::comm_error; } @@ -654,10 +695,12 @@ auto s3_provider::get_object_info( return api_error::error; } -auto s3_provider::get_object_list( - std::string &response_data, long &response_code, - std::optional delimiter, std::optional prefix, - std::optional token) const -> bool { +auto s3_provider::get_object_list(std::string &response_data, + long &response_code, + std::optional delimiter, + std::optional prefix, + std::optional token) const + -> bool { curl::requests::http_get get{}; get.allow_timeout = true; get.aws_service = "aws:amz:" + get_s3_config().region + ":s3"; @@ -673,8 +716,7 @@ auto s3_provider::get_object_list( if (token.has_value() && not token.value().empty()) { get.query["continuation-token"] = token.value(); } - get.response_handler = [&response_data](const data_buffer &data, - long /*response_code*/) { + get.response_handler = [&response_data](auto &&data, long /*response_code*/) { response_data = std::string(data.begin(), data.end()); }; @@ -686,8 +728,8 @@ auto s3_provider::get_total_drive_space() const -> std::uint64_t { return std::numeric_limits::max() / std::int64_t(2); } -auto s3_provider::is_directory(const std::string &api_path, - bool &exists) const -> api_error { +auto s3_provider::is_directory(const std::string &api_path, bool &exists) const + -> api_error { REPERTORY_USES_FUNCTION_NAME(); exists = false; @@ -714,8 +756,8 @@ auto s3_provider::is_directory(const std::string &api_path, return api_error::error; } -auto s3_provider::is_file(const std::string &api_path, - bool &exists) const -> api_error { +auto s3_provider::is_file(const std::string &api_path, bool &exists) const + -> api_error { REPERTORY_USES_FUNCTION_NAME(); exists = false; @@ -782,7 +824,7 @@ auto s3_provider::read_file_bytes(const std::string &api_path, std::size_t size, read_offset, read_offset + read_size - 1U, }}; - get.response_handler = [&read_buffer](const data_buffer &response_data, + get.response_handler = [&read_buffer](auto &&response_data, long /*response_code*/) { read_buffer = response_data; }; @@ -872,10 +914,14 @@ auto s3_provider::remove_directory_impl(const std::string &api_path) auto object_name = utils::path::create_api_path(is_encrypted ? key : api_path); + std::string response_data; curl::requests::http_delete del{}; del.allow_timeout = true; del.aws_service = "aws:amz:" + cfg.region + ":s3"; del.path = object_name + '/'; + del.response_handler = [&response_data](auto &&data, long /*response_code*/) { + response_data = std::string(data.begin(), data.end()); + }; long response_code{}; stop_type stop_requested{}; @@ -889,8 +935,9 @@ auto s3_provider::remove_directory_impl(const std::string &api_path) if ((response_code < http_error_codes::ok || response_code >= http_error_codes::multiple_choices) && response_code != http_error_codes::not_found) { - utils::error::raise_api_path_error(function_name, api_path, response_code, - "failed to remove directory"); + utils::error::raise_api_path_error( + function_name, api_path, response_code, + fmt::format("failed to remove directory|response|{}", response_data)); return api_error::comm_error; } @@ -987,9 +1034,14 @@ auto s3_provider::upload_file_impl(const std::string &api_path, auto object_name = utils::path::create_api_path(is_encrypted ? key : api_path); + std::string response_data; curl::requests::http_put_file put_file{}; put_file.aws_service = "aws:amz:" + cfg.region + ":s3"; put_file.path = object_name; + put_file.response_handler = [&response_data](auto &&data, + long /*response_code*/) { + response_data = std::string(data.begin(), data.end()); + }; put_file.source_path = source_path; if (is_encrypted && file_size > 0U) { @@ -1005,8 +1057,9 @@ auto s3_provider::upload_file_impl(const std::string &api_path, } if (response_code != http_error_codes::ok) { - utils::error::raise_api_path_error(function_name, api_path, response_code, - "failed to get upload file"); + utils::error::raise_api_path_error( + function_name, api_path, response_code, + fmt::format("failed to upload file|response|{}", response_data)); return api_error::comm_error; }