[bug] Address slow directory responses in S3 mounts for deep nested directories #28
All checks were successful
BlockStorage/repertory/pipeline/head This commit looks good

[bug] S3 error responses are not being logged #29
This commit is contained in:
Scott E. Graves 2024-12-30 12:09:52 -06:00
parent ae573af89d
commit 62a303e856

View File

@ -43,8 +43,9 @@ namespace repertory {
s3_provider::s3_provider(app_config &config, i_http_comm &comm)
: base_provider(config, comm) {}
auto s3_provider::add_if_not_found(
api_file &file, const std::string &object_name) const -> api_error {
auto s3_provider::add_if_not_found(api_file &file,
const std::string &object_name) const
-> api_error {
api_meta_map meta{};
if (get_item_meta(file.api_path, meta) == api_error::item_not_found) {
auto err = create_path_directories(
@ -70,7 +71,7 @@ auto s3_provider::convert_api_date(std::string_view date) -> std::uint64_t {
utils::string::split(date_parts.at(1U), 'Z', true).at(0U)) *
1000000UL;
struct tm tm1 {};
struct tm tm1{};
#if defined(_WIN32)
utils::time::strptime(date_time.c_str(), "%Y-%m-%dT%T", &tm1);
return nanos + utils::time::windows_time_t_to_unix_time(_mkgmtime(&tm1));
@ -112,10 +113,15 @@ auto s3_provider::create_directory_impl(const std::string &api_path,
auto object_name =
utils::path::create_api_path(is_encrypted ? meta[META_KEY] : api_path);
std::string response_data;
curl::requests::http_put_file put_file{};
put_file.allow_timeout = true;
put_file.aws_service = "aws:amz:" + cfg.region + ":s3";
put_file.path = object_name + '/';
put_file.response_handler = [&response_data](auto &&data,
long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
long response_code{};
if (not get_comm().make_request(put_file, response_code, stop_requested)) {
@ -126,8 +132,9 @@ auto s3_provider::create_directory_impl(const std::string &api_path,
}
if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, api_path, response_code,
"failed to create directory");
utils::error::raise_api_path_error(
function_name, api_path, response_code,
fmt::format("failed to create directory|response|{}", response_data));
return api_error::comm_error;
}
@ -162,8 +169,9 @@ auto s3_provider::create_file_extra(const std::string &api_path,
return api_error::success;
}
auto s3_provider::create_path_directories(
const std::string &api_path, const std::string &key) const -> api_error {
auto s3_provider::create_path_directories(const std::string &api_path,
const std::string &key) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME();
if (api_path == "/") {
@ -191,46 +199,57 @@ auto s3_provider::create_path_directories(
cur_path = utils::path::create_api_path(
utils::path::combine(cur_path, {path_parts.at(idx)}));
auto exists{false};
auto res = is_directory(cur_path, exists);
if (res != api_error::success) {
return res;
}
if (not exists) {
curl::requests::http_put_file put_file{};
put_file.allow_timeout = true;
put_file.aws_service = "aws:amz:" + cfg.region + ":s3";
put_file.path = (is_encrypted ? cur_key : cur_path) + '/';
stop_type stop_requested{false};
long response_code{};
if (not get_comm().make_request(put_file, response_code,
stop_requested)) {
utils::error::raise_api_path_error(function_name, cur_path,
api_error::comm_error,
"failed to create directory object");
return api_error::comm_error;
std::string value;
auto res = get_item_meta(cur_path, META_DIRECTORY, value);
if (res == api_error::success) {
if (not utils::string::to_bool(value)) {
return api_error::item_exists;
}
if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, cur_path,
response_code,
"failed to create directory object");
return api_error::comm_error;
}
}
api_meta_map meta{};
res = get_item_meta(cur_path, meta);
if (res == api_error::item_not_found) {
auto dir = create_api_file(cur_path, cur_key, 0U,
get_last_modified(true, cur_path));
get_api_item_added()(true, dir);
continue;
}
if (res == api_error::item_not_found) {
auto exists{false};
res = is_directory(cur_path, exists);
if (res != api_error::success) {
return res;
}
if (not exists) {
std::string response_data;
curl::requests::http_put_file put_file{};
put_file.allow_timeout = true;
put_file.aws_service = "aws:amz:" + cfg.region + ":s3";
put_file.path = (is_encrypted ? cur_key : cur_path) + '/';
put_file.response_handler = [&response_data](auto &&data,
long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
stop_type stop_requested{false};
long response_code{};
if (not get_comm().make_request(put_file, response_code,
stop_requested)) {
utils::error::raise_api_path_error(
function_name, cur_path, api_error::comm_error,
"failed to create directory object");
return api_error::comm_error;
}
if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(
function_name, api_path, response_code,
fmt::format("failed to create directory|response|{}",
response_data));
return api_error::comm_error;
}
auto dir = create_api_file(cur_path, cur_key, 0U,
get_last_modified(true, cur_path));
get_api_item_added()(true, dir);
}
}
if (res != api_error::success) {
return res;
}
@ -330,8 +349,9 @@ auto s3_provider::get_directory_item_count(const std::string &api_path) const
return 0U;
}
auto s3_provider::get_directory_items_impl(
const std::string &api_path, directory_item_list &list) const -> api_error {
auto s3_provider::get_directory_items_impl(const std::string &api_path,
directory_item_list &list) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME();
const auto &cfg = get_s3_config();
@ -389,60 +409,38 @@ auto s3_provider::get_directory_items_impl(
.as_string();
}
const auto add_directory_item =
[&](bool directory, const std::string &name,
std::uint64_t last_modified,
std::function<std::uint64_t(const directory_item &)> get_size)
-> api_error {
auto child_api_path =
utils::path::create_api_path(utils::path::combine("/", {name}));
std::string child_object_name;
if (is_encrypted) {
child_object_name = child_api_path;
if (not utils::encryption::decrypt_file_path(cfg.encryption_token,
child_api_path)) {
return api_error::decryption_error;
}
}
directory_item dir_item{};
dir_item.api_path = child_api_path;
dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path);
dir_item.directory = directory;
dir_item.size = get_size(dir_item);
ret = get_item_meta(child_api_path, dir_item.meta);
if (ret == api_error::item_not_found) {
if (directory) {
ret = create_path_directories(child_api_path, child_object_name);
if (ret != api_error::success) {
return ret;
}
} else {
auto file = create_api_file(child_api_path, child_object_name,
dir_item.size, last_modified);
ret = add_if_not_found(file, child_object_name);
if (ret != api_error::success) {
return ret;
}
}
ret = get_item_meta(child_api_path, dir_item.meta);
}
if (ret != api_error::success) {
return ret;
}
list.push_back(std::move(dir_item));
return api_error::success;
};
auto node_list =
doc.select_nodes("/ListBucketResult/CommonPrefixes/Prefix");
for (const auto &node : node_list) {
add_directory_item(
true, node.node().text().as_string(), 0U,
[](const directory_item &) -> std::uint64_t { return 0U; });
auto child_object_name = node.node().text().as_string();
directory_item dir_item{};
dir_item.api_path = child_object_name;
if (is_encrypted) {
if (not utils::encryption::decrypt_file_path(cfg.encryption_token,
dir_item.api_path)) {
return api_error::decryption_error;
}
}
dir_item.api_path = utils::path::create_api_path(dir_item.api_path);
dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path);
dir_item.directory = true;
dir_item.size = 0U;
auto res = get_item_meta(dir_item.api_path, dir_item.meta);
if (res == api_error::item_not_found) {
res = create_path_directories(dir_item.api_path, child_object_name);
if (res != api_error::success) {
return res;
}
res = get_item_meta(dir_item.api_path, dir_item.meta);
}
if (res != api_error::success) {
return res;
}
list.push_back(std::move(dir_item));
}
node_list = doc.select_nodes("/ListBucketResult/Contents");
@ -453,31 +451,66 @@ auto s3_provider::get_directory_items_impl(
continue;
}
auto size = node.node().select_node("Size").node().text().as_ullong();
auto last_modified = convert_api_date(
node.node().select_node("LastModified").node().text().as_string());
add_directory_item(false, child_object_name, last_modified,
[this, &is_encrypted, &size](
const directory_item &dir_item) -> std::uint64_t {
std::string size_str;
if (get_item_meta(dir_item.api_path, META_SIZE,
size_str) == api_error::success) {
return utils::string::to_uint64(size_str);
}
directory_item dir_item{};
dir_item.api_path = child_object_name;
if (is_encrypted) {
if (not utils::encryption::decrypt_file_path(cfg.encryption_token,
dir_item.api_path)) {
return api_error::decryption_error;
}
}
dir_item.api_path = utils::path::create_api_path(dir_item.api_path);
dir_item.api_parent = utils::path::get_parent_api_path(dir_item.api_path);
dir_item.directory = false;
return is_encrypted
? utils::encryption::encrypting_reader::
calculate_decrypted_size(size)
: size;
});
std::string size_str;
if (get_item_meta(dir_item.api_path, META_SIZE, size_str) ==
api_error::success) {
dir_item.size = utils::string::to_uint64(size_str);
} else {
auto size = node.node().select_node("Size").node().text().as_ullong();
dir_item.size = is_encrypted ? utils::encryption::encrypting_reader::
calculate_decrypted_size(size)
: size;
}
auto res = get_item_meta(dir_item.api_path, dir_item.meta);
if (res == api_error::item_not_found) {
auto last_modified = convert_api_date(
node.node().select_node("LastModified").node().text().as_string());
api_file file{};
file.api_path = dir_item.api_path;
file.api_parent = dir_item.api_parent;
file.accessed_date = file.changed_date = file.creation_date =
file.modified_date = last_modified;
file.file_size = dir_item.size;
if (is_encrypted) {
file.key = child_object_name;
}
res = add_if_not_found(file, child_object_name);
if (res != api_error::success) {
return res;
}
res = get_item_meta(dir_item.api_path, dir_item.meta);
}
if (res != api_error::success) {
return res;
}
list.push_back(std::move(dir_item));
}
}
return ret;
}
auto s3_provider::get_file(const std::string &api_path,
api_file &file) const -> api_error {
auto s3_provider::get_file(const std::string &api_path, api_file &file) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME();
try {
@ -516,8 +549,8 @@ auto s3_provider::get_file(const std::string &api_path,
return api_error::error;
}
auto s3_provider::get_file_list(api_file_list &list,
std::string &marker) const -> api_error {
auto s3_provider::get_file_list(api_file_list &list, std::string &marker) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME();
std::string response_data;
@ -592,8 +625,9 @@ auto s3_provider::get_file_list(api_file_list &list,
return grab_more ? api_error::more_data : api_error::success;
}
auto s3_provider::get_last_modified(
bool directory, const std::string &api_path) const -> std::uint64_t {
auto s3_provider::get_last_modified(bool directory,
const std::string &api_path) const
-> std::uint64_t {
bool is_encrypted{};
std::string object_name;
head_object_result result{};
@ -603,9 +637,10 @@ auto s3_provider::get_last_modified(
: utils::time::get_time_now();
}
auto s3_provider::get_object_info(
bool directory, const std::string &api_path, bool &is_encrypted,
std::string &object_name, head_object_result &result) const -> api_error {
auto s3_provider::get_object_info(bool directory, const std::string &api_path,
bool &is_encrypted, std::string &object_name,
head_object_result &result) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME();
try {
@ -622,11 +657,16 @@ auto s3_provider::get_object_info(
object_name = utils::path::create_api_path(is_encrypted ? key : api_path);
std::string response_data;
curl::requests::http_head head{};
head.allow_timeout = true;
head.aws_service = "aws:amz:" + cfg.region + ":s3";
head.path = directory ? object_name + '/' : object_name;
head.response_headers = http_headers{};
head.response_handler = [&response_data](auto &&data,
long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
stop_type stop_requested{false};
long response_code{};
@ -639,8 +679,9 @@ auto s3_provider::get_object_info(
}
if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, api_path, response_code,
"failed to get object info");
utils::error::raise_api_path_error(
function_name, api_path, response_code,
fmt::format("failed to get object info|response|{}", response_data));
return api_error::comm_error;
}
@ -654,10 +695,12 @@ auto s3_provider::get_object_info(
return api_error::error;
}
auto s3_provider::get_object_list(
std::string &response_data, long &response_code,
std::optional<std::string> delimiter, std::optional<std::string> prefix,
std::optional<std::string> token) const -> bool {
auto s3_provider::get_object_list(std::string &response_data,
long &response_code,
std::optional<std::string> delimiter,
std::optional<std::string> prefix,
std::optional<std::string> token) const
-> bool {
curl::requests::http_get get{};
get.allow_timeout = true;
get.aws_service = "aws:amz:" + get_s3_config().region + ":s3";
@ -673,8 +716,7 @@ auto s3_provider::get_object_list(
if (token.has_value() && not token.value().empty()) {
get.query["continuation-token"] = token.value();
}
get.response_handler = [&response_data](const data_buffer &data,
long /*response_code*/) {
get.response_handler = [&response_data](auto &&data, long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
@ -686,8 +728,8 @@ auto s3_provider::get_total_drive_space() const -> std::uint64_t {
return std::numeric_limits<std::int64_t>::max() / std::int64_t(2);
}
auto s3_provider::is_directory(const std::string &api_path,
bool &exists) const -> api_error {
auto s3_provider::is_directory(const std::string &api_path, bool &exists) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME();
exists = false;
@ -714,8 +756,8 @@ auto s3_provider::is_directory(const std::string &api_path,
return api_error::error;
}
auto s3_provider::is_file(const std::string &api_path,
bool &exists) const -> api_error {
auto s3_provider::is_file(const std::string &api_path, bool &exists) const
-> api_error {
REPERTORY_USES_FUNCTION_NAME();
exists = false;
@ -782,7 +824,7 @@ auto s3_provider::read_file_bytes(const std::string &api_path, std::size_t size,
read_offset,
read_offset + read_size - 1U,
}};
get.response_handler = [&read_buffer](const data_buffer &response_data,
get.response_handler = [&read_buffer](auto &&response_data,
long /*response_code*/) {
read_buffer = response_data;
};
@ -872,10 +914,14 @@ auto s3_provider::remove_directory_impl(const std::string &api_path)
auto object_name =
utils::path::create_api_path(is_encrypted ? key : api_path);
std::string response_data;
curl::requests::http_delete del{};
del.allow_timeout = true;
del.aws_service = "aws:amz:" + cfg.region + ":s3";
del.path = object_name + '/';
del.response_handler = [&response_data](auto &&data, long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
long response_code{};
stop_type stop_requested{};
@ -889,8 +935,9 @@ auto s3_provider::remove_directory_impl(const std::string &api_path)
if ((response_code < http_error_codes::ok ||
response_code >= http_error_codes::multiple_choices) &&
response_code != http_error_codes::not_found) {
utils::error::raise_api_path_error(function_name, api_path, response_code,
"failed to remove directory");
utils::error::raise_api_path_error(
function_name, api_path, response_code,
fmt::format("failed to remove directory|response|{}", response_data));
return api_error::comm_error;
}
@ -987,9 +1034,14 @@ auto s3_provider::upload_file_impl(const std::string &api_path,
auto object_name =
utils::path::create_api_path(is_encrypted ? key : api_path);
std::string response_data;
curl::requests::http_put_file put_file{};
put_file.aws_service = "aws:amz:" + cfg.region + ":s3";
put_file.path = object_name;
put_file.response_handler = [&response_data](auto &&data,
long /*response_code*/) {
response_data = std::string(data.begin(), data.end());
};
put_file.source_path = source_path;
if (is_encrypted && file_size > 0U) {
@ -1005,8 +1057,9 @@ auto s3_provider::upload_file_impl(const std::string &api_path,
}
if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, api_path, response_code,
"failed to get upload file");
utils::error::raise_api_path_error(
function_name, api_path, response_code,
fmt::format("failed to upload file|response|{}", response_data));
return api_error::comm_error;
}