fixed directory item count bug on s3 provider

This commit is contained in:
2025-09-27 11:05:04 -05:00
parent cf68a7effe
commit 5a042c09e5
4 changed files with 152 additions and 141 deletions

View File

@@ -34,6 +34,11 @@ struct i_http_comm;
struct head_object_result;
class s3_provider final : public base_provider {
private:
using interate_callback_t = std::function<api_error(
const std::string &prefix, const pugi::xml_node &node,
const std::string &api_prefix)>;
public:
static const constexpr auto type{provider_type::s3};
@@ -170,6 +175,11 @@ public:
return false;
};
[[nodiscard]] auto iterate_prefix(const std::string &prefix,
interate_callback_t prefix_action,
interate_callback_t key_action) const
-> api_error;
[[nodiscard]] auto read_file_bytes(const std::string &api_path,
std::size_t size, std::uint64_t offset,
data_buffer &data,

View File

@@ -290,82 +290,19 @@ auto s3_provider::get_directory_item_count(const std::string &api_path) const
: utils::path::create_api_path(is_encrypted ? key : api_path);
auto prefix = object_name.empty() ? std::string{} : object_name + "/";
std::unordered_set<std::string> seen_prefixes;
std::unordered_set<std::string> seen_keys;
bool grab_more{true};
std::string token{};
std::uint64_t total_count{0};
while (grab_more) {
long response_code{};
std::string response_data{};
if (not get_object_list(response_data, response_code, "/", prefix,
token)) {
return total_count;
}
if (response_code == http_error_codes::not_found) {
return total_count;
}
if (response_code != http_error_codes::ok) {
return total_count;
}
pugi::xml_document doc;
auto parsed = doc.load_string(response_data.c_str());
if (parsed.status != pugi::xml_parse_status::status_ok) {
return total_count;
}
grab_more = doc.select_node("/ListBucketResult/IsTruncated")
.node()
.text()
.as_bool();
if (grab_more) {
token = doc.select_node("/ListBucketResult/NextContinuationToken")
.node()
.text()
.as_string();
}
for (auto const &node :
doc.select_nodes("/ListBucketResult/CommonPrefixes/Prefix")) {
std::string cur_prefix = node.node().text().as_string();
if (not cur_prefix.empty() && not seen_prefixes.contains(cur_prefix)) {
seen_prefixes.insert(cur_prefix);
auto res = iterate_prefix(
prefix,
[&total_count](auto &&, auto &&, auto &&) {
++total_count;
}
}
for (auto const &node : doc.select_nodes("/ListBucketResult/Contents")) {
std::string cur_key = node.node().child("Key").text().as_string();
if (cur_key.empty()) {
continue;
}
if (not prefix.empty() && // and
(cur_key == prefix || not cur_key.starts_with(prefix))) {
continue;
}
if (cur_key.back() == '/') {
continue;
}
if (seen_prefixes.contains(cur_key + "/")) {
continue;
}
if (seen_keys.contains(cur_key)) {
continue;
}
seen_keys.insert(cur_key);
++total_count;
}
return api_error::success;
},
[&total_count](auto &&, auto &&, auto &&) {
++total_count;
return api_error::success;
});
if (res != api_error::success) {
return 0U;
}
return total_count;
@@ -468,74 +405,25 @@ auto s3_provider::get_directory_items_impl(const std::string &api_path,
object_name.empty() ? object_name : object_name + "/",
};
auto grab_more{true};
std::string token{};
while (grab_more) {
std::string response_data{};
long response_code{};
if (not get_object_list(response_data, response_code, "/", prefix, token)) {
return api_error::comm_error;
}
return iterate_prefix(
prefix,
[&](auto && /* prefix */, auto &&node, auto &&) -> auto {
return add_directory_item(
utils::path::create_api_path(
utils::path::combine("/", {node.text().as_string()})),
true, node);
},
[&](auto && /* key */, auto &&node, auto &&api_prefix) -> auto {
auto child_api_path{
utils::path::create_api_path(
node.select_node("Key").node().text().as_string()),
};
if (child_api_path == api_prefix) {
return api_error::success;
}
if (response_code == http_error_codes::not_found) {
return api_error::directory_not_found;
}
if (response_code != http_error_codes::ok) {
utils::error::raise_api_path_error(function_name, api_path, response_code,
"failed to get directory items");
return api_error::comm_error;
}
pugi::xml_document doc;
auto parse_res{doc.load_string(response_data.c_str())};
if (parse_res.status != pugi::xml_parse_status::status_ok) {
return api_error::error;
}
grab_more = doc.select_node("/ListBucketResult/IsTruncated")
.node()
.text()
.as_bool();
if (grab_more) {
token = doc.select_node("/ListBucketResult/NextContinuationToken")
.node()
.text()
.as_string();
}
auto node_list{
doc.select_nodes("/ListBucketResult/CommonPrefixes/Prefix"),
};
for (const auto &node : node_list) {
auto child_object_name{
utils::path::create_api_path(
utils::path::combine("/", {node.node().text().as_string()})),
};
auto res{add_directory_item(child_object_name, true, node.node())};
if (res != api_error::success) {
return res;
}
}
node_list = doc.select_nodes("/ListBucketResult/Contents");
for (const auto &node : node_list) {
auto child_object_name{
utils::path::create_api_path(
node.node().select_node("Key").node().text().as_string()),
};
if (child_object_name == utils::path::create_api_path(prefix)) {
continue;
}
auto res{add_directory_item(child_object_name, false, node.node())};
if (res != api_error::success) {
return res;
}
}
}
return api_error::success;
return add_directory_item(child_api_path, false, node);
});
}
auto s3_provider::get_file(const std::string &api_path, api_file &file) const
@@ -946,6 +834,114 @@ auto s3_provider::is_online() const -> bool {
return false;
}
auto s3_provider::iterate_prefix(const std::string &prefix,
interate_callback_t prefix_action,
interate_callback_t key_action) const
-> api_error {
auto api_prefix = utils::path::create_api_path(prefix);
auto api_prefix_parent = api_prefix;
if (api_prefix != "/") {
api_prefix_parent += '/';
}
std::unordered_set<std::string> seen_prefixes;
std::unordered_set<std::string> seen_keys;
bool grab_more{true};
std::string token{};
while (grab_more) {
long response_code{};
std::string response_data{};
if (not get_object_list(response_data, response_code, "/", prefix, token)) {
return api_error::comm_error;
}
if (response_code == http_error_codes::not_found) {
return api_error::item_not_found;
}
if (response_code != http_error_codes::ok) {
return api_error::comm_error;
}
pugi::xml_document doc;
auto parsed = doc.load_string(response_data.c_str());
if (parsed.status != pugi::xml_parse_status::status_ok) {
return api_error::error;
}
grab_more = doc.select_node("/ListBucketResult/IsTruncated")
.node()
.text()
.as_bool();
if (grab_more) {
token = doc.select_node("/ListBucketResult/NextContinuationToken")
.node()
.text()
.as_string();
}
for (auto const &node :
doc.select_nodes("/ListBucketResult/CommonPrefixes/Prefix")) {
std::string cur_prefix = node.node().text().as_string();
auto cur_api_path = utils::path::create_api_path(cur_prefix);
if (cur_prefix.empty()) {
continue;
}
if (not prefix.empty() &&
(cur_api_path == api_prefix ||
not cur_api_path.starts_with(api_prefix_parent))) {
continue;
}
if (not seen_prefixes.contains(cur_prefix)) {
seen_prefixes.insert(cur_prefix);
auto res = prefix_action(cur_prefix, node.node(), api_prefix);
if (res != api_error::success) {
return res;
}
}
}
for (auto const &node : doc.select_nodes("/ListBucketResult/Contents")) {
std::string cur_key = node.node().child("Key").text().as_string();
auto cur_api_path = utils::path::create_api_path(cur_key);
if (cur_key.empty()) {
continue;
}
if (not prefix.empty() &&
(cur_api_path == api_prefix ||
not cur_api_path.starts_with(api_prefix_parent))) {
continue;
}
if (cur_key.back() == '/') {
continue;
}
if (seen_prefixes.contains(cur_key + "/")) {
continue;
}
if (seen_keys.contains(cur_key)) {
continue;
}
seen_keys.insert(cur_key);
auto res = key_action(cur_key, node.node(), api_prefix);
if (res != api_error::success) {
return res;
}
}
}
return api_error::success;
}
auto s3_provider::remove_directory_impl(const std::string &api_path)
-> api_error {
REPERTORY_USES_FUNCTION_NAME();

View File

@@ -123,6 +123,9 @@ TYPED_TEST(fuse_test, directory_rmdir_on_non_empty_directory_should_fail) {
std::string dir_name{"non_empty"};
auto dir = this->create_directory_and_test(dir_name);
std::string dir_name2{"non_empty_2"};
auto dir2 = this->create_directory_and_test(dir_name2);
std::string name{dir_name + "/child"};
auto file = this->create_file_and_test(name, 0644);
this->overwrite_text(file, "X");
@@ -133,6 +136,7 @@ TYPED_TEST(fuse_test, directory_rmdir_on_non_empty_directory_should_fail) {
this->unlink_file_and_test(file);
this->rmdir_and_test(dir);
this->rmdir_and_test(dir2);
}
} // namespace repertory

View File

@@ -544,6 +544,7 @@ TYPED_TEST(providers_test, get_directory_items_fails_if_item_is_file) {
EXPECT_EQ(api_error::success, this->provider->remove_file("/pt01.txt"));
}
TYPED_TEST(providers_test, get_directory_item_count) {
if (this->provider->get_provider_type() == provider_type::encrypt) {
EXPECT_EQ(std::size_t(2U), this->provider->get_directory_item_count("/"));