Skip to content

Commit

Permalink
Adapt to XML API changes (#332)
Browse files Browse the repository at this point in the history
**Issue**
The XML API was hard to use right, leading to bugs like this: #328

**Description of changes:**
- Adapt to API changes from: awslabs/aws-c-common#1043
- Break up node traversal functions, to ensure we're processing the correct XML elements.
    - Previously, the same callback would be used for all XML elements. This could cause error if an element with the same name occurred at different parts of the document tree.
- Improved error checking
    - Previously, many calls to `aws_xml_node_as_body()` weren't being checked for error.
- Replace ~aws_xml_get_top_level_tag()~ and ~aws_xml_get_top_level_tag_with_root_name()~ with `aws_xml_get_body_at_path()`
   - ~aws_xml_get_top_level_tag()~ didn't check the name of the root node
   - ~aws_xml_get_top_level_tag_with_root_name()~ was clunky to use (IMHO)
   - so replace with an API that can retrieve an element at any depth (not just 2), checking names the whole way, and with a nicer API (IMHO).
   - new function gives `aws_byte_cursor` instead of `aws_string`, the user was usually just deleting it afterwards, which made their error-handling more complicated.
- Trivial stuff:
    - Remove unused functions ~aws_s3_list_objects_operation_new()~ and ~aws_s3_initiate_list_parts()~
    - `aws_replace_quote_entities()` returns `aws_byte_buf` by value, instead of as out-param
    - Some functions take `aws_byte_cursor` by value, instead taking `aws_string *` or `aws_byte_buf *` or `aws_byte_cursor *` by pointer
  • Loading branch information
graebm authored Jul 18, 2023
1 parent bc9c8b2 commit 2311881
Show file tree
Hide file tree
Showing 19 changed files with 381 additions and 538 deletions.
6 changes: 1 addition & 5 deletions include/aws/s3/private/s3_list_objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct aws_s3_object_info {
* Invoked when an object or prefix is encountered during a ListObjectsV2 API call. Return false, to immediately
* terminate the list operation. Returning true will continue until at least the current page is iterated.
*/
typedef bool(aws_s3_on_object_fn)(const struct aws_s3_object_info *info, void *user_data);
typedef int(aws_s3_on_object_fn)(const struct aws_s3_object_info *info, void *user_data);

/**
* Invoked upon the complete fetch and parsing of a page. If error_code is AWS_OP_SUCCESS and
Expand Down Expand Up @@ -112,10 +112,6 @@ AWS_S3_API struct aws_s3_paginator *aws_s3_initiate_list_objects(
struct aws_allocator *allocator,
const struct aws_s3_list_objects_params *params);

AWS_S3_API struct aws_s3_paginated_operation *aws_s3_list_objects_operation_new(
struct aws_allocator *allocator,
const struct aws_s3_list_objects_params *params);

AWS_EXTERN_C_END

#endif /* AWS_S3_LIST_OBJECTS_H */
30 changes: 6 additions & 24 deletions include/aws/s3/private/s3_list_parts.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,15 @@ struct aws_s3_part_info {
};

/**
* Invoked when a part is encountered during ListParts call. Return false, to immediately
* terminate the list operation. Returning true will continue until at least the current page is iterated.
* Invoked when a part is encountered during ListParts call.
* Return AWS_OP_ERR (after an error has been raised) to fail the list operation.
* Return AWS_OP_SUCCESS to continue until at least the current page is iterated.
*/
typedef bool(aws_s3_on_part_fn)(const struct aws_s3_part_info *info, void *user_data);
typedef int(aws_s3_on_part_fn)(const struct aws_s3_part_info *info, void *user_data);

/**
* Parameters for calling aws_s3_initiate_list_parts(). All values are copied out or re-seated and reference counted.
* Parameters for calling aws_s3_list_parts_operation_new(). All values are copied out or re-seated and reference
* counted.
*/
struct aws_s3_list_parts_params {
/**
Expand All @@ -87,10 +89,6 @@ struct aws_s3_list_parts_params {
* Callback to invoke on each part that's listed.
*/
aws_s3_on_part_fn *on_part;
/**
* Callback to invoke when each page of the bucket listing completes.
*/
aws_s3_on_page_finished_fn *on_list_finished;
/**
* Associated user data.
*/
Expand All @@ -99,22 +97,6 @@ struct aws_s3_list_parts_params {

AWS_EXTERN_C_BEGIN

/**
* Initiates a list objects command (without executing it), and returns a paginator object to iterate the bucket with if
* successful.
*
* Returns NULL on failure. Check aws_last_error() for details on the error that occurred.
*
* this is a reference counted object. It is returned with a reference count of 1. You must call
* aws_s3_paginator_release() on this object when you are finished with it.
*
* This does not start the actual list operation. You need to call aws_s3_paginator_continue() to start
* the operation.
*/
AWS_S3_API struct aws_s3_paginator *aws_s3_initiate_list_parts(
struct aws_allocator *allocator,
const struct aws_s3_list_parts_params *params);

AWS_S3_API struct aws_s3_paginated_operation *aws_s3_list_parts_operation_new(
struct aws_allocator *allocator,
const struct aws_s3_list_parts_params *params);
Expand Down
11 changes: 5 additions & 6 deletions include/aws/s3/private/s3_paginator.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ typedef int(aws_s3_next_http_message_fn)(
void *user_data,
struct aws_http_message **out_message);

typedef bool(
aws_s3_on_result_node_encountered_fn)(struct aws_xml_parser *parser, struct aws_xml_node *node, void *user_data);
typedef int(aws_s3_on_result_node_encountered_fn)(struct aws_xml_node *node, void *user_data);

typedef void(aws_s3_on_page_finished_fn)(struct aws_s3_paginator *paginator, int error_code, void *user_data);

Expand Down Expand Up @@ -84,14 +83,14 @@ struct aws_s3_paginator_params {
*/
struct aws_s3_paginated_operation_params {
/**
* Name of the top level result node. Must not be NULL.
* Name of the top level result node. Must not be empty.
*/
const struct aws_byte_cursor *result_xml_node_name;
struct aws_byte_cursor result_xml_node_name;

/**
* Name of the continuation token node. Must not be NULL.
* Name of the continuation token node. Must not be empty.
*/
const struct aws_byte_cursor *continuation_token_node_name;
struct aws_byte_cursor continuation_token_node_name;

/**
* Function to generate next message.
Expand Down
46 changes: 26 additions & 20 deletions include/aws/s3/private/s3_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,6 @@ extern const struct aws_byte_cursor g_head_method;
AWS_S3_API
extern const struct aws_byte_cursor g_delete_method;

extern const struct aws_byte_cursor g_error_body_xml_name;

extern const struct aws_byte_cursor g_code_body_xml_name;

extern const struct aws_byte_cursor g_s3_internal_error_code;

AWS_S3_API
Expand All @@ -184,25 +180,35 @@ void aws_cached_signing_config_destroy(struct aws_cached_signing_config_aws *cac
AWS_S3_API
void copy_http_headers(const struct aws_http_headers *src, struct aws_http_headers *dest);

/* Get a top-level (exists directly under the root tag) tag value. */
AWS_S3_API
struct aws_string *aws_xml_get_top_level_tag(
struct aws_allocator *allocator,
const struct aws_byte_cursor *tag_name,
struct aws_byte_cursor *xml_body);

/* Get a top-level (exists directly under the root tag) tag value with expected root name. */
/**
* Get content of XML element at path.
*
* path_name_array must be a C array of char*, with a NULL as its final entry.
*
* For example:
* Given `xml_doc`: "<Error><Code>SlowDown</Code></Error>"
* And `path_name_array`: {"Error", "Code", NULL}
* `out_body` will get set to: "SlowDown"
*
* Returns AWS_OP_SUCCESS or AWS_OP_ERR.
* Raises AWS_ERROR_STRING_MATCH_NOT_FOUND if path not found in XML,
* or AWS_ERROR_INVALID_XML if the XML can't be parsed.
*
* DO NOT make this function public without a lot of thought.
* The whole thing of passing a C-array of C-strings with a NULL sentinel
* is unconventional for this codebase.
*/
AWS_S3_API
struct aws_string *aws_xml_get_top_level_tag_with_root_name(
int aws_xml_get_body_at_path(
struct aws_allocator *allocator,
const struct aws_byte_cursor *tag_name,
const struct aws_byte_cursor *expected_root_name,
bool *out_root_name_mismatch,
struct aws_byte_cursor *xml_body);
struct aws_byte_cursor xml_doc,
const char *path_name_array[],
struct aws_byte_cursor *out_body);

/* replace &quot; with escaped /" */
/* replace &quot; with escaped /"
* Returns initialized aws_byte_buf */
AWS_S3_API
void aws_replace_quote_entities(struct aws_allocator *allocator, struct aws_string *str, struct aws_byte_buf *out_buf);
struct aws_byte_buf aws_replace_quote_entities(struct aws_allocator *allocator, struct aws_byte_cursor src);

/* strip quotes if string is enclosed in quotes. does not remove quotes if they only appear on either side of the string
*/
Expand Down Expand Up @@ -267,7 +273,7 @@ void aws_s3_get_part_range(

/* Match the S3 error code to CRT error code, return AWS_ERROR_UNKNOWN when not matched */
AWS_S3_API
int aws_s3_crt_error_code_from_server_error_code_string(const struct aws_string *error_code_string);
int aws_s3_crt_error_code_from_server_error_code_string(struct aws_byte_cursor error_code_string);

AWS_EXTERN_C_END

Expand Down
13 changes: 6 additions & 7 deletions samples/s3/s3-cp.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ static int s_kickoff_get_object(
}

/* upon listing the objects in a bucket, this is invoked for each object encountered. */
static bool s_on_list_object(const struct aws_s3_object_info *info, void *user_data) {
static int s_on_list_object(const struct aws_s3_object_info *info, void *user_data) {
struct cp_app_ctx *cp_app_ctx = user_data;

/* size greater than zero means it's an actual object. */
Expand Down Expand Up @@ -784,7 +784,7 @@ static bool s_on_list_object(const struct aws_s3_object_info *info, void *user_d
int ret_val = s_kickoff_get_object(cp_app_ctx, &info->key, &destination_cur, info->size);
aws_byte_buf_clean_up(&dest_directory);

return ret_val == AWS_OP_SUCCESS;
return ret_val;
}

/* otherwise, we're copying between buckets. Set up the copy here. */
Expand All @@ -794,15 +794,14 @@ static bool s_on_list_object(const struct aws_s3_object_info *info, void *user_d
aws_byte_buf_append_dynamic(&destination_key, &trimmed_key);
struct aws_byte_cursor destination_key_cur = aws_byte_cursor_from_buf(&destination_key);

int return_code =
s_kick_off_copy_object_request(
cp_app_ctx, &cp_app_ctx->source_uri.host_name, &info->key, &destination_key_cur) == AWS_OP_SUCCESS;
int return_code = s_kick_off_copy_object_request(
cp_app_ctx, &cp_app_ctx->source_uri.host_name, &info->key, &destination_key_cur);

aws_byte_buf_clean_up(&destination_key);
return return_code == AWS_OP_SUCCESS;
return return_code;
}

return true;
return AWS_OP_SUCCESS;
}

static bool s_are_all_transfers_and_listings_done(void *arg) {
Expand Down
4 changes: 2 additions & 2 deletions samples/s3/s3-ls.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,14 @@ static bool s_app_completion_predicate(void *arg) {
/**
* Called once for each object returned in the ListObjectsV2 responses.
*/
bool s_on_object(const struct aws_s3_object_info *info, void *user_data) {
int s_on_object(const struct aws_s3_object_info *info, void *user_data) {
struct s3_ls_app_data *app_ctx = user_data;

if (app_ctx->long_format) {
printf("%-18" PRIu64 " ", info->size);
}
printf("%.*s\n", (int)info->key.len, info->key.ptr);
return true;
return AWS_OP_SUCCESS;
}

/**
Expand Down
2 changes: 1 addition & 1 deletion source/s3.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static struct aws_error_info s_errors[] = {
AWS_DEFINE_ERROR_INFO_S3(AWS_ERROR_S3_RESPONSE_CHECKSUM_MISMATCH, "response checksum header does not match calculated checksum"),
AWS_DEFINE_ERROR_INFO_S3(AWS_ERROR_S3_CHECKSUM_CALCULATION_FAILED, "failed to calculate a checksum for the provided stream"),
AWS_DEFINE_ERROR_INFO_S3(AWS_ERROR_S3_PAUSED, "Request successfully paused"),
AWS_DEFINE_ERROR_INFO_S3(AWS_ERROR_S3_LIST_PARTS_PARSE_FAILED, "Failed to parse result from list parts"),
AWS_DEFINE_ERROR_INFO_S3(AWS_ERROR_S3_LIST_PARTS_PARSE_FAILED, "Failed to parse response from ListParts"),
AWS_DEFINE_ERROR_INFO_S3(AWS_ERROR_S3_RESUMED_PART_CHECKSUM_MISMATCH, "Checksum does not match previously uploaded part"),
AWS_DEFINE_ERROR_INFO_S3(AWS_ERROR_S3_RESUME_FAILED, "Resuming request failed"),
AWS_DEFINE_ERROR_INFO_S3(AWS_ERROR_S3_OBJECT_MODIFIED, "The object modifed during download."),
Expand Down
16 changes: 8 additions & 8 deletions source/s3_auto_ranged_get.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

const uint32_t s_conservative_max_requests_in_flight = 8;
const struct aws_byte_cursor g_application_xml_value = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("application/xml");
const struct aws_byte_cursor g_object_size_value = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("ActualObjectSize");

static void s_s3_meta_request_auto_ranged_get_destroy(struct aws_s3_meta_request *meta_request);

Expand Down Expand Up @@ -426,7 +425,9 @@ finish:;
return future;
}

/* Check the finish result of meta request, in case of the request failed because of downloading an empty file */
/* Check the finish result of meta request.
* Return true if the request failed because it downloaded an empty file.
* Return false if the request failed for any other reason */
static bool s_check_empty_file_download_error(struct aws_s3_request *failed_request) {
struct aws_http_headers *failed_headers = failed_request->send_data.response_headers;
struct aws_byte_buf failed_body = failed_request->send_data.response_body;
Expand All @@ -437,12 +438,11 @@ static bool s_check_empty_file_download_error(struct aws_s3_request *failed_requ
/* Content type found */
if (aws_byte_cursor_eq_ignore_case(&content_type, &g_application_xml_value)) {
/* XML response */
struct aws_byte_cursor body_cursor = aws_byte_cursor_from_buf(&failed_body);
struct aws_string *size =
aws_xml_get_top_level_tag(failed_request->allocator, &g_object_size_value, &body_cursor);
bool check_size = aws_string_eq_c_str(size, "0");
aws_string_destroy(size);
if (check_size) {
struct aws_byte_cursor xml_doc = aws_byte_cursor_from_buf(&failed_body);
const char *path_to_size[] = {"Error", "ActualObjectSize", NULL};
struct aws_byte_cursor size = {0};
aws_xml_get_body_at_path(failed_request->allocator, xml_doc, path_to_size, &size);
if (aws_byte_cursor_eq_c_str(&size, "0")) {
return true;
}
}
Expand Down
39 changes: 21 additions & 18 deletions source/s3_auto_ranged_put.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

/* TODO: better logging of steps */

static const struct aws_byte_cursor s_upload_id = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("UploadId");
static const size_t s_complete_multipart_upload_init_body_size_bytes = 512;
static const size_t s_abort_multipart_upload_init_body_size_bytes = 512;
/* For unknown length body we no longer know the number of parts. to avoid
Expand Down Expand Up @@ -119,12 +118,18 @@ static int s_s3_auto_ranged_put_pause(
struct aws_s3_meta_request *meta_request,
struct aws_s3_meta_request_resume_token **resume_token);

static bool s_process_part_info_synced(const struct aws_s3_part_info *info, void *user_data) {
static int s_process_part_info_synced(const struct aws_s3_part_info *info, void *user_data) {
struct aws_s3_auto_ranged_put *auto_ranged_put = user_data;
struct aws_s3_meta_request *meta_request = &auto_ranged_put->base;

ASSERT_SYNCED_DATA_LOCK_HELD(&auto_ranged_put->base);

if (info->part_number == 0) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST, "id=%p: ListParts reported Part without valid PartNumber", (void *)meta_request);
return aws_raise_error(AWS_ERROR_S3_LIST_PARTS_PARSE_FAILED);
}

struct aws_s3_mpu_part_info *part = aws_mem_calloc(meta_request->allocator, 1, sizeof(struct aws_s3_mpu_part_info));
part->size = info->size;
part->etag = aws_strip_quotes(meta_request->allocator, info->e_tag);
Expand Down Expand Up @@ -166,7 +171,7 @@ static bool s_process_part_info_synced(const struct aws_s3_part_info *info, void
/* Add this part */
aws_array_list_set_at(&auto_ranged_put->synced_data.part_list, &part, info->part_number - 1);

return true;
return AWS_OP_SUCCESS;
}

/*
Expand Down Expand Up @@ -1666,13 +1671,14 @@ static void s_s3_auto_ranged_put_request_finished(
}
}

struct aws_byte_cursor buffer_byte_cursor = aws_byte_cursor_from_buf(&request->send_data.response_body);
struct aws_byte_cursor xml_doc = aws_byte_cursor_from_buf(&request->send_data.response_body);

/* Find the upload id for this multipart upload. */
struct aws_string *upload_id =
aws_xml_get_top_level_tag(meta_request->allocator, &s_upload_id, &buffer_byte_cursor);
struct aws_byte_cursor upload_id = {0};
const char *xml_path[] = {"InitiateMultipartUploadResult", "UploadId", NULL};
aws_xml_get_body_at_path(meta_request->allocator, xml_doc, xml_path, &upload_id);

if (upload_id == NULL) {
if (upload_id.len == 0) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
"id=%p Could not find upload-id in create-multipart-upload response",
Expand All @@ -1682,7 +1688,7 @@ static void s_s3_auto_ranged_put_request_finished(
error_code = AWS_ERROR_S3_MISSING_UPLOAD_ID;
} else {
/* Store the multipart upload id. */
auto_ranged_put->upload_id = upload_id;
auto_ranged_put->upload_id = aws_string_new_from_cursor(meta_request->allocator, &upload_id);
}
}

Expand Down Expand Up @@ -1814,8 +1820,7 @@ static void s_s3_auto_ranged_put_request_finished(
}
/* END CRITICAL SECTION */

struct aws_byte_cursor response_body_cursor =
aws_byte_cursor_from_buf(&request->send_data.response_body);
struct aws_byte_cursor xml_doc = aws_byte_cursor_from_buf(&request->send_data.response_body);

/**
* TODO: The body of the response can be ERROR, check Error specified in body part from
Expand All @@ -1825,21 +1830,19 @@ static void s_s3_auto_ranged_put_request_finished(
*/

/* Grab the ETag for the entire object, and set it as a header. */
struct aws_string *etag_header_value =
aws_xml_get_top_level_tag(meta_request->allocator, &g_etag_header_name, &response_body_cursor);

if (etag_header_value != NULL) {
struct aws_byte_buf etag_header_value_byte_buf;
AWS_ZERO_STRUCT(etag_header_value_byte_buf);
struct aws_byte_cursor etag_header_value = {0};
const char *xml_path[] = {"CompleteMultipartUploadResult", "ETag", NULL};
aws_xml_get_body_at_path(meta_request->allocator, xml_doc, xml_path, &etag_header_value);

aws_replace_quote_entities(meta_request->allocator, etag_header_value, &etag_header_value_byte_buf);
if (etag_header_value.len > 0) {
struct aws_byte_buf etag_header_value_byte_buf =
aws_replace_quote_entities(meta_request->allocator, etag_header_value);

aws_http_headers_set(
final_response_headers,
g_etag_header_name,
aws_byte_cursor_from_buf(&etag_header_value_byte_buf));

aws_string_destroy(etag_header_value);
aws_byte_buf_clean_up(&etag_header_value_byte_buf);
}

Expand Down
Loading

0 comments on commit 2311881

Please sign in to comment.