libcudf
23.12.00
|
Settings for write_parquet()
.
More...
#include <parquet.hpp>
Public Member Functions | |
parquet_writer_options ()=default | |
Default constructor. More... | |
sink_info const & | get_sink () const |
Returns sink info. More... | |
compression_type | get_compression () const |
Returns compression format used. More... | |
statistics_freq | get_stats_level () const |
Returns level of statistics requested in output file. More... | |
table_view | get_table () const |
Returns table_view. More... | |
std::vector< partition_info > const & | get_partitions () const |
Returns partitions. More... | |
auto const & | get_metadata () const |
Returns associated metadata. More... | |
std::vector< std::map< std::string, std::string > > const & | get_key_value_metadata () const |
Returns Key-Value footer metadata information. More... | |
bool | is_enabled_int96_timestamps () const |
Returns true if timestamps will be written as INT96. More... | |
auto | is_enabled_utc_timestamps () const |
Returns true if timestamps will be written as UTC. More... | |
std::vector< std::string > const & | get_column_chunks_file_paths () const |
Returns Column chunks file paths to be set in the raw output metadata. More... | |
auto | get_row_group_size_bytes () const |
Returns maximum row group size, in bytes. More... | |
auto | get_row_group_size_rows () const |
Returns maximum row group size, in rows. More... | |
auto | get_max_page_size_bytes () const |
Returns the maximum uncompressed page size, in bytes. More... | |
auto | get_max_page_size_rows () const |
Returns maximum page size, in rows. More... | |
auto | get_column_index_truncate_length () const |
Returns maximum length of min or max values in column index, in bytes. More... | |
dictionary_policy | get_dictionary_policy () const |
Returns policy for dictionary use. More... | |
auto | get_max_dictionary_size () const |
Returns maximum dictionary size, in bytes. More... | |
auto | get_max_page_fragment_size () const |
Returns maximum page fragment size, in rows. More... | |
std::shared_ptr< writer_compression_statistics > | get_compression_statistics () const |
Returns a shared pointer to the user-provided compression statistics. More... | |
auto | is_enabled_write_v2_headers () const |
Returns true if V2 page headers should be written. More... | |
void | set_partitions (std::vector< partition_info > partitions) |
Sets partitions. More... | |
void | set_metadata (table_input_metadata metadata) |
Sets metadata. More... | |
void | set_key_value_metadata (std::vector< std::map< std::string, std::string >> metadata) |
Sets metadata. More... | |
void | set_stats_level (statistics_freq sf) |
Sets the level of statistics. More... | |
void | set_compression (compression_type compression) |
Sets compression type. More... | |
void | enable_int96_timestamps (bool req) |
Sets timestamp writing preferences. INT96 timestamps will be written if true and TIMESTAMP_MICROS will be written if false . More... | |
void | enable_utc_timestamps (bool val) |
Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true . More... | |
void | set_column_chunks_file_paths (std::vector< std::string > file_paths) |
Sets column chunks file path to be set in the raw output metadata. More... | |
void | set_row_group_size_bytes (size_t size_bytes) |
Sets the maximum row group size, in bytes. More... | |
void | set_row_group_size_rows (size_type size_rows) |
Sets the maximum row group size, in rows. More... | |
void | set_max_page_size_bytes (size_t size_bytes) |
Sets the maximum uncompressed page size, in bytes. More... | |
void | set_max_page_size_rows (size_type size_rows) |
Sets the maximum page size, in rows. More... | |
void | set_column_index_truncate_length (int32_t size_bytes) |
Sets the maximum length of min or max values in column index, in bytes. More... | |
void | set_dictionary_policy (dictionary_policy policy) |
Sets the policy for dictionary use. More... | |
void | set_max_dictionary_size (size_t size_bytes) |
Sets the maximum dictionary size, in bytes. More... | |
void | set_max_page_fragment_size (size_type size_rows) |
Sets the maximum page fragment size, in rows. More... | |
void | set_compression_statistics (std::shared_ptr< writer_compression_statistics > comp_stats) |
Sets the pointer to the output compression statistics. More... | |
void | enable_write_v2_headers (bool val) |
Sets preference for V2 page headers. Write V2 page headers if set to true . More... | |
Static Public Member Functions | |
static parquet_writer_options_builder | builder (sink_info const &sink, table_view const &table) |
Create builder to create parquet_writer_options . More... | |
static parquet_writer_options_builder | builder () |
Create builder to create parquet_writer_options . More... | |
Settings for write_parquet()
.
Definition at line 517 of file parquet.hpp.
|
default |
Default constructor.
This has been added since Cython requires a default constructor to create objects on stack.
|
static |
Create builder to create parquet_writer_options
.
|
static |
Create builder to create parquet_writer_options
.
sink | The sink used for writer output |
table | Table to be written to output |
|
inline |
Sets timestamp writing preferences. INT96 timestamps will be written if true
and TIMESTAMP_MICROS will be written if false
.
req | Boolean value to enable/disable writing of INT96 timestamps |
Definition at line 800 of file parquet.hpp.
|
inline |
Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true
.
val | Boolean value to enable/disable writing of timestamps as UTC. |
Definition at line 807 of file parquet.hpp.
|
inline |
Sets preference for V2 page headers. Write V2 page headers if set to true
.
val | Boolean value to enable/disable writing of V2 page headers. |
Definition at line 888 of file parquet.hpp.
|
inline |
Returns Column chunks file paths to be set in the raw output metadata.
Definition at line 670 of file parquet.hpp.
|
inline |
Returns maximum length of min or max values in column index, in bytes.
Definition at line 718 of file parquet.hpp.
|
inline |
Returns compression format used.
Definition at line 611 of file parquet.hpp.
|
inline |
Returns a shared pointer to the user-provided compression statistics.
Definition at line 746 of file parquet.hpp.
|
inline |
Returns policy for dictionary use.
Definition at line 725 of file parquet.hpp.
|
inline |
Returns Key-Value footer metadata information.
Definition at line 646 of file parquet.hpp.
|
inline |
Returns maximum dictionary size, in bytes.
Definition at line 732 of file parquet.hpp.
|
inline |
Returns maximum page fragment size, in rows.
Definition at line 739 of file parquet.hpp.
|
inline |
Returns the maximum uncompressed page size, in bytes.
If set larger than the row group size, then this will return the row group size.
Definition at line 696 of file parquet.hpp.
|
inline |
Returns maximum page size, in rows.
If set larger than the row group size, then this will return the row group size.
Definition at line 708 of file parquet.hpp.
|
inline |
|
inline |
|
inline |
Returns maximum row group size, in bytes.
Definition at line 680 of file parquet.hpp.
|
inline |
Returns maximum row group size, in rows.
Definition at line 687 of file parquet.hpp.
|
inline |
|
inline |
Returns level of statistics requested in output file.
Definition at line 618 of file parquet.hpp.
|
inline |
|
inline |
Returns true
if timestamps will be written as INT96.
true
if timestamps will be written as INT96 Definition at line 656 of file parquet.hpp.
|
inline |
Returns true
if timestamps will be written as UTC.
true
if timestamps will be written as UTC Definition at line 663 of file parquet.hpp.
|
inline |
Returns true
if V2 page headers should be written.
true
if V2 page headers should be written. Definition at line 756 of file parquet.hpp.
void cudf::io::parquet_writer_options::set_column_chunks_file_paths | ( | std::vector< std::string > | file_paths | ) |
Sets column chunks file path to be set in the raw output metadata.
file_paths | Vector of Strings which indicates file path. Must be same size as number of data sinks in sink info |
void cudf::io::parquet_writer_options::set_column_index_truncate_length | ( | int32_t | size_bytes | ) |
Sets the maximum length of min or max values in column index, in bytes.
size_bytes | length min/max will be truncated to |
|
inline |
Sets compression type.
compression | The compression type to use |
Definition at line 792 of file parquet.hpp.
|
inline |
Sets the pointer to the output compression statistics.
comp_stats | Pointer to compression statistics to be updated after writing |
Definition at line 878 of file parquet.hpp.
void cudf::io::parquet_writer_options::set_dictionary_policy | ( | dictionary_policy | policy | ) |
Sets the policy for dictionary use.
policy | Policy for dictionary use |
void cudf::io::parquet_writer_options::set_key_value_metadata | ( | std::vector< std::map< std::string, std::string >> | metadata | ) |
Sets metadata.
metadata | Key-Value footer metadata |
void cudf::io::parquet_writer_options::set_max_dictionary_size | ( | size_t | size_bytes | ) |
Sets the maximum dictionary size, in bytes.
size_bytes | Maximum dictionary size, in bytes |
void cudf::io::parquet_writer_options::set_max_page_fragment_size | ( | size_type | size_rows | ) |
Sets the maximum page fragment size, in rows.
size_rows | Maximum page fragment size, in rows. |
void cudf::io::parquet_writer_options::set_max_page_size_bytes | ( | size_t | size_bytes | ) |
Sets the maximum uncompressed page size, in bytes.
size_bytes | Maximum uncompressed page size, in bytes to set |
void cudf::io::parquet_writer_options::set_max_page_size_rows | ( | size_type | size_rows | ) |
Sets the maximum page size, in rows.
size_rows | Maximum page size, in rows to set |
|
inline |
void cudf::io::parquet_writer_options::set_partitions | ( | std::vector< partition_info > | partitions | ) |
Sets partitions.
partitions | Partitions of input table in {start_row, num_rows} pairs. If specified, must be same size as number of sinks in sink_info |
void cudf::io::parquet_writer_options::set_row_group_size_bytes | ( | size_t | size_bytes | ) |
Sets the maximum row group size, in bytes.
size_bytes | Maximum row group size, in bytes to set |
void cudf::io::parquet_writer_options::set_row_group_size_rows | ( | size_type | size_rows | ) |
Sets the maximum row group size, in rows.
size_rows | Maximum row group size, in rows to set |
|
inline |
Sets the level of statistics.
sf | Level of statistics requested in the output file |
Definition at line 785 of file parquet.hpp.