std::shared_ptr<arrow::Table> generate_table()
{
std::shared_ptr<arrow::DataType> static_facts_type(new arrow::StructType(
{
arrow::field("str", arrow::utf8(), false)
}
));
arrow::StringBuilder strbuilder;
PARQUET_THROW_NOT_OK(strbuilder.Append("some"));
PARQUET_THROW_NOT_OK(strbuilder.Append("string"));
PARQUET_THROW_NOT_OK(strbuilder.Append("content"));
PARQUET_THROW_NOT_OK(strbuilder.Append("in"));
PARQUET_THROW_NOT_OK(strbuilder.Append("rows"));
std::shared_ptr<arrow::Array> strarray;
PARQUET_THROW_NOT_OK(strbuilder.Finish(&strarray));
std::shared_ptr<arrow::Array> struct_array(new arrow::StructArray(static_facts_type, 5, {strarray}));
std::shared_ptr<arrow::Schema> schema = arrow::schema(
{
arrow::field("MyStruct", static_facts_type)
});
return arrow::Table::Make(schema, {struct_array});
}
// #1 Write out the data as a Parquet file
void write_parquet_file(const arrow::Table& table) {
std::shared_ptr<arrow::io::FileOutputStream> outfile;
PARQUET_THROW_NOT_OK(
arrow::io::FileOutputStream::Open("test.parquet", &outfile));
// The last argument to the function call is the size of the RowGroup in
// the parquet file. Normally you would choose this to be rather large but
// for the example, we use a small value to have multiple RowGroups.
PARQUET_THROW_NOT_OK(
parquet::arrow::WriteTable(table, arrow::default_memory_pool(), outfile, 1));
}
int main(int argc, char** argv) {
std::shared_ptr<arrow::Table> table = generate_table();
write_parquet_file(*table);
}