diff --git a/ci/scripts/populate_minio.sh b/ci/scripts/populate_minio.sh index b10d7d2..87c507f 100644 --- a/ci/scripts/populate_minio.sh +++ b/ci/scripts/populate_minio.sh @@ -3,14 +3,23 @@ set -eux SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" - -curl --fail-with-body https://dl.min.io/client/mc/release/linux-amd64/mc --create-dirs -o $HOME/minio-binaries/mc -chmod +x $HOME/minio-binaries/mc - TEST_DATA_DIR="${SCRIPT_DIR}/../../test_data" -$HOME/minio-binaries/mc alias set local http://127.0.0.1:9000 minioadmin minioadmin -$HOME/minio-binaries/mc mb local/test-data -$HOME/minio-binaries/mc cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data -$HOME/minio-binaries/mc cp "${TEST_DATA_DIR}"/blogs.parquet "local/test-data/blogs space.parquet" -$HOME/minio-binaries/mc cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data/blogs/ +MCBIN=mc +if ! which "${MCBIN}" &>/dev/null; then + curl --fail-with-body https://dl.min.io/client/mc/release/linux-amd64/mc --create-dirs -o $HOME/minio-binaries/mc + chmod +x $HOME/minio-binaries/mc + MCBIN=$HOME/minio-binaries/mc +fi + +"${MCBIN}" alias set local http://127.0.0.1:9000 minioadmin minioadmin +"${MCBIN}" mb local/test-data + +"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data +"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet "local/test-data/blogs space.parquet" +"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data/blogs/ +# populate partitioned table in S3 +"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data/partitioned_blogs/year=2024/month=10/ +"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data/partitioned_blogs/year=2023/month=2/ +# populate delta table +"${MCBIN}" cp --recursive "${TEST_DATA_DIR}"/blogs-delta local/test-data/ diff --git a/ci/scripts/setup_minio.sh b/ci/scripts/setup_minio.sh index 81b524f..171752e 100644 --- a/ci/scripts/setup_minio.sh +++ b/ci/scripts/setup_minio.sh @@ -2,4 +2,9 @@ set -eux -docker run -d -p 9000:9000 quay.io/minio/minio:RELEASE.2023-08-23T10-07-06Z server /data +CTLBIN="docker" +if which podman &>/dev/null; then + CTLBIN="podman" +fi + +${CTLBIN} run -d --name minio -p 9000:9000 quay.io/minio/minio:RELEASE.2023-08-23T10-07-06Z server /data diff --git a/columnq/Cargo.toml b/columnq/Cargo.toml index affad96..37d6d91 100644 --- a/columnq/Cargo.toml +++ b/columnq/Cargo.toml @@ -51,7 +51,7 @@ tokio-postgres = { version = "0.7.8", optional = true } version = "0.17" # git = "https://github.com/delta-io/delta-rs.git" # rev = "63c14b3716428ff65e01404c6f7e62f341c98f05" -features = ["datafusion"] +features = ["datafusion", "s3", "gcs", "azure"] default-features = false [dependencies.connectorx] diff --git a/columnq/src/columnq.rs b/columnq/src/columnq.rs index 7e484d3..1519564 100644 --- a/columnq/src/columnq.rs +++ b/columnq/src/columnq.rs @@ -1,6 +1,7 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::Arc; +use std::sync::Once; use datafusion::arrow; use datafusion::arrow::array::as_string_array; @@ -24,6 +25,8 @@ use crate::query; use crate::table::TableIoSource; use crate::table::{self, KeyValueSource, TableSource}; +static START: Once = Once::new(); + pub struct ColumnQ { pub dfctx: SessionContext, schema_map: HashMap, @@ -40,6 +43,12 @@ impl ColumnQ { } pub fn new_with_config(config: SessionConfig) -> Self { + START.call_once(|| { + deltalake::aws::register_handlers(None); + deltalake::azure::register_handlers(None); + deltalake::gcp::register_handlers(None); + }); + let config = config .with_default_catalog_and_schema("roapi", "public") // TODO: fix bug in datafusion to support partitioned table when