mirror of
https://github.com/roapi/roapi.git
synced 2026-06-05 21:04:02 +08:00
fix remote storage support for deltalake (#331)
This commit is contained in:
parent
8168d93859
commit
1938fe6c2b
@ -3,14 +3,23 @@
|
||||
set -eux
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
|
||||
curl --fail-with-body https://dl.min.io/client/mc/release/linux-amd64/mc --create-dirs -o $HOME/minio-binaries/mc
|
||||
chmod +x $HOME/minio-binaries/mc
|
||||
|
||||
TEST_DATA_DIR="${SCRIPT_DIR}/../../test_data"
|
||||
|
||||
$HOME/minio-binaries/mc alias set local http://127.0.0.1:9000 minioadmin minioadmin
|
||||
$HOME/minio-binaries/mc mb local/test-data
|
||||
$HOME/minio-binaries/mc cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data
|
||||
$HOME/minio-binaries/mc cp "${TEST_DATA_DIR}"/blogs.parquet "local/test-data/blogs space.parquet"
|
||||
$HOME/minio-binaries/mc cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data/blogs/
|
||||
MCBIN=mc
|
||||
if ! which "${MCBIN}" &>/dev/null; then
|
||||
curl --fail-with-body https://dl.min.io/client/mc/release/linux-amd64/mc --create-dirs -o $HOME/minio-binaries/mc
|
||||
chmod +x $HOME/minio-binaries/mc
|
||||
MCBIN=$HOME/minio-binaries/mc
|
||||
fi
|
||||
|
||||
"${MCBIN}" alias set local http://127.0.0.1:9000 minioadmin minioadmin
|
||||
"${MCBIN}" mb local/test-data
|
||||
|
||||
"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data
|
||||
"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet "local/test-data/blogs space.parquet"
|
||||
"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data/blogs/
|
||||
# populate partitioned table in S3
|
||||
"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data/partitioned_blogs/year=2024/month=10/
|
||||
"${MCBIN}" cp "${TEST_DATA_DIR}"/blogs.parquet local/test-data/partitioned_blogs/year=2023/month=2/
|
||||
# populate delta table
|
||||
"${MCBIN}" cp --recursive "${TEST_DATA_DIR}"/blogs-delta local/test-data/
|
||||
|
||||
@ -2,4 +2,9 @@
|
||||
|
||||
set -eux
|
||||
|
||||
docker run -d -p 9000:9000 quay.io/minio/minio:RELEASE.2023-08-23T10-07-06Z server /data
|
||||
CTLBIN="docker"
|
||||
if which podman &>/dev/null; then
|
||||
CTLBIN="podman"
|
||||
fi
|
||||
|
||||
${CTLBIN} run -d --name minio -p 9000:9000 quay.io/minio/minio:RELEASE.2023-08-23T10-07-06Z server /data
|
||||
|
||||
@ -51,7 +51,7 @@ tokio-postgres = { version = "0.7.8", optional = true }
|
||||
version = "0.17"
|
||||
# git = "https://github.com/delta-io/delta-rs.git"
|
||||
# rev = "63c14b3716428ff65e01404c6f7e62f341c98f05"
|
||||
features = ["datafusion"]
|
||||
features = ["datafusion", "s3", "gcs", "azure"]
|
||||
default-features = false
|
||||
|
||||
[dependencies.connectorx]
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Once;
|
||||
|
||||
use datafusion::arrow;
|
||||
use datafusion::arrow::array::as_string_array;
|
||||
@ -24,6 +25,8 @@ use crate::query;
|
||||
use crate::table::TableIoSource;
|
||||
use crate::table::{self, KeyValueSource, TableSource};
|
||||
|
||||
static START: Once = Once::new();
|
||||
|
||||
pub struct ColumnQ {
|
||||
pub dfctx: SessionContext,
|
||||
schema_map: HashMap<String, arrow::datatypes::SchemaRef>,
|
||||
@ -40,6 +43,12 @@ impl ColumnQ {
|
||||
}
|
||||
|
||||
pub fn new_with_config(config: SessionConfig) -> Self {
|
||||
START.call_once(|| {
|
||||
deltalake::aws::register_handlers(None);
|
||||
deltalake::azure::register_handlers(None);
|
||||
deltalake::gcp::register_handlers(None);
|
||||
});
|
||||
|
||||
let config = config
|
||||
.with_default_catalog_and_schema("roapi", "public")
|
||||
// TODO: fix bug in datafusion to support partitioned table when
|
||||
|
||||
Loading…
Reference in New Issue
Block a user