support empty cells in google spreadsheet (#141)

This commit is contained in:
QP Hou 2022-02-14 22:23:17 -08:00 committed by GitHub
parent 47a17f0696
commit cd271c65ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 104 additions and 57 deletions

34
Cargo.lock generated
View File

@ -212,7 +212,7 @@ dependencies = [
"tokio",
"tokio-util",
"tower",
"tower-http 0.1.2",
"tower-http",
"tower-layer",
"tower-service",
]
@ -421,7 +421,7 @@ dependencies = [
[[package]]
name = "columnq"
version = "0.4.3"
version = "0.4.5"
dependencies = [
"anyhow",
"bytes",
@ -454,7 +454,7 @@ dependencies = [
[[package]]
name = "columnq-cli"
version = "0.2.1"
version = "0.2.3"
dependencies = [
"anyhow",
"clap",
@ -1166,12 +1166,6 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "http-range-header"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29"
[[package]]
name = "httparse"
version = "1.5.1"
@ -2126,7 +2120,7 @@ dependencies = [
[[package]]
name = "roapi-http"
version = "0.5.2"
version = "0.5.4"
dependencies = [
"anyhow",
"async-process",
@ -2145,7 +2139,7 @@ dependencies = [
"snmalloc-rs",
"thiserror",
"tokio",
"tower-http 0.2.0",
"tower-http",
"tower-layer",
"tracing",
]
@ -3107,24 +3101,6 @@ dependencies = [
"tower-service",
]
[[package]]
name = "tower-http"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ee603d6e665ecc7e0f8d479eedb4626bd4726f0ee6119cee5b3a6bf184cac0"
dependencies = [
"bitflags",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-range-header",
"pin-project-lite",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower-layer"
version = "0.3.1"

View File

@ -1,6 +1,6 @@
[package]
name = "columnq-cli"
version = "0.2.2"
version = "0.2.3"
homepage = "https://github.com/roapi/roapi"
license = "MIT"
readme = "README.md"

View File

@ -1,6 +1,6 @@
[package]
name = "columnq"
version = "0.4.4"
version = "0.4.5"
homepage = "https://github.com/roapi/roapi"
license = "MIT"
authors = ["QP Hou <dave2008713@gmail.com>"]

View File

@ -141,7 +141,7 @@ fn infer_schema(rows: &[Vec<String>]) -> Schema {
let dt = dt_iter.fold(dt_init, coerce_type);
// normalize column name by replacing space with under score
Field::new(&col_name.replace(" ", "_"), dt, false)
Field::new(&col_name.replace(" ", "_"), dt, true)
})
.collect();
Schema::new(fields)
@ -165,36 +165,43 @@ fn sheet_values_to_record_batch(values: &[Vec<String>]) -> Result<RecordBatch, C
Ok(match field.data_type() {
DataType::Boolean => Arc::new(
rows_iter
.map(|row| Some(parse_boolean(&row[i])))
.map(|row| row.get(i).map(|v| parse_boolean(v)))
.collect::<BooleanArray>(),
) as ArrayRef,
DataType::Int64 => Arc::new(
rows_iter
.map(|row| {
Ok(Some(row[i].parse::<i64>().map_err(|_| {
ColumnQError::GoogleSpreadsheets(format!(
"Expect int64 value, got: {}",
row[i]
))
})?))
row.get(i)
.map(|v| {
v.parse::<i64>().map_err(|_| {
ColumnQError::GoogleSpreadsheets(format!(
"Expect int64 value, got: {}",
row[i]
))
})
})
.transpose()
})
.collect::<Result<PrimitiveArray<Int64Type>, ColumnQError>>()?,
) as ArrayRef,
DataType::Float64 => Arc::new(
rows_iter
.map(|row| {
Ok(Some(row[i].parse::<f64>().map_err(|_| {
ColumnQError::GoogleSpreadsheets(format!(
"Expect float64 value, got: {}",
row[i]
))
})?))
row.get(i)
.map(|v| {
v.parse::<f64>().map_err(|_| {
ColumnQError::GoogleSpreadsheets(format!(
"Expect float64 value, got: {}",
row[i]
))
})
})
.transpose()
})
.collect::<Result<PrimitiveArray<Float64Type>, ColumnQError>>()?,
) as ArrayRef,
_ => Arc::new(rows_iter.map(|row| Some(&row[i])).collect::<StringArray>())
as ArrayRef,
_ => Arc::new(rows_iter.map(|row| row.get(i)).collect::<StringArray>()) as ArrayRef,
})
})
.collect::<Result<Vec<ArrayRef>, ColumnQError>>()?;
@ -424,15 +431,15 @@ mod tests {
assert_eq!(
schema,
Schema::new(vec![
Field::new("Address", DataType::Utf8, false),
Field::new("Image", DataType::Utf8, false),
Field::new("Landlord", DataType::Utf8, false),
Field::new("Bed", DataType::Int64, false),
Field::new("Bath", DataType::Int64, false),
Field::new("Occupied", DataType::Boolean, false),
Field::new("Monthly_Rent", DataType::Utf8, false),
Field::new("Lease_Expiration_Date", DataType::Utf8, false),
Field::new("Days_Until_Expiration", DataType::Utf8, false),
Field::new("Address", DataType::Utf8, true),
Field::new("Image", DataType::Utf8, true),
Field::new("Landlord", DataType::Utf8, true),
Field::new("Bed", DataType::Int64, true),
Field::new("Bath", DataType::Int64, true),
Field::new("Occupied", DataType::Boolean, true),
Field::new("Monthly_Rent", DataType::Utf8, true),
Field::new("Lease_Expiration_Date", DataType::Utf8, true),
Field::new("Days_Until_Expiration", DataType::Utf8, true),
])
);
}
@ -458,4 +465,68 @@ mod tests {
Ok(())
}
#[test]
fn unaligned_sheetvalue_to_record_batch() -> anyhow::Result<()> {
// empty cells at the end of a row will not be returned from the server
let sheet = SpreadsheetValues {
range: "Properties!A1:AB1000".to_string(),
major_dimension: "ROWS".to_string(),
values: vec![
row(&[
"Address",
"Image",
"Landlord",
"Bed",
"Bath",
"Occupied",
"Monthly Rent",
"Lease Expiration Date",
"Days Until Expiration",
]),
row(&[
"Bothell, WA",
"https://a.com/1.jpeg",
"Roger",
"3",
"2",
"FALSE",
"$2,000",
"10/23/2020",
"Expired",
]),
row(&[
"Shoreline, WA",
"https://a.com/3.jpeg",
"Roger",
"1",
"1",
"TRUE",
"$1,200",
]),
],
};
let batch = sheet_values_to_record_batch(&sheet.values)?;
assert_eq!(batch.num_columns(), 9);
assert_eq!(
batch.column(3).as_ref(),
Arc::new(Int64Array::from(vec![3, 1])).as_ref(),
);
assert_eq!(
batch.column(5).as_ref(),
Arc::new(BooleanArray::from(vec![false, true])).as_ref(),
);
assert_eq!(
batch.column(2).as_ref(),
Arc::new(StringArray::from(vec!["Roger", "Roger"])).as_ref(),
);
assert_eq!(
batch.column(8).as_ref(),
Arc::new(StringArray::from(vec![Some("Expired"), None])).as_ref(),
);
Ok(())
}
}

View File

@ -1,6 +1,6 @@
[package]
name = "roapi-http"
version = "0.5.3"
version = "0.5.4"
authors = ["QP Hou <dave2008713@gmail.com>"]
homepage = "https://github.com/roapi/roapi"
license = "MIT"