implementation des features

refactor
Nicolas Sanchez 4 weeks ago
parent 6d446b4767
commit a11ca56849

@ -5,11 +5,14 @@ edition = "2024"
[dependencies] [dependencies]
clap = { version = "4.5.48", features = ["derive"] } clap = { version = "4.5.48", features = ["derive"] }
polars = {version = "0.53.0", optional = true} polars = {version = "0.53.0", optional = true, features=["lazy"]}
rhai = "1.23.6" rhai = "1.23.6"
umya-spreadsheet = "2.3.3" umya-spreadsheet = "2.3.3"
[features] [features]
default = ["csv"] default = ["csv", "lazyframe"]
csv = [] csv = []
lazyframe = ["dep:polars"] lazyframe = ["dep:polars"]
[package.metadata.docs.rs]
all-features = true

@ -88,7 +88,7 @@ impl fmt::Display for NumberRows {
} }
#[derive(Parser, Debug, Clone)] #[derive(Parser, Debug, Clone)]
pub struct RawArguments { pub(crate) struct RawArguments {
/// Path to the xlsx file /// Path to the xlsx file
#[arg()] #[arg()]
pub file: String, pub file: String,
@ -128,45 +128,47 @@ pub struct RawArguments {
/// replacement for end of line character inside cells /// replacement for end of line character inside cells
#[arg(short = 'R', long)] #[arg(short = 'R', long)]
pub replace_end_of_line_by: Option<String>, pub replace_end_of_line_by: Option<String>,
/// a rhai function to filter lines
#[arg(long)]
pub filter: Option<String>,
} }
/// Arguments structure is used to parse shell arguments (using clap)
///
/// Argument is also implicitly used by the XlsxReader .with_...() methods.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Arguments { pub struct Arguments {
/// Path to the xlsx file /// Path to the xlsx file
pub file: String, pub(crate) file: String,
/// List all worksheets /// List all worksheets in the xlsx file
pub list_worksheets: bool, pub(crate) list_worksheets: bool,
/// Separator /// Choose worksheet as name or number
pub separator: char, pub(crate) worksheet: String,
/// Replace separator char in cells by
pub replace_separator_by: Option<String>,
/// include hidden lines to output
// pub include_hidden: IncludeHidden,
pub include_hidden_rows: bool,
pub include_hidden_columns: bool,
/// If merged cells, fill horizontally, vertically, both, or none
// pub fill_merged_cells: FillMergedCells,
pub fill_merged_cells_horizontal: bool,
pub fill_merged_cells_vertical: bool,
/// Chosse worksheet
pub worksheet: String,
/// Use the worksheet that was active when the file was last saved /// Use the worksheet that was active when the file was last saved
pub active_worksheet: bool, pub(crate) active_worksheet: bool,
/// Trim white spaces at end of cells /// Trim white spaces at end of cells
pub trim: TrimSpaces, /// Include hidden lines to output
// pub(crate) include_hidden: IncludeHidden,
pub(crate) include_hidden_rows: bool,
/// Include hidden columns to output
pub(crate) include_hidden_columns: bool,
/// If merged cells, fill horizontally
// pub(crate) fill_merged_cells: FillMergedCells,
pub(crate) fill_merged_cells_horizontal: bool,
/// If merged cells, fill vertically
pub(crate) fill_merged_cells_vertical: bool,
/// trim spaces at end of file
pub(crate) trim: TrimSpaces,
/// number the rows in first cell of each line /// number the rows in first cell of each line
pub number_rows: NumberRows, pub(crate) number_rows: NumberRows,
/// avoid nth first rows of xlsx file /// Avoid nth first rows of xlsx file
pub skip_rows: u32, pub(crate) skip_rows: u32,
/// change end of line character ///# csv output specific options
pub end_of_line: String, /// Separator for output
/// replacement for end of line character inside cells pub(crate) separator: char,
pub replace_end_of_line_by: Option<String>, /// Replace separator char in cells by another
/// a rhai function to filter lines pub(crate) replace_separator_by: Option<String>,
pub filter: Option<String>, /// Set the end of line string, defaults to "\n"
pub(crate) end_of_line: String,
/// replacement for end of line string inside cells
pub(crate) replace_end_of_line_by: Option<String>,
} }
impl Arguments { impl Arguments {
@ -194,7 +196,6 @@ impl Default for Arguments {
skip_rows: Default::default(), skip_rows: Default::default(),
end_of_line: String::from("\n"), end_of_line: String::from("\n"),
replace_end_of_line_by: Default::default(), replace_end_of_line_by: Default::default(),
filter: Default::default(),
} }
} }
} }
@ -232,7 +233,6 @@ impl From<RawArguments> for Arguments {
skip_rows: raw.skip_rows, skip_rows: raw.skip_rows,
end_of_line: raw.end_of_line, end_of_line: raw.end_of_line,
replace_end_of_line_by: raw.replace_end_of_line_by, replace_end_of_line_by: raw.replace_end_of_line_by,
filter: raw.filter,
} }
} }
} }

@ -5,6 +5,7 @@ pub enum Error {
Msg(String), Msg(String),
XlsxError(String), XlsxError(String),
IoError(String), IoError(String),
PolarsError(String),
} }
impl fmt::Display for Error { impl fmt::Display for Error {
@ -13,6 +14,7 @@ impl fmt::Display for Error {
Error::Msg(msg) => write!(f, "XlsxToCsvError: {msg}"), Error::Msg(msg) => write!(f, "XlsxToCsvError: {msg}"),
Error::XlsxError(msg) => write!(f, "XlsxError: {msg}"), Error::XlsxError(msg) => write!(f, "XlsxError: {msg}"),
Error::IoError(msg) => write!(f, "IoError: {msg}"), Error::IoError(msg) => write!(f, "IoError: {msg}"),
Error::PolarsError(msg) => write!(f, "PolarsError: {msg}"),
} }
} }
} }

@ -3,7 +3,7 @@ pub mod error;
pub mod xlsx; pub mod xlsx;
pub mod xlsx_builder; pub mod xlsx_builder;
//#[cfg(feature = "csv")] #[cfg(feature = "csv")]
pub mod xlsx_to_csv; pub mod xlsx_to_csv;
#[cfg(feature = "lazyframe")] #[cfg(feature = "lazyframe")]

@ -1,9 +1,13 @@
use xlsxtocsv::{error::Error, xlsx::XlsxReader, xlsx_to_csv::Output}; pub use xlsxtocsv::{error::Error, xlsx::XlsxReader};
fn main() -> Result<(), Error> { fn main() -> Result<(), Error> {
XlsxReader::new("noms.xlsx") let lf = XlsxReader::new("noms.xlsx")
.with_active_worksheet() .with_active_worksheet()
.to_csv(Output::Stdout)?; .to_lazyframe()?;
let df = lf.collect()?;
println!("{df}");
Ok(()) Ok(())
} }

@ -6,12 +6,13 @@ use umya_spreadsheet::{Cell, Range, Spreadsheet, Worksheet, reader};
use crate::arguments::{Arguments, IntoArgs, NumberRows, TrimSpaces}; use crate::arguments::{Arguments, IntoArgs, NumberRows, TrimSpaces};
use crate::error::Error; use crate::error::Error;
/// A reader for .xlsx files with extended options.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct XlsxReader { pub struct XlsxReader {
pub(crate) args: Arguments, pub(crate) args: Arguments,
pub(crate) book: Option<Spreadsheet>, pub(crate) book: Option<Spreadsheet>,
pub(crate) sheet_index: Option<usize>, pub(crate) sheet_index: Option<usize>,
pub worksheet_dimensions: RefCell<Option<(u32, u32)>>, pub(crate) worksheet_dimensions: RefCell<Option<(u32, u32)>>,
} }
impl XlsxReader { impl XlsxReader {

@ -0,0 +1,51 @@
use crate::{error::Error, xlsx::XlsxReader};
use polars::prelude::*;
impl From<PolarsError> for Error {
fn from(value: PolarsError) -> Self {
Error::PolarsError(value.to_string())
}
}
impl XlsxReader {
pub fn to_lazyframe(mut self) -> Result<LazyFrame, Error> {
self.finish()?;
let (num_cols, num_rows) = self.get_worksheet_dimensions();
let mut columns: Vec<Column> = (0..num_cols)
.map(|i| Column::new(format!("column_{i}").into(), Vec::<String>::new()))
.collect();
const CHUNK_SIZE: usize = 1000;
let mut chunk: Vec<Vec<String>> = Vec::with_capacity(CHUNK_SIZE);
for current_row in 0..num_rows {
let row = self.get_row(current_row);
chunk.push(row);
if chunk.len() >= CHUNK_SIZE {
append_chunk(&mut columns, &chunk);
chunk.clear();
}
}
if !chunk.is_empty() {
append_chunk(&mut columns, &chunk);
}
let df =
DataFrame::new(num_rows as usize, columns).map_err(|e| Error::from(e.to_string()))?;
Ok(df.lazy())
}
}
fn append_chunk(columns: &mut [Column], chunk: &[Vec<String>]) {
for (col_idx, column) in columns.iter_mut().enumerate() {
let chunk_data: Vec<String> = chunk.iter().map(|row| row[col_idx].clone()).collect();
let chunk_column = Column::new("temp".into(), chunk_data);
column.append(&chunk_column).unwrap();
}
}
Loading…
Cancel
Save