[go: nahoru, domu]

Skip to content

Commit

Permalink
Echo: add --trim-columns, and tests for --contiguous and --trim-colum…
Browse files Browse the repository at this point in the history
…ns (#167)

* echo: add --trim-columns, tests for --contiguous and --trim-columns
  • Loading branch information
liquidaty committed Apr 15, 2024
1 parent ac32f13 commit 75f2adf
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 7 deletions.
90 changes: 84 additions & 6 deletions app/echo.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <zsv/utils/compiler.h>
#include <zsv/utils/writer.h>
#include <zsv/utils/file.h>
#include <zsv/utils/string.h>
#include <zsv/utils/mem.h>

Expand Down Expand Up @@ -52,9 +53,13 @@ struct zsv_echo_data {

unsigned char *skip_until_prefix;
size_t skip_until_prefix_len;

char *tmp_fn;
unsigned max_nonempty_cols;
unsigned char trim_white:1;
unsigned char trim_columns:1;
unsigned char contiguous:1;
unsigned char _:6;
unsigned char _:5;
};

/**
Expand Down Expand Up @@ -87,10 +92,28 @@ void zsv_echo_get_next_overwrite(struct zsv_echo_data *data) {
}
}

static void zsv_echo_get_max_nonempty_cols(void *hook) {
struct zsv_echo_data *data = hook;
unsigned row_nonempty_col_count = 0;
for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
struct zsv_cell cell = zsv_get_cell(data->parser, i);
if(UNLIKELY(data->trim_white))
cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len);
if(cell.len)
row_nonempty_col_count = i+1;
}
if(data->max_nonempty_cols < row_nonempty_col_count)
data->max_nonempty_cols = row_nonempty_col_count;
}

static void zsv_echo_row(void *hook) {
struct zsv_echo_data *data = hook;
size_t j = zsv_cell_count(data->parser);
if(UNLIKELY(data->trim_columns && j > data->max_nonempty_cols))
j = data->max_nonempty_cols;

if(VERY_UNLIKELY(data->row_ix == 0)) { // header
for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
for(size_t i = 0; i < j; i++) {
struct zsv_cell cell = zsv_get_cell(data->parser, i);
if(UNLIKELY(data->trim_white))
cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len);
Expand All @@ -99,7 +122,7 @@ static void zsv_echo_row(void *hook) {
} else if(VERY_UNLIKELY(data->contiguous && zsv_row_is_blank(data->parser))) {
zsv_abort(data->parser);
} else {
for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
for(size_t i = 0; i < j; i++) {
if(VERY_UNLIKELY(data->overwrite.row_ix == data->row_ix && data->overwrite.col_ix == i)) {
zsv_writer_cell(data->csv_writer, i == 0, data->overwrite.str, data->overwrite.len, 1);
zsv_echo_get_next_overwrite(data);
Expand Down Expand Up @@ -135,6 +158,7 @@ const char *zsv_echo_usage_msg[] = {
"Options:",
" -b : output with BOM",
" --trim : trim whitespace",
" --trim-columns : trim blank columns",
" --contiguous : stop output upon scanning an entire row of blank values",
" --skip-until <value>: ignore all leading rows until the first row whose first column starts with the given value ",
" --overwrite <source>: overwrite cells using given source. Source may be:",
Expand All @@ -161,6 +185,11 @@ static void zsv_echo_cleanup(struct zsv_echo_data *data) {
fclose(data->in);
if(data->o.sqlite3.db)
sqlite3_close(data->o.sqlite3.db);

if(data->tmp_fn) {
remove(data->tmp_fn);
free(data->tmp_fn);
}
}

#define zsv_echo_sqlite3_prefix "sqlite3://"
Expand Down Expand Up @@ -235,6 +264,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
writer_opts.with_bom = 1;
else if(!strcmp(arg, "--contiguous"))
data.contiguous = 1;
else if(!strcmp(arg, "--trim-columns"))
data.trim_columns = 1;
else if(!strcmp(arg, "--trim"))
data.trim_white = 1;
else if(!strcmp(arg, "--skip-until")) {
Expand Down Expand Up @@ -294,10 +325,57 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
return 1;
}

unsigned char buff[4096];
if(data.skip_until_prefix)
opts->row_handler = zsv_echo_row_skip_until;
else
else {
if(data.trim_columns) {
// first, save the file if it is stdin
if(data.in == stdin) {
if(!(data.tmp_fn = zsv_get_temp_filename("zsv_echo_XXXXXXXX"))) {
zsv_echo_cleanup(&data);
return 1;
}

FILE *f = fopen(data.tmp_fn, "wb");
if(!f) {
perror(data.tmp_fn);
zsv_echo_cleanup(&data);
return 1;
} else {
size_t bytes_read;
while((bytes_read = fread(buff, 1, sizeof(buff), data.in)) > 0)
fwrite(buff, 1, bytes_read, f);
fclose(f);
if(!(data.in = fopen(data.tmp_fn, "rb"))) {
perror(data.tmp_fn);
zsv_echo_cleanup(&data);
return 1;
}
}
}
// next, determine the max number of columns from the left that contains data
struct zsv_opts tmp_opts = *opts;
tmp_opts.row_handler = zsv_echo_get_max_nonempty_cols;
tmp_opts.stream = data.in;
tmp_opts.ctx = &data;
if(zsv_new_with_properties(&tmp_opts, custom_prop_handler, data.input_path, opts_used, &data.parser) != zsv_status_ok) {
zsv_echo_cleanup(&data);
return 1;
} else {
// find the max nonempty col count
enum zsv_status status;
while(!zsv_signal_interrupted && (status = zsv_parse_more(data.parser)) == zsv_status_ok) ;
zsv_finish(data.parser);
zsv_delete(data.parser);
data.parser = NULL;

// re-open the input again
data.in = fopen(data.tmp_fn ? data.tmp_fn : data.input_path, "rb");
}
}
opts->row_handler = zsv_echo_row;
}
opts->stream = data.in;
opts->ctx = &data;
data.csv_writer = zsv_writer_new(&writer_opts);
Expand All @@ -320,8 +398,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
}

// create a local csv writer buff for faster performance
unsigned char writer_buff[64];
zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff));
// unsigned char writer_buff[64];
zsv_writer_set_temp_buff(data.csv_writer, buff, sizeof(buff));

// process the input data.
zsv_handle_ctrl_c_signal();
Expand Down
17 changes: 16 additions & 1 deletion app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ test: ${TESTS}
test-prop:
EXE=${BUILD_DIR}/bin/zsv_prop${EXE} make -C prop test

test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until
test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until test-echo-contiguous test-echo-trim-columns test-echo-trim-columns-2

test-echo1: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
Expand All @@ -124,6 +124,21 @@ test-echo-skip-until: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${PREFIX} $< --skip-until ASF ${TEST_DATA_DIR}/test/echo-skip-until.csv ${REDIRECT} ${TMP_DIR}/$@.out
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo-contiguous: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
@${PREFIX} $< --contiguous ${TEST_DATA_DIR}/test/../../data/test/echo-contiguous.csv ${REDIRECT} ${TMP_DIR}/$@.out
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo-trim-columns: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
@${PREFIX} $< --trim-columns ${TEST_DATA_DIR}/test/echo-trim-columns.csv ${REDIRECT} ${TMP_DIR}/$@.out
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo-trim-columns-2: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
@${PREFIX} $< --trim --trim-columns ${TEST_DATA_DIR}/test/echo-trim-columns.csv ${REDIRECT} ${TMP_DIR}/$@.out
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo-chars: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
@${PREFIX} echo '東京都' | $< -u '?' ${REDIRECT} ${TMP_DIR}/$@.out
Expand Down
2 changes: 2 additions & 0 deletions app/test/expected/test-echo-contiguous.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
abc,def,,,,
1,2,3,,,,
4 changes: 4 additions & 0 deletions app/test/expected/test-echo-trim-columns-2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abc,def,,
1,2,3,
4,,,
5,6,7,8
4 changes: 4 additions & 0 deletions app/test/expected/test-echo-trim-columns.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abc,def,,,
1,2,3,,
4,,,,
5,6,7,8,
4 changes: 4 additions & 0 deletions data/test/echo-contiguous.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abc,def,,,,
1,2,3,,,,
,,,,,,,,
5,6,7,8,,,,
4 changes: 4 additions & 0 deletions data/test/echo-trim-columns.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abc,def,,,,
1,2,3,, ,,
4,,,,,,,
5,6,7,8,,,,

0 comments on commit 75f2adf

Please sign in to comment.