From 09254c935228836d35bab43d487a22ae9c47c2ca Mon Sep 17 00:00:00 2001 From: 0xff-dev Date: Mon, 29 Jan 2024 14:47:27 +0800 Subject: [PATCH] feat: arctl support merge files --- pkg/arctl/eval.go | 51 +++++++++++++++++++++++++++--------- pkg/evaluation/evaluation.go | 25 ++++++++++++------ 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/pkg/arctl/eval.go b/pkg/arctl/eval.go index ac12c3719..55b02fcfb 100644 --- a/pkg/arctl/eval.go +++ b/pkg/arctl/eval.go @@ -113,11 +113,15 @@ arctl -narcadia eval --rag= } func EvalGenTestDataset(home *string, namespace *string, appName *string) *cobra.Command { - var inputDir string - var questionColumn string - var groundTruthsColumn string - var outputMethod string - var outputDir string + var ( + inputDir string + questionColumn string + groundTruthsColumn string + outputMethod string + outputDir string + mergeFileName string + merge bool + ) cmd := &cobra.Command{ Use: "gen_test_dataset", @@ -151,6 +155,18 @@ func EvalGenTestDataset(home *string, namespace *string, appName *string) *cobra return err } + var ( + csvWriter *csv.Writer + writeHeader = true + ) + if merge { + mergeFile, err := os.OpenFile(mergeFileName, os.O_CREATE|os.O_RDWR, 0744) + if err != nil { + return err + } + defer mergeFile.Close() + csvWriter = csv.NewWriter(mergeFile) + } // read files from input directory err = filepath.WalkDir(inputDir, func(path string, d fs.DirEntry, err error) error { if err != nil { @@ -162,13 +178,16 @@ func EvalGenTestDataset(home *string, namespace *string, appName *string) *cobra var output evaluation.Output switch outputMethod { case "csv": - outputCSVFile, err := os.Create(strings.Replace(path, d.Name(), fmt.Sprintf("ragas-%s", d.Name()), 1)) - if err != nil { - return err - } - defer outputCSVFile.Close() csvOutput := &evaluation.CSVOutput{ - W: csv.NewWriter(outputCSVFile), + W: csvWriter, + } + if !merge { + outputCSVFile, err := os.Create(strings.Replace(path, d.Name(), fmt.Sprintf("ragas-%s", d.Name()), 1)) + if err != nil { + return err + } + defer outputCSVFile.Close() + csvOutput.W = csv.NewWriter(outputCSVFile) } defer csvOutput.W.Flush() output = csvOutput @@ -176,12 +195,18 @@ func EvalGenTestDataset(home *string, namespace *string, appName *string) *cobra output = &evaluation.PrintOutput{} } // read file from dataset - return GenDatasetOnSingleFile(ctx, kubeClient, app, + err = GenDatasetOnSingleFile(ctx, kubeClient, app, path, evaluation.WithQuestionColumn(questionColumn), evaluation.WithGroundTruthsColumn(groundTruthsColumn), evaluation.WithOutput(output), + evaluation.WithWriteHeader(!merge || writeHeader), ) + if err != nil { + return err + } + writeHeader = false + return nil }) return err }, @@ -195,6 +220,8 @@ func EvalGenTestDataset(home *string, namespace *string, appName *string) *cobra cmd.Flags().StringVar(&questionColumn, "question-column", "q", "The column name which provides questions") cmd.Flags().StringVar(&groundTruthsColumn, "ground-truths-column", "a", "The column name which provides the answers") cmd.Flags().StringVar(&outputMethod, "output", "", "The way to output the generated dataset rows.We support two ways: \n - stdout: print row \n - csv: save row to csv file") + cmd.Flags().BoolVar(&merge, "merge", false, "Whether to merge all generated test data into a single file") + cmd.Flags().StringVar(&mergeFileName, "merge-file", "ragas.csv", "name of the merged document") return cmd } diff --git a/pkg/evaluation/evaluation.go b/pkg/evaluation/evaluation.go index f526dbf04..25de7c9eb 100644 --- a/pkg/evaluation/evaluation.go +++ b/pkg/evaluation/evaluation.go @@ -57,14 +57,16 @@ func NewRagasDatasetGenerator(ctx context.Context, cli dynamic.Interface, app *v } // output header - err := genOpts.output.Output(RagasDataRow{ - Question: "question", - GroundTruths: []string{"ground_truths"}, - Contexts: []string{"contexts"}, - Answer: "answer", - }) - if err != nil { - return nil, err + if genOpts.writeHeader { + err := genOpts.output.Output(RagasDataRow{ + Question: "question", + GroundTruths: []string{"ground_truths"}, + Contexts: []string{"contexts"}, + Answer: "answer", + }) + if err != nil { + return nil, err + } } runapp, err := appruntime.NewAppOrGetFromCache(ctx, cli, app) @@ -81,6 +83,8 @@ type genOptions struct { groundTruthsColumn string output Output + + writeHeader bool } func defaultGenOptions() *genOptions { @@ -91,6 +95,11 @@ func defaultGenOptions() *genOptions { } } +func WithWriteHeader(writeHeader bool) GenOptions { + return func(genOpts *genOptions) { + genOpts.writeHeader = writeHeader + } +} func WithQuestionColumn(questionColumn string) GenOptions { return func(genOpts *genOptions) { genOpts.questionColumn = questionColumn