From 82211b91a2621c634ddec46f2d1b92081a99a62e Mon Sep 17 00:00:00 2001 From: lintool Date: Sun, 22 Sep 2024 09:29:21 -0400 Subject: [PATCH 1/2] Tweak parquet yaml configs. --- .../index/generator/ParquetDenseVectorDocumentGenerator.java | 5 ----- ...-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...r-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...upstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...upstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...dupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...ack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...upstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...ack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...adupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...qadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...tack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...stack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...ir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...eir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- .../beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...ir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ....0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- ...ebis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml | 2 +- 30 files changed, 29 insertions(+), 34 deletions(-) diff --git a/src/main/java/io/anserini/index/generator/ParquetDenseVectorDocumentGenerator.java b/src/main/java/io/anserini/index/generator/ParquetDenseVectorDocumentGenerator.java index 03d1182e8..206e0e9ef 100644 --- a/src/main/java/io/anserini/index/generator/ParquetDenseVectorDocumentGenerator.java +++ b/src/main/java/io/anserini/index/generator/ParquetDenseVectorDocumentGenerator.java @@ -49,8 +49,6 @@ public class ParquetDenseVectorDocumentGenerator imple public Document createDocument(T src) throws InvalidDocumentException { try { - LOG.info("Processing document ID: " + src.id() + " with thread: " + Thread.currentThread().getName()); - // Parse vector data from document contents float[] contents = parseVectorFromString(src.contents()); if (contents == null || contents.length == 0) { @@ -58,15 +56,12 @@ public Document createDocument(T src) throws InvalidDocumentException { throw new InvalidDocumentException(); } - LOG.info("Vector length: " + contents.length + " for document ID: " + src.id()); - // Create and populate the Lucene document final Document document = new Document(); document.add(new StringField(Constants.ID, src.id(), Field.Store.YES)); document.add(new BinaryDocValuesField(Constants.ID, new BytesRef(src.id()))); document.add(new KnnFloatVectorField(Constants.VECTOR, contents, VectorSimilarityFunction.DOT_PRODUCT)); - LOG.info("Document created for ID: " + src.id()); return document; } catch (Exception e) { diff --git a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml index 54905ab44..dc5832bfd 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-arguana.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/arguana.parquet -index_path: indexes/parquet/arguana +index_path: indexes/lucene-flat.beir-v1.0.0-arguana.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml index e58ac778a..1d969a082 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-bioasq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/bioasq.parquet -index_path: indexes/parquet/bioasq +index_path: indexes/lucene-flat.beir-v1.0.0-bioasq.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml index 4ffa90bf9..267024222 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-climate-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/climate-fever.parquet -index_path: indexes/parquet/climate-fever +index_path: indexes/lucene-flat.beir-v1.0.0-climate-fever.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml index 42e6582a7..913fe0c8b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-android.parquet -index_path: indexes/parquet/cqadupstack-android +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml index 868d8c696..b026e0364 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-english.parquet -index_path: indexes/parquet/cqadupstack-english +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml index 94c97dd95..89cca2971 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gaming.parquet -index_path: indexes/parquet/cqadupstack-gaming +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml index ab56e7b69..a503c1787 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-gis.parquet -index_path: indexes/parquet/cqadupstack-gis +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml index c4fe8bf38..a65ad90c0 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-mathematica.parquet -index_path: indexes/parquet/cqadupstack-mathematica +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml index 433d2d6eb..0dc97cad2 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-physics.parquet -index_path: indexes/parquet/cqadupstack-physics +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml index e8eef58f3..77477da46 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-programmers.parquet -index_path: indexes/parquet/cqadupstack-programmers +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml index 9a83f02c8..501f90a88 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-stats.parquet -index_path: indexes/parquet/cqadupstack-stats +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml index 3d6709246..ef840a196 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-tex.parquet -index_path: indexes/parquet/cqadupstack-tex +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml index cde499810..e9ee1f42d 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-unix.parquet -index_path: indexes/parquet/cqadupstack-unix +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml index fa0d2f7f0..69af15587 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-webmasters.parquet -index_path: indexes/parquet/cqadupstack-webmasters +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml index 9018e53cd..696407196 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/cqadupstack-wordpress.parquet -index_path: indexes/parquet/cqadupstack-wordpress +index_path: indexes/lucene-flat.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml index b0ddd8eab..99983fbd6 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/dbpedia-entity.parquet -index_path: indexes/parquet/dbpedia-entity +index_path: indexes/lucene-flat.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml index fdf25e2b9..62cf09c15 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-fever.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fever.parquet -index_path: indexes/parquet/fever +index_path: indexes/lucene-flat.beir-v1.0.0-fever.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml index 33060e19a..37e53a4c0 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-fiqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/fiqa.parquet -index_path: indexes/parquet/fiqa +index_path: indexes/lucene-flat.beir-v1.0.0-fiqa.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml index 7ea4a5336..75a7e9be0 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-hotpotqa.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/hotpotqa.parquet -index_path: indexes/parquet/hotpotqa +index_path: indexes/lucene-flat.beir-v1.0.0-hotpotqa.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml index 246e9fb66..e57ec5912 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-nfcorpus.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nfcorpus.parquet -index_path: indexes/parquet/nfcorpus +index_path: indexes/lucene-flat.beir-v1.0.0-nfcorpus.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml index 52e4df12a..ce7cb88b6 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-nq.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/nq.parquet -index_path: indexes/parquet/nq +index_path: indexes/lucene-flat.beir-v1.0.0-nq.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml index cfd995c55..c47b4a652 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-quora.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/quora.parquet -index_path: indexes/parquet/quora +index_path: indexes/lucene-flat.beir-v1.0.0-quora.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml index f25d23e3a..7ba7a0584 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-robust04.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/robust04.parquet -index_path: indexes/parquet/robust04 +index_path: indexes/lucene-flat.beir-v1.0.0-robust04.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml index fbbd7cf60..0f03cfbf2 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-scidocs.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scidocs.parquet -index_path: indexes/parquet/scidocs +index_path: indexes/lucene-flat.beir-v1.0.0-scidocs.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml index 2b62dbaec..667f7ed98 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-scifact.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/scifact.parquet -index_path: indexes/parquet/scifact +index_path: indexes/lucene-flat.beir-v1.0.0-scifact.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml index 492abafd1..6def8c01f 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-signal1m.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/signal1m.parquet -index_path: indexes/parquet/signal1m +index_path: indexes/lucene-flat.beir-v1.0.0-signal1m.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml index 0a52b0e81..0579a55d5 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-trec-covid.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-covid.parquet -index_path: indexes/parquet/trec-covid +index_path: indexes/lucene-flat.beir-v1.0.0-trec-covid.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml index 66622226b..4568c23a8 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-trec-news.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/trec-news.parquet -index_path: indexes/parquet/trec-news +index_path: indexes/lucene-flat.beir-v1.0.0-trec-news.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml index 34aefbaad..0081a05b0 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.parquet.flat.cached.yaml @@ -1,7 +1,7 @@ corpus: beir-v1.0.0-webis-touche2020.bge-base-en-v1.5 corpus_path: collections/beir-v1.0.0/bge-base-en-v1.5/webis-touche2020.parquet -index_path: indexes/parquet/webis-touche2020 +index_path: indexes/lucene-flat.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5/ index_type: flat collection_class: ParquetDenseVectorCollection generator_class: ParquetDenseVectorDocumentGenerator From 0a41a5031a835b75516b44d93d2a33689fb10ca4 Mon Sep 17 00:00:00 2001 From: lintool Date: Sun, 22 Sep 2024 16:03:40 -0400 Subject: [PATCH 2/2] tweak jvm settings. --- bin/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/run.sh b/bin/run.sh index 43bb461cc..d0d07d61b 100755 --- a/bin/run.sh +++ b/bin/run.sh @@ -1,3 +1,3 @@ #!/bin/sh -java -cp `ls target/*-fatjar.jar` -Xms512M -Xmx64G --add-modules jdk.incubator.vector $@ \ No newline at end of file +java -cp `ls target/*-fatjar.jar` -Xms512M -Xmx128G --add-modules jdk.incubator.vector $@