From e449a35447cd8bc0307899048ac1efc236d59271 Mon Sep 17 00:00:00 2001
From: Vincent <vincentwarmerdam@gmail.com>
Date: Wed, 9 Aug 2023 15:59:28 +0200
Subject: [PATCH 1/3] final changes

---
 docs/API/text.md           | 20 ++++++++++++++------
 embetter/text/__init__.py  |  7 ++++---
 embetter/text/_word2vec.py |  2 +-
 tests/test_text.py         |  6 +++---
 4 files changed, 22 insertions(+), 13 deletions(-)
diff --git a/docs/API/text.md b/docs/API/text.md
index 3efd451..98f9847 100644
--- a/docs/API/text.md
+++ b/docs/API/text.md
@@ -4,12 +4,6 @@
     options:
         members: false
 
-## BytePairEncoder
-
-::: embetter.text.BytePairEncoder
-    options:
-        members: false
-
 ## KerasNLPEncoder
 
 ::: embetter.text.KerasNLPEncoder
@@ -27,3 +21,17 @@
 ::: embetter.text.Sense2VecEncoder
     options:
         members: false
+
+## BytePairEncoder
+
+::: embetter.text.BytePairEncoder
+    options:
+        members: false
+
+
+## GensimEncoder
+
+::: embetter.text.GensimEncoder
+    options:
+        members: false
+
diff --git a/embetter/text/__init__.py b/embetter/text/__init__.py
index 0c69a13..9a4fce3 100644
--- a/embetter/text/__init__.py
+++ b/embetter/text/__init__.py
@@ -21,10 +21,11 @@
     spaCyEncoder = NotInstalled("spaCyEncoder", "spacy")
 
 try:
-    from embetter.text._word2vec import Word2VecEncoder
+    from embetter.text._word2vec import GensimEncoder
 except ModuleNotFoundError:
-    Word2VecEncoder = NotInstalled("Word2VecEncoder", "gensim")
+    GensimEncoder = NotInstalled("GensimEncoder", "gensim")
 
+try:
     from embetter.text._keras import KerasNLPEncoder
 except ModuleNotFoundError:
     KerasNLPEncoder = NotInstalled("KerasNLPEncoder", "keras_nlp")
@@ -35,6 +36,6 @@
     "Sense2VecEncoder",
     "BytePairEncoder",
     "spaCyEncoder",
-    "Word2VecEncoder",
+    "GensimEncoder",
     "KerasNLPEncoder",
 ]
diff --git a/embetter/text/_word2vec.py b/embetter/text/_word2vec.py
index 08c1e20..75df154 100644
--- a/embetter/text/_word2vec.py
+++ b/embetter/text/_word2vec.py
@@ -8,7 +8,7 @@
 from embetter.base import EmbetterBase
 
 
-class Word2VecEncoder(EmbetterBase):
+class GensimEncoder(EmbetterBase):
     """
     Encodes text using a static word embedding model. The component uses gensim's default tokenizer.
 
diff --git a/tests/test_text.py b/tests/test_text.py
index e436a55..675a8bd 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -10,7 +10,7 @@
 from embetter.text import (
     BytePairEncoder,
     SentenceEncoder,
-    Word2VecEncoder,
+    GensimEncoder,
     spaCyEncoder,
 )
 from embetter.utils import cached
@@ -30,7 +30,7 @@ def test_word2vec(setting):
     model = Word2Vec(
         sentences=sentences, vector_size=vector_size, window=3, min_count=1
     )
-    encoder = Word2VecEncoder(model, agg=setting)
+    encoder = GensimEncoder(model, agg=setting)
     output = encoder.fit_transform(test_sentences)
     assert isinstance(output, np.ndarray)
     out_dim = vector_size if setting != "both" else vector_size * 2
@@ -38,7 +38,7 @@ def test_word2vec(setting):
     # This tests whether it can load the model from disk
     with tempfile.NamedTemporaryFile() as fp:
         model.save(fp)
-        encoder = Word2VecEncoder(fp.name, agg=setting)
+        encoder = GensimEncoder(fp.name, agg=setting)
         encoder.transform(test_sentences)
     assert repr(encoder)
 

From 4ac67c3fca45f0434b530d462958b7fee0491e9b Mon Sep 17 00:00:00 2001
From: Vincent <vincentwarmerdam@gmail.com>
Date: Wed, 9 Aug 2023 16:00:03 +0200
Subject: [PATCH 2/3] update landing

---
 docs/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/index.md b/docs/index.md
index d0b7f5d..178321b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,7 +6,7 @@
 
 <br> 
 
-Embetter implements scikit-learn compatible embeddings for computer vision and text. It should make it very easy to quickly build proof of concepts using scikit-learn pipelines and, in particular, should help with [bulk labelling](https://www.youtube.com/watch?v=gDk7_f3ovIk). It's a also meant to play nice with [bulk](https://github.com/koaning/bulk) and [scikit-partial](https://github.com/koaning/scikit-partial).
+Embetter implements scikit-learn compatible embeddings for computer vision and text. It should make it very easy to quickly build proof of concepts using scikit-learn pipelines and, in particular, should help with [bulk labelling](https://www.youtube.com/watch?v=gDk7_f3ovIk). It's a also meant to play nice with [bulk](https://github.com/koaning/bulk) and [scikit-partial](https://github.com/koaning/scikit-partial) but it can also be used together with your favorite ANN solution like [weaviate](https://weaviate.io/), [chromadb](https://www.trychroma.com/) and [hnswlib](https://github.com/nmslib/hnswlib). 
 
 ## Install 
 

From ec11107175df4b76550bbd7ea7c1bf4f91a386ad Mon Sep 17 00:00:00 2001
From: Vincent <vincentwarmerdam@gmail.com>
Date: Wed, 9 Aug 2023 16:12:21 +0200
Subject: [PATCH 3/3] more lightweight

---
 .github/workflows/style.yml | 4 +---
 README.md                   | 2 +-
 docs/index.md               | 3 ++-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
index 41325ad..58aa07d 100644
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -22,10 +22,8 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
         cache: 'pip'
-    - name: Install Base Dependencies
-      run: python -m pip install -e .
     - name: Install Testing Dependencies
-      run: make install
+      run: python -m pip install black interrogate
     - name: Interrogate
       if: always()
       run: make interrogate
diff --git a/README.md b/README.md
index 3887af5..3dbccf1 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ from embetter.grab import ColumnGrabber
 from embetter.vision import ImageLoader, TimmEncoder, ColorHistogramEncoder
 
 # Representations for text
-from embetter.text import SentenceEncoder, Sense2VecEncoder, BytePairEncoder, spaCyEncoder, Word2VecEncoder
+from embetter.text import SentenceEncoder, Sense2VecEncoder, BytePairEncoder, spaCyEncoder, GensimEncoder
 
 # Representations from multi-modal models
 from embetter.multi import ClipEncoder
diff --git a/docs/index.md b/docs/index.md
index 178321b..151fcfc 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -25,6 +25,7 @@ python -m pip install "embetter[sentence-tfm]"
 python -m pip install "embetter[spacy]"
 python -m pip install "embetter[sense2vec]"
 python -m pip install "embetter[bpemb]"
+python -m pip install "embetter[gensim]"
 python -m pip install "embetter[vision]"
 python -m pip install "embetter[all]"
 ```
@@ -41,7 +42,7 @@ from embetter.grab import ColumnGrabber
 from embetter.vision import ImageLoader, TimmEncoder, ColorHistogramEncoder
 
 # Representations for text
-from embetter.text import SentenceEncoder, Sense2VecEncoder, BytePairEncoder, spaCyEncoder
+from embetter.text import SentenceEncoder, Sense2VecEncoder, BytePairEncoder, spaCyEncoder, GensimEncoder
 
 # Representations from multi-modal models
 from embetter.multi import ClipEncoder