kubeagi · Lanture1064 · Mar 1, 2024
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.analysis.typeCheckingMode": "basic"
+}
diff --git a/pypi/ragas_once/pyproject.toml b/pypi/ragas_once/pyproject.toml
@@ -2,8 +2,8 @@
 requires = [
     "setuptools>=61.0",
     "setuptools-scm",
-    "ragas==0.0.22",
-    "langchain==0.0.354"
+    "ragas",
+    "langchain"
 ]
 build-backend = "setuptools.build_meta"
 
@@ -22,8 +22,8 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
-    "langchain==0.0.354",
-    "ragas==0.0.22",
+    "langchain",
+    "ragas",
 ]
 
 [project.scripts]

diff --git a/pypi/ragas_once/src/ragas_once/cli.py b/pypi/ragas_once/src/ragas_once/cli.py
@@ -35,7 +35,14 @@ def main():
         help="Specifies the base URL for the API. Defaults to OpenAI.",
     )
     parser.add_argument(
-        "--apikey", type=str, help="Specifies the API key to authenticate requests."
+        "--embeddingbase",
+        type=str,
+        help="Specifies the base URL for the embedding API. Will use the same base as the LLM API if not set.",
+    )
+    parser.add_argument(
+        "--apikey",
+        type=str,
+        help="Specifies the API key to authenticate requests."
     )
     parser.add_argument(
         "--llm",
@@ -58,14 +65,16 @@ def main():
     parser.add_argument(
         "--dataset",
         type=str,
-        help="Specifies the path to the dataset for evaluation. Will use fiqa dataset if not set.",
+        help="Specifies the path to the dataset for evaluation.",
+        required=True,
     )
 
     args = parser.parse_args()
 
     # Initialize ragas_once with provided arguments
     once = RagasEval(
         api_base=args.apibase,
+        embedding_base=args.embeddingbase,
         api_key=args.apikey,
         llm_model=args.llm,
         embedding_model=args.embedding,

diff --git a/pypi/ragas_once/src/ragas_once/embeddings/openai.py b/pypi/ragas_once/src/ragas_once/embeddings/openai.py
@@ -14,13 +14,14 @@
 
 import os
 
-from langchain.embeddings import OpenAIEmbeddings as BaseOpenAIEmbeddings
-from ragas.embeddings import RagasEmbeddings
-from ragas.exceptions import OpenAIKeyNotFound
-from ragas.utils import NO_KEY
+from langchain_openai import OpenAIEmbeddings as BaseOpenAIEmbeddings
+from ragas.embeddings import BaseRagasEmbeddings
 
+NO_KEY = "NO_KEY"
 
-class OpenAIEmbeddings(BaseOpenAIEmbeddings, RagasEmbeddings):
+# DEPRECATED: using ragas.embeddings.LangchainEmbeddingsWrapper instead
+
+class OpenAIEmbeddings(BaseOpenAIEmbeddings, BaseRagasEmbeddings):
     api_key: str = NO_KEY
 
     def __init__(
@@ -43,4 +44,4 @@ def validate_api_key(self):
             if os_env_key != NO_KEY:
                 self.api_key = os_env_key
             else:
-                raise OpenAIKeyNotFound
+                raise ValueError("openai api key must be provided")
diff --git a/pypi/ragas_once/src/ragas_once/eval.py b/pypi/ragas_once/src/ragas_once/eval.py
@@ -16,17 +16,16 @@
 
 import pandas as pd
 from datasets import Dataset
-from langchain.chat_models import ChatOpenAI
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from ragas import evaluate
-from ragas.embeddings import RagasEmbeddings
-from ragas.llms import LangchainLLM, RagasLLM
+from ragas.llms import BaseRagasLLM
+from ragas.embeddings import BaseRagasEmbeddings, LangchainEmbeddingsWrapper
 from ragas.metrics import (AnswerCorrectness, AnswerRelevancy,
                            AnswerSimilarity, ContextPrecision, ContextRecall,
                            ContextRelevancy, Faithfulness)
 from ragas.metrics.base import Metric
-from ragas.utils import NO_KEY
-from ragas_once.embeddings.openai import OpenAIEmbeddings
 
+NO_KEY = "NO_KEY"
 
 class RagasEval:
     """
@@ -42,16 +41,18 @@ class RagasEval:
 
     # use openai llm&embedding by default
     api_base: str = "https://api.openai.com/v1"
+    embedding_base: str
     api_key: str = "fake"
     llm_model: str = "gpt-3.5-turbo"
     embedding_model: str = "text-embedding-ada-002"
 
-    llm: RagasLLM
-    embeddings: RagasEmbeddings
+    llm: BaseRagasLLM
+    embeddings: BaseRagasEmbeddings
 
     def __init__(
         self,
         api_base: str = NO_KEY,
+        embedding_base: str = NO_KEY,
         api_key: str = NO_KEY,
         llm_model: str = NO_KEY,
         embedding_model: str = NO_KEY,
@@ -63,21 +64,25 @@ def __init__(
         self.embedding_model = (
             embedding_model if embedding_model != NO_KEY else self.embedding_model
         )
+        # Using the same base as the LLM API if not set
+        self.embedding_base = (
+            embedding_base if embedding_base != NO_KEY else self.api_base
+        )
 
         # Initialize judge llm
-        self.llm = LangchainLLM(
-            llm=ChatOpenAI(
-                model_name=self.llm_model,
-                openai_api_key=self.api_key,
-                openai_api_base=self.api_base,
+        self.llm = ChatOpenAI(
+                model=self.llm_model,
+                api_key=self.api_key,
+                base_url=self.api_base,
             )
-        )
 
         # Initialize judge embedding
-        self.embeddings = OpenAIEmbeddings(
-            api_key=self.api_key,
-            api_base=self.api_base,
-            model_name=self.embedding_model,
+        self.embeddings = LangchainEmbeddingsWrapper(
+            OpenAIEmbeddings(
+                api_key=self.api_key,
+                base_url=self.embedding_base,
+                model=self.embedding_model,
+            )
         )
 
     def prepare_dataset(self, dataset: str = NO_KEY) -> Dataset:

diff --git a/pypi/ragas_once/src/ragas_once/wrapper.py b/pypi/ragas_once/src/ragas_once/wrapper.py
@@ -0,0 +1,112 @@
+import os
+
+
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from ragas.embeddings import BaseRagasEmbeddings, HuggingfaceEmbeddings
+from ragas.llms import BaseRagasLLM, LangchainLLMWrapper
+from ragas.metrics import (answer_correctness, answer_relevancy,
+                           answer_similarity, context_precision,
+                           context_recall, context_relevancy, faithfulness)
+from ragas.metrics.base import Metric
+
+DEFAULT_METRICS = [
+    "answer_relevancy",
+    "context_precision",
+    "faithfulness",
+    "context_recall",
+    "context_relevancy",
+]
+
+
+def wrap_langchain_llm(
+    model: str, api_base: str | None, api_key: str | None
+) -> LangchainLLMWrapper:
+    """
+    Initializes and returns an instance of the LangchainLLM class.
+
+    Args:
+        model (str): The name of the language model to use.
+        api_base (str | None): The base URL for the OpenAI API. If None, the default URL is assumed.
+        api_key (str | None): The API key for the OpenAI API.
+
+    Returns:
+        LangchainLLMWrapper: An instance of the LangchainLLMWrapper class.
+
+    Raises:
+        ValueError: If api_key is not provided.
+
+    Notes:
+        - If api_base is not provided, the default URL 'https://api.openai.com/v1' is assumed.
+        - The environment variables OPENAI_API_KEY and OPENAI_API_BASE are set to the provided api_key and api_base.
+    """
+    if api_base is None:
+        print("api_base not provided, assuming OpenAI default")
+        if api_key is None:
+            raise ValueError("api_key must be provided")
+        os.environ["OPENAI_API_KEY"] = api_key
+        base = ChatOpenAI(model_name=model)
+    else:
+        os.environ["OPENAI_API_BASE"] = api_base
+        if api_key:
+            os.environ["OPENAI_API_KEY"] = api_key
+        base = ChatOpenAI(
+            model_name=model, openai_api_key=api_key, openai_api_base=api_base
+        )
+    return LangchainLLMWrapper(base)
+
+
+def set_metrics(
+    metrics: list[str], llm: BaseRagasLLM | None, embeddings: BaseRagasEmbeddings | None
+) -> list[Metric]:
+    """
+    Sets the metrics for evaluation.
+
+    Parameters:
+        metrics (list[str]): A list of metric names to be set.
+        llm (RagasLLM | None): An instance of RagasLLM or None. If not set, the code will use OpenAI ChatGPT as default.
+        embeddings (RagasEmbeddings | None): An instance of RagasEmbeddings or None. If not set, the code will use OpenAI Embeddings as default.
+
+    Returns:
+        list[Metric]: A list of Metric objects representing the set metrics.
+    """
+    ms = []
+    if llm:
+        context_precision.llm = llm
+        context_recall.llm = llm
+        context_relevancy.llm = llm
+        answer_correctness.llm = llm
+        answer_similarity.llm = llm
+        faithfulness.llm = llm
+    if embeddings:
+        answer_relevancy.embeddings = embeddings
+        answer_correctness.embeddings = embeddings
+        answer_similarity.embeddings = embeddings
+    if not metrics:
+        metrics = DEFAULT_METRICS
+    for m in metrics:
+        if m == "context_precision":
+            ms.append(context_precision)
+        elif m == "context_recall":
+            ms.append(context_recall)
+        elif m == "context_relevancy":
+            ms.append(context_relevancy)
+        elif m == "answer_relevancy":
+            ms.append(answer_relevancy)
+        elif m == "answer_correctness":
+            ms.append(answer_correctness)
+        elif m == "answer_similarity":
+            ms.append(answer_similarity)
+        elif m == "faithfulness":
+            ms.append(faithfulness)
+    return ms
+
+# DEPRECATED: using ragas.embeddings.LangchainEmbeddingsWrapper instead
+def wrap_embeddings(
+    model_type: str, model_name: str | None, api_key: str | None
+) -> BaseRagasEmbeddings:
+    if model_type == "openai":
+        return OpenAIEmbeddings(api_key=api_key)
+    elif model_type == "huggingface":
+        return HuggingfaceEmbeddings(model_name=model_name)
+    else:
+        raise ValueError(f"Invalid model type: {model_type}")