Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: upgrade ragas_once to match langchain 0.1 #783

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"python.analysis.typeCheckingMode": "basic"
}
8 changes: 4 additions & 4 deletions pypi/ragas_once/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
requires = [
"setuptools>=61.0",
"setuptools-scm",
"ragas==0.0.22",
"langchain==0.0.354"
"ragas",
"langchain"
]
build-backend = "setuptools.build_meta"

Expand All @@ -22,8 +22,8 @@ classifiers = [
"Operating System :: OS Independent",
]
dependencies = [
"langchain==0.0.354",
"ragas==0.0.22",
"langchain",
"ragas",
]

[project.scripts]
Expand Down
13 changes: 11 additions & 2 deletions pypi/ragas_once/src/ragas_once/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,14 @@ def main():
help="Specifies the base URL for the API. Defaults to OpenAI.",
)
parser.add_argument(
"--apikey", type=str, help="Specifies the API key to authenticate requests."
"--embeddingbase",
type=str,
help="Specifies the base URL for the embedding API. Will use the same base as the LLM API if not set.",
)
parser.add_argument(
"--apikey",
type=str,
help="Specifies the API key to authenticate requests."
)
parser.add_argument(
"--llm",
Expand All @@ -58,14 +65,16 @@ def main():
parser.add_argument(
"--dataset",
type=str,
help="Specifies the path to the dataset for evaluation. Will use fiqa dataset if not set.",
help="Specifies the path to the dataset for evaluation.",
required=True,
)

args = parser.parse_args()

# Initialize ragas_once with provided arguments
once = RagasEval(
api_base=args.apibase,
embedding_base=args.embeddingbase,
api_key=args.apikey,
llm_model=args.llm,
embedding_model=args.embedding,
Expand Down
13 changes: 7 additions & 6 deletions pypi/ragas_once/src/ragas_once/embeddings/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@

import os

from langchain.embeddings import OpenAIEmbeddings as BaseOpenAIEmbeddings
from ragas.embeddings import RagasEmbeddings
from ragas.exceptions import OpenAIKeyNotFound
from ragas.utils import NO_KEY
from langchain_openai import OpenAIEmbeddings as BaseOpenAIEmbeddings
from ragas.embeddings import BaseRagasEmbeddings

NO_KEY = "NO_KEY"

class OpenAIEmbeddings(BaseOpenAIEmbeddings, RagasEmbeddings):
# DEPRECATED: using ragas.embeddings.LangchainEmbeddingsWrapper instead

class OpenAIEmbeddings(BaseOpenAIEmbeddings, BaseRagasEmbeddings):
api_key: str = NO_KEY

def __init__(
Expand All @@ -43,4 +44,4 @@ def validate_api_key(self):
if os_env_key != NO_KEY:
self.api_key = os_env_key
else:
raise OpenAIKeyNotFound
raise ValueError("openai api key must be provided")
39 changes: 22 additions & 17 deletions pypi/ragas_once/src/ragas_once/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,16 @@

import pandas as pd
from datasets import Dataset
from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas import evaluate
from ragas.embeddings import RagasEmbeddings
from ragas.llms import LangchainLLM, RagasLLM
from ragas.llms import BaseRagasLLM
from ragas.embeddings import BaseRagasEmbeddings, LangchainEmbeddingsWrapper
from ragas.metrics import (AnswerCorrectness, AnswerRelevancy,
AnswerSimilarity, ContextPrecision, ContextRecall,
ContextRelevancy, Faithfulness)
from ragas.metrics.base import Metric
from ragas.utils import NO_KEY
from ragas_once.embeddings.openai import OpenAIEmbeddings

NO_KEY = "NO_KEY"

class RagasEval:
"""
Expand All @@ -42,16 +41,18 @@ class RagasEval:

# use openai llm&embedding by default
api_base: str = "https://api.openai.com/v1"
embedding_base: str
api_key: str = "fake"
llm_model: str = "gpt-3.5-turbo"
embedding_model: str = "text-embedding-ada-002"

llm: RagasLLM
embeddings: RagasEmbeddings
llm: BaseRagasLLM
embeddings: BaseRagasEmbeddings

def __init__(
self,
api_base: str = NO_KEY,
embedding_base: str = NO_KEY,
api_key: str = NO_KEY,
llm_model: str = NO_KEY,
embedding_model: str = NO_KEY,
Expand All @@ -63,21 +64,25 @@ def __init__(
self.embedding_model = (
embedding_model if embedding_model != NO_KEY else self.embedding_model
)
# Using the same base as the LLM API if not set
self.embedding_base = (
embedding_base if embedding_base != NO_KEY else self.api_base
)

# Initialize judge llm
self.llm = LangchainLLM(
llm=ChatOpenAI(
model_name=self.llm_model,
openai_api_key=self.api_key,
openai_api_base=self.api_base,
self.llm = ChatOpenAI(
model=self.llm_model,
api_key=self.api_key,
base_url=self.api_base,
)
)

# Initialize judge embedding
self.embeddings = OpenAIEmbeddings(
api_key=self.api_key,
api_base=self.api_base,
model_name=self.embedding_model,
self.embeddings = LangchainEmbeddingsWrapper(
OpenAIEmbeddings(
api_key=self.api_key,
base_url=self.embedding_base,
model=self.embedding_model,
)
)

def prepare_dataset(self, dataset: str = NO_KEY) -> Dataset:
Expand Down
112 changes: 112 additions & 0 deletions pypi/ragas_once/src/ragas_once/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import os


from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.embeddings import BaseRagasEmbeddings, HuggingfaceEmbeddings
from ragas.llms import BaseRagasLLM, LangchainLLMWrapper
from ragas.metrics import (answer_correctness, answer_relevancy,
answer_similarity, context_precision,
context_recall, context_relevancy, faithfulness)
from ragas.metrics.base import Metric

DEFAULT_METRICS = [
"answer_relevancy",
"context_precision",
"faithfulness",
"context_recall",
"context_relevancy",
]


def wrap_langchain_llm(
model: str, api_base: str | None, api_key: str | None
) -> LangchainLLMWrapper:
"""
Initializes and returns an instance of the LangchainLLM class.

Args:
model (str): The name of the language model to use.
api_base (str | None): The base URL for the OpenAI API. If None, the default URL is assumed.
api_key (str | None): The API key for the OpenAI API.

Returns:
LangchainLLMWrapper: An instance of the LangchainLLMWrapper class.

Raises:
ValueError: If api_key is not provided.

Notes:
- If api_base is not provided, the default URL 'https://api.openai.com/v1' is assumed.
- The environment variables OPENAI_API_KEY and OPENAI_API_BASE are set to the provided api_key and api_base.
"""
if api_base is None:
print("api_base not provided, assuming OpenAI default")
if api_key is None:
raise ValueError("api_key must be provided")
os.environ["OPENAI_API_KEY"] = api_key
base = ChatOpenAI(model_name=model)
else:
os.environ["OPENAI_API_BASE"] = api_base
if api_key:
os.environ["OPENAI_API_KEY"] = api_key
base = ChatOpenAI(
model_name=model, openai_api_key=api_key, openai_api_base=api_base
)
return LangchainLLMWrapper(base)


def set_metrics(
metrics: list[str], llm: BaseRagasLLM | None, embeddings: BaseRagasEmbeddings | None
) -> list[Metric]:
"""
Sets the metrics for evaluation.

Parameters:
metrics (list[str]): A list of metric names to be set.
llm (RagasLLM | None): An instance of RagasLLM or None. If not set, the code will use OpenAI ChatGPT as default.
embeddings (RagasEmbeddings | None): An instance of RagasEmbeddings or None. If not set, the code will use OpenAI Embeddings as default.

Returns:
list[Metric]: A list of Metric objects representing the set metrics.
"""
ms = []
if llm:
context_precision.llm = llm
context_recall.llm = llm
context_relevancy.llm = llm
answer_correctness.llm = llm
answer_similarity.llm = llm
faithfulness.llm = llm
if embeddings:
answer_relevancy.embeddings = embeddings
answer_correctness.embeddings = embeddings
answer_similarity.embeddings = embeddings
if not metrics:
metrics = DEFAULT_METRICS
for m in metrics:
if m == "context_precision":
ms.append(context_precision)
elif m == "context_recall":
ms.append(context_recall)
elif m == "context_relevancy":
ms.append(context_relevancy)
elif m == "answer_relevancy":
ms.append(answer_relevancy)
elif m == "answer_correctness":
ms.append(answer_correctness)
elif m == "answer_similarity":
ms.append(answer_similarity)
elif m == "faithfulness":
ms.append(faithfulness)
return ms

# DEPRECATED: using ragas.embeddings.LangchainEmbeddingsWrapper instead
def wrap_embeddings(
model_type: str, model_name: str | None, api_key: str | None
) -> BaseRagasEmbeddings:
if model_type == "openai":
return OpenAIEmbeddings(api_key=api_key)
elif model_type == "huggingface":
return HuggingfaceEmbeddings(model_name=model_name)
else:
raise ValueError(f"Invalid model type: {model_type}")
Loading