-
Notifications
You must be signed in to change notification settings - Fork 0
/
challenge-12.py
136 lines (107 loc) · 3.63 KB
/
challenge-12.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# fullstack gpt code challenge 12
import streamlit as st
from langchain.chat_models import ChatOllama
from langchain.callbacks.base import BaseCallbackHandler
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings import CacheBackedEmbeddings, OllamaEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from langchain.storage import LocalFileStore
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
st.set_page_config(
page_title="::: Private GPT :::",
page_icon="📃",
)
st.title("Private GPT")
st.markdown(
"""
Welcome to Private GPT!
Use this chatbot to ask questions to an AI about your files.
Upload your files on the sidebar.
"""
)
class ChatCallbackHandler(BaseCallbackHandler):
message = ""
def on_llm_start(self, *args, **kwargs):
self.message_box = st.empty()
def on_llm_end(self, *args, **kwargs):
save_message(self.message, "ai")
def on_llm_new_token(self, token, *args, **kwargs):
self.message += token
self.message_box.markdown(self.message)
llm = ChatOllama(
model="mistral:latest",
temperature=0.1,
streaming=True,
callbacks=[
ChatCallbackHandler(),
],
)
@st.cache_resource(show_spinner="Embedding file...")
def embed_file(file):
file_content = file.read()
file_path = f"./.cache/private_files/{file.name}"
with open(file_path, "wb") as f:
f.write(file_content)
cache_dir = LocalFileStore(f"./.cache/private_embeddings/{file.name}")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
separator="\n",
chunk_size=600,
chunk_overlap=100,
)
loader = UnstructuredFileLoader(file_path)
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OllamaEmbeddings(model="mistral:latest")
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)
vectorstore = FAISS.from_documents(docs, cached_embeddings)
retriever = vectorstore.as_retriever()
return retriever
def save_message(message, role):
st.session_state["messages"].append({"message": message, "role": role})
def send_message(message, role, save=True):
with st.chat_message(role):
st.markdown(message)
if save:
save_message(message, role)
def paint_history():
for message in st.session_state["messages"]:
send_message(
message["message"],
message["role"],
save=False,
)
def format_docs(docs):
return "\n\n".join(document.page_content for document in docs)
prompt = ChatPromptTemplate.from_template(
"""
Answer the question using ONLY the following context and not your training data.
If you don't know the answer just say you don't know. DON'T make anything up.
Context: {context}
Question:{question}
"""
)
with st.sidebar:
file = st.file_uploader(
"Upload a .txt .pdf or .docx file",
type=["pdf", "txt", "docx"],
)
if file:
retriever = embed_file(file)
send_message("I'm ready! Ask away!", "ai", save=False)
paint_history()
message = st.chat_input("Ask anything about your file...")
if message:
send_message(message, "human")
chain = (
{
"question": RunnablePassthrough(),
"context": retriever | RunnableLambda(format_docs),
}
| prompt
| llm
)
with st.chat_message("ai"):
chain.invoke(message)
else:
st.session_state["messages"] = []