-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
100 lines (80 loc) · 3.12 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from text_to_doc import get_doc_chunks
from web_crawler import get_data_from_website
from prompt import get_prompt
from langchain.chains import ConversationalRetrievalChain
from langchain_cohere import CohereEmbeddings
from langchain_cohere import ChatCohere
def get_chroma_client():
"""
Returns a chroma vector store instance.
Returns:
langchain.vectorstores.chroma.Chroma: ChromaDB vector store instance.
"""
embedding_function = CohereEmbeddings(model="embed-english-light-v3.0")
return Chroma(
collection_name="website_data",
embedding_function=embedding_function,
persist_directory="data/chroma")
def store_docs(url):
"""
Retrieves data from a website, processes it into document chunks, and stores them in a vector store.
Args:
url (str): The URL of the website to retrieve data from.
Returns:
None
"""
text, metadata = get_data_from_website(url)
docs = get_doc_chunks(text, metadata)
vector_store = get_chroma_client()
#vector_store.delete_collection()
# i want to clear the vector store before storing new embeddings
for collection in vector_store._client.list_collections():
ids = collection.get()['ids']
print('REMOVE %s document(s) from %s collection' % (str(len(ids)), collection.name))
if len(ids): collection.delete(ids)
vector_store.add_documents(docs)
vector_store.persist()
def make_chain():
"""
Creates a chain of langchain components.
Returns:
langchain.chains.ConversationalRetrievalChain: ConversationalRetrievalChain instance.
"""
# model = ChatOpenAI(
# model_name="gpt-3.5-turbo",
# temperature=0.0,
# verbose=True
# )
model = chat = ChatCohere(model="command")
vector_store = get_chroma_client()
prompt = get_prompt()
retriever = vector_store.as_retriever(search_type="mmr", verbose=True)
chain = ConversationalRetrievalChain.from_llm(
model,
retriever=retriever,
return_source_documents=True,
combine_docs_chain_kwargs=dict(prompt=prompt),
verbose=True,
rephrase_question=False,
)
return chain
def get_response(question, organization_name, organization_info, contact_info):
"""
Generates a response based on the input question.
Args:
question (str): The input question to generate a response for.
organization_name (str): The name of the organization.
organization_info (str): Information about the organization.
contact_info (str): Contact information for the organization.
Returns:
str: The response generated by the chain model.
"""
chat_history = ""
chain = make_chain()
response = chain({"question": question, "chat_history": chat_history,
"organization_name": organization_name, "contact_info": contact_info,
"organization_info": organization_info})
return response['answer']