#!/usr/bin/env python
# coding: utf-8

# # GPT4All Langchain Demo
# 
# Example of locally running [`GPT4All`](https://github.com/nomic-ai/gpt4all), a 4GB, *llama.cpp* based large language model (LLM) under [`langchain`](https://github.com/hwchase17/langchain), in a Jupyter notebook running a Python 3.10 kernel.
# 
# *Tested on a mid-2015 16GB Macbook Pro, concurrently running Docker (a single container running a sepearate Jupyter server) and Chrome with approx. 40 open tabs).*

# ## Model preparation

# - download `gpt4all` model:

# In[ ]:


#https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized.bin


# - download `llama.cpp` 7B model

# In[2]:


#%pip install pyllama
#!python3.10 -m llama.download --model_size 7B --folder llama/


# - transform `gpt4all` model:

# In[ ]:


#%pip install pyllamacpp
#!pyllamacpp-convert-gpt4all ./gpt4all-main/chat/gpt4all-lora-quantized.bin llama/tokenizer.model ./gpt4all-main/chat/gpt4all-lora-q-converted.bin


# In[25]:


GPT4ALL_MODEL_PATH = "./gpt4all-main/chat/gpt4all-lora-q-converted.bin"


# ## `langchain` Demo
# 
# Example of running a prompt using `langchain`.

# In[1]:


#https://python.langchain.com/en/latest/ecosystem/llamacpp.html
#%pip uninstall -y langchain
#%pip install --upgrade git+https://github.com/hwchase17/langchain.git

from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain


# - set up prompt template:

# In[2]:


template = """
Question: {question}
Answer: Let's think step by step.
"""

prompt = PromptTemplate(template=template, input_variables=["question"])


# - load model:

# In[3]:


get_ipython().run_cell_magic('time', '', '\nllm = LlamaCpp(model_path=GPT4ALL_MODEL_PATH)\n')


# - create language chain using prompt template and loaded model:

# In[5]:


llm_chain = LLMChain(prompt=prompt, llm=llm)


# - run prompt:

# In[6]:


get_ipython().run_cell_magic('time', '', 'question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"\n\nllm_chain.run(question)\n')


# Another example...

# In[7]:


template = """
Question: {question}
Answer: 
"""

prompt = PromptTemplate(template=template, input_variables=["question"])


# In[8]:


llm_chain = LLMChain(prompt=prompt, llm=llm)


# In[9]:


get_ipython().run_cell_magic('time', '', 'question = "What is a relational database and what is ACID in that context?"\n\nllm_chain.run(question2)\n')


# ## Generating Embeddings
# 
# We can also use the model to generate embddings.

# In[28]:


get_ipython().run_cell_magic('time', '', '#https://abetlen.github.io/llama-cpp-python/\n#%pip uninstall -y llama-cpp-python\n#%pip install --upgrade llama-cpp-python\n\nfrom langchain.embeddings import LlamaCppEmbeddings\n\nllama_embeddings = LlamaCppEmbeddings(model_path=GPT4ALL_MODEL_PATH)\n')


# In[30]:


get_ipython().run_cell_magic('time', '', 'text = "This is a test document."\n\nquery_result = llama_embeddings.embed_query(text)\n')


# In[12]:


get_ipython().run_cell_magic('time', '', 'doc_result = llama.embed_documents([text])\n')


# ## Example Query Supported by a Document Based Knowledge Source
# 
# Example document query using the example from the [`langchain` docs](https://python.langchain.com/en/latest/use_cases/question_answering.html).
# 
# The idea is to run the query against a document source to retrieve some relevant context, and use that as part of the prompt context.

# In[19]:


#https://python.langchain.com/en/latest/use_cases/question_answering.html

template = """
Question: {question}
Answer: 
"""

prompt = PromptTemplate(template=template, input_variables=["question"])

llm_chain = LLMChain(prompt=prompt, llm=llm)


# A naive prompt gives an irrelevant answer:

# In[20]:


get_ipython().run_cell_magic('time', '', 'query = "What did the president say about Ketanji Brown Jackson"\nllm_chain.run(question)\n')


# Now let's try with a source document.

# In[21]:


#!wget https://raw.githubusercontent.com/hwchase17/langchainjs/main/examples/state_of_the_union.txt
from langchain.document_loaders import TextLoader

# Ideally....
loader = TextLoader('./state_of_the_union.txt')


# However, creating the embeddings is qute slow so I'm going to use a fragment of the text:

# In[108]:


#ish via chatgpt...
def search_context(src, phrase, buffer=100):
    with open(src, 'r') as f:
        txt=f.read()

    words = txt.split()
    index = words.index(phrase)
    start_index = max(0, index - buffer)
    end_index = min(len(words), index + buffer+1)
    return ' '.join(words[start_index:end_index])

fragment = './fragment.txt'
with open(fragment, 'w') as fo:
    _txt = search_context('./state_of_the_union.txt', "Ketanji")
    fo.write(_txt)

get_ipython().system('cat $fragment')


# In[101]:


loader = TextLoader('./fragment.txt')


# In[102]:


#%pip install chromadb
from langchain.indexes import VectorstoreIndexCreator


# Generate an index from the knowledge source text:

# In[103]:


get_ipython().run_line_magic('time', '')
# Time: ~0.5s per token
# NOTE: "You must specify a persist_directory oncreation to persist the collection."
# TO DO: How do we load in an already generated and persisted index?
index = VectorstoreIndexCreator(embedding=llama_embeddings,
                                vectorstore_kwargs={"persist_directory": "db"}
                               ).from_loaders([loader])


# In[106]:


get_ipython().run_line_magic('time', '')
pass

# The following errors...
#index.query(query, llm=llm)

# With the full SOTH text, I got:
# Error: llama_tokenize: too many tokens;
# Also getting:
# ValueError: Requested tokens exceed context window of 512
# If we do get passed that, 
# NotEnoughElementsException
# For the latter, somehow need to set something like search_kwargs={"k": 1}


# It seems the retriever is expecting, by default, 4 results documents. I can't see how to pass in a lower limit (a single response document is acceptable in this case), so we nd to roll our own chain...

# In[121]:


get_ipython().run_cell_magic('time', '', '\n# Roll our own....\n\n#https://github.com/hwchase17/langchain/issues/2255\nfrom langchain.vectorstores import Chroma\nfrom langchain.text_splitter import RecursiveCharacterTextSplitter\nfrom langchain.chains import RetrievalQA\n\ndocuments = loader.load()\n\ntext_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\ntexts = text_splitter.split_documents(documents)\n\n# Again, we should persist the db and figure out how to reuse it\ndocsearch = Chroma.from_documents(texts, llama_embeddings)\n')


# In[122]:


get_ipython().run_cell_magic('time', '', '\n# Just getting a single result document from the knowledge lookup is fine...\nMIN_DOCS = 1\n\nqa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",\n                                 retriever=docsearch.as_retriever(search_kwargs={"k": MIN_DOCS}))\n')


# What do we get in response to our original query now?

# In[127]:


get_ipython().run_cell_magic('time', '', '\nprint(query)\n\nqa.run(query)\n')


# In[128]:


get_ipython().run_cell_magic('time', '', '\nquery = "Identify three things the president said about Ketanji Brown Jackson"\n\nqa.run(query)\n')


# In[129]:


get_ipython().run_cell_magic('time', '', '\nquery = """\nIdentify three things the president said about Ketanji Brown Jackson. Provide the answer in the form: \n\n- ITEM 1\n- ITEM 2\n- ITEM 3\n"""\n\nqa.run(query)\n')