Document Trained ChatGPT

Custom train a GPT-3 model to answer questions about document set.

Document Trained ChatGPT

This is a simple Flask app that allows you to upload a set of documents and then query them using a custom trained GPT-3 model. The app will create a GPT-3 index of the documents and then use the index to answer questions about the documents.

Demo:

Folder Structure:

├───food
├───static
│   └───style.css
├───docGPT.py
├───index.html
├───index.json
├───requirements.txt
└───result.html

Files:

index.html
result.html
style.css
requirements.txt
docGPT.py


docGPT.py

from flask import Flask, render_template, request
from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper
from flask import Flask, render_template, request, redirect
from langchain.chat_models import ChatOpenAI
import os

os.environ["OPENAI_API_KEY"] = '<KEY>'

app = Flask(__name__, template_folder='C:\\Users\\<PATH_TO_FILE>')
app.config['UPLOAD_FOLDER'] = 'food'

# Initialize the index
index = None

def construct_index(directory_path):
    max_input_size = 4096
    num_outputs = 1024
    max_chunk_overlap = 20
    chunk_size_limit = 600

    prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)

    llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens=num_outputs))

    documents = SimpleDirectoryReader(directory_path).load_data()

    index = GPTSimpleVectorIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper)

    index.save_to_disk('index.json')

    return index

def chatbot(input_text):
    index = GPTSimpleVectorIndex.load_from_disk('index.json')
    response = index.query(input_text, response_mode="compact")
    return response.response

@app.route('/', methods=['GET', 'POST'])
def home():
    if request.method == 'POST':
        # file upload
        files = request.files.getlist('documents')
        for file in files:
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))

        # construct index
        global index
        index = construct_index(app.config['UPLOAD_FOLDER'])

    # list uploaded files
    files = os.listdir(app.config['UPLOAD_FOLDER'])

    return render_template('index.html', files=files)


# main function
@app.route('/run', methods=['POST'])
def run_program():
    if not os.listdir(app.config['UPLOAD_FOLDER']):
        return "Please upload documents first."

    prompt = request.form.get('prompt')
    if not prompt:
        return "Please enter a prompt question."

    response = chatbot(prompt)

    return render_template('result.html', response=response)


@app.route('/clear', methods=['POST'])
def clear_documents():
    # delete all files in the upload folder
    file_list = os.listdir(app.config['UPLOAD_FOLDER'])
    for file_name in file_list:
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], file_name)
        os.remove(file_path)

    # clear the index
    global index
    index = None

    # Redirect back to doc upload page
    return redirect('/')


@app.route('/result', methods=['GET'])
def show_results():
    return render_template('result.html')

if __name__ == '__main__':
    app.run()


References:

Google Colab - armrrs

******
Written by Shain Lakin on 08 May 2023