first commit

This commit is contained in:
tahsin-protik
2023-02-22 03:04:53 +06:00
parent 40442a9dab
commit 1247af9b8b
2 changed files with 87 additions and 82 deletions
+79 -79
View File
@@ -1,18 +1,4 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
@@ -27,18 +13,23 @@
},
{
"cell_type": "code",
"source": [
"import os\n",
"os.environ['OPENAI_API_KEY'] = \"\""
],
"execution_count": null,
"metadata": {
"id": "BYUc4Z7vY2bb"
},
"execution_count": null,
"outputs": []
"outputs": [],
"source": [
"import os\n",
"os.environ['OPENAI_API_KEY'] = \"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "MZAy8TaKY6pI"
},
"outputs": [],
"source": [
"#This is for embedding. In here, one LM model from huggingface used.\n",
"\n",
@@ -48,15 +39,15 @@
"\n",
"text ='Abc'\n",
"model.encode(text).tolist() #exmple how to do encoding."
],
"metadata": {
"id": "MZAy8TaKY6pI"
},
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gu04tON0cZvT"
},
"outputs": [],
"source": [
"#Function to split long documents in to smaller parts\n",
"def split_text_into_chunks(plain_text, max_chars=2000):\n",
@@ -71,28 +62,28 @@
" if current_chunk:\n",
" text_chunks.append(current_chunk.strip())\n",
" return text_chunks"
],
"metadata": {
"id": "gu04tON0cZvT"
},
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"source": [
"import pinecone\n",
"pinecone.init(api_key=\"\", environment=\"\") #Initialization of vector database module\n",
"index = pinecone.Index(\"\") #Fill out with index name."
],
"execution_count": null,
"metadata": {
"id": "EqCYELlQZN0m"
},
"execution_count": null,
"outputs": []
"outputs": [],
"source": [
"import pinecone\n",
"pinecone.init(api_key=\"\", environment=\"\") #Todo: Initialization of vector database module\n",
"index = pinecone.Index(\"\") #Todo: Fill out with index name."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "CAqSjLcQZjjJ"
},
"outputs": [],
"source": [
"def addData(corpusData):\n",
" id = index.describe_index_stats()['total_vector_count']\n",
@@ -102,15 +93,15 @@
" model.encode(chunk).tolist(), #We are using the model to encode the original chunk of text.\n",
" {'context': chunk}) #In metadata we are storing the original text here as context. \n",
" index.upsert(vectors=[chunkInfo])"
],
"metadata": {
"id": "CAqSjLcQZjjJ"
},
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8VIZ5_ufbRQ5"
},
"outputs": [],
"source": [
"#This function is responsible for matching the input string with alread existing data on vector database.\n",
"\n",
@@ -119,62 +110,71 @@
" result = index.query(query_em, top_k=k, includeMetadata=True)\n",
" \n",
" return [result['matches'][i]['metadata']['context'] for i in range(k)]"
],
"metadata": {
"id": "8VIZ5_ufbRQ5"
},
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"source": [
"def create_prompt(context,query):\n",
" #Unfinished, Should be generated with the context/contexts we find by doing semantaic search\n",
" pass"
],
"execution_count": null,
"metadata": {
"id": "AoRDzK85aF9E"
},
"execution_count": null,
"outputs": []
"outputs": [],
"source": [
"def create_prompt(context,query):\n",
" #Todo: Should be generated with the context/contexts we find by doing semantaic search\n",
" pass"
]
},
{
"cell_type": "code",
"source": [
"def generate_answer(prompt):\n",
" #Unfinished, Pass the generated prompt and pass it to gpt-3 to get answers.\n",
" pass"
],
"execution_count": null,
"metadata": {
"id": "IyPNrKW3aeoD"
},
"execution_count": null,
"outputs": []
"outputs": [],
"source": [
"def generate_answer(prompt):\n",
" #Todo: Pass the generated prompt and pass it to gpt-3 to get answers.\n",
" pass"
]
},
{
"cell_type": "code",
"source": [
"def user_query(query):\n",
" \n",
" pass\n"
],
"execution_count": null,
"metadata": {
"id": "uWM2IcOKarWz"
},
"execution_count": null,
"outputs": []
"outputs": [],
"source": [
"def user_query(query):\n",
" #Todo: Make all the things together.\n",
" pass\n"
]
},
{
"cell_type": "code",
"source": [
"user_query(\"How can I do this?\")"
],
"execution_count": null,
"metadata": {
"id": "jBds94_gbJ_G"
},
"execution_count": null,
"outputs": []
"outputs": [],
"source": [
"user_query(\"How can I do this?\")"
]
}
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}