From ac98999507740d5bf53a0334c017766ad75c321b Mon Sep 17 00:00:00 2001
From: Iyeoluwa Akinrinola <iyeoluwa@connou.app>
Date: Fri, 9 May 2025 15:41:16 +0100
Subject: [PATCH] Initial commit for deployment

---
 .env.example                               |  17 +
 .gitignore                                 | 109 ++++++
 DEPLOYMENT.md                              | 135 ++++++++
 OLLAMA_INTEGRATION.md                      |  87 +++++
 PRODUCTION_READINESS.md                    | 136 ++++++++
 README.md                                  |  61 ++++
 ai_service/.env.example                    |  18 +
 ai_service/.env.production                 |  33 ++
 ai_service/README.md                       |  94 +++++
 ai_service/__init__.py                     |   6 +
 ai_service/ai_service/data/chats.json      |  28 ++
 ai_service/api.py                          | 377 +++++++++++++++++++++
 ai_service/config.py                       |  37 ++
 ai_service/data/chats.json                 |  28 ++
 ai_service/data/document_metadata.json     |   0
 ai_service/deploy.sh                       |  20 ++
 ai_service/embeddings/document_service.py  | 261 ++++++++++++++
 ai_service/embeddings/embedding_service.py | 214 ++++++++++++
 ai_service/models/chat_service.py          | 309 +++++++++++++++++
 ai_service/models/model_parameters.py      | 170 ++++++++++
 ai_service/models/model_service.py         | 243 +++++++++++++
 ai_service/requirements.production.txt     |  28 ++
 ai_service/requirements.txt                |  19 ++
 ai_service/run.py                          |  21 ++
 ai_service_workflow.md                     | 173 ++++++++++
 app/__init__.py                            |  34 ++
 app/api/__init__.py                        |   0
 app/api/api.py                             | 110 ++++++
 app/api/routes.py                          | 100 ++++++
 app/config/__init__.py                     |   0
 app/config/config.py                       |  79 +++++
 app/database/__init__.py                   |   0
 app/database/db.py                         |  36 ++
 app/models/__init__.py                     |   0
 app/models/chat.py                         |  67 ++++
 app/models/document.py                     |  59 ++++
 app/models/user.py                         |  24 ++
 app/services/__init__.py                   |   0
 app/services/chat_service.py               | 227 +++++++++++++
 app/services/chatbot_service.py            | 105 ++++++
 app/services/document_service.py           | 165 +++++++++
 app/services/model_service.py              |  95 ++++++
 app/utils/__init__.py                      |   0
 deploy.sh                                  |  15 +
 deploy_ai_service.sh                       | 102 ++++++
 deploy_production.sh                       |  82 +++++
 remote_deploy.sh                           |  65 ++++
 requirements-deploy.txt                    |  10 +
 requirements.txt                           |  22 ++
 run.py                                     |  14 +
 run_ai_service.py                          |  22 ++
 simple_api.py                              | 144 ++++++++
 test_chat_with_params.py                   |  69 ++++
 test_ollama.py                             |  73 ++++
 54 files changed, 4343 insertions(+)
 create mode 100644 .env.example
 create mode 100644 .gitignore
 create mode 100644 DEPLOYMENT.md
 create mode 100644 OLLAMA_INTEGRATION.md
 create mode 100644 PRODUCTION_READINESS.md
 create mode 100644 README.md
 create mode 100644 ai_service/.env.example
 create mode 100644 ai_service/.env.production
 create mode 100644 ai_service/README.md
 create mode 100644 ai_service/__init__.py
 create mode 100644 ai_service/ai_service/data/chats.json
 create mode 100644 ai_service/api.py
 create mode 100644 ai_service/config.py
 create mode 100644 ai_service/data/chats.json
 create mode 100644 ai_service/data/document_metadata.json
 create mode 100755 ai_service/deploy.sh
 create mode 100644 ai_service/embeddings/document_service.py
 create mode 100644 ai_service/embeddings/embedding_service.py
 create mode 100644 ai_service/models/chat_service.py
 create mode 100644 ai_service/models/model_parameters.py
 create mode 100644 ai_service/models/model_service.py
 create mode 100644 ai_service/requirements.production.txt
 create mode 100644 ai_service/requirements.txt
 create mode 100644 ai_service/run.py
 create mode 100644 ai_service_workflow.md
 create mode 100644 app/__init__.py
 create mode 100644 app/api/__init__.py
 create mode 100644 app/api/api.py
 create mode 100644 app/api/routes.py
 create mode 100644 app/config/__init__.py
 create mode 100644 app/config/config.py
 create mode 100644 app/database/__init__.py
 create mode 100644 app/database/db.py
 create mode 100644 app/models/__init__.py
 create mode 100644 app/models/chat.py
 create mode 100644 app/models/document.py
 create mode 100644 app/models/user.py
 create mode 100644 app/services/__init__.py
 create mode 100644 app/services/chat_service.py
 create mode 100644 app/services/chatbot_service.py
 create mode 100644 app/services/document_service.py
 create mode 100644 app/services/model_service.py
 create mode 100644 app/utils/__init__.py
 create mode 100755 deploy.sh
 create mode 100755 deploy_ai_service.sh
 create mode 100644 deploy_production.sh
 create mode 100755 remote_deploy.sh
 create mode 100644 requirements-deploy.txt
 create mode 100644 requirements.txt
 create mode 100644 run.py
 create mode 100644 run_ai_service.py
 create mode 100644 simple_api.py
 create mode 100644 test_chat_with_params.py
 create mode 100644 test_ollama.py

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..7f119a6
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,17 @@
+# Flask configuration
+FLASK_APP=run.py
+FLASK_ENV=development
+FLASK_CONFIG=development
+SECRET_KEY=your-secret-key-here
+
+# Database configuration
+DATABASE_URL=mysql+pymysql://username:password@localhost/chatbot
+
+# Pinecone configuration
+PINECONE_API_KEY=your-pinecone-api-key
+PINECONE_ENVIRONMENT=your-pinecone-environment
+PINECONE_INDEX_NAME=chatbot-index
+
+# Model configuration
+DEFAULT_MODEL=gpt-3.5-turbo
+OPENAI_API_KEY=your-openai-api-key
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4e06a0b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,109 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# IDE files
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Project specific
+uploads/
+*.db
diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md
new file mode 100644
index 0000000..3f5e300
--- /dev/null
+++ b/DEPLOYMENT.md
@@ -0,0 +1,135 @@
+# Deployment Instructions
+
+This document provides instructions for deploying the chatbot application with Ollama and OpenWebUI integration.
+
+## Prerequisites
+
+- Python 3.8 or higher
+- pip
+- virtualenv or venv
+- Access to OpenWebUI at http://104.225.217.215:8080
+
+## Deployment Steps
+
+1. **Clone the repository**
+
+   ```bash
+   git clone <repository-url>
+   cd <repository-directory>
+   ```
+
+2. **Create and activate a virtual environment**
+
+   ```bash
+   python -m venv venv
+   source venv/bin/activate  # On Windows: venv\Scripts\activate
+   ```
+
+3. **Install dependencies**
+
+   ```bash
+   pip install -r requirements.txt
+   pip install python-dotenv langchain-text-splitters
+   ```
+
+4. **Create a .env file**
+
+   Copy the .env.example file to .env and update the values:
+
+   ```bash
+   cp ai_service/.env.example ai_service/.env
+   # Edit the .env file with appropriate values
+   ```
+
+   Make sure to include the OpenWebUI configuration:
+
+   ```
+   # OpenWebUI configuration
+   OPENWEBUI_URL=http://104.225.217.215:8080
+   OPENWEBUI_API_KEY=GdCU4ieYDqHsLfH2
+
+   # Ollama configuration
+   OLLAMA_API_URL=http://104.225.217.215:8080/ollama
+   DEFAULT_MODEL=llama3.1
+   ```
+
+5. **Run the deployment script**
+
+   For local deployment:
+   ```bash
+   python -m ai_service.run
+   ```
+
+   For server deployment:
+   ```bash
+   ./ai_service/deploy.sh
+   ```
+
+   This will start the application on port 5251 using uvicorn with nohup.
+
+   For remote deployment from your local machine:
+   ```bash
+   ./remote_deploy.sh 157.157.221.29 user 22 /home/user/ds_zagres_ai
+   ```
+
+6. **Verify the application is running**
+
+   ```bash
+   curl http://localhost:5251/api/health
+   ```
+
+   You should see a response like:
+   ```json
+   {
+     "status": "healthy"
+   }
+   ```
+
+## Managing the Deployed Application
+
+- **View logs**
+
+  ```bash
+  tail -f app.log
+  ```
+
+- **Stop the application**
+
+  ```bash
+  ps aux | grep uvicorn  # Find the process ID (PID)
+  kill <PID>             # Replace <PID> with the actual process ID
+  ```
+
+- **Restart the application**
+
+  ```bash
+  ./deploy.sh
+  ```
+
+## API Endpoints
+
+- `GET /health` - Health check endpoint
+- `POST /chats` - Create a new chat
+- `POST /chats/{chat_id}/messages` - Send a message to the chatbot
+- `GET /chats/{chat_id}` - Get chat history
+
+## Ollama and OpenWebUI Integration
+
+The chatbot now uses Ollama models via OpenWebUI. The following models are available:
+
+- **gemma3**: Google Gemma 3 model
+- **llama3.3**: Meta Llama 3 70B model
+- **llama3.1**: Meta Llama 3 8B model
+- **mistral**: Mistral AI model
+- **deepseek**: DeepSeek model
+
+### Document Training
+
+To use RAG with your documents:
+
+1. Go to the OpenWebUI interface at http://104.225.217.215:8080/
+2. Navigate to the Knowledge section
+3. Upload your documents
+4. OpenWebUI will automatically process them for RAG
+
+When using the chatbot API, set `use_rag=True` in your chat requests to enable RAG.
diff --git a/OLLAMA_INTEGRATION.md b/OLLAMA_INTEGRATION.md
new file mode 100644
index 0000000..13df107
--- /dev/null
+++ b/OLLAMA_INTEGRATION.md
@@ -0,0 +1,87 @@
+# Ollama and OpenWebUI Integration
+
+This document explains how to use the chatbot with Ollama and OpenWebUI.
+
+## Overview
+
+The chatbot has been updated to use Ollama models via OpenWebUI. The following models are now available:
+
+- **gemma3**: Google Gemma 3 model
+- **llama3.3**: Meta Llama 3 70B model
+- **llama3.1**: Meta Llama 3 8B model
+- **mistral**: Mistral AI model
+- **deepseek**: DeepSeek model
+
+## OpenWebUI
+
+OpenWebUI is running at: http://104.225.217.215:8080/
+
+### Features
+
+1. **Document Training**: OpenWebUI provides built-in RAG capabilities. When you upload a document through OpenWebUI, it automatically processes it for RAG.
+
+2. **Knowledge Database**: OpenWebUI stores documents in its knowledge database, which can be accessed during chat sessions.
+
+## Using the Integration
+
+### Configuration
+
+1. Update your `.env` file with the following settings:
+
+```
+# OpenWebUI configuration
+OPENWEBUI_URL=http://104.225.217.215:8080
+OPENWEBUI_API_KEY=your-openwebui-api-key
+
+# Ollama configuration
+OLLAMA_API_URL=http://localhost:11434
+DEFAULT_MODEL=llama3.1
+```
+
+### Document Processing
+
+To use RAG with your documents:
+
+1. Go to the OpenWebUI interface at http://104.225.217.215:8080/
+2. Navigate to the Knowledge section
+3. Upload your documents
+4. OpenWebUI will automatically process them for RAG
+
+### Chat with RAG
+
+When using the chatbot API:
+
+1. Set `use_rag=True` in your chat requests to enable RAG
+2. The system will use OpenWebUI's knowledge database to enhance responses
+
+## API Usage
+
+The API endpoints remain the same, but now they use Ollama models via OpenWebUI:
+
+```python
+# Example: Get a response with RAG
+response = chat_service.get_chat_response(
+    chat_id="your-chat-id",
+    message="Tell me about the documents I uploaded",
+    user_id="user123",
+    use_rag=True,
+    model_id="llama3.1"  # Use one of the Ollama models
+)
+```
+
+## Troubleshooting
+
+If you encounter issues:
+
+1. Make sure OpenWebUI is accessible at http://104.225.217.215:8080/
+2. Check that you have the correct API key if authentication is enabled
+3. Verify that the documents are properly uploaded to OpenWebUI's knowledge database
+
+## Direct Usage
+
+For direct usage without the API, you can:
+
+1. Go to http://104.225.217.215:8080/
+2. Select the model you want to use
+3. Upload documents in the Knowledge section
+4. Chat with the model and it will use the knowledge database automatically
diff --git a/PRODUCTION_READINESS.md b/PRODUCTION_READINESS.md
new file mode 100644
index 0000000..5577b93
--- /dev/null
+++ b/PRODUCTION_READINESS.md
@@ -0,0 +1,136 @@
+# Production Readiness Checklist
+
+This document outlines what's currently implemented, what's missing, and what improvements can be made to make the AI service production-ready.
+
+## Current Implementation
+
+### Core Functionality
+- ✅ Document processing and chunking
+- ✅ Embedding generation and storage
+- ✅ Document search
+- ✅ Chat functionality
+- ✅ Model switching
+- ✅ Team chat support
+- ✅ RAG (Retrieval-Augmented Generation)
+- ✅ Customizable model parameters
+
+### API Endpoints
+- ✅ Health check endpoint
+- ✅ Document management endpoints
+- ✅ Model information endpoints
+- ✅ Chat management endpoints
+- ✅ Message sending endpoint
+
+## Missing Components
+
+### Authentication and Authorization
+- ❌ User authentication
+- ❌ API key validation
+- ❌ Role-based access control
+- ❌ Document access permissions
+
+### Database Integration
+- ❌ MySQL database integration (currently using JSON files and SQLite)
+- ❌ Database migration scripts
+- ❌ Connection pooling
+
+### Monitoring and Observability
+- ❌ Structured logging
+- ❌ Metrics collection
+- ❌ Alerting system
+- ❌ Detailed health checks
+
+### Testing
+- ❌ Unit tests
+- ❌ Integration tests
+- ❌ Load tests
+- ❌ CI/CD pipeline
+
+### Documentation
+- ❌ API documentation
+- ❌ Developer guide
+- ❌ Deployment guide
+- ❌ User guide
+
+## Improvement Opportunities
+
+### Performance
+- Implement caching for frequently accessed data
+- Add connection pooling for database connections
+- Optimize embedding generation and search
+- Implement batch processing for document ingestion
+
+### Scalability
+- Add support for distributed deployment
+- Implement horizontal scaling
+- Add load balancing
+- Implement message queues for asynchronous processing
+
+### Security
+- Add input validation and sanitization
+- Implement rate limiting
+- Add CORS configuration
+- Implement secure storage for API keys
+
+### User Experience
+- Add progress tracking for document processing
+- Implement streaming responses for chat
+- Add support for file attachments
+- Implement chat history export
+
+### AI Capabilities
+- Add support for more AI models
+- Implement fine-tuning capabilities
+- Add support for function calling
+- Implement conversation summarization
+
+## Document Storage
+
+Currently, documents are stored in two places:
+
+1. **Document Metadata**: Stored in a JSON file at `ai_service/data/document_metadata.json`
+2. **Document Embeddings**: Stored in Pinecone vector database
+
+For production, you should:
+- Replace the JSON file storage with MySQL database
+- Implement proper document versioning
+- Add document access controls
+- Implement backup and recovery procedures
+
+## API Keys and Configuration
+
+The system is designed to use environment variables for configuration, including API keys. The following keys need to be set:
+
+1. **Pinecone API Key**: For vector storage
+   - Sign up at https://www.pinecone.io/
+   - Set `PINECONE_API_KEY` and `PINECONE_ENVIRONMENT` in `.env`
+
+2. **OpenAI API Key**: For AI model access
+   - Sign up at https://platform.openai.com/
+   - Set `OPENAI_API_KEY` in `.env`
+
+A template `.env.production` file has been created with placeholders for these keys.
+
+## Deployment
+
+A production deployment script (`deploy_production.sh`) has been created to:
+- Set up the virtual environment
+- Install dependencies
+- Check for API keys
+- Start the service with proper logging
+- Verify the service is running
+
+To deploy:
+1. Copy `.env.production` to `.env` and add your API keys
+2. Run `./deploy_production.sh`
+3. Monitor the service with `tail -f ai_service.log`
+
+## Next Steps
+
+1. Implement authentication and authorization
+2. Set up MySQL database integration
+3. Add comprehensive testing
+4. Implement monitoring and observability
+5. Create detailed documentation
+6. Address security concerns
+7. Optimize performance and scalability
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f634351
--- /dev/null
+++ b/README.md
@@ -0,0 +1,61 @@
+# Chatbot Application
+
+A chatbot application with document training, private/team chat options, and model switching capability.
+
+## Features
+
+- Document training through library page
+- Private chat functionality
+- Team chat functionality (multiple users can see each other's interactions)
+- Model switching capability
+
+## Technology Stack
+
+- **Backend**: Flask with FastAPI
+- **Database**: MySQL
+- **Vector Database**: Pinecone
+- **Embeddings**: Sentence Transformers / OpenAI Embeddings
+- **Chat Models**: Various LLMs (configurable)
+
+## Project Structure
+
+```
+app/
+├── api/            # API endpoints (Flask and FastAPI)
+├── config/         # Configuration settings
+├── database/       # Database connection and utilities
+├── models/         # Database models
+├── services/       # Business logic services
+└── utils/          # Utility functions
+tests/              # Test cases
+```
+
+## Setup Instructions
+
+1. Clone the repository
+2. Create a virtual environment:
+   ```
+   python -m venv venv
+   source venv/bin/activate  # On Windows: venv\Scripts\activate
+   ```
+3. Install dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+4. Copy `.env.example` to `.env` and update the values
+5. Initialize the database:
+   ```
+   flask db init
+   flask db migrate
+   flask db upgrade
+   ```
+6. Run the application:
+   ```
+   python run.py
+   ```
+
+## API Documentation
+
+Once the application is running, you can access the API documentation at:
+- FastAPI Swagger UI: http://localhost:5000/docs
+- FastAPI ReDoc: http://localhost:5000/redoc
diff --git a/ai_service/.env.example b/ai_service/.env.example
new file mode 100644
index 0000000..3803587
--- /dev/null
+++ b/ai_service/.env.example
@@ -0,0 +1,18 @@
+# API configuration
+API_HOST=0.0.0.0
+API_PORT=5251
+
+# OpenWebUI configuration
+OPENWEBUI_URL=http://104.225.217.215:8080
+OPENWEBUI_API_KEY=your-openwebui-api-key  # Replace with the actual key in your .env file
+
+# Ollama configuration
+OLLAMA_API_URL=http://localhost:11434
+DEFAULT_MODEL=llama3.1
+
+# Local storage
+SQLITE_DB_PATH=ai_service/data/chatbot.db
+
+# Document processing
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=200
diff --git a/ai_service/.env.production b/ai_service/.env.production
new file mode 100644
index 0000000..5d2cafd
--- /dev/null
+++ b/ai_service/.env.production
@@ -0,0 +1,33 @@
+# API configuration
+API_HOST=0.0.0.0
+API_PORT=5251
+
+# Pinecone configuration
+# Sign up at https://www.pinecone.io/ to get your API key
+PINECONE_API_KEY=your-pinecone-api-key-here
+PINECONE_ENVIRONMENT=your-pinecone-environment-here
+PINECONE_INDEX_NAME=chatbot-index
+
+# Model configuration
+# Sign up at https://platform.openai.com/ to get your API key
+DEFAULT_MODEL=gpt-3.5-turbo
+OPENAI_API_KEY=your-openai-api-key-here
+
+# Local storage
+# Path to SQLite database (will be replaced with MySQL in production)
+SQLITE_DB_PATH=ai_service/data/chatbot.db
+
+# Document processing
+# Adjust these values based on your needs
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=200
+
+# Embedding model
+# Options: all-MiniLM-L6-v2 (default), paraphrase-MiniLM-L3-v2 (smaller/faster)
+EMBEDDING_MODEL=all-MiniLM-L6-v2
+
+# Production settings
+# Set to 'production' in production environment
+ENVIRONMENT=production
+LOG_LEVEL=INFO
+ENABLE_MOCK=false
diff --git a/ai_service/README.md b/ai_service/README.md
new file mode 100644
index 0000000..eafe1e5
--- /dev/null
+++ b/ai_service/README.md
@@ -0,0 +1,94 @@
+# AI Service for Chatbot Application
+
+This is the AI service component for the chatbot application. It provides APIs for document processing, embeddings, and chat functionality.
+
+## Features
+
+- Document processing and embedding
+- Retrieval-augmented generation (RAG)
+- Chat functionality with model switching
+- Team chat support
+
+## Project Structure
+
+```
+ai_service/
+├── embeddings/       # Embedding and document processing services
+├── models/           # Model and chat services
+├── utils/            # Utility functions
+├── data/             # Data storage
+├── config.py         # Configuration settings
+├── api.py            # FastAPI application
+└── run.py            # Script to run the service
+```
+
+## Setup Instructions
+
+1. Create a virtual environment:
+   ```
+   python -m venv venv
+   source venv/bin/activate  # On Windows: venv\Scripts\activate
+   ```
+
+2. Install dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+
+3. Copy `.env.example` to `.env` and update the values:
+   ```
+   cp .env.example .env
+   # Edit the .env file with appropriate values
+   ```
+
+4. Run the service:
+   ```
+   python run.py
+   ```
+
+## API Documentation
+
+Once the service is running, you can access the API documentation at:
+- Swagger UI: http://localhost:5251/docs
+- ReDoc: http://localhost:5251/redoc
+
+## Deployment
+
+To deploy the service:
+
+1. Make the deployment script executable:
+   ```
+   chmod +x deploy.sh
+   ```
+
+2. Run the deployment script:
+   ```
+   ./deploy.sh
+   ```
+
+This will start the service on port 5251 using uvicorn with nohup.
+
+## API Endpoints
+
+### Document Endpoints
+
+- `POST /documents` - Process a document for embedding
+- `GET /documents` - Get all documents
+- `GET /documents/{doc_id}` - Get a document by ID
+- `DELETE /documents/{doc_id}` - Delete a document
+- `POST /documents/search` - Search for documents
+
+### Model Endpoints
+
+- `GET /models` - Get available models
+- `GET /models/{model_id}` - Get information about a model
+
+### Chat Endpoints
+
+- `POST /chats` - Create a new chat
+- `GET /chats/user/{user_id}` - Get all chats for a user
+- `GET /chats/{chat_id}` - Get a chat by ID
+- `POST /chats/{chat_id}/messages` - Send a message to a chat
+- `POST /chats/{chat_id}/members/{user_id}` - Add a user to a team chat
+- `DELETE /chats/{chat_id}/members/{user_id}` - Remove a user from a team chat
+- `DELETE /chats/{chat_id}` - Delete a chat
diff --git a/ai_service/__init__.py b/ai_service/__init__.py
new file mode 100644
index 0000000..2fbe56a
--- /dev/null
+++ b/ai_service/__init__.py
@@ -0,0 +1,6 @@
+"""
+AI service package.
+"""
+
+# Import for easier access
+from ai_service.api import app
diff --git a/ai_service/ai_service/data/chats.json b/ai_service/ai_service/data/chats.json
new file mode 100644
index 0000000..6e17500
--- /dev/null
+++ b/ai_service/ai_service/data/chats.json
@@ -0,0 +1,28 @@
+{
+  "e2b1bdc2-a384-4775-9c14-42b221e5554f": {
+    "id": "e2b1bdc2-a384-4775-9c14-42b221e5554f",
+    "title": "Test Chat",
+    "user_id": "test_user",
+    "model_id": "gpt-3.5-turbo",
+    "is_team_chat": false,
+    "created_at": "2025-05-06T11:50:43.558931",
+    "updated_at": "2025-05-06T11:51:20.982846",
+    "messages": [
+      {
+        "id": "865a1e57-c71f-4bab-a4ba-56d630a38631",
+        "content": "Hello, AI!",
+        "user_id": "test_user",
+        "is_user_message": true,
+        "timestamp": "2025-05-06T11:51:20.051537"
+      },
+      {
+        "id": "eb755b4f-fe55-4bec-b77f-ed20941df360",
+        "content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions",
+        "user_id": null,
+        "is_user_message": false,
+        "timestamp": "2025-05-06T11:51:20.982829"
+      }
+    ],
+    "team_members": []
+  }
+}
\ No newline at end of file
diff --git a/ai_service/api.py b/ai_service/api.py
new file mode 100644
index 0000000..dd7d589
--- /dev/null
+++ b/ai_service/api.py
@@ -0,0 +1,377 @@
+"""
+FastAPI application for the AI service.
+"""
+
+from fastapi import FastAPI, HTTPException, Depends, Body, Query, Path
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.embeddings.document_service import document_service
+from ai_service.models.model_service import model_service
+from ai_service.models.chat_service import chat_service
+from ai_service.models.model_parameters import ModelParameters
+
+# Create FastAPI app
+app = FastAPI(
+    title="AI Service API",
+    description="API for the AI service",
+    version="1.0.0"
+)
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allow all methods
+    allow_headers=["*"],  # Allow all headers
+)
+
+# Define API models
+class DocumentRequest(BaseModel):
+    """Request model for document processing."""
+    content: str = Field(..., description="Document content")
+    title: str = Field(..., description="Document title")
+    description: Optional[str] = Field(None, description="Document description")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
+
+class DocumentResponse(BaseModel):
+    """Response model for document processing."""
+    id: str = Field(..., description="Document ID")
+    title: str = Field(..., description="Document title")
+    description: str = Field(..., description="Document description")
+    chunk_count: int = Field(..., description="Number of chunks")
+    metadata: Dict[str, Any] = Field(..., description="Additional metadata")
+
+class SearchRequest(BaseModel):
+    """Request model for document search."""
+    query: str = Field(..., description="Search query")
+    top_k: int = Field(5, description="Number of results to return")
+
+class SearchResult(BaseModel):
+    """Model for a search result."""
+    id: str = Field(..., description="Result ID")
+    score: float = Field(..., description="Similarity score")
+    metadata: Dict[str, Any] = Field(..., description="Result metadata")
+
+class ModelInfo(BaseModel):
+    """Model for model information."""
+    id: str = Field(..., description="Model ID")
+    name: str = Field(..., description="Model name")
+    description: str = Field(..., description="Model description")
+    provider: str = Field(..., description="Model provider")
+    max_tokens: int = Field(..., description="Maximum tokens")
+    is_default: bool = Field(..., description="Whether this is the default model")
+
+class ChatRequest(BaseModel):
+    """Request model for creating a chat."""
+    user_id: str = Field(..., description="User ID")
+    title: Optional[str] = Field(None, description="Chat title")
+    model_id: Optional[str] = Field(None, description="Model ID")
+    is_team_chat: bool = Field(False, description="Whether this is a team chat")
+
+class MessageRequest(BaseModel):
+    """Request model for sending a message."""
+    message: str = Field(..., description="Message content")
+    user_id: str = Field(..., description="User ID")
+    use_rag: bool = Field(False, description="Whether to use RAG")
+
+    # Model parameters
+    temperature: Optional[float] = Field(None, description="Controls randomness: higher values mean more random completions")
+    max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate")
+    top_p: Optional[float] = Field(None, description="Nucleus sampling parameter")
+    frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens")
+    presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics")
+    stop_sequences: Optional[List[str]] = Field(None, description="Sequences where the API will stop generating")
+    system_prompt: Optional[str] = Field(None, description="System prompt to guide the model's behavior")
+
+    # Additional advanced parameters
+    min_p: Optional[float] = Field(None, description="Minimum probability threshold for token selection")
+    top_k: Optional[int] = Field(None, description="Only sample from the top k tokens")
+    repeat_penalty: Optional[float] = Field(None, description="Penalty for repeating tokens")
+    function_calling: Optional[bool] = Field(None, description="Whether to enable function calling")
+
+class Message(BaseModel):
+    """Model for a message."""
+    id: str = Field(..., description="Message ID")
+    content: str = Field(..., description="Message content")
+    user_id: Optional[str] = Field(None, description="User ID")
+    is_user_message: bool = Field(..., description="Whether this is a user message")
+    timestamp: str = Field(..., description="Message timestamp")
+
+class Chat(BaseModel):
+    """Model for a chat."""
+    id: str = Field(..., description="Chat ID")
+    title: str = Field(..., description="Chat title")
+    user_id: str = Field(..., description="User ID")
+    model_id: str = Field(..., description="Model ID")
+    is_team_chat: bool = Field(..., description="Whether this is a team chat")
+    created_at: str = Field(..., description="Creation timestamp")
+    updated_at: str = Field(..., description="Update timestamp")
+    messages: List[Message] = Field(..., description="Chat messages")
+    team_members: List[str] = Field(..., description="Team members")
+
+# Define API endpoints
+@app.get("/health")
+async def health_check():
+    """
+    Health check endpoint.
+
+    Returns:
+        Health status.
+    """
+    return {"status": "healthy"}
+
+# Document endpoints
+@app.post("/documents", response_model=DocumentResponse)
+async def process_document(request: DocumentRequest):
+    """
+    Process a document for embedding.
+
+    Args:
+        request: Document processing request.
+
+    Returns:
+        Processed document information.
+    """
+    doc_id = document_service.process_document(
+        content=request.content,
+        title=request.title,
+        description=request.description,
+        metadata=request.metadata
+    )
+
+    return document_service.get_document(doc_id)
+
+@app.get("/documents", response_model=List[DocumentResponse])
+async def get_all_documents():
+    """
+    Get all documents.
+
+    Returns:
+        List of document information.
+    """
+    return document_service.get_all_documents()
+
+@app.get("/documents/{doc_id}", response_model=DocumentResponse)
+async def get_document(doc_id: str):
+    """
+    Get a document by ID.
+
+    Args:
+        doc_id: Document ID.
+
+    Returns:
+        Document information.
+    """
+    doc = document_service.get_document(doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="Document not found")
+
+    return doc
+
+@app.delete("/documents/{doc_id}")
+async def delete_document(doc_id: str):
+    """
+    Delete a document.
+
+    Args:
+        doc_id: Document ID.
+
+    Returns:
+        Deletion status.
+    """
+    success = document_service.delete_document(doc_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Document not found")
+
+    return {"status": "success", "message": "Document deleted"}
+
+@app.post("/documents/search", response_model=List[SearchResult])
+async def search_documents(request: SearchRequest):
+    """
+    Search for documents.
+
+    Args:
+        request: Search request.
+
+    Returns:
+        Search results.
+    """
+    results = document_service.search_documents(
+        query=request.query,
+        top_k=request.top_k
+    )
+
+    return results
+
+# Model endpoints
+@app.get("/models", response_model=List[ModelInfo])
+async def get_available_models():
+    """
+    Get available models.
+
+    Returns:
+        List of model information.
+    """
+    return model_service.get_available_models()
+
+@app.get("/models/{model_id}", response_model=ModelInfo)
+async def get_model_info(model_id: str):
+    """
+    Get information about a model.
+
+    Args:
+        model_id: Model ID.
+
+    Returns:
+        Model information.
+    """
+    model_info = model_service.get_model_info(model_id)
+    if not model_info:
+        raise HTTPException(status_code=404, detail="Model not found")
+
+    return model_info
+
+# Chat endpoints
+@app.post("/chats", response_model=Chat)
+async def create_chat(request: ChatRequest):
+    """
+    Create a new chat.
+
+    Args:
+        request: Chat creation request.
+
+    Returns:
+        Created chat.
+    """
+    chat_id = chat_service.create_chat(
+        user_id=request.user_id,
+        title=request.title,
+        model_id=request.model_id,
+        is_team_chat=request.is_team_chat
+    )
+
+    return chat_service.get_chat(chat_id)
+
+@app.get("/chats/user/{user_id}", response_model=List[Chat])
+async def get_user_chats(user_id: str):
+    """
+    Get all chats for a user.
+
+    Args:
+        user_id: User ID.
+
+    Returns:
+        List of chats.
+    """
+    return chat_service.get_user_chats(user_id)
+
+@app.get("/chats/{chat_id}", response_model=Chat)
+async def get_chat(chat_id: str):
+    """
+    Get a chat by ID.
+
+    Args:
+        chat_id: Chat ID.
+
+    Returns:
+        Chat information.
+    """
+    chat = chat_service.get_chat(chat_id)
+    if not chat:
+        raise HTTPException(status_code=404, detail="Chat not found")
+
+    return chat
+
+@app.post("/chats/{chat_id}/messages", response_model=Message)
+async def send_message(chat_id: str, request: MessageRequest):
+    """
+    Send a message to a chat.
+
+    Args:
+        chat_id: Chat ID.
+        request: Message request with optional model parameters.
+
+    Returns:
+        Bot response message.
+    """
+    try:
+        # Extract model parameters from the request
+        response = chat_service.get_chat_response(
+            chat_id=chat_id,
+            message=request.message,
+            user_id=request.user_id,
+            use_rag=request.use_rag,
+            temperature=request.temperature,
+            max_tokens=request.max_tokens,
+            top_p=request.top_p,
+            frequency_penalty=request.frequency_penalty,
+            presence_penalty=request.presence_penalty,
+            stop_sequences=request.stop_sequences,
+            system_prompt=request.system_prompt,
+            min_p=request.min_p,
+            top_k=request.top_k,
+            repeat_penalty=request.repeat_penalty,
+            function_calling=request.function_calling
+        )
+
+        return response
+
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+
+@app.post("/chats/{chat_id}/members/{user_id}")
+async def add_team_member(chat_id: str, user_id: str):
+    """
+    Add a user to a team chat.
+
+    Args:
+        chat_id: Chat ID.
+        user_id: User ID.
+
+    Returns:
+        Addition status.
+    """
+    success = chat_service.add_team_member(chat_id, user_id)
+    if not success:
+        raise HTTPException(status_code=400, detail="Failed to add team member")
+
+    return {"status": "success", "message": "Team member added"}
+
+@app.delete("/chats/{chat_id}/members/{user_id}")
+async def remove_team_member(chat_id: str, user_id: str):
+    """
+    Remove a user from a team chat.
+
+    Args:
+        chat_id: Chat ID.
+        user_id: User ID.
+
+    Returns:
+        Removal status.
+    """
+    success = chat_service.remove_team_member(chat_id, user_id)
+    if not success:
+        raise HTTPException(status_code=400, detail="Failed to remove team member")
+
+    return {"status": "success", "message": "Team member removed"}
+
+@app.delete("/chats/{chat_id}")
+async def delete_chat(chat_id: str):
+    """
+    Delete a chat.
+
+    Args:
+        chat_id: Chat ID.
+
+    Returns:
+        Deletion status.
+    """
+    success = chat_service.delete_chat(chat_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Chat not found")
+
+    return {"status": "success", "message": "Chat deleted"}
diff --git a/ai_service/config.py b/ai_service/config.py
new file mode 100644
index 0000000..f373100
--- /dev/null
+++ b/ai_service/config.py
@@ -0,0 +1,37 @@
+"""
+Configuration settings for the AI service.
+"""
+
+import os
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+class Config:
+    """Base configuration."""
+
+    # API configuration
+    API_HOST = os.environ.get('API_HOST', '0.0.0.0')
+    API_PORT = int(os.environ.get('API_PORT', 5251))
+
+    # OpenWebUI configuration
+    OPENWEBUI_URL = os.environ.get('OPENWEBUI_URL', 'http://104.225.217.215:8080')
+    OPENWEBUI_API_KEY = os.environ.get('OPENWEBUI_API_KEY', '')
+
+    # Ollama configuration
+    OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434')
+    DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1')
+
+    # Local storage
+    SQLITE_DB_PATH = os.environ.get('SQLITE_DB_PATH', 'ai_service/data/chatbot.db')
+
+    # Document processing
+    CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000))
+    CHUNK_OVERLAP = int(os.environ.get('CHUNK_OVERLAP', 200))
+
+    # Embedding model
+    EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
+
+
+config = Config()
diff --git a/ai_service/data/chats.json b/ai_service/data/chats.json
new file mode 100644
index 0000000..b50e610
--- /dev/null
+++ b/ai_service/data/chats.json
@@ -0,0 +1,28 @@
+{
+  "48b04d66-3ef4-440b-8508-ced930aa42a9": {
+    "id": "48b04d66-3ef4-440b-8508-ced930aa42a9",
+    "title": "Test Chat",
+    "user_id": "test_user",
+    "model_id": "gpt-3.5-turbo",
+    "is_team_chat": false,
+    "created_at": "2025-05-05T20:29:54.936964",
+    "updated_at": "2025-05-05T20:29:55.394906",
+    "messages": [
+      {
+        "id": "9daafee5-f00c-4493-96ea-96492f97482e",
+        "content": "Tell me about artificial intelligence",
+        "user_id": "test_user",
+        "is_user_message": true,
+        "timestamp": "2025-05-05T20:29:54.971667"
+      },
+      {
+        "id": "0337e0b1-1e18-4d09-8b8b-c5ee295870a0",
+        "content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions",
+        "user_id": null,
+        "is_user_message": false,
+        "timestamp": "2025-05-05T20:29:55.394891"
+      }
+    ],
+    "team_members": []
+  }
+}
\ No newline at end of file
diff --git a/ai_service/data/document_metadata.json b/ai_service/data/document_metadata.json
new file mode 100644
index 0000000..e69de29
diff --git a/ai_service/deploy.sh b/ai_service/deploy.sh
new file mode 100755
index 0000000..4cc0e7f
--- /dev/null
+++ b/ai_service/deploy.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Create a directory for the AI service logs
+mkdir -p logs
+
+# Activate virtual environment
+source venv/bin/activate
+
+# Export environment variables
+export API_HOST=0.0.0.0
+export API_PORT=5251
+
+# Make sure the Python path includes the current directory
+export PYTHONPATH=$PYTHONPATH:$(pwd)
+
+# Run the application with uvicorn and nohup
+nohup uvicorn ai_service.run:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 &
+
+echo "AI Service started on port $API_PORT. Check ai_service.log for output."
+echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill <PID>'."
diff --git a/ai_service/embeddings/document_service.py b/ai_service/embeddings/document_service.py
new file mode 100644
index 0000000..17b87c4
--- /dev/null
+++ b/ai_service/embeddings/document_service.py
@@ -0,0 +1,261 @@
+"""
+Service for document processing and chunking.
+"""
+
+import os
+import json
+import uuid
+import requests
+import base64
+from typing import List, Dict, Any, Optional
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+from ai_service.config import config
+
+class DocumentService:
+    """Service for document processing and chunking."""
+
+    def __init__(self):
+        """Initialize the document service."""
+        self.chunk_size = config.CHUNK_SIZE
+        self.chunk_overlap = config.CHUNK_OVERLAP
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunk_size,
+            chunk_overlap=self.chunk_overlap,
+            length_function=len
+        )
+
+        # OpenWebUI configuration
+        self.openwebui_url = config.OPENWEBUI_URL
+        self.openwebui_api_key = config.OPENWEBUI_API_KEY
+
+        # Ensure data directory exists
+        os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
+
+        # For now, we'll store document metadata in a simple JSON file
+        self.metadata_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'document_metadata.json')
+        self._load_metadata()
+
+    def _load_metadata(self):
+        """Load document metadata from file."""
+        if os.path.exists(self.metadata_file):
+            try:
+                with open(self.metadata_file, 'r') as f:
+                    self.documents = json.load(f)
+            except Exception as e:
+                print(f"Error loading document metadata: {str(e)}")
+                self.documents = {}
+        else:
+            self.documents = {}
+
+    def _save_metadata(self):
+        """Save document metadata to file."""
+        try:
+            with open(self.metadata_file, 'w') as f:
+                json.dump(self.documents, f, indent=2)
+        except Exception as e:
+            print(f"Error saving document metadata: {str(e)}")
+
+    def process_document(self, content: str, title: str,
+                         description: Optional[str] = None,
+                         metadata: Optional[Dict[str, Any]] = None) -> str:
+        """
+        Process a document for embedding.
+
+        Args:
+            content: Document content.
+            title: Document title.
+            description: Optional document description.
+            metadata: Optional additional metadata.
+
+        Returns:
+            Document ID.
+        """
+        # Generate a unique ID for the document
+        doc_id = str(uuid.uuid4())
+
+        # Upload the document to OpenWebUI for RAG processing
+        try:
+            # Prepare headers
+            headers = {"Content-Type": "application/json"}
+            if self.openwebui_api_key:
+                headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+            # Prepare the document data
+            document_data = {
+                "filename": f"{title}.txt",
+                "content": base64.b64encode(content.encode('utf-8')).decode('utf-8'),
+                "description": description or title
+            }
+
+            # Upload to OpenWebUI
+            response = requests.post(
+                f"{self.openwebui_url}/api/knowledge/upload",
+                headers=headers,
+                json=document_data,
+                timeout=60
+            )
+
+            response.raise_for_status()
+            result = response.json()
+
+            # Get the OpenWebUI document ID
+            openwebui_doc_id = result.get('id', '')
+
+            # Store document metadata
+            self.documents[doc_id] = {
+                'id': doc_id,
+                'title': title,
+                'description': description or '',
+                'openwebui_id': openwebui_doc_id,
+                'metadata': metadata or {}
+            }
+
+            # Save metadata to file
+            self._save_metadata()
+
+            return doc_id
+
+        except Exception as e:
+            print(f"Error uploading document to OpenWebUI: {str(e)}")
+
+            # Fall back to local processing if OpenWebUI upload fails
+            print("Falling back to local document processing")
+
+            # Split the document into chunks for local reference
+            chunks = self.text_splitter.split_text(content)
+
+            # Store document metadata
+            self.documents[doc_id] = {
+                'id': doc_id,
+                'title': title,
+                'description': description or '',
+                'chunk_count': len(chunks),
+                'openwebui_upload_failed': True,
+                'metadata': metadata or {}
+            }
+
+            # Save metadata to file
+            self._save_metadata()
+
+            return doc_id
+
+    def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get document metadata.
+
+        Args:
+            doc_id: Document ID.
+
+        Returns:
+            Document metadata if found, None otherwise.
+        """
+        return self.documents.get(doc_id)
+
+    def get_all_documents(self) -> List[Dict[str, Any]]:
+        """
+        Get all document metadata.
+
+        Returns:
+            List of document metadata.
+        """
+        # Get documents from local storage
+        local_documents = list(self.documents.values())
+
+        # Try to get documents from OpenWebUI as well
+        try:
+            # Prepare headers
+            headers = {"Content-Type": "application/json"}
+            if self.openwebui_api_key:
+                headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+            # Get documents from OpenWebUI
+            response = requests.get(
+                f"{self.openwebui_url}/api/knowledge",
+                headers=headers,
+                timeout=30
+            )
+
+            if response.status_code == 200:
+                openwebui_docs = response.json()
+
+                # Update local documents with OpenWebUI information
+                for doc in local_documents:
+                    if 'openwebui_id' in doc:
+                        for openwebui_doc in openwebui_docs:
+                            if openwebui_doc.get('id') == doc['openwebui_id']:
+                                doc['openwebui_status'] = 'active'
+                                doc['openwebui_info'] = openwebui_doc
+                                break
+
+        except Exception as e:
+            print(f"Error getting documents from OpenWebUI: {str(e)}")
+
+        return local_documents
+
+    def delete_document(self, doc_id: str) -> bool:
+        """
+        Delete a document and its chunks.
+
+        Args:
+            doc_id: Document ID.
+
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        if doc_id not in self.documents:
+            return False
+
+        # Check if document was uploaded to OpenWebUI
+        doc = self.documents[doc_id]
+        openwebui_id = doc.get('openwebui_id')
+
+        if openwebui_id:
+            try:
+                # Prepare headers
+                headers = {"Content-Type": "application/json"}
+                if self.openwebui_api_key:
+                    headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+                # Delete from OpenWebUI
+                response = requests.delete(
+                    f"{self.openwebui_url}/api/knowledge/{openwebui_id}",
+                    headers=headers,
+                    timeout=30
+                )
+
+                if response.status_code != 200:
+                    print(f"Warning: Failed to delete document from OpenWebUI: {response.text}")
+
+            except Exception as e:
+                print(f"Error deleting document from OpenWebUI: {str(e)}")
+
+        # Delete document metadata
+        del self.documents[doc_id]
+
+        # Save metadata to file
+        self._save_metadata()
+
+        return True
+
+    def search_documents(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
+        """
+        Search for documents similar to a query.
+
+        Args:
+            query: Search query.
+            top_k: Number of results to return.
+
+        Returns:
+            List of similar document chunks with their metadata.
+        """
+        # Note: We don't need to implement this method anymore since
+        # RAG is handled directly by OpenWebUI when use_rag=True in the model service
+
+        # Return empty results - this is just a placeholder
+        # The actual RAG functionality is in the model_service.generate_response method
+        return []
+
+
+# Create a singleton instance
+document_service = DocumentService()
diff --git a/ai_service/embeddings/embedding_service.py b/ai_service/embeddings/embedding_service.py
new file mode 100644
index 0000000..a7212b7
--- /dev/null
+++ b/ai_service/embeddings/embedding_service.py
@@ -0,0 +1,214 @@
+"""
+Service for generating and managing embeddings.
+"""
+
+import os
+import random
+import pinecone
+import numpy as np
+from typing import List, Dict, Any, Optional, Union
+from sentence_transformers import SentenceTransformer
+
+from ai_service.config import config
+
+class EmbeddingService:
+    """Service for generating and managing embeddings."""
+
+    def __init__(self, use_mock=True):  # Default to mock implementation
+        """Initialize the embedding service."""
+        self.use_mock = use_mock
+
+        if not self.use_mock:
+            # Use a smaller model for testing
+            self.model_name = "paraphrase-MiniLM-L3-v2"  # Smaller model than the default
+            try:
+                self.model = SentenceTransformer(self.model_name)
+                print(f"Loaded embedding model: {self.model_name}")
+            except Exception as e:
+                print(f"Error loading embedding model: {str(e)}")
+                self.use_mock = True
+                print("Falling back to mock implementation")
+        else:
+            print("Using mock embedding implementation")
+            self.model_name = "mock-model"
+            self.model = None
+
+        self._initialize_pinecone()
+
+    def _initialize_pinecone(self):
+        """Initialize Pinecone client."""
+        if not config.PINECONE_API_KEY or not config.PINECONE_ENVIRONMENT:
+            print("Warning: Pinecone API key or environment not set. Vector storage will not be available.")
+            self.index = None
+            return
+
+        try:
+            pinecone.init(
+                api_key=config.PINECONE_API_KEY,
+                environment=config.PINECONE_ENVIRONMENT
+            )
+
+            # Check if index exists, create if it doesn't
+            if config.PINECONE_INDEX_NAME not in pinecone.list_indexes():
+                pinecone.create_index(
+                    name=config.PINECONE_INDEX_NAME,
+                    dimension=self.model.get_sentence_embedding_dimension(),
+                    metric="cosine"
+                )
+
+            self.index = pinecone.Index(config.PINECONE_INDEX_NAME)
+            print(f"Connected to Pinecone index: {config.PINECONE_INDEX_NAME}")
+        except Exception as e:
+            print(f"Error connecting to Pinecone: {str(e)}")
+            self.index = None
+
+    def generate_embedding(self, text: str) -> List[float]:
+        """
+        Generate an embedding for a text.
+
+        Args:
+            text: Text to embed.
+
+        Returns:
+            Embedding vector.
+        """
+        if self.use_mock:
+            # Generate a mock embedding vector (384 dimensions for consistency)
+            return [random.random() for _ in range(384)]
+
+        embedding = self.model.encode(text)
+        return embedding.tolist()
+
+    def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """
+        Generate embeddings for multiple texts.
+
+        Args:
+            texts: List of texts to embed.
+
+        Returns:
+            List of embedding vectors.
+        """
+        if self.use_mock:
+            # Generate mock embedding vectors
+            return [[random.random() for _ in range(384)] for _ in texts]
+
+        embeddings = self.model.encode(texts)
+        return embeddings.tolist()
+
+    def store_embeddings(self, ids: List[str], embeddings: List[List[float]],
+                         metadata: Optional[List[Dict[str, Any]]] = None) -> bool:
+        """
+        Store embeddings in Pinecone.
+
+        Args:
+            ids: List of IDs for the embeddings.
+            embeddings: List of embedding vectors.
+            metadata: Optional list of metadata dictionaries.
+
+        Returns:
+            True if storage was successful, False otherwise.
+        """
+        if self.use_mock:
+            print(f"Mock: Stored {len(ids)} embeddings")
+            return True
+
+        if self.index is None:
+            print("Warning: Pinecone index not available. Embeddings not stored.")
+            return False
+
+        if metadata is None:
+            metadata = [{} for _ in ids]
+
+        vectors = [
+            (id, embedding, meta)
+            for id, embedding, meta in zip(ids, embeddings, metadata)
+        ]
+
+        try:
+            self.index.upsert(vectors=vectors)
+            return True
+        except Exception as e:
+            print(f"Error storing embeddings in Pinecone: {str(e)}")
+            return False
+
+    def search_similar(self, query_embedding: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
+        """
+        Search for similar embeddings in Pinecone.
+
+        Args:
+            query_embedding: Query embedding vector.
+            top_k: Number of results to return.
+
+        Returns:
+            List of similar items with their metadata.
+        """
+        if self.use_mock:
+            # Generate mock search results
+            print(f"Mock: Searching for similar embeddings (top_k={top_k})")
+            mock_results = []
+            for i in range(min(top_k, 3)):  # Return at most 3 mock results
+                mock_results.append({
+                    'id': f"mock_doc_{i}",
+                    'score': 0.9 - (i * 0.1),  # Decreasing similarity scores
+                    'metadata': {
+                        'document_id': f"mock_doc_{i}",
+                        'chunk_index': i,
+                        'title': f"Mock Document {i}",
+                        'description': f"This is a mock document {i}",
+                        'chunk_text': f"This is the content of mock document {i}..."
+                    }
+                })
+            return mock_results
+
+        if self.index is None:
+            print("Warning: Pinecone index not available. Search not performed.")
+            return []
+
+        try:
+            results = self.index.query(
+                vector=query_embedding,
+                top_k=top_k,
+                include_metadata=True
+            )
+
+            return [
+                {
+                    'id': match['id'],
+                    'score': match['score'],
+                    'metadata': match.get('metadata', {})
+                }
+                for match in results.get('matches', [])
+            ]
+        except Exception as e:
+            print(f"Error searching in Pinecone: {str(e)}")
+            return []
+
+    def delete_embeddings(self, ids: List[str]) -> bool:
+        """
+        Delete embeddings from Pinecone.
+
+        Args:
+            ids: List of IDs to delete.
+
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        if self.use_mock:
+            print(f"Mock: Deleted {len(ids)} embeddings")
+            return True
+
+        if self.index is None:
+            print("Warning: Pinecone index not available. Deletion not performed.")
+            return False
+
+        try:
+            self.index.delete(ids=ids)
+            return True
+        except Exception as e:
+            print(f"Error deleting embeddings from Pinecone: {str(e)}")
+            return False
+
+
+# Create a singleton instance
+embedding_service = EmbeddingService()
diff --git a/ai_service/models/chat_service.py b/ai_service/models/chat_service.py
new file mode 100644
index 0000000..a583c3e
--- /dev/null
+++ b/ai_service/models/chat_service.py
@@ -0,0 +1,309 @@
+"""
+Service for chat functionality.
+"""
+
+import os
+import json
+import uuid
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.models.model_service import model_service
+from ai_service.models.model_parameters import ModelParameters
+
+class ChatService:
+    """Service for chat functionality."""
+
+    def __init__(self):
+        """Initialize the chat service."""
+        # Ensure data directory exists
+        os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
+
+        # For now, we'll store chat data in a simple JSON file
+        self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json')
+        self._load_chats()
+
+    def _load_chats(self):
+        """Load chats from file."""
+        if os.path.exists(self.chats_file):
+            try:
+                with open(self.chats_file, 'r') as f:
+                    self.chats = json.load(f)
+            except Exception as e:
+                print(f"Error loading chats: {str(e)}")
+                self.chats = {}
+        else:
+            self.chats = {}
+
+    def _save_chats(self):
+        """Save chats to file."""
+        try:
+            with open(self.chats_file, 'w') as f:
+                json.dump(self.chats, f, indent=2)
+        except Exception as e:
+            print(f"Error saving chats: {str(e)}")
+
+    def create_chat(self, user_id: str, title: Optional[str] = None,
+                   model_id: Optional[str] = None, is_team_chat: bool = False) -> str:
+        """
+        Create a new chat.
+
+        Args:
+            user_id: ID of the user creating the chat.
+            title: Optional title for the chat.
+            model_id: Optional model ID to use for this chat.
+            is_team_chat: Whether this is a team chat.
+
+        Returns:
+            ID of the created chat.
+        """
+        # Generate a unique ID for the chat
+        chat_id = str(uuid.uuid4())
+
+        # Create chat data
+        self.chats[chat_id] = {
+            'id': chat_id,
+            'title': title or f"Chat {len(self.chats) + 1}",
+            'user_id': user_id,
+            'model_id': model_id or config.DEFAULT_MODEL,
+            'is_team_chat': is_team_chat,
+            'created_at': datetime.utcnow().isoformat(),
+            'updated_at': datetime.utcnow().isoformat(),
+            'messages': [],
+            'team_members': [user_id] if is_team_chat else []
+        }
+
+        # Save chats to file
+        self._save_chats()
+
+        return chat_id
+
+    def add_message(self, chat_id: str, content: str, user_id: str,
+                   is_user_message: bool = True) -> Dict[str, Any]:
+        """
+        Add a message to a chat.
+
+        Args:
+            chat_id: ID of the chat.
+            content: Message content.
+            user_id: ID of the user sending the message.
+            is_user_message: Whether this is a user message (vs. bot message).
+
+        Returns:
+            Added message.
+        """
+        if chat_id not in self.chats:
+            raise ValueError(f"Chat with ID {chat_id} not found")
+
+        # Create message data
+        message = {
+            'id': str(uuid.uuid4()),
+            'content': content,
+            'user_id': user_id if is_user_message else None,
+            'is_user_message': is_user_message,
+            'timestamp': datetime.utcnow().isoformat()
+        }
+
+        # Add message to chat
+        self.chats[chat_id]['messages'].append(message)
+
+        # Update chat timestamp
+        self.chats[chat_id]['updated_at'] = datetime.utcnow().isoformat()
+
+        # Save chats to file
+        self._save_chats()
+
+        return message
+
+    def get_chat(self, chat_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a chat by ID.
+
+        Args:
+            chat_id: ID of the chat.
+
+        Returns:
+            Chat data if found, None otherwise.
+        """
+        return self.chats.get(chat_id)
+
+    def get_user_chats(self, user_id: str) -> List[Dict[str, Any]]:
+        """
+        Get all chats for a user.
+
+        Args:
+            user_id: ID of the user.
+
+        Returns:
+            List of chat data.
+        """
+        user_chats = []
+
+        for chat_id, chat in self.chats.items():
+            # Include private chats owned by the user
+            if chat['user_id'] == user_id and not chat['is_team_chat']:
+                user_chats.append(chat)
+
+            # Include team chats where the user is a member
+            elif chat['is_team_chat'] and user_id in chat['team_members']:
+                user_chats.append(chat)
+
+        # Sort by updated_at (newest first)
+        user_chats.sort(key=lambda x: x['updated_at'], reverse=True)
+
+        return user_chats
+
+    def add_team_member(self, chat_id: str, user_id: str) -> bool:
+        """
+        Add a user to a team chat.
+
+        Args:
+            chat_id: ID of the team chat.
+            user_id: ID of the user to add.
+
+        Returns:
+            True if addition was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        chat = self.chats[chat_id]
+
+        if not chat['is_team_chat']:
+            return False
+
+        if user_id not in chat['team_members']:
+            chat['team_members'].append(user_id)
+            self._save_chats()
+
+        return True
+
+    def remove_team_member(self, chat_id: str, user_id: str) -> bool:
+        """
+        Remove a user from a team chat.
+
+        Args:
+            chat_id: ID of the team chat.
+            user_id: ID of the user to remove.
+
+        Returns:
+            True if removal was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        chat = self.chats[chat_id]
+
+        if not chat['is_team_chat']:
+            return False
+
+        if user_id in chat['team_members']:
+            chat['team_members'].remove(user_id)
+            self._save_chats()
+
+        return True
+
+    def delete_chat(self, chat_id: str) -> bool:
+        """
+        Delete a chat.
+
+        Args:
+            chat_id: ID of the chat to delete.
+
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        del self.chats[chat_id]
+        self._save_chats()
+
+        return True
+
+    def get_chat_response(self, chat_id: str, message: str, user_id: str,
+                         use_rag: bool = False, temperature: Optional[float] = None,
+                         max_tokens: Optional[int] = None, top_p: Optional[float] = None,
+                         frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
+                         stop_sequences: Optional[List[str]] = None, system_prompt: Optional[str] = None,
+                         min_p: Optional[float] = None, top_k: Optional[int] = None,
+                         repeat_penalty: Optional[float] = None, function_calling: Optional[bool] = None) -> Dict[str, Any]:
+        """
+        Get a response from the chatbot.
+
+        Args:
+            chat_id: ID of the chat.
+            message: User message.
+            user_id: ID of the user sending the message.
+            use_rag: Whether to use RAG (Retrieval Augmented Generation).
+            temperature: Controls randomness in the response.
+            max_tokens: Maximum number of tokens to generate.
+            top_p: Nucleus sampling parameter.
+            frequency_penalty: Penalizes repeated tokens.
+            presence_penalty: Penalizes repeated topics.
+            stop_sequences: Sequences where the API will stop generating.
+            system_prompt: System prompt to guide the model's behavior.
+            min_p: Minimum probability threshold for token selection.
+            top_k: Only sample from the top k tokens.
+            repeat_penalty: Penalty for repeating tokens.
+            function_calling: Whether to enable function calling.
+
+        Returns:
+            Bot response message.
+        """
+        if chat_id not in self.chats:
+            raise ValueError(f"Chat with ID {chat_id} not found")
+
+        chat = self.chats[chat_id]
+
+        # Add user message to chat
+        self.add_message(chat_id, message, user_id, is_user_message=True)
+
+        # Prepare conversation context for the model
+        context = []
+        for msg in chat['messages'][-10:]:  # Use last 10 messages as context
+            role = "user" if msg['is_user_message'] else "assistant"
+            context.append({
+                "role": role,
+                "content": msg['content']
+            })
+
+        # Create model parameters
+        model_params = ModelParameters(
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            stop_sequences=stop_sequences,
+            system_prompt=system_prompt,
+            min_p=min_p,
+            top_k=top_k,
+            repeat_penalty=repeat_penalty,
+            function_calling=function_calling
+        )
+
+        # Get response from model
+        model_id = chat['model_id']
+        response_text = model_service.generate_response(
+            model_id=model_id,
+            prompt=message,
+            context=context,
+            use_rag=use_rag,
+            model_params=model_params
+        )
+
+        # Add bot response to chat
+        response_message = self.add_message(
+            chat_id=chat_id,
+            content=response_text,
+            user_id=user_id,
+            is_user_message=False
+        )
+
+        return response_message
+
+
+# Create a singleton instance
+chat_service = ChatService()
diff --git a/ai_service/models/model_parameters.py b/ai_service/models/model_parameters.py
new file mode 100644
index 0000000..cc27724
--- /dev/null
+++ b/ai_service/models/model_parameters.py
@@ -0,0 +1,170 @@
+"""
+Model parameters for AI models.
+"""
+
+from typing import Dict, Any, Optional, List
+from pydantic import BaseModel, Field, validator
+
+
+class ModelParameters(BaseModel):
+    """Parameters for AI model generation."""
+
+    # Basic parameters
+    temperature: Optional[float] = Field(
+        0.7,
+        description="Controls randomness: 0 is deterministic, higher values are more random",
+        ge=0.0,
+        le=2.0
+    )
+
+    max_tokens: Optional[int] = Field(
+        1000,
+        description="Maximum number of tokens to generate",
+        gt=0
+    )
+
+    # Sampling parameters
+    top_p: Optional[float] = Field(
+        1.0,
+        description="Nucleus sampling: consider tokens with top_p probability mass",
+        ge=0.0,
+        le=1.0
+    )
+
+    top_k: Optional[int] = Field(
+        None,
+        description="Only sample from the top k tokens",
+        gt=0
+    )
+
+    # Repetition control
+    frequency_penalty: Optional[float] = Field(
+        0.0,
+        description="Penalizes repeated tokens",
+        ge=-2.0,
+        le=2.0
+    )
+
+    presence_penalty: Optional[float] = Field(
+        0.0,
+        description="Penalizes repeated topics",
+        ge=-2.0,
+        le=2.0
+    )
+
+    # Advanced parameters
+    stop_sequences: Optional[List[str]] = Field(
+        None,
+        description="Sequences where the API will stop generating"
+    )
+
+    min_p: Optional[float] = Field(
+        None,
+        description="Minimum probability threshold for token selection",
+        ge=0.0,
+        le=1.0
+    )
+
+    repeat_penalty: Optional[float] = Field(
+        None,
+        description="Penalty for repeating tokens",
+        ge=0.0
+    )
+
+    presence_penalty_tokens: Optional[int] = Field(
+        None,
+        description="Number of tokens to consider for presence penalty",
+        gt=0
+    )
+
+    # System prompt
+    system_prompt: Optional[str] = Field(
+        None,
+        description="System prompt to guide the model's behavior"
+    )
+
+    # Function calling
+    function_calling: Optional[bool] = Field(
+        None,
+        description="Whether to enable function calling"
+    )
+
+    # Additional parameters that might be model-specific
+    extra_params: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Additional model-specific parameters"
+    )
+
+    @validator('temperature', 'top_p', 'frequency_penalty', 'presence_penalty', pre=True)
+    def validate_float_params(cls, v):
+        """Validate float parameters."""
+        if v is not None and not isinstance(v, bool):  # Avoid converting bool to float
+            return float(v)
+        return v
+
+    @validator('max_tokens', 'top_k', pre=True)
+    def validate_int_params(cls, v):
+        """Validate integer parameters."""
+        if v is not None and not isinstance(v, bool):  # Avoid converting bool to int
+            return int(v)
+        return v
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert parameters to a dictionary, excluding None values.
+
+        Returns:
+            Dictionary of parameters.
+        """
+        result = {}
+        for key, value in self.dict().items():
+            if value is not None and key != 'extra_params':
+                result[key] = value
+
+        # Add any extra parameters
+        if self.extra_params:
+            result.update(self.extra_params)
+
+        return result
+
+    def for_provider(self, provider: str) -> Dict[str, Any]:
+        """
+        Get parameters formatted for a specific provider.
+
+        Args:
+            provider: Provider name (e.g., 'openai', 'ollama', 'anthropic').
+
+        Returns:
+            Dictionary of parameters formatted for the provider.
+        """
+        params = self.to_dict()
+
+        # Handle provider-specific parameter naming
+        if provider == 'openai':
+            # OpenAI uses 'stop' instead of 'stop_sequences'
+            if 'stop_sequences' in params:
+                params['stop'] = params.pop('stop_sequences')
+
+        elif provider == 'ollama':
+            # Ollama has specific parameter handling
+            # Remove parameters not supported by Ollama
+            params_to_keep = ['temperature', 'top_p', 'top_k', 'max_tokens', 'stop_sequences']
+            params = {k: v for k, v in params.items() if k in params_to_keep}
+
+            # Rename stop_sequences to stop if present
+            if 'stop_sequences' in params:
+                params['stop'] = params.pop('stop_sequences')
+
+        elif provider == 'anthropic':
+            # Anthropic uses 'stop_sequences' and different temperature scaling
+            if 'temperature' in params:
+                # Anthropic's temperature is typically 0-1
+                params['temperature'] = min(params['temperature'], 1.0)
+
+        elif provider == 'cohere':
+            # Cohere uses 'stop_sequences' and has some unique parameters
+            pass
+
+        # Add more provider-specific conversions as needed
+
+        return params
diff --git a/ai_service/models/model_service.py b/ai_service/models/model_service.py
new file mode 100644
index 0000000..638feee
--- /dev/null
+++ b/ai_service/models/model_service.py
@@ -0,0 +1,243 @@
+"""
+Service for model management and interaction.
+"""
+
+import os
+import json
+import requests
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.embeddings.document_service import document_service
+from ai_service.models.model_parameters import ModelParameters
+
+class ModelService:
+    """Service for model management and interaction."""
+
+    # Available models
+    AVAILABLE_MODELS = {
+        'gemma3': {
+            'name': 'Gemma 3',
+            'description': 'Google Gemma 3 model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'llama3.3': {
+            'name': 'Llama 3 (70B)',
+            'description': 'Meta Llama 3 70B model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'llama3.1': {
+            'name': 'Llama 3 (8B)',
+            'description': 'Meta Llama 3 8B model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'mistral': {
+            'name': 'Mistral',
+            'description': 'Mistral AI model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'deepseek': {
+            'name': 'DeepSeek',
+            'description': 'DeepSeek model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        }
+    }
+
+    def __init__(self):
+        """Initialize the model service."""
+        self.default_model = config.DEFAULT_MODEL
+        self.ollama_api_url = config.OLLAMA_API_URL
+        self.openwebui_url = config.OPENWEBUI_URL
+        self.openwebui_api_key = config.OPENWEBUI_API_KEY
+
+    def get_available_models(self) -> List[Dict[str, Any]]:
+        """
+        Get a list of available models.
+
+        Returns:
+            List of model information dictionaries.
+        """
+        models = []
+        for model_id, model_info in self.AVAILABLE_MODELS.items():
+            model_data = {
+                'id': model_id,
+                'is_default': model_id == self.default_model,
+                **model_info
+            }
+            models.append(model_data)
+
+        return models
+
+    def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get information about a specific model.
+
+        Args:
+            model_id: ID of the model.
+
+        Returns:
+            Model information dictionary if found, None otherwise.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            return None
+
+        return {
+            'id': model_id,
+            'is_default': model_id == self.default_model,
+            **self.AVAILABLE_MODELS[model_id]
+        }
+
+    def generate_response(self, model_id: str, prompt: str,
+                         context: Optional[List[Dict[str, str]]] = None,
+                         use_rag: bool = False,
+                         model_params: Optional[ModelParameters] = None) -> str:
+        """
+        Generate a response from the model.
+
+        Args:
+            model_id: ID of the model to use.
+            prompt: User prompt.
+            context: Optional conversation context.
+            use_rag: Whether to use RAG (Retrieval Augmented Generation).
+            model_params: Optional model parameters.
+
+        Returns:
+            Generated response.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            model_id = self.default_model
+
+        # Get the provider for this model
+        provider = self.AVAILABLE_MODELS[model_id].get('provider', 'ollama')
+
+        # Prepare the messages for the API call
+        messages = []
+
+        # Use custom system prompt if provided, otherwise use default
+        system_content = "You are a helpful assistant."
+        if model_params and model_params.system_prompt:
+            system_content = model_params.system_prompt
+
+        messages.append({
+            "role": "system",
+            "content": system_content
+        })
+
+        # Add conversation context if provided
+        if context:
+            messages.extend(context)
+
+        # If RAG is enabled, use OpenWebUI's knowledge database
+        if use_rag:
+            # We'll use OpenWebUI's built-in RAG capabilities
+            # This is handled by sending the request to OpenWebUI instead of Ollama directly
+            try:
+                # Prepare the request for OpenWebUI
+                openwebui_request = {
+                    "model": model_id,
+                    "messages": messages + [{"role": "user", "content": prompt}],
+                    "use_knowledge": True,  # Enable RAG
+                    "stream": False
+                }
+
+                # Add model parameters if provided
+                if model_params:
+                    params = model_params.to_dict()
+                    # Map parameters to OpenWebUI format
+                    if 'temperature' in params:
+                        openwebui_request['temperature'] = params['temperature']
+                    if 'max_tokens' in params:
+                        openwebui_request['max_tokens'] = params['max_tokens']
+                    if 'top_p' in params:
+                        openwebui_request['top_p'] = params['top_p']
+
+                # Make the API call to OpenWebUI
+                headers = {"Content-Type": "application/json"}
+                if self.openwebui_api_key:
+                    headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+                # OpenWebUI API endpoint is /api/chat/completions
+                response = requests.post(
+                    f"{self.openwebui_url}/api/chat/completions",
+                    headers=headers,
+                    json=openwebui_request,
+                    timeout=60  # Longer timeout for RAG
+                )
+
+                response.raise_for_status()
+                result = response.json()
+
+                # Extract the response content
+                if 'message' in result:
+                    return result['message']['content']
+                else:
+                    return "Error: Unexpected response format from OpenWebUI"
+
+            except Exception as e:
+                print(f"Error calling OpenWebUI API: {str(e)}")
+                # Fall back to direct Ollama call without RAG
+                print("Falling back to direct Ollama call without RAG")
+                # Continue to the Ollama API call below
+
+        # Add user prompt
+        messages.append({
+            "role": "user",
+            "content": prompt
+        })
+
+        # Prepare API request parameters for Ollama
+        request_json = {
+            "model": model_id,
+            "messages": messages,
+            "stream": False
+        }
+
+        # Add model parameters if provided
+        if model_params:
+            params = model_params.to_dict()
+            # Map parameters to Ollama format
+            if 'temperature' in params:
+                request_json['temperature'] = params['temperature']
+            if 'top_p' in params:
+                request_json['top_p'] = params['top_p']
+            if 'top_k' in params:
+                request_json['top_k'] = params['top_k']
+            if 'max_tokens' in params:
+                request_json['max_tokens'] = params['max_tokens']
+
+        # Make the API call to Ollama
+        try:
+            # Ollama API endpoint is /api/chat or /api/generate
+            response = requests.post(
+                f"{self.ollama_api_url}/api/generate",
+                headers={"Content-Type": "application/json"},
+                json=request_json,
+                timeout=30
+            )
+
+            response.raise_for_status()
+            result = response.json()
+
+            # Extract the response content from Ollama
+            # The response format depends on whether we're using /api/chat or /api/generate
+            if 'message' in result and 'content' in result['message']:
+                # Format for /api/chat
+                return result['message']['content']
+            elif 'response' in result:
+                # Format for /api/generate
+                return result['response']
+            else:
+                return "Error: Unexpected response format from Ollama"
+
+        except Exception as e:
+            print(f"Error calling Ollama API: {str(e)}")
+            return f"Error generating response: {str(e)}"
+
+
+# Create a singleton instance
+model_service = ModelService()
diff --git a/ai_service/requirements.production.txt b/ai_service/requirements.production.txt
new file mode 100644
index 0000000..2007d95
--- /dev/null
+++ b/ai_service/requirements.production.txt
@@ -0,0 +1,28 @@
+# Core dependencies with fixed versions for stability
+fastapi==0.103.1
+uvicorn[standard]==0.23.2
+pydantic==2.3.0
+python-dotenv==1.0.0
+
+# AI/ML dependencies
+pinecone-client==2.2.2
+langchain==0.0.267
+sentence-transformers==2.2.2
+numpy==1.26.4
+openai==1.3.0
+
+# For local storage (will be replaced with MySQL in production)
+sqlalchemy==2.0.20
+pymysql==1.1.0
+cryptography==41.0.3  # Required for PyMySQL
+
+# Utilities
+tqdm==4.67.1
+requests==2.32.3
+tenacity==8.5.0  # For retrying API calls
+
+# Production dependencies
+gunicorn==21.2.0  # Production WSGI server
+python-json-logger==2.0.7  # Structured logging
+prometheus-client==0.17.1  # Metrics
+sentry-sdk==1.39.1  # Error tracking
diff --git a/ai_service/requirements.txt b/ai_service/requirements.txt
new file mode 100644
index 0000000..752109b
--- /dev/null
+++ b/ai_service/requirements.txt
@@ -0,0 +1,19 @@
+# Core dependencies
+fastapi==0.103.1
+uvicorn==0.23.2
+pydantic==2.3.0
+python-dotenv==1.0.0
+
+# AI/ML dependencies
+pinecone-client==2.2.2
+langchain==0.0.267
+sentence-transformers==2.2.2
+numpy==1.26.4
+
+# For local storage
+sqlalchemy==2.0.20
+sqlite3==0.0.0  # This is a placeholder, sqlite3 is part of Python's standard library
+
+# Utilities
+tqdm==4.67.1
+requests==2.32.3
diff --git a/ai_service/run.py b/ai_service/run.py
new file mode 100644
index 0000000..c363a4f
--- /dev/null
+++ b/ai_service/run.py
@@ -0,0 +1,21 @@
+"""
+Script to run the AI service.
+"""
+
+import uvicorn
+import os
+import sys
+
+# Add the parent directory to the path so we can import ai_service
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from ai_service.config import config
+
+if __name__ == "__main__":
+    print(f"Starting AI service on {config.API_HOST}:{config.API_PORT}")
+    uvicorn.run(
+        "ai_service.api:app",
+        host=config.API_HOST,
+        port=config.API_PORT,
+        reload=True
+    )
diff --git a/ai_service_workflow.md b/ai_service_workflow.md
new file mode 100644
index 0000000..81ec340
--- /dev/null
+++ b/ai_service_workflow.md
@@ -0,0 +1,173 @@
+# AI Service Workflow and Architecture
+
+## Overview
+
+The AI Service is a modular, API-driven system that provides document processing, embedding, and chat functionality with multiple AI models. It's designed to support a chatbot application with document training, private/team chat options, and model switching capabilities.
+
+## System Architecture
+
+```
+┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
+│                 │     │                 │     │                 │
+│  Client Apps    │────▶│  AI Service API │────▶│  Vector Store   │
+│                 │     │                 │     │   (Pinecone)    │
+└─────────────────┘     └────────┬────────┘     └─────────────────┘
+                                 │
+                                 ▼
+                        ┌─────────────────┐     ┌─────────────────┐
+                        │                 │     │                 │
+                        │   AI Models     │────▶│  Local Storage  │
+                        │                 │     │                 │
+                        └─────────────────┘     └─────────────────┘
+```
+
+## Core Components
+
+1. **Document Service**: Processes documents, splits them into chunks, and stores embeddings
+2. **Embedding Service**: Generates vector embeddings for text using sentence transformers
+3. **Model Service**: Manages different AI models and generates responses
+4. **Chat Service**: Handles chat creation, message history, and team chat functionality
+
+## API Endpoints Workflow
+
+### Health Check
+
+- **Endpoint**: `GET /health`
+- **Purpose**: Simple health check to verify the service is running
+- **Response**: `{"status": "healthy"}`
+
+### Document Management Workflow
+
+1. **Process Document**
+   - **Endpoint**: `POST /documents`
+   - **Purpose**: Process a document for embedding
+   - **Workflow**:
+     - Client submits document content, title, and optional metadata
+     - Document is split into chunks
+     - Embeddings are generated for each chunk
+     - Embeddings are stored in Pinecone
+     - Document metadata is stored locally
+   - **Response**: Document metadata including ID and chunk count
+
+2. **Get All Documents**
+   - **Endpoint**: `GET /documents`
+   - **Purpose**: Retrieve all processed documents
+   - **Response**: List of document metadata
+
+3. **Get Document by ID**
+   - **Endpoint**: `GET /documents/{doc_id}`
+   - **Purpose**: Retrieve a specific document's metadata
+   - **Response**: Document metadata
+
+4. **Delete Document**
+   - **Endpoint**: `DELETE /documents/{doc_id}`
+   - **Purpose**: Remove a document and its embeddings
+   - **Workflow**:
+     - Document chunks are deleted from Pinecone
+     - Document metadata is removed from local storage
+   - **Response**: Success status
+
+5. **Search Documents**
+   - **Endpoint**: `POST /documents/search`
+   - **Purpose**: Semantic search across document embeddings
+   - **Workflow**:
+     - Query text is converted to an embedding
+     - Similar embeddings are found in Pinecone
+     - Results are returned with metadata and similarity scores
+   - **Response**: List of search results with metadata
+
+### Model Management Workflow
+
+1. **Get Available Models**
+   - **Endpoint**: `GET /models`
+   - **Purpose**: List all available AI models
+   - **Response**: List of model information (ID, name, description, etc.)
+
+2. **Get Model Information**
+   - **Endpoint**: `GET /models/{model_id}`
+   - **Purpose**: Get details about a specific model
+   - **Response**: Model information
+
+### Chat Workflow
+
+1. **Create Chat**
+   - **Endpoint**: `POST /chats`
+   - **Purpose**: Create a new chat session
+   - **Workflow**:
+     - Client provides user ID, optional title, and model ID
+     - System generates a unique chat ID
+     - Chat metadata is stored locally
+   - **Response**: Created chat information
+
+2. **Get User Chats**
+   - **Endpoint**: `GET /chats/user/{user_id}`
+   - **Purpose**: Get all chats for a specific user
+   - **Response**: List of chat information
+
+3. **Get Chat by ID**
+   - **Endpoint**: `GET /chats/{chat_id}`
+   - **Purpose**: Get a specific chat's information and messages
+   - **Response**: Chat information including message history
+
+4. **Send Message**
+   - **Endpoint**: `POST /chats/{chat_id}/messages`
+   - **Purpose**: Send a message and get AI response
+   - **Workflow**:
+     - Client sends message with user ID and optional model parameters
+     - User message is added to chat history
+     - If RAG is enabled, relevant documents are retrieved
+     - AI model generates a response based on chat history and context
+     - Bot response is added to chat history
+   - **Response**: Bot response message
+
+5. **Team Chat Management**
+   - **Add Team Member**: `POST /chats/{chat_id}/members/{user_id}`
+   - **Remove Team Member**: `DELETE /chats/{chat_id}/members/{user_id}`
+   - **Purpose**: Manage team chat participants
+   - **Response**: Success status
+
+6. **Delete Chat**
+   - **Endpoint**: `DELETE /chats/{chat_id}`
+   - **Purpose**: Remove a chat and its messages
+   - **Response**: Success status
+
+## Retrieval-Augmented Generation (RAG) Workflow
+
+When RAG is enabled in a chat message request:
+
+1. User message is processed
+2. Message is converted to an embedding
+3. Similar document chunks are retrieved from Pinecone
+4. Retrieved chunks are added as context to the prompt
+5. AI model generates a response using both the chat history and document context
+6. Response is returned to the user
+
+## Model Parameters
+
+The API supports customizing AI model behavior through parameters:
+
+- `temperature`: Controls randomness (0.0-2.0)
+- `max_tokens`: Maximum response length
+- `top_p`: Nucleus sampling parameter (0.0-1.0)
+- `frequency_penalty`: Penalizes repeated tokens (-2.0-2.0)
+- `presence_penalty`: Penalizes repeated topics (-2.0-2.0)
+- `stop_sequences`: Sequences where generation stops
+- `system_prompt`: Custom system prompt to guide the model
+
+## Deployment
+
+The service is deployed using uvicorn:
+
+```bash
+nohup uvicorn ai_service.run:app --host 0.0.0.0 --port 5251 &
+```
+
+## Example Usage Flow
+
+1. Process documents for knowledge base
+2. Create a new chat session
+3. Send messages with or without RAG
+4. Optionally add team members for collaborative chats
+5. Switch models as needed for different capabilities
+
+This architecture provides a flexible, scalable foundation for building AI-powered chat applications with document training capabilities.
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..3ee7c4e
--- /dev/null
+++ b/app/__init__.py
@@ -0,0 +1,34 @@
+"""
+Main application package for the chatbot application.
+"""
+
+from flask import Flask
+
+from app.config.config import Config
+
+def create_app(config_class=Config):
+    """
+    Create and configure the Flask application.
+
+    Args:
+        config_class: Configuration class to use.
+
+    Returns:
+        Flask application instance.
+    """
+    # Initialize Flask app
+    flask_app = Flask(__name__)
+    flask_app.config.from_object(config_class)
+
+    # Register Flask routes
+    from app.api import routes as flask_routes
+    flask_app.register_blueprint(flask_routes.bp)
+
+    # For now, we'll use only Flask routes and disable FastAPI integration
+    # until we resolve the integration issues
+
+    # Initialize database
+    from app.database import db
+    db.init_app(flask_app)
+
+    return flask_app
\ No newline at end of file
diff --git a/app/api/__init__.py b/app/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/api/api.py b/app/api/api.py
new file mode 100644
index 0000000..ffae7df
--- /dev/null
+++ b/app/api/api.py
@@ -0,0 +1,110 @@
+"""
+FastAPI routes for the application.
+"""
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from pydantic import BaseModel
+from typing import List, Dict, Any, Optional
+
+from app.services.chatbot_service import chatbot_service
+
+router = APIRouter()
+
+class MessageRequest(BaseModel):
+    """Request model for sending a message."""
+    message: str
+    user_id: str = "default_user"
+
+class MessageResponse(BaseModel):
+    """Response model for a message."""
+    content: str
+    is_user: bool
+    timestamp: str
+
+class ChatResponse(BaseModel):
+    """Response model for a chat."""
+    chat_id: int
+    messages: List[MessageResponse]
+
+@router.get("/health")
+async def health_check():
+    """
+    Health check endpoint.
+
+    Returns:
+        JSON response with health status.
+    """
+    return {"status": "healthy"}
+
+@router.post("/chat", response_model=ChatResponse)
+async def create_chat(user_id: str = "default_user"):
+    """
+    Create a new chat.
+
+    Args:
+        user_id: ID of the user creating the chat.
+
+    Returns:
+        Created chat.
+    """
+    chat_id = chatbot_service.create_chat(user_id)
+
+    return {
+        "chat_id": chat_id,
+        "messages": []
+    }
+
+@router.post("/chat/{chat_id}/message", response_model=MessageResponse)
+async def send_message(chat_id: int, request: MessageRequest):
+    """
+    Send a message to the chatbot.
+
+    Args:
+        chat_id: ID of the chat.
+        request: Message request.
+
+    Returns:
+        Bot response.
+    """
+    try:
+        response = chatbot_service.get_response(chat_id, request.message)
+
+        # Get the last message (bot response)
+        messages = chatbot_service.get_chat_messages(chat_id)
+        last_message = messages[-1]
+
+        return last_message
+
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+
+@router.get("/chat/{chat_id}", response_model=ChatResponse)
+async def get_chat(chat_id: int):
+    """
+    Get a chat by ID.
+
+    Args:
+        chat_id: ID of the chat.
+
+    Returns:
+        Chat with messages.
+    """
+    try:
+        messages = chatbot_service.get_chat_messages(chat_id)
+
+        return {
+            "chat_id": chat_id,
+            "messages": messages
+        }
+
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+
+def init_app(app):
+    """
+    Initialize FastAPI application with routes.
+
+    Args:
+        app: FastAPI application instance.
+    """
+    app.include_router(router, prefix="/api")
diff --git a/app/api/routes.py b/app/api/routes.py
new file mode 100644
index 0000000..2f50bfc
--- /dev/null
+++ b/app/api/routes.py
@@ -0,0 +1,100 @@
+"""
+Flask routes for the application.
+"""
+
+from flask import Blueprint, jsonify, request, abort
+
+from app.services.chatbot_service import chatbot_service
+
+bp = Blueprint('main', __name__)
+
+@bp.route('/')
+def index():
+    """
+    Root endpoint.
+
+    Returns:
+        JSON response with application information.
+    """
+    return jsonify({
+        'name': 'Chatbot Application',
+        'version': '1.0.0',
+        'status': 'running'
+    })
+
+@bp.route('/api/health')
+def health_check():
+    """
+    Health check endpoint.
+
+    Returns:
+        JSON response with health status.
+    """
+    return jsonify({
+        'status': 'healthy'
+    })
+
+@bp.route('/api/chat', methods=['POST'])
+def create_chat():
+    """
+    Create a new chat.
+
+    Returns:
+        JSON response with chat ID.
+    """
+    user_id = request.json.get('user_id', 'default_user')
+    chat_id = chatbot_service.create_chat(user_id)
+
+    return jsonify({
+        'chat_id': chat_id,
+        'messages': []
+    })
+
+@bp.route('/api/chat/<int:chat_id>/message', methods=['POST'])
+def send_message(chat_id):
+    """
+    Send a message to the chatbot.
+
+    Args:
+        chat_id: ID of the chat.
+
+    Returns:
+        JSON response with bot response.
+    """
+    if not request.json or 'message' not in request.json:
+        abort(400, description="Message is required")
+
+    try:
+        message = request.json['message']
+        response = chatbot_service.get_response(chat_id, message)
+
+        # Get the last message (bot response)
+        messages = chatbot_service.get_chat_messages(chat_id)
+        last_message = messages[-1]
+
+        return jsonify(last_message)
+
+    except ValueError as e:
+        abort(404, description=str(e))
+
+@bp.route('/api/chat/<int:chat_id>', methods=['GET'])
+def get_chat(chat_id):
+    """
+    Get a chat by ID.
+
+    Args:
+        chat_id: ID of the chat.
+
+    Returns:
+        JSON response with chat messages.
+    """
+    try:
+        messages = chatbot_service.get_chat_messages(chat_id)
+
+        return jsonify({
+            'chat_id': chat_id,
+            'messages': messages
+        })
+
+    except ValueError as e:
+        abort(404, description=str(e))
diff --git a/app/config/__init__.py b/app/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/config/config.py b/app/config/config.py
new file mode 100644
index 0000000..08856aa
--- /dev/null
+++ b/app/config/config.py
@@ -0,0 +1,79 @@
+"""
+Configuration settings for the application.
+"""
+
+import os
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+class Config:
+    """Base configuration."""
+
+    # Flask configuration
+    SECRET_KEY = os.environ.get('SECRET_KEY', 'dev-key-for-development-only')
+    DEBUG = False
+    TESTING = False
+
+    # Database configuration
+    SQLALCHEMY_DATABASE_URI = os.environ.get(
+        'DATABASE_URL',
+        'sqlite:///chatbot.db'
+    )
+    SQLALCHEMY_TRACK_MODIFICATIONS = False
+    INITIALIZE_DATABASE = os.environ.get('INITIALIZE_DATABASE', 'False').lower() == 'true'
+
+    # Pinecone configuration
+    PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '')
+    PINECONE_ENVIRONMENT = os.environ.get('PINECONE_ENVIRONMENT', '')
+    PINECONE_INDEX_NAME = os.environ.get('PINECONE_INDEX_NAME', 'chatbot-index')
+
+    # Model configuration
+    DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'gpt-3.5-turbo')
+    OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', '')
+
+
+class DevelopmentConfig(Config):
+    """Development configuration."""
+
+    DEBUG = True
+
+
+class TestingConfig(Config):
+    """Testing configuration."""
+
+    TESTING = True
+    SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:'
+
+
+class ProductionConfig(Config):
+    """Production configuration."""
+
+    # Ensure all required environment variables are set in production
+    @classmethod
+    def init_app(cls, app):
+        """Initialize production application."""
+        # Check for required environment variables
+        required_vars = [
+            'SECRET_KEY',
+            'DATABASE_URL',
+            'PINECONE_API_KEY',
+            'PINECONE_ENVIRONMENT',
+            'OPENAI_API_KEY'
+        ]
+
+        missing_vars = [var for var in required_vars if not os.environ.get(var)]
+        if missing_vars:
+            raise RuntimeError(
+                f"Missing required environment variables: {', '.join(missing_vars)}"
+            )
+
+
+# Configuration dictionary
+config = {
+    'development': DevelopmentConfig,
+    'testing': TestingConfig,
+    'production': ProductionConfig,
+    'default': DevelopmentConfig
+}
diff --git a/app/database/__init__.py b/app/database/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/database/db.py b/app/database/db.py
new file mode 100644
index 0000000..e32dc49
--- /dev/null
+++ b/app/database/db.py
@@ -0,0 +1,36 @@
+"""
+Database module for the application.
+"""
+
+from flask_sqlalchemy import SQLAlchemy
+from sqlalchemy import MetaData
+
+# Define naming convention for constraints
+convention = {
+    "ix": 'ix_%(column_0_label)s',
+    "uq": "uq_%(table_name)s_%(column_0_name)s",
+    "ck": "ck_%(table_name)s_%(constraint_name)s",
+    "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
+    "pk": "pk_%(table_name)s"
+}
+
+# Create SQLAlchemy instance with naming convention
+db = SQLAlchemy(metadata=MetaData(naming_convention=convention))
+
+def init_app(app):
+    """
+    Initialize the database with the Flask application.
+
+    Args:
+        app: Flask application instance.
+    """
+    db.init_app(app)
+
+    # Only initialize database if configured to do so
+    if app.config.get('INITIALIZE_DATABASE', False):
+        # Import models to ensure they are registered with SQLAlchemy
+        from app.models import user, chat, document
+
+        # Create tables if they don't exist
+        with app.app_context():
+            db.create_all()
diff --git a/app/models/__init__.py b/app/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/models/chat.py b/app/models/chat.py
new file mode 100644
index 0000000..f2dd303
--- /dev/null
+++ b/app/models/chat.py
@@ -0,0 +1,67 @@
+"""
+Chat models for the application.
+"""
+
+from datetime import datetime
+from app.database.db import db
+
+class Chat(db.Model):
+    """Chat model representing a chat session."""
+    
+    __tablename__ = 'chats'
+    
+    id = db.Column(db.Integer, primary_key=True)
+    title = db.Column(db.String(100), nullable=True)
+    is_team_chat = db.Column(db.Boolean, default=False)
+    model_name = db.Column(db.String(50), nullable=False)
+    created_at = db.Column(db.DateTime, default=datetime.utcnow)
+    updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    
+    # Foreign keys
+    user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False)
+    
+    # Relationships
+    messages = db.relationship('Message', backref='chat', lazy='dynamic', cascade='all, delete-orphan')
+    team_members = db.relationship('TeamChatMember', backref='chat', lazy='dynamic', cascade='all, delete-orphan')
+    
+    def __repr__(self):
+        return f'<Chat {self.id}: {self.title or "Untitled"}>'
+
+
+class Message(db.Model):
+    """Message model representing a single message in a chat."""
+    
+    __tablename__ = 'messages'
+    
+    id = db.Column(db.Integer, primary_key=True)
+    content = db.Column(db.Text, nullable=False)
+    is_user_message = db.Column(db.Boolean, default=True)
+    created_at = db.Column(db.DateTime, default=datetime.utcnow)
+    
+    # Foreign keys
+    chat_id = db.Column(db.Integer, db.ForeignKey('chats.id'), nullable=False)
+    user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=True)
+    
+    def __repr__(self):
+        return f'<Message {self.id}: {self.content[:20]}...>'
+
+
+class TeamChatMember(db.Model):
+    """Model representing a member of a team chat."""
+    
+    __tablename__ = 'team_chat_members'
+    
+    id = db.Column(db.Integer, primary_key=True)
+    joined_at = db.Column(db.DateTime, default=datetime.utcnow)
+    
+    # Foreign keys
+    chat_id = db.Column(db.Integer, db.ForeignKey('chats.id'), nullable=False)
+    user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False)
+    
+    # Ensure a user can only be added to a team chat once
+    __table_args__ = (
+        db.UniqueConstraint('chat_id', 'user_id', name='uq_team_chat_member'),
+    )
+    
+    def __repr__(self):
+        return f'<TeamChatMember chat_id={self.chat_id}, user_id={self.user_id}>'
diff --git a/app/models/document.py b/app/models/document.py
new file mode 100644
index 0000000..eca6f7d
--- /dev/null
+++ b/app/models/document.py
@@ -0,0 +1,59 @@
+"""
+Document models for the application.
+"""
+
+from datetime import datetime
+import json
+from app.database.db import db
+
+class Document(db.Model):
+    """Document model representing a document in the library."""
+
+    __tablename__ = 'documents'
+
+    id = db.Column(db.Integer, primary_key=True)
+    title = db.Column(db.String(255), nullable=False)
+    description = db.Column(db.Text, nullable=True)
+    file_path = db.Column(db.String(255), nullable=True)
+    content_type = db.Column(db.String(50), nullable=False)
+    status = db.Column(db.String(20), default='pending')  # pending, processing, completed, error
+    created_at = db.Column(db.DateTime, default=datetime.utcnow)
+    updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    # Foreign keys
+    uploaded_by = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False)
+
+    # Relationships
+    chunks = db.relationship('DocumentChunk', backref='document', lazy='dynamic', cascade='all, delete-orphan')
+
+    def __repr__(self):
+        return f'<Document {self.id}: {self.title}>'
+
+
+class DocumentChunk(db.Model):
+    """Model representing a chunk of a document for embedding."""
+
+    __tablename__ = 'document_chunks'
+
+    id = db.Column(db.Integer, primary_key=True)
+    content = db.Column(db.Text, nullable=False)
+    chunk_index = db.Column(db.Integer, nullable=False)
+    embedding_id = db.Column(db.String(100), nullable=True)  # ID in Pinecone
+    meta_data = db.Column(db.Text, nullable=True)  # JSON string of metadata
+    created_at = db.Column(db.DateTime, default=datetime.utcnow)
+
+    # Foreign keys
+    document_id = db.Column(db.Integer, db.ForeignKey('documents.id'), nullable=False)
+
+    def set_metadata(self, metadata_dict):
+        """Set metadata as JSON string."""
+        self.meta_data = json.dumps(metadata_dict)
+
+    def get_metadata(self):
+        """Get metadata as dictionary."""
+        if self.meta_data:
+            return json.loads(self.meta_data)
+        return {}
+
+    def __repr__(self):
+        return f'<DocumentChunk {self.id}: doc_id={self.document_id}, index={self.chunk_index}>'
diff --git a/app/models/user.py b/app/models/user.py
new file mode 100644
index 0000000..9d8f4a9
--- /dev/null
+++ b/app/models/user.py
@@ -0,0 +1,24 @@
+"""
+User model for the application.
+"""
+
+from datetime import datetime
+from app.database.db import db
+
+class User(db.Model):
+    """User model representing application users."""
+    
+    __tablename__ = 'users'
+    
+    id = db.Column(db.Integer, primary_key=True)
+    username = db.Column(db.String(64), unique=True, nullable=False, index=True)
+    email = db.Column(db.String(120), unique=True, nullable=False, index=True)
+    password_hash = db.Column(db.String(128), nullable=False)
+    created_at = db.Column(db.DateTime, default=datetime.utcnow)
+    updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    
+    # Relationships
+    chats = db.relationship('Chat', backref='user', lazy='dynamic')
+    
+    def __repr__(self):
+        return f'<User {self.username}>'
diff --git a/app/services/__init__.py b/app/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/services/chat_service.py b/app/services/chat_service.py
new file mode 100644
index 0000000..d754c3b
--- /dev/null
+++ b/app/services/chat_service.py
@@ -0,0 +1,227 @@
+"""
+Service for chat functionality.
+"""
+
+from typing import List, Dict, Any, Optional
+from app.database.db import db
+from app.models.chat import Chat, Message, TeamChatMember
+from app.models.user import User
+
+class ChatService:
+    """Service for chat functionality."""
+    
+    def create_chat(self, user_id: int, title: Optional[str] = None, 
+                   is_team_chat: bool = False, model_name: Optional[str] = None) -> Chat:
+        """
+        Create a new chat.
+        
+        Args:
+            user_id: ID of the user creating the chat.
+            title: Optional title for the chat.
+            is_team_chat: Whether this is a team chat.
+            model_name: Name of the model to use for this chat.
+            
+        Returns:
+            Created chat.
+        """
+        from app.config.config import Config
+        
+        chat = Chat(
+            user_id=user_id,
+            title=title,
+            is_team_chat=is_team_chat,
+            model_name=model_name or Config().DEFAULT_MODEL
+        )
+        
+        db.session.add(chat)
+        db.session.commit()
+        
+        # If it's a team chat, add the creator as a member
+        if is_team_chat:
+            self.add_team_member(chat.id, user_id)
+        
+        return chat
+    
+    def get_chat(self, chat_id: int) -> Optional[Chat]:
+        """
+        Get a chat by ID.
+        
+        Args:
+            chat_id: ID of the chat.
+            
+        Returns:
+            Chat if found, None otherwise.
+        """
+        return Chat.query.get(chat_id)
+    
+    def get_user_chats(self, user_id: int) -> List[Chat]:
+        """
+        Get all chats for a user.
+        
+        Args:
+            user_id: ID of the user.
+            
+        Returns:
+            List of chats.
+        """
+        # Get private chats
+        private_chats = Chat.query.filter_by(
+            user_id=user_id, 
+            is_team_chat=False
+        ).order_by(Chat.updated_at.desc()).all()
+        
+        # Get team chats where user is a member
+        team_chat_ids = db.session.query(TeamChatMember.chat_id).filter_by(user_id=user_id).all()
+        team_chat_ids = [chat_id for (chat_id,) in team_chat_ids]
+        
+        team_chats = Chat.query.filter(
+            Chat.id.in_(team_chat_ids)
+        ).order_by(Chat.updated_at.desc()).all()
+        
+        # Combine and sort by updated_at
+        all_chats = private_chats + team_chats
+        all_chats.sort(key=lambda x: x.updated_at, reverse=True)
+        
+        return all_chats
+    
+    def add_message(self, chat_id: int, content: str, 
+                   is_user_message: bool = True, user_id: Optional[int] = None) -> Message:
+        """
+        Add a message to a chat.
+        
+        Args:
+            chat_id: ID of the chat.
+            content: Message content.
+            is_user_message: Whether this is a user message (vs. bot message).
+            user_id: ID of the user sending the message (required for user messages).
+            
+        Returns:
+            Created message.
+        """
+        message = Message(
+            chat_id=chat_id,
+            content=content,
+            is_user_message=is_user_message,
+            user_id=user_id if is_user_message else None
+        )
+        
+        db.session.add(message)
+        
+        # Update chat's updated_at timestamp
+        chat = Chat.query.get(chat_id)
+        if chat:
+            chat.updated_at = message.created_at
+        
+        db.session.commit()
+        
+        return message
+    
+    def get_chat_messages(self, chat_id: int) -> List[Message]:
+        """
+        Get all messages for a chat.
+        
+        Args:
+            chat_id: ID of the chat.
+            
+        Returns:
+            List of messages.
+        """
+        return Message.query.filter_by(chat_id=chat_id).order_by(Message.created_at).all()
+    
+    def add_team_member(self, chat_id: int, user_id: int) -> Optional[TeamChatMember]:
+        """
+        Add a user to a team chat.
+        
+        Args:
+            chat_id: ID of the team chat.
+            user_id: ID of the user to add.
+            
+        Returns:
+            Created team chat member if successful, None otherwise.
+        """
+        chat = Chat.query.get(chat_id)
+        if not chat or not chat.is_team_chat:
+            return None
+        
+        # Check if user is already a member
+        existing_member = TeamChatMember.query.filter_by(
+            chat_id=chat_id, 
+            user_id=user_id
+        ).first()
+        
+        if existing_member:
+            return existing_member
+        
+        member = TeamChatMember(
+            chat_id=chat_id,
+            user_id=user_id
+        )
+        
+        db.session.add(member)
+        db.session.commit()
+        
+        return member
+    
+    def get_team_members(self, chat_id: int) -> List[User]:
+        """
+        Get all members of a team chat.
+        
+        Args:
+            chat_id: ID of the team chat.
+            
+        Returns:
+            List of users.
+        """
+        member_ids = db.session.query(TeamChatMember.user_id).filter_by(chat_id=chat_id).all()
+        member_ids = [user_id for (user_id,) in member_ids]
+        
+        return User.query.filter(User.id.in_(member_ids)).all()
+    
+    def remove_team_member(self, chat_id: int, user_id: int) -> bool:
+        """
+        Remove a user from a team chat.
+        
+        Args:
+            chat_id: ID of the team chat.
+            user_id: ID of the user to remove.
+            
+        Returns:
+            True if removal was successful, False otherwise.
+        """
+        member = TeamChatMember.query.filter_by(
+            chat_id=chat_id, 
+            user_id=user_id
+        ).first()
+        
+        if not member:
+            return False
+        
+        db.session.delete(member)
+        db.session.commit()
+        
+        return True
+    
+    def delete_chat(self, chat_id: int) -> bool:
+        """
+        Delete a chat and all its messages.
+        
+        Args:
+            chat_id: ID of the chat to delete.
+            
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        chat = Chat.query.get(chat_id)
+        if not chat:
+            return False
+        
+        try:
+            db.session.delete(chat)
+            db.session.commit()
+            return True
+            
+        except Exception as e:
+            # Log the error
+            print(f"Error deleting chat {chat_id}: {str(e)}")
+            db.session.rollback()
+            return False
diff --git a/app/services/chatbot_service.py b/app/services/chatbot_service.py
new file mode 100644
index 0000000..3dba74c
--- /dev/null
+++ b/app/services/chatbot_service.py
@@ -0,0 +1,105 @@
+"""
+Service for chatbot functionality without database dependency.
+"""
+
+from typing import List, Dict, Any, Optional
+
+class ChatbotService:
+    """Service for chatbot functionality."""
+    
+    def __init__(self):
+        """Initialize the chatbot service."""
+        # In-memory storage for chat history
+        self.chat_history = {}
+        self.current_chat_id = 0
+    
+    def create_chat(self, user_id: str) -> int:
+        """
+        Create a new chat session.
+        
+        Args:
+            user_id: ID of the user creating the chat.
+            
+        Returns:
+            ID of the created chat.
+        """
+        self.current_chat_id += 1
+        chat_id = self.current_chat_id
+        
+        self.chat_history[chat_id] = {
+            'user_id': user_id,
+            'messages': []
+        }
+        
+        return chat_id
+    
+    def add_message(self, chat_id: int, content: str, is_user: bool = True) -> Dict[str, Any]:
+        """
+        Add a message to a chat.
+        
+        Args:
+            chat_id: ID of the chat.
+            content: Message content.
+            is_user: Whether this is a user message (vs. bot message).
+            
+        Returns:
+            Added message.
+        """
+        if chat_id not in self.chat_history:
+            raise ValueError(f"Chat with ID {chat_id} not found")
+        
+        message = {
+            'content': content,
+            'is_user': is_user,
+            'timestamp': self._get_timestamp()
+        }
+        
+        self.chat_history[chat_id]['messages'].append(message)
+        
+        return message
+    
+    def get_chat_messages(self, chat_id: int) -> List[Dict[str, Any]]:
+        """
+        Get all messages for a chat.
+        
+        Args:
+            chat_id: ID of the chat.
+            
+        Returns:
+            List of messages.
+        """
+        if chat_id not in self.chat_history:
+            raise ValueError(f"Chat with ID {chat_id} not found")
+        
+        return self.chat_history[chat_id]['messages']
+    
+    def get_response(self, chat_id: int, message: str) -> str:
+        """
+        Get a response from the chatbot.
+        
+        Args:
+            chat_id: ID of the chat.
+            message: User message.
+            
+        Returns:
+            Bot response.
+        """
+        # Add user message to chat history
+        self.add_message(chat_id, message, is_user=True)
+        
+        # Simple echo response for now
+        response = f"You said: {message}"
+        
+        # Add bot response to chat history
+        self.add_message(chat_id, response, is_user=False)
+        
+        return response
+    
+    def _get_timestamp(self) -> str:
+        """Get current timestamp."""
+        from datetime import datetime
+        return datetime.utcnow().isoformat()
+
+
+# Create a singleton instance
+chatbot_service = ChatbotService()
diff --git a/app/services/document_service.py b/app/services/document_service.py
new file mode 100644
index 0000000..239bcd8
--- /dev/null
+++ b/app/services/document_service.py
@@ -0,0 +1,165 @@
+"""
+Service for document processing and embedding.
+"""
+
+import os
+from typing import List, Dict, Any, Optional
+import pinecone
+from app.database.db import db
+from app.models.document import Document, DocumentChunk
+from app.config.config import Config
+
+class DocumentService:
+    """Service for document processing and embedding."""
+    
+    def __init__(self, config: Config = None):
+        """
+        Initialize the document service.
+        
+        Args:
+            config: Configuration object.
+        """
+        self.config = config or Config()
+        self._initialize_pinecone()
+    
+    def _initialize_pinecone(self):
+        """Initialize Pinecone client."""
+        pinecone.init(
+            api_key=self.config.PINECONE_API_KEY,
+            environment=self.config.PINECONE_ENVIRONMENT
+        )
+        
+        # Check if index exists, create if it doesn't
+        if self.config.PINECONE_INDEX_NAME not in pinecone.list_indexes():
+            pinecone.create_index(
+                name=self.config.PINECONE_INDEX_NAME,
+                dimension=768,  # Default dimension for sentence-transformers
+                metric="cosine"
+            )
+        
+        self.index = pinecone.Index(self.config.PINECONE_INDEX_NAME)
+    
+    def create_document(self, title: str, file_path: str, content_type: str, 
+                        description: Optional[str], user_id: int) -> Document:
+        """
+        Create a new document record.
+        
+        Args:
+            title: Document title.
+            file_path: Path to the document file.
+            content_type: MIME type of the document.
+            description: Optional description of the document.
+            user_id: ID of the user who uploaded the document.
+            
+        Returns:
+            Created document.
+        """
+        document = Document(
+            title=title,
+            file_path=file_path,
+            content_type=content_type,
+            description=description,
+            uploaded_by=user_id,
+            status='pending'
+        )
+        
+        db.session.add(document)
+        db.session.commit()
+        
+        return document
+    
+    def process_document(self, document_id: int) -> bool:
+        """
+        Process a document for embedding.
+        
+        Args:
+            document_id: ID of the document to process.
+            
+        Returns:
+            True if processing was successful, False otherwise.
+        """
+        document = Document.query.get(document_id)
+        if not document:
+            return False
+        
+        try:
+            # Update status to processing
+            document.status = 'processing'
+            db.session.commit()
+            
+            # TODO: Implement document parsing and chunking
+            # This will be implemented in the next step
+            
+            # Update status to completed
+            document.status = 'completed'
+            db.session.commit()
+            return True
+            
+        except Exception as e:
+            # Update status to error
+            document.status = 'error'
+            db.session.commit()
+            # Log the error
+            print(f"Error processing document {document_id}: {str(e)}")
+            return False
+    
+    def get_document(self, document_id: int) -> Optional[Document]:
+        """
+        Get a document by ID.
+        
+        Args:
+            document_id: ID of the document.
+            
+        Returns:
+            Document if found, None otherwise.
+        """
+        return Document.query.get(document_id)
+    
+    def get_all_documents(self, user_id: Optional[int] = None) -> List[Document]:
+        """
+        Get all documents, optionally filtered by user.
+        
+        Args:
+            user_id: Optional user ID to filter by.
+            
+        Returns:
+            List of documents.
+        """
+        query = Document.query
+        if user_id:
+            query = query.filter_by(uploaded_by=user_id)
+        return query.order_by(Document.created_at.desc()).all()
+    
+    def delete_document(self, document_id: int) -> bool:
+        """
+        Delete a document and its chunks.
+        
+        Args:
+            document_id: ID of the document to delete.
+            
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        document = Document.query.get(document_id)
+        if not document:
+            return False
+        
+        try:
+            # Delete document chunks from Pinecone
+            chunks = DocumentChunk.query.filter_by(document_id=document_id).all()
+            embedding_ids = [chunk.embedding_id for chunk in chunks if chunk.embedding_id]
+            
+            if embedding_ids:
+                self.index.delete(ids=embedding_ids)
+            
+            # Delete document from database
+            db.session.delete(document)
+            db.session.commit()
+            
+            return True
+            
+        except Exception as e:
+            # Log the error
+            print(f"Error deleting document {document_id}: {str(e)}")
+            db.session.rollback()
+            return False
diff --git a/app/services/model_service.py b/app/services/model_service.py
new file mode 100644
index 0000000..4a086e2
--- /dev/null
+++ b/app/services/model_service.py
@@ -0,0 +1,95 @@
+"""
+Service for model management and interaction.
+"""
+
+from typing import List, Dict, Any, Optional
+from app.config.config import Config
+
+class ModelService:
+    """Service for model management and interaction."""
+    
+    # Available models
+    AVAILABLE_MODELS = {
+        'gpt-3.5-turbo': {
+            'name': 'GPT-3.5 Turbo',
+            'description': 'OpenAI GPT-3.5 Turbo model',
+            'provider': 'openai',
+            'max_tokens': 4096
+        },
+        'gpt-4': {
+            'name': 'GPT-4',
+            'description': 'OpenAI GPT-4 model',
+            'provider': 'openai',
+            'max_tokens': 8192
+        },
+        # Add more models as needed
+    }
+    
+    def __init__(self, config: Config = None):
+        """
+        Initialize the model service.
+        
+        Args:
+            config: Configuration object.
+        """
+        self.config = config or Config()
+        self.default_model = self.config.DEFAULT_MODEL
+    
+    def get_available_models(self) -> List[Dict[str, Any]]:
+        """
+        Get a list of available models.
+        
+        Returns:
+            List of model information dictionaries.
+        """
+        models = []
+        for model_id, model_info in self.AVAILABLE_MODELS.items():
+            model_data = {
+                'id': model_id,
+                'is_default': model_id == self.default_model,
+                **model_info
+            }
+            models.append(model_data)
+        
+        return models
+    
+    def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get information about a specific model.
+        
+        Args:
+            model_id: ID of the model.
+            
+        Returns:
+            Model information dictionary if found, None otherwise.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            return None
+        
+        return {
+            'id': model_id,
+            'is_default': model_id == self.default_model,
+            **self.AVAILABLE_MODELS[model_id]
+        }
+    
+    def generate_response(self, model_id: str, prompt: str, 
+                         context: Optional[List[Dict[str, str]]] = None) -> str:
+        """
+        Generate a response from the model.
+        
+        Args:
+            model_id: ID of the model to use.
+            prompt: User prompt.
+            context: Optional conversation context.
+            
+        Returns:
+            Generated response.
+        """
+        # TODO: Implement actual model integration
+        # This is a placeholder that will be implemented in the next steps
+        
+        if model_id not in self.AVAILABLE_MODELS:
+            model_id = self.default_model
+        
+        # Placeholder response
+        return f"This is a placeholder response from {self.AVAILABLE_MODELS[model_id]['name']}. The actual model integration will be implemented in the next steps."
diff --git a/app/utils/__init__.py b/app/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/deploy.sh b/deploy.sh
new file mode 100755
index 0000000..4cbe9ef
--- /dev/null
+++ b/deploy.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Activate virtual environment
+source venv/bin/activate
+
+# Export environment variables
+export FLASK_APP=run.py
+export FLASK_ENV=production
+export FLASK_CONFIG=production
+
+# Run the application with uvicorn and nohup
+nohup uvicorn run:app --host 0.0.0.0 --port 5251 > app.log 2>&1 &
+
+echo "Application started on port 5251. Check app.log for output."
+echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill <PID>'."
diff --git a/deploy_ai_service.sh b/deploy_ai_service.sh
new file mode 100755
index 0000000..6275b72
--- /dev/null
+++ b/deploy_ai_service.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# Stop any existing service
+pkill -f "uvicorn ai_service.api:app" || true
+
+# Create data directory if it doesn't exist
+mkdir -p ai_service/data
+
+# Set environment variables for testing
+# In production, replace these with your actual API keys
+export PINECONE_API_KEY="test-pinecone-api-key"
+export PINECONE_ENVIRONMENT="test-pinecone-environment"
+export OPENAI_API_KEY="test-openai-api-key"
+
+# Create empty files for local storage if they don't exist
+touch ai_service/data/chatbot.db
+touch ai_service/data/document_metadata.json
+touch ai_service/data/chats.json
+
+# For testing purposes, we'll use a simplified API
+echo "Starting Simple API Service on port 5251..."
+VENV_PATH="./venv"
+PYTHON_PATH="$VENV_PATH/bin/python"
+
+# Check if the virtual environment exists
+if [ -f "$PYTHON_PATH" ]; then
+    echo "Using Python from virtual environment: $PYTHON_PATH"
+    # Use the simplified API for testing
+    nohup $PYTHON_PATH simple_api.py > ai_service.log 2>&1 &
+else
+    echo "Virtual environment not found at $VENV_PATH, using system Python"
+    nohup python simple_api.py > ai_service.log 2>&1 &
+fi
+
+# Wait a moment for the service to start
+sleep 2
+
+# Check if the service is running
+if pgrep -f "simple_api.py" > /dev/null; then
+    echo "AI Service started successfully on port 5251"
+    echo "Check ai_service.log for output"
+    echo "To stop the service, run: pkill -f \"simple_api.py\""
+
+    # Test the health endpoint
+    echo -e "\nTesting health endpoint..."
+    curl -s http://localhost:5251/health
+    echo -e "\n"
+
+    # Test creating a chat and sending a message
+    echo "Testing chat creation and message sending..."
+    if [ -f "$PYTHON_PATH" ]; then
+        # Create a simple test script
+        cat > test_api.py << 'EOF'
+import requests
+import json
+
+# Create a chat
+response = requests.post(
+    "http://localhost:5251/chats",
+    json={
+        "user_id": "test_user",
+        "title": "Test Chat",
+        "model_id": "gpt-3.5-turbo"
+    }
+)
+
+if response.status_code == 200:
+    chat_id = response.json()["id"]
+    print(f"Chat created with ID: {chat_id}")
+
+    # Send a message with parameters
+    response = requests.post(
+        f"http://localhost:5251/chats/{chat_id}/messages",
+        json={
+            "message": "Hello, AI!",
+            "user_id": "test_user",
+            "temperature": 0.7,
+            "max_tokens": 100
+        }
+    )
+
+    if response.status_code == 200:
+        print("Message sent successfully")
+        print(f"Response: {response.json()['content'][:100]}...")
+    else:
+        print(f"Error sending message: {response.status_code}")
+        print(response.text)
+else:
+    print(f"Error creating chat: {response.status_code}")
+    print(response.text)
+EOF
+
+        # Run the test script with the virtual environment's Python
+        $PYTHON_PATH test_api.py
+        rm test_api.py
+    else
+        echo "Skipping API test as virtual environment Python is not available"
+    fi
+else
+    echo "Failed to start AI Service. Check ai_service.log for errors."
+    exit 1
+fi
diff --git a/deploy_production.sh b/deploy_production.sh
new file mode 100644
index 0000000..9a13258
--- /dev/null
+++ b/deploy_production.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# Production deployment script for AI Service
+# This script deploys the AI service in a production environment
+
+# Exit on error
+set -e
+
+echo "Starting AI Service deployment..."
+
+# Check if virtual environment exists
+if [ ! -d "venv" ]; then
+    echo "Creating virtual environment..."
+    python3 -m venv venv
+fi
+
+# Activate virtual environment
+source venv/bin/activate
+
+# Install dependencies
+echo "Installing dependencies..."
+pip install --upgrade pip
+pip install -r ai_service/requirements.txt
+
+# Check if .env file exists, if not copy from .env.production
+if [ ! -f "ai_service/.env" ]; then
+    echo "Creating .env file from .env.production..."
+    cp ai_service/.env.production ai_service/.env
+    echo "Please edit ai_service/.env to add your API keys before continuing."
+    echo "Then run this script again."
+    exit 1
+fi
+
+# Create data directory if it doesn't exist
+mkdir -p ai_service/data
+
+# Check if Pinecone API key is set
+PINECONE_API_KEY=$(grep PINECONE_API_KEY ai_service/.env | cut -d '=' -f2)
+if [ "$PINECONE_API_KEY" = "your-pinecone-api-key-here" ]; then
+    echo "Warning: Pinecone API key not set. Vector storage will not be available."
+    echo "Edit ai_service/.env to set your Pinecone API key."
+fi
+
+# Check if OpenAI API key is set
+OPENAI_API_KEY=$(grep OPENAI_API_KEY ai_service/.env | cut -d '=' -f2)
+if [ "$OPENAI_API_KEY" = "your-openai-api-key-here" ]; then
+    echo "Warning: OpenAI API key not set. AI responses will be placeholders."
+    echo "Edit ai_service/.env to set your OpenAI API key."
+fi
+
+# Stop any existing service
+echo "Stopping any existing AI service..."
+pkill -f "uvicorn ai_service.run:app" || true
+
+# Start the service with nohup
+echo "Starting AI service..."
+cd $(dirname "$0")
+nohup uvicorn ai_service.run:app --host 0.0.0.0 --port 5251 > ai_service.log 2>&1 &
+
+# Wait for service to start
+sleep 2
+
+# Check if service is running
+if pgrep -f "uvicorn ai_service.run:app" > /dev/null; then
+    echo "AI service started successfully!"
+    echo "Service is running on http://0.0.0.0:5251"
+    echo "Logs are available in ai_service.log"
+else
+    echo "Failed to start AI service. Check ai_service.log for details."
+    exit 1
+fi
+
+# Test the service
+echo "Testing service health..."
+if curl -s http://localhost:5251/health | grep -q "healthy"; then
+    echo "Service is healthy!"
+else
+    echo "Service health check failed. Check ai_service.log for details."
+    exit 1
+fi
+
+echo "Deployment complete!"
diff --git a/remote_deploy.sh b/remote_deploy.sh
new file mode 100755
index 0000000..3c3ad9a
--- /dev/null
+++ b/remote_deploy.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Remote deployment script for the AI service
+# Usage: ./remote_deploy.sh [server_ip] [user] [port] [remote_dir]
+
+# Default values
+SERVER_IP=${1:-"104.225.217.215"}
+SERVER_USER=${2:-"root"}
+SERVER_PORT=${3:-"22"}
+REMOTE_DIR=${4:-"/root/openwebui"}
+LOCAL_DIR="."
+
+echo "Deploying to server: $SERVER_IP"
+echo "Remote directory: $REMOTE_DIR"
+
+# Check if the server is reachable
+echo "Checking if server is reachable..."
+ssh -q -o BatchMode=yes -o ConnectTimeout=5 -p $SERVER_PORT $SERVER_USER@$SERVER_IP exit
+if [ $? -ne 0 ]; then
+    echo "Error: Cannot connect to server $SERVER_IP"
+    exit 1
+fi
+
+# Create a subdirectory for our AI service in the OpenWebUI directory
+echo "Creating AI service directory in OpenWebUI..."
+ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "mkdir -p $REMOTE_DIR/ai_service_app"
+
+# Sync files to the server
+echo "Syncing files to server..."
+rsync -avz -e "ssh -p $SERVER_PORT" --exclude 'venv' --exclude '__pycache__' --exclude '*.pyc' --exclude '.git' \
+    $LOCAL_DIR/ $SERVER_USER@$SERVER_IP:$REMOTE_DIR/ai_service_app/
+
+# Install dependencies on the server
+echo "Installing dependencies on the server..."
+ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \
+    python3 -m venv venv || true && \
+    source venv/bin/activate && \
+    pip install --upgrade pip && \
+    pip install -r requirements.txt && \
+    pip install python-dotenv langchain-text-splitters"
+
+# Stop any existing service
+echo "Stopping any existing service..."
+ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "pkill -f 'uvicorn ai_service.run:app' || true"
+
+# Start the service
+echo "Starting the service..."
+ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \
+    source venv/bin/activate && \
+    bash ai_service/deploy.sh"
+
+# Check if the service is running
+echo "Checking if the service is running..."
+sleep 5
+ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "ps aux | grep 'uvicorn ai_service.run:app' | grep -v grep"
+if [ $? -eq 0 ]; then
+    echo "Service is running!"
+    echo "You can access the API at: http://$SERVER_IP:5251"
+    echo "Check logs with: ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP 'tail -f $REMOTE_DIR/ai_service_app/logs/ai_service.log'"
+else
+    echo "Error: Service failed to start. Check logs on the server."
+    exit 1
+fi
+
+echo "Deployment completed successfully!"
diff --git a/requirements-deploy.txt b/requirements-deploy.txt
new file mode 100644
index 0000000..ff1d347
--- /dev/null
+++ b/requirements-deploy.txt
@@ -0,0 +1,10 @@
+# Core dependencies
+flask==2.3.3
+fastapi==0.103.1
+uvicorn==0.23.2
+flask-sqlalchemy==3.0.5
+python-dotenv==1.0.0
+pydantic==2.3.0
+
+# For future implementation
+pinecone-client==2.2.2
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8a5cdb5
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,22 @@
+# Flask and FastAPI integration
+flask==2.3.3
+fastapi==0.103.1
+uvicorn==0.23.2
+
+# Database
+sqlalchemy==2.0.20
+flask-sqlalchemy==3.0.5
+pymysql==1.1.0
+cryptography==41.0.3  # Required for PyMySQL
+alembic==1.12.0
+
+# Document Processing
+langchain-text-splitters==0.3.8
+sentence-transformers==2.2.2
+
+# Utilities
+python-dotenv==1.0.0
+pydantic==2.3.0
+
+# Testing
+pytest==7.4.0
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..3a154ae
--- /dev/null
+++ b/run.py
@@ -0,0 +1,14 @@
+"""
+Application entry point.
+"""
+
+import os
+from app import create_app
+from app.config.config import config
+
+# Get configuration from environment or use default
+config_name = os.environ.get('FLASK_CONFIG', 'default')
+app = create_app(config[config_name])
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000)
diff --git a/run_ai_service.py b/run_ai_service.py
new file mode 100644
index 0000000..5798ef7
--- /dev/null
+++ b/run_ai_service.py
@@ -0,0 +1,22 @@
+"""
+Script to run the AI service.
+"""
+
+import uvicorn
+import os
+import sys
+
+# Set environment variables for testing
+os.environ['PINECONE_API_KEY'] = 'test-key'
+os.environ['PINECONE_ENVIRONMENT'] = 'test-env'
+os.environ['OPENAI_API_KEY'] = 'test-key'
+
+# Run the service
+if __name__ == "__main__":
+    print("Starting AI service on 0.0.0.0:5251")
+    uvicorn.run(
+        "ai_service.api:app",
+        host="0.0.0.0",
+        port=5251,
+        reload=True
+    )
diff --git a/simple_api.py b/simple_api.py
new file mode 100644
index 0000000..b09e3d3
--- /dev/null
+++ b/simple_api.py
@@ -0,0 +1,144 @@
+"""
+Simple API for testing deployment.
+"""
+
+import os
+import uuid
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+
+# Create FastAPI app
+app = FastAPI(
+    title="Simple AI Service API",
+    description="Simple API for testing deployment",
+    version="1.0.0"
+)
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Define API models
+class MessageRequest(BaseModel):
+    """Request model for sending a message."""
+    message: str = Field(..., description="Message content")
+    user_id: str = Field(..., description="User ID")
+    
+    # Model parameters
+    temperature: Optional[float] = Field(None, description="Controls randomness")
+    max_tokens: Optional[int] = Field(None, description="Maximum tokens to generate")
+    top_p: Optional[float] = Field(None, description="Nucleus sampling parameter")
+    frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens")
+    presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics")
+    system_prompt: Optional[str] = Field(None, description="System prompt")
+
+class Message(BaseModel):
+    """Model for a message."""
+    id: str = Field(..., description="Message ID")
+    content: str = Field(..., description="Message content")
+    user_id: Optional[str] = Field(None, description="User ID")
+    is_user_message: bool = Field(..., description="Whether this is a user message")
+    timestamp: str = Field(..., description="Message timestamp")
+
+class ChatRequest(BaseModel):
+    """Request model for creating a chat."""
+    user_id: str = Field(..., description="User ID")
+    title: Optional[str] = Field(None, description="Chat title")
+    model_id: Optional[str] = Field(None, description="Model ID")
+
+class Chat(BaseModel):
+    """Model for a chat."""
+    id: str = Field(..., description="Chat ID")
+    title: str = Field(..., description="Chat title")
+    user_id: str = Field(..., description="User ID")
+    model_id: str = Field(..., description="Model ID")
+    created_at: str = Field(..., description="Creation timestamp")
+    updated_at: str = Field(..., description="Update timestamp")
+    messages: List[Message] = Field(default=[], description="Chat messages")
+
+# In-memory storage
+chats = {}
+
+# API endpoints
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {"status": "healthy"}
+
+@app.post("/chats", response_model=Chat)
+async def create_chat(request: ChatRequest):
+    """Create a new chat."""
+    chat_id = str(uuid.uuid4())
+    
+    chat = {
+        "id": chat_id,
+        "title": request.title or f"Chat {len(chats) + 1}",
+        "user_id": request.user_id,
+        "model_id": request.model_id or "gpt-3.5-turbo",
+        "created_at": datetime.utcnow().isoformat(),
+        "updated_at": datetime.utcnow().isoformat(),
+        "messages": []
+    }
+    
+    chats[chat_id] = chat
+    return chat
+
+@app.get("/chats/{chat_id}", response_model=Chat)
+async def get_chat(chat_id: str):
+    """Get a chat by ID."""
+    if chat_id not in chats:
+        raise HTTPException(status_code=404, detail="Chat not found")
+    
+    return chats[chat_id]
+
+@app.post("/chats/{chat_id}/messages", response_model=Message)
+async def send_message(chat_id: str, request: MessageRequest):
+    """Send a message to a chat."""
+    if chat_id not in chats:
+        raise HTTPException(status_code=404, detail="Chat not found")
+    
+    # Add user message
+    user_message = {
+        "id": str(uuid.uuid4()),
+        "content": request.message,
+        "user_id": request.user_id,
+        "is_user_message": True,
+        "timestamp": datetime.utcnow().isoformat()
+    }
+    
+    chats[chat_id]["messages"].append(user_message)
+    
+    # Generate bot response
+    params_text = ""
+    if request.temperature is not None:
+        params_text += f" (temperature={request.temperature})"
+    if request.max_tokens is not None:
+        params_text += f" (max_tokens={request.max_tokens})"
+    if request.system_prompt is not None:
+        params_text += f" (using custom system prompt)"
+    
+    bot_message = {
+        "id": str(uuid.uuid4()),
+        "content": f"This is a test response to: '{request.message}'{params_text}",
+        "user_id": None,
+        "is_user_message": False,
+        "timestamp": datetime.utcnow().isoformat()
+    }
+    
+    chats[chat_id]["messages"].append(bot_message)
+    chats[chat_id]["updated_at"] = datetime.utcnow().isoformat()
+    
+    return bot_message
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=5251)
diff --git a/test_chat_with_params.py b/test_chat_with_params.py
new file mode 100644
index 0000000..f6b6bcc
--- /dev/null
+++ b/test_chat_with_params.py
@@ -0,0 +1,69 @@
+"""
+Test script for sending a message with advanced parameters.
+"""
+
+import requests
+import json
+import uuid
+
+# Create a new chat
+def create_chat():
+    response = requests.post(
+        "http://localhost:5251/chats",
+        json={
+            "user_id": "test_user",
+            "title": "Test Chat",
+            "model_id": "gpt-3.5-turbo",
+            "is_team_chat": False
+        }
+    )
+    
+    if response.status_code == 200:
+        return response.json()["id"]
+    else:
+        print(f"Error creating chat: {response.status_code}")
+        print(response.text)
+        return None
+
+# Send a message with advanced parameters
+def send_message(chat_id):
+    response = requests.post(
+        f"http://localhost:5251/chats/{chat_id}/messages",
+        json={
+            "message": "Tell me about artificial intelligence",
+            "user_id": "test_user",
+            "use_rag": False,
+            "temperature": 0.7,
+            "max_tokens": 500,
+            "top_p": 0.9,
+            "frequency_penalty": 0.5,
+            "presence_penalty": 0.5,
+            "system_prompt": "You are a helpful AI assistant that provides concise responses."
+        }
+    )
+    
+    if response.status_code == 200:
+        return response.json()
+    else:
+        print(f"Error sending message: {response.status_code}")
+        print(response.text)
+        return None
+
+# Main function
+def main():
+    print("Creating a new chat...")
+    chat_id = create_chat()
+    
+    if chat_id:
+        print(f"Chat created with ID: {chat_id}")
+        
+        print("\nSending a message with advanced parameters...")
+        response = send_message(chat_id)
+        
+        if response:
+            print("\nResponse received:")
+            print(f"Message ID: {response['id']}")
+            print(f"Content: {response['content']}")
+    
+if __name__ == "__main__":
+    main()
diff --git a/test_ollama.py b/test_ollama.py
new file mode 100644
index 0000000..15e28dc
--- /dev/null
+++ b/test_ollama.py
@@ -0,0 +1,73 @@
+"""
+Test script for Ollama integration.
+"""
+
+import os
+import sys
+import requests
+import json
+
+# Add the parent directory to the path so we can import ai_service
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+from ai_service.config import config
+from ai_service.models.model_service import model_service
+
+def test_available_models():
+    """Test getting available models."""
+    models = model_service.get_available_models()
+    print("Available models:")
+    for model in models:
+        print(f"- {model['name']} ({model['id']}): {model['description']}")
+    print()
+
+def test_generate_response():
+    """Test generating a response."""
+    model_id = "llama3.1"  # Use a specific model instead of config.DEFAULT_MODEL
+    prompt = "What is the capital of France?"
+
+    print(f"Testing model: {model_id}")
+    print(f"Prompt: {prompt}")
+
+    response = model_service.generate_response(
+        model_id=model_id,
+        prompt=prompt,
+        use_rag=False
+    )
+
+    print("Response:")
+    print(response)
+    print()
+
+def test_rag_response():
+    """Test generating a response with RAG."""
+    model_id = "llama3.1"  # Use a specific model instead of config.DEFAULT_MODEL
+    prompt = "Tell me about the documents in the knowledge base."
+
+    print(f"Testing RAG with model: {model_id}")
+    print(f"Prompt: {prompt}")
+
+    response = model_service.generate_response(
+        model_id=model_id,
+        prompt=prompt,
+        use_rag=True
+    )
+
+    print("Response with RAG:")
+    print(response)
+    print()
+
+if __name__ == "__main__":
+    print("Testing Ollama integration")
+    print(f"OpenWebUI URL: {config.OPENWEBUI_URL}")
+
+    # Override the Ollama API URL to use OpenWebUI
+    model_service.ollama_api_url = f"{config.OPENWEBUI_URL}/ollama"
+    print(f"Using Ollama API URL: {model_service.ollama_api_url}")
+
+    print(f"Default model: {config.DEFAULT_MODEL}")
+    print()
+
+    test_available_models()
+    test_generate_response()
+    test_rag_response()