diff --git a/flask-langchain-app/.dockerignore b/flask-langchain-app/.dockerignore new file mode 100644 index 0000000..86291a2 --- /dev/null +++ b/flask-langchain-app/.dockerignore @@ -0,0 +1,11 @@ +venv +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +static/uploads/ +.env +.git +.gitignore +.DS_Store \ No newline at end of file diff --git a/flask-langchain-app/Dockerfile b/flask-langchain-app/Dockerfile new file mode 100644 index 0000000..d88564e --- /dev/null +++ b/flask-langchain-app/Dockerfile @@ -0,0 +1,30 @@ +# Use official Python image +FROM python:3.12-slim + +# Set work directory +WORKDIR /app + +# Install system dependencies for python-magic and pymupdf +RUN apt-get update && apt-get install -y \ + build-essential \ + libmagic1 \ + mupdf-tools \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements +COPY requirements.txt ./ + +# Install Python dependencies +RUN pip install --upgrade pip && pip install -r requirements.txt + +# Copy app code +COPY . . + +# Create uploads directory +RUN mkdir -p static/uploads + +# Expose port +EXPOSE 5000 + +# Run the app with Gunicorn +CMD ["gunicorn", "-b", "0.0.0.0:5000", "app:app"] \ No newline at end of file diff --git a/flask-langchain-app/README.md b/flask-langchain-app/README.md new file mode 100644 index 0000000..f2f07be --- /dev/null +++ b/flask-langchain-app/README.md @@ -0,0 +1,143 @@ +# Flask Document Chatbot + +[![Build Status](https://img.shields.io/badge/build-passing-brightgreen)](https://github.com/your-repo) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) + +A modern web application that allows users to upload documents (PDF, DOCX, TXT) and ask questions about their content using ChromaDB for document storage and retrieval. Powered by Flask, LangChain, and FAISS. + +--- + +## 🚀 Quick Start + +### Run with Docker (Recommended) +```bash +git clone +cd flask-chromadb-app +docker-compose up --build +``` +Visit: [http://localhost:5001](http://localhost:5001) + +### Run Locally (Python) +```bash +git clone +cd flask-chromadb-app +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +pip install -r requirements.txt +python app.py +``` +Visit: [http://localhost:5000](http://localhost:5000) + +--- + +## ✨ Features +- Modern, responsive UI with smooth animations +- Drag-and-drop file upload +- Support for PDF, DOCX, and TXT files +- Real-time chat interface +- Document management system +- Semantic search using ChromaDB & FAISS +- Beautiful loading animations and transitions + +--- + +## 🖼️ Demo + + +--- + +## 📦 Project Structure +``` +flask-chromadb-app/ +├── app.py # Main Flask application +├── requirements.txt # Python dependencies +├── Dockerfile # Docker build file +├── docker-compose.yml # Docker Compose config +├── .dockerignore # Docker ignore file +├── static/ +│ ├── css/ +│ │ └── style.css # Custom styles +│ ├── js/ +│ │ └── main.js # Frontend JavaScript +│ └── uploads/ # Uploaded documents +├── templates/ +│ └── index.html # Main template +└── db/ # ChromaDB storage +``` + +--- + +## ⚙️ Configuration & Customization +- **UI Customization:** Edit `static/css/style.css` and `templates/index.html` for branding and layout changes. +- **File Size Limit:** Adjust `MAX_CONTENT_LENGTH` in `app.py`. +- **Allowed File Types:** Update `ALLOWED_EXTENSIONS` in `app.py`. + +--- + +## 📝 Usage +1. **Upload** a document by dragging and dropping it or clicking "Browse Files". +2. **Wait** for the document to be processed (progress bar will show). +3. **Ask** a question in the chat input field. +4. **View** the chatbot's answer based on your document content. + +**Example Q&A:** +- Q: "What is the main topic of this document?" +- Q: "Summarize the second section." +- Q: "List all dates mentioned." + +--- + +## 🛠️ Dependencies +- `flask` +- `langchain` +- `langchain-community` +- `faiss-cpu` +- `numpy==1.26.4` (required for FAISS compatibility) +- `python-docx`, `pymupdf`, `python-magic`, etc. + +--- + +## 🐳 Docker Notes +- The app runs on port **5001** by default (see `docker-compose.yml`). +- Uploaded files and ChromaDB data persist in `static/uploads` and `db`. +- To stop the app: `Ctrl+C` then `docker-compose down`. + +--- + +## ❓ FAQ & Troubleshooting + +**Q: I get `ModuleNotFoundError: No module named 'numpy.distutils'` or FAISS import errors.** +- A: Ensure your `requirements.txt` includes: + ``` + numpy==1.26.4 + faiss-cpu + ``` + Then rebuild Docker: `docker-compose build && docker-compose up` + +**Q: Port 5000 is already in use!** +- A: The Docker app is mapped to port **5001**. Visit [http://localhost:5001](http://localhost:5001) + +**Q: How do I change the upload size or allowed file types?** +- A: Edit `MAX_CONTENT_LENGTH` and `ALLOWED_EXTENSIONS` in `app.py`. + +--- + +## 🤝 Contributing +1. Fork the repository +2. Create a new branch for your feature +3. Commit your changes +4. Push to the branch +5. Create a Pull Request + +--- + +## 📄 License +This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. + +--- + +## 📬 Contact & Support +For questions, issues, or feature requests, please open an issue on GitHub or contact the maintainer at [your-email@example.com]. \ No newline at end of file diff --git a/flask-langchain-app/__pycache__/app.cpython-312.pyc b/flask-langchain-app/__pycache__/app.cpython-312.pyc new file mode 100644 index 0000000..2799ff0 Binary files /dev/null and b/flask-langchain-app/__pycache__/app.cpython-312.pyc differ diff --git a/flask-langchain-app/app.py b/flask-langchain-app/app.py new file mode 100644 index 0000000..cd38000 --- /dev/null +++ b/flask-langchain-app/app.py @@ -0,0 +1,102 @@ +import os +from flask import Flask, render_template, request, jsonify +from werkzeug.utils import secure_filename +import fitz # PyMuPDF +from docx import Document +import magic +from datetime import datetime +from langchain.text_splitter import CharacterTextSplitter +from langchain_community.embeddings import FakeEmbeddings +from langchain_community.vectorstores import FAISS +from langchain.chains import RetrievalQA +from langchain.llms.fake import FakeListLLM + +app = Flask(__name__) +app.config['UPLOAD_FOLDER'] = 'static/uploads' +app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size +app.config['ALLOWED_EXTENSIONS'] = {'pdf', 'docx', 'txt'} + +# In-memory document store +documents = [] + +# Helper functions + +def allowed_file(filename): + return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS'] + +def extract_text_from_pdf(file_path): + text = "" + with fitz.open(file_path) as doc: + for page in doc: + text += page.get_text() + return text + +def extract_text_from_docx(file_path): + doc = Document(file_path) + text = "" + for paragraph in doc.paragraphs: + text += paragraph.text + "\n" + return text + +def extract_text_from_txt(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + return file.read() + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/upload', methods=['POST']) +def upload_file(): + if 'file' not in request.files: + return jsonify({'error': 'No file part'}), 400 + file = request.files['file'] + if file.filename == '': + return jsonify({'error': 'No selected file'}), 400 + if file and allowed_file(file.filename): + filename = secure_filename(file.filename) + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_') + filename = timestamp + filename + file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) + file.save(file_path) + file_type = magic.from_file(file_path, mime=True) + if file_type == 'application/pdf': + text = extract_text_from_pdf(file_path) + elif file_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': + text = extract_text_from_docx(file_path) + elif file_type == 'text/plain': + text = extract_text_from_txt(file_path) + else: + return jsonify({'error': 'Unsupported file type'}), 400 + documents.append({'filename': filename, 'text': text}) + return jsonify({'message': 'File uploaded and processed successfully', 'filename': filename}) + return jsonify({'error': 'Invalid file type'}), 400 + +@app.route('/query', methods=['POST']) +def query(): + data = request.get_json() + query_text = data.get('query') + if not query_text: + return jsonify({'error': 'No query provided'}), 400 + if not documents: + return jsonify({'error': 'No documents uploaded yet.'}), 400 + # Combine all docs for demo; in production, use per-doc QA + all_text = '\n'.join([doc['text'] for doc in documents]) + splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) + texts = splitter.split_text(all_text) + # Use fake embeddings and LLM for demo; replace with real ones for production + embeddings = FakeEmbeddings(size=32) + vectordb = FAISS.from_texts(texts, embeddings) + retriever = vectordb.as_retriever() + llm = FakeListLLM(responses=[f"Pretend answer for: {query_text}"]) + qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) + answer = qa.run(query_text) + return jsonify({'results': [answer]}) + +@app.route('/documents', methods=['GET']) +def list_documents(): + return jsonify({'documents': [{'source': doc['filename']} for doc in documents]}) + +if __name__ == '__main__': + os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) + app.run(debug=True) \ No newline at end of file diff --git a/flask-langchain-app/docker-compose.yml b/flask-langchain-app/docker-compose.yml new file mode 100644 index 0000000..96e9fc2 --- /dev/null +++ b/flask-langchain-app/docker-compose.yml @@ -0,0 +1,11 @@ +version: '3.8' +services: + flask-app: + build: . + ports: + - "5001:5000" + volumes: + - ./static/uploads:/app/static/uploads + environment: + - FLASK_ENV=production + restart: unless-stopped \ No newline at end of file diff --git a/flask-langchain-app/requirements.txt b/flask-langchain-app/requirements.txt new file mode 100644 index 0000000..3b3570b --- /dev/null +++ b/flask-langchain-app/requirements.txt @@ -0,0 +1,13 @@ +flask==3.0.2 +langchain==0.1.12 +langchain-community +numpy==1.26.4 +faiss-cpu +python-dotenv==1.0.1 +python-docx==1.1.0 +pymupdf==1.23.26 +Werkzeug==3.0.1 +gunicorn==21.2.0 +python-magic==0.4.27 +flask-wtf==1.2.1 +python-magic-bin==0.4.14; sys_platform == 'win32' \ No newline at end of file diff --git a/flask-langchain-app/run.sh b/flask-langchain-app/run.sh new file mode 100755 index 0000000..33e16be --- /dev/null +++ b/flask-langchain-app/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Navigate to the project directory +cd "$(dirname "$0")" + +# Create virtual environment if it doesn't exist +if [ ! -d "venv" ]; then + python3 -m venv venv +fi + +# Activate virtual environment +source venv/bin/activate + +# Install dependencies +pip install --upgrade pip +pip install -r requirements.txt + +# Create uploads directory if it doesn't exist +mkdir -p static/uploads + +# Run the Flask app +export FLASK_APP=app.py +export FLASK_ENV=development +flask run \ No newline at end of file diff --git a/flask-langchain-app/static/css/style.css b/flask-langchain-app/static/css/style.css new file mode 100644 index 0000000..19a85f3 --- /dev/null +++ b/flask-langchain-app/static/css/style.css @@ -0,0 +1,96 @@ +/* Custom styles */ +.drop-zone-active { + border-color: #4f46e5 !important; + background-color: #f5f3ff; +} + +.chat-message { + max-width: 80%; + margin-bottom: 1rem; + padding: 1rem; + border-radius: 0.5rem; + animation: fadeIn 0.3s ease-in-out; +} + +.user-message { + background-color: #e0e7ff; + margin-left: auto; +} + +.bot-message { + background-color: #f3f4f6; + margin-right: auto; +} + +.document-card { + transition: transform 0.2s ease-in-out; +} + +.document-card:hover { + transform: translateY(-2px); +} + +/* Animations */ +@keyframes fadeIn { + from { + opacity: 0; + transform: translateY(10px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +/* Loading animation */ +.loading-dots { + display: inline-block; +} + +.loading-dots:after { + content: '...'; + animation: dots 1.5s steps(5, end) infinite; +} + +@keyframes dots { + 0%, 20% { + content: '.'; + } + 40% { + content: '..'; + } + 60%, 100% { + content: '...'; + } +} + +/* Scrollbar styling */ +::-webkit-scrollbar { + width: 8px; +} + +::-webkit-scrollbar-track { + background: #f1f1f1; + border-radius: 4px; +} + +::-webkit-scrollbar-thumb { + background: #888; + border-radius: 4px; +} + +::-webkit-scrollbar-thumb:hover { + background: #555; +} + +/* File upload animation */ +.upload-progress { + transition: width 0.3s ease-in-out; +} + +/* Responsive adjustments */ +@media (max-width: 768px) { + .chat-message { + max-width: 90%; + } +} \ No newline at end of file diff --git a/flask-langchain-app/static/js/main.js b/flask-langchain-app/static/js/main.js new file mode 100644 index 0000000..55d5b47 --- /dev/null +++ b/flask-langchain-app/static/js/main.js @@ -0,0 +1,199 @@ +document.addEventListener('DOMContentLoaded', () => { + const dropZone = document.getElementById('dropZone'); + const fileInput = document.getElementById('fileInput'); + const uploadProgress = document.getElementById('uploadProgress'); + const progressBar = uploadProgress.querySelector('div > div'); + const chatContainer = document.getElementById('chatContainer'); + const queryInput = document.getElementById('queryInput'); + const sendButton = document.getElementById('sendButton'); + const documentList = document.getElementById('documentList'); + + // Initialize document list + loadDocuments(); + + // Drag and drop handlers + ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => { + dropZone.addEventListener(eventName, preventDefaults, false); + }); + + function preventDefaults(e) { + e.preventDefault(); + e.stopPropagation(); + } + + ['dragenter', 'dragover'].forEach(eventName => { + dropZone.addEventListener(eventName, highlight, false); + }); + + ['dragleave', 'drop'].forEach(eventName => { + dropZone.addEventListener(eventName, unhighlight, false); + }); + + function highlight(e) { + dropZone.classList.add('drop-zone-active'); + } + + function unhighlight(e) { + dropZone.classList.remove('drop-zone-active'); + } + + dropZone.addEventListener('drop', handleDrop, false); + + function handleDrop(e) { + const dt = e.dataTransfer; + const files = dt.files; + handleFiles(files); + } + + fileInput.addEventListener('change', function() { + handleFiles(this.files); + }); + + function handleFiles(files) { + if (files.length > 0) { + uploadFile(files[0]); + } + } + + function uploadFile(file) { + const formData = new FormData(); + formData.append('file', file); + + uploadProgress.classList.remove('hidden'); + progressBar.style.width = '0%'; + + fetch('/upload', { + method: 'POST', + body: formData + }) + .then(response => response.json()) + .then(data => { + if (data.error) { + showError(data.error); + } else { + progressBar.style.width = '100%'; + setTimeout(() => { + uploadProgress.classList.add('hidden'); + progressBar.style.width = '0%'; + }, 1000); + loadDocuments(); + addMessage('System', 'Document uploaded successfully! You can now ask questions about it.', 'bot-message'); + } + }) + .catch(error => { + showError('Upload failed. Please try again.'); + }); + } + + function showError(message) { + addMessage('System', message, 'bot-message'); + uploadProgress.classList.add('hidden'); + progressBar.style.width = '0%'; + } + + // Chat functionality + sendButton.addEventListener('click', sendMessage); + queryInput.addEventListener('keypress', (e) => { + if (e.key === 'Enter') { + sendMessage(); + } + }); + + function sendMessage() { + const query = queryInput.value.trim(); + if (!query) return; + + addMessage('You', query, 'user-message'); + queryInput.value = ''; + + // Show loading message + const loadingId = 'loading-' + Date.now(); + addMessage('Bot', 'Thinking', 'bot-message', loadingId); + + fetch('/query', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ query: query }) + }) + .then(response => response.json()) + .then(data => { + // Remove loading message + const loadingMessage = document.getElementById(loadingId); + if (loadingMessage) { + loadingMessage.remove(); + } + + if (data.error) { + addMessage('Bot', data.error, 'bot-message'); + } else { + const response = data.results[0] || 'No relevant information found.'; + addMessage('Bot', response, 'bot-message'); + } + }) + .catch(error => { + const loadingMessage = document.getElementById(loadingId); + if (loadingMessage) { + loadingMessage.remove(); + } + addMessage('Bot', 'Sorry, there was an error processing your request.', 'bot-message'); + }); + } + + function addMessage(sender, message, className, id = null) { + const messageDiv = document.createElement('div'); + messageDiv.className = `chat-message ${className}`; + if (id) messageDiv.id = id; + + const senderSpan = document.createElement('span'); + senderSpan.className = 'font-semibold text-gray-700'; + senderSpan.textContent = sender + ': '; + + const contentSpan = document.createElement('span'); + contentSpan.innerHTML = message; + + messageDiv.appendChild(senderSpan); + messageDiv.appendChild(contentSpan); + + chatContainer.appendChild(messageDiv); + chatContainer.scrollTop = chatContainer.scrollHeight; + } + + function loadDocuments() { + fetch('/documents') + .then(response => response.json()) + .then(data => { + documentList.innerHTML = ''; + data.documents.forEach(doc => { + const card = createDocumentCard(doc); + documentList.appendChild(card); + }); + }) + .catch(error => { + console.error('Error loading documents:', error); + }); + } + + function createDocumentCard(doc) { + const card = document.createElement('div'); + card.className = 'document-card bg-white p-4 rounded-lg shadow hover:shadow-md transition-shadow'; + + const fileName = doc.source.split('_').slice(2).join('_'); + const uploadDate = new Date(doc.source.split('_')[0] + doc.source.split('_')[1]).toLocaleDateString(); + + card.innerHTML = ` +
+ + + +
+

${fileName}

+

Uploaded on ${uploadDate}

+
+
+ `; + + return card; + } +}); \ No newline at end of file diff --git a/flask-langchain-app/static/uploads/20250618_192143_Algorithms_Data_Structures.docx b/flask-langchain-app/static/uploads/20250618_192143_Algorithms_Data_Structures.docx new file mode 100644 index 0000000..9af13de Binary files /dev/null and b/flask-langchain-app/static/uploads/20250618_192143_Algorithms_Data_Structures.docx differ diff --git a/flask-langchain-app/static/uploads/20250618_192418_Heaps__Priority_Queues__Comprehensive_Notes_with.pdf b/flask-langchain-app/static/uploads/20250618_192418_Heaps__Priority_Queues__Comprehensive_Notes_with.pdf new file mode 100644 index 0000000..04aeeed Binary files /dev/null and b/flask-langchain-app/static/uploads/20250618_192418_Heaps__Priority_Queues__Comprehensive_Notes_with.pdf differ diff --git a/flask-langchain-app/static/uploads/20250618_192635_10179322_2024_020.pdf b/flask-langchain-app/static/uploads/20250618_192635_10179322_2024_020.pdf new file mode 100644 index 0000000..1ba4a1b Binary files /dev/null and b/flask-langchain-app/static/uploads/20250618_192635_10179322_2024_020.pdf differ diff --git a/flask-langchain-app/templates/index.html b/flask-langchain-app/templates/index.html new file mode 100644 index 0000000..a349487 --- /dev/null +++ b/flask-langchain-app/templates/index.html @@ -0,0 +1,71 @@ + + + + + + Document Chatbot + + + + + +
+
+

Document Chatbot

+

Upload your documents and ask questions about them

+
+ +
+ +
+

Upload Document

+
+ +
+ + + +
+

Drag and drop your file here, or

+ +
+

Supported formats: PDF, DOCX, TXT

+
+
+ +
+ + +
+

Ask Questions

+
+ +
+
+ + +
+
+
+ + +
+

Uploaded Documents

+
+ +
+
+
+ + + + + \ No newline at end of file