#src/
import os
import json
import re
import PyPDF2
import docx
import requests
import pdfplumber
import docx
import hashlib
import io
import zipfile
import as ET
import openpyxl
import hashlib
import unicodedata
import regex
from typing import Pattern
from flask import Flask jsonify request
from import firestore
from flaskcors import CORS
from datetime import datetime timezone
from dotenv import loaddotenv
import firebaseadmin
from firebaseadmin import credentials firestore storage
from bs4 import BeautifulSoup # for URL text extraction
import PyPDF2
import unicodedata regex uuid
from import runanalysis
import asyncio
from import ThreadPoolExecutor
import uuid
from import unquote
from threading import Thread
from itertools import ziplongest
from crewai import Crew
from import capture
from import capturetask
from import extractorgname # adjust import if needed
import regex
import unicodedata
from typing import Union
import unicodedata
#
# Load environment variables
#
loaddotenv()
#
# Initialize Flask app
#
app Flask(name) # MUST come first
#
# Configure CORS
#
allowedorigins (ALLOWEDORIGINS *) # e.g. http://localhost:3000
CORS(app resourcesr/*: origins: allowedorigins)
serviceaccountinfo ((FIREBASESERVICEACCOUNT))
cred (serviceaccountinfo)
(cred
storageBucket: (storageBucket)
)
db ()
bucket ()
print(f Firebase initialized with bucket: )
#
# Azure OpenAI Setup
#
AZUREAPIKEY (AZUREOPENAIAPIKEY)
AZUREAPIBASE (AZUREOPENAIAPIBASE)
AZUREAPIVERSION (AZUREOPENAIAPIVERSION)
AZUREDEPLOYMENTNAME (AZUREOPENAIDEPLOYMENTNAME)
AZUREAPIURL fAZUREAPIBASE/openai/deployments/AZUREDEPLOYMENTNAME/chat/completionsapi-versionAZUREAPIVERSION
def extractpdftext(content):
text
try:
with ((content)) as pdf:
for page in :
pagetext ()
if pagetext:
text pagetext n
except Exception as e:
print( PDF parsing error: str(e))
return text
def extractdocxtext(content):
text
try:
doc ((content))
text ( for p in )
for table in :
for row in :
text n ( for cell in )
except Exception as e:
print( DOCX parsing error: str(e))
return text
def extracttxttext(content):
try:
return (utf-8 errorsignore)
except Exception as e:
print( TXT decoding error: str(e))
return
def extracttextbytype(filename content):
filenamelower ()
if (.pdf):
return extractpdftext(content)
if (.docx):
return extractdocxtext(content)
if (.txt):
return extracttxttext(content)
if (.pptx):
return extractpptxtext(content)
if (.xlsx):
return extractxlsxtext(content)
print(f Unsupported file type: filename)
return
def getfilesfromfirebasefolder(foldername):
Reads all files from Firebase Storage folder extracts text and returns as a single string.
texts
try:
blobs list((prefixfUpload/foldername/))
print(f