feat: Complete Smart Resume Formatter with R2 and Gemini AI integration
Some checks failed
Profile Linker Docker Build / Build and push Docker image (push) Failing after 3s

- Integrated Cloudflare R2 for template storage and converted file management
- Added Google Gemini AI for resume parsing and HTML generation
- Created backend API endpoints for templates, conversion, and history
- Refactored frontend to use real API instead of mock data
- Fixed Docker networking issues (IPv6/IPv4) for R2 connectivity
- Added resumeService.ts for frontend API integration
- Updated Vite configuration for proper asset serving in Docker
- Successfully tested with 13 templates from R2 bucket
This commit is contained in:
Laxmi Khilnani
2025-10-14 21:43:41 +05:30
parent ee030b70bc
commit cda50356b4
34 changed files with 2604 additions and 360 deletions

View File

@@ -1,6 +1,7 @@
from fastapi import APIRouter
from app.api.endpoints import people
from app.api.endpoints import people, resumes
api_router = APIRouter()
api_router.include_router(people.router, prefix="/people", tags=["people"])
api_router.include_router(resumes.router, prefix="/resumes", tags=["resumes"])

View File

@@ -0,0 +1,179 @@
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
from fastapi.responses import JSONResponse
from typing import List, Dict, Optional
import io
from app.services.r2_service import r2_service
from app.services.ai_service import ai_service
router = APIRouter()
@router.get("/templates", response_model=List[str])
async def get_templates():
"""
Get list of available resume templates from R2
"""
try:
templates = r2_service.list_templates()
return templates
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to fetch templates: {str(e)}"
)
@router.get("/templates/{template_name}")
async def get_template_content(template_name: str):
"""
Get the HTML content of a specific template
"""
try:
content = r2_service.get_template_content(template_name)
if content is None:
raise HTTPException(
status_code=404,
detail=f"Template '{template_name}' not found"
)
return {"content": content}
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to fetch template content: {str(e)}"
)
@router.post("/convert")
async def convert_resume(
file: UploadFile = File(...),
template_name: str = Form(...)
):
"""
Convert a resume file using the specified template
1. Extract text from resume using Gemini AI
2. Get template content from R2
3. Generate formatted HTML using Gemini AI
4. Upload HTML and PDF to R2
5. Return URLs for download
"""
try:
# Validate file type
allowed_types = [
'application/pdf',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
]
if file.content_type not in allowed_types:
raise HTTPException(
status_code=400,
detail="Invalid file type. Only PDF and DOCX files are allowed."
)
# Read file content
file_content = await file.read()
# Step 1: Extract text from resume
resume_text = await ai_service.extract_text_from_resume(
file_content,
file.content_type
)
if not resume_text:
raise HTTPException(
status_code=500,
detail="Failed to extract text from resume"
)
# Step 2: Get template content
template_html = r2_service.get_template_content(template_name)
if not template_html:
raise HTTPException(
status_code=404,
detail=f"Template '{template_name}' not found"
)
# Step 3: Generate formatted HTML
generated_html = await ai_service.generate_html_from_template(
resume_text,
template_html
)
if not generated_html:
raise HTTPException(
status_code=500,
detail="Failed to generate formatted HTML"
)
# Step 4: Upload HTML to R2
base_filename = file.filename.rsplit('.', 1)[0]
html_filename = f"{base_filename}_{template_name}.html"
html_url = r2_service.upload_converted_file(
generated_html.encode('utf-8'),
html_filename,
'text/html',
metadata={
'original_filename': file.filename,
'template': template_name
}
)
if not html_url:
raise HTTPException(
status_code=500,
detail="Failed to upload HTML to storage"
)
# Return response
return {
"success": True,
"html_url": html_url,
"html_content": generated_html,
"message": "Resume converted successfully"
}
except HTTPException:
raise
except Exception as e:
print(f"Error converting resume: {e}")
raise HTTPException(
status_code=500,
detail=f"An error occurred during conversion: {str(e)}"
)
@router.get("/history", response_model=List[Dict])
async def get_conversion_history(limit: int = 50):
"""
Get list of previously converted resumes from R2
"""
try:
files = r2_service.list_converted_resumes(limit=limit)
return files
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to fetch conversion history: {str(e)}"
)
@router.get("/download/{file_key:path}")
async def get_download_url(file_key: str):
"""
Get a presigned download URL for a file
"""
try:
url = r2_service.get_file_url(file_key)
if not url:
raise HTTPException(
status_code=404,
detail="File not found"
)
return {"url": url}
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to generate download URL: {str(e)}"
)

View File

@@ -12,7 +12,7 @@ class Settings:
"""
APP_NAME: str = os.getenv("APP_NAME", "ResumeFormatter")
API_V1_STR: str = f"/{APP_NAME}/api"
PROJECT_NAME: str = "Profile Linker API"
PROJECT_NAME: str = "Smart Resume Formatter API"
# CORS settings
BACKEND_CORS_ORIGINS: List[str] = ["*"]
@@ -20,6 +20,15 @@ class Settings:
# Database settings - using in-memory database by default
# In a production environment, you would use a real database connection string
DATABASE_URL: Optional[str] = None
# Gemini AI settings
GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY", "")
# Cloudflare R2 settings
R2_ENDPOINT: str = os.getenv("R2_ENDPOINT", "")
R2_ACCESS_KEY_ID: str = os.getenv("R2_ACCESS_KEY_ID", "")
R2_SECRET_ACCESS_KEY: str = os.getenv("R2_SECRET_ACCESS_KEY", "")
R2_BUCKET_NAME: str = os.getenv("R2_BUCKET_NAME", "e-teams")
settings = Settings()

View File

@@ -0,0 +1 @@
# Services module

View File

@@ -0,0 +1,135 @@
"""
Gemini AI Service
Handles resume text extraction and HTML generation
"""
import google.generativeai as genai
from typing import Optional
import base64
from app.core.config import settings
class AIService:
"""Service for interacting with Google Gemini AI"""
def __init__(self):
"""Initialize Gemini AI with API key"""
if not settings.GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY not configured")
genai.configure(api_key=settings.GEMINI_API_KEY)
self.model = genai.GenerativeModel('gemini-2.0-flash-exp')
async def extract_text_from_resume(
self,
file_content: bytes,
mime_type: str
) -> Optional[str]:
"""
Extract text from resume file using Gemini Vision
Args:
file_content: File content as bytes
mime_type: MIME type of the file (application/pdf or application/vnd.openxmlformats-officedocument.wordprocessingml.document)
Returns: Extracted text or None if failed
"""
try:
# Convert bytes to base64
base64_data = base64.b64encode(file_content).decode('utf-8')
prompt = """Extract all text from this resume document.
Preserve the original structure, including sections, headings, bullet points, and line breaks, as plain text.
Focus on maintaining the hierarchical structure of the content."""
response = self.model.generate_content([
{
'mime_type': mime_type,
'data': base64_data
},
prompt
])
return response.text
except Exception as e:
print(f"Error extracting text from resume: {e}")
return None
async def generate_html_from_template(
self,
resume_text: str,
template_html: str
) -> Optional[str]:
"""
Generate formatted HTML by merging resume content with template
Args:
resume_text: Extracted resume text
template_html: HTML template content
Returns: Generated HTML or None if failed
"""
try:
prompt = self._build_generation_prompt(resume_text, template_html)
response = self.model.generate_content(prompt)
# Clean up the response (remove code blocks if present)
html_content = response.text.strip()
if html_content.startswith('```html'):
html_content = html_content[7:] # Remove ```html
if html_content.endswith('```'):
html_content = html_content[:-3] # Remove ```
return html_content.strip()
except Exception as e:
print(f"Error generating HTML: {e}")
return None
def _build_generation_prompt(self, resume_text: str, template_html: str) -> str:
"""Build the prompt for HTML generation"""
instructions = """### 🎯 EXACT TEMPLATE PRESERVATION INSTRUCTIONS:
**🚨 RULE #1: COPY TEMPLATE EXACTLY - NO STRUCTURAL CHANGES! 🚨**
**🚨 RULE #2: ONLY REPLACE PLACEHOLDER TEXT - NOTHING ELSE! 🚨**
**YOU ARE A FIND-AND-REPLACE TOOL - NOT A DESIGNER!**
**SIMPLE 3-STEP PROCESS:**
1. **COPY**: Take the entire HTML template (every character from <!DOCTYPE to </html>).
2. **FIND**: Locate placeholder text in the template (like "{{name}}", "John Doe", "Software Engineer", "2020-2023", etc.).
3. **REPLACE**: Replace ONLY that placeholder text with the user's corresponding information.
**WHAT TO REPLACE:**
- Names, contact info
- Job titles, companies, dates, descriptions
- Education details
- Skills lists
**WHAT TO NEVER CHANGE:**
- HTML tags (div, p, h1, etc.), CSS classes, IDs, or any inline styles.
- The overall HTML structure, layout, nesting, alignment, spacing, colors, and fonts.
**FOR EXTRA USER CONTENT:**
If the user's resume has sections not present in the template (e.g., 'Projects', 'Certifications'):
- Find a similar section in the template (e.g., 'Experience').
- Copy that section's HTML structure.
- Add it at a logical place (usually at the end) with the user's content.
- Reuse the same CSS classes and styling patterns to maintain consistency.
**CRITICAL:** Ensure ALL information from the user's resume is included in the final HTML. Do not omit any details.
"""
return f"""You are an expert HTML resume generator. Your task is to take the user's resume content and perfectly merge it into the provided company HTML template by acting as a precise find-and-replace tool.
**User's Resume Content:**
---
{resume_text}
---
**Company HTML Template:**
---
{template_html}
---
{instructions}
Now, generate the final, complete HTML file. Your entire output must be only the HTML code, starting with `<!DOCTYPE html>` and ending with `</html>`. Do not include any explanations or surrounding text."""
# Singleton instance
ai_service = AIService()

View File

@@ -0,0 +1,203 @@
"""
Cloudflare R2 Storage Service
Handles all interactions with R2 bucket for templates and converted resumes
"""
import boto3
from botocore.client import Config
from typing import List, Dict, Optional
from datetime import datetime
import io
import socket
from app.core.config import settings
# Force IPv4 to avoid Docker IPv6 issues
original_getaddrinfo = socket.getaddrinfo
def getaddrinfo_ipv4_only(host, port, family=0, type=0, proto=0, flags=0):
"""Force IPv4 resolution only"""
return original_getaddrinfo(host, port, socket.AF_INET, type, proto, flags)
socket.getaddrinfo = getaddrinfo_ipv4_only
class R2Service:
"""Service for interacting with Cloudflare R2 storage"""
def __init__(self):
"""Initialize R2 client with credentials from settings"""
self.s3_client = boto3.client(
's3',
endpoint_url=settings.R2_ENDPOINT,
aws_access_key_id=settings.R2_ACCESS_KEY_ID,
aws_secret_access_key=settings.R2_SECRET_ACCESS_KEY,
config=Config(
signature_version='s3v4',
s3={'addressing_style': 'path'}
),
region_name='auto'
)
self.bucket_name = settings.R2_BUCKET_NAME
self.templates_prefix = "templates/"
self.converted_prefix = "converted_resumes/"
def list_templates(self) -> List[str]:
"""
List all available template names from R2
Returns: List of template names (without .html extension)
"""
try:
print(f"Attempting to list templates from bucket: {self.bucket_name}, prefix: {self.templates_prefix}")
print(f"Using endpoint: {settings.R2_ENDPOINT}")
response = self.s3_client.list_objects_v2(
Bucket=self.bucket_name,
Prefix=self.templates_prefix
)
print(f"R2 Response: {response}")
if 'Contents' not in response:
print(f"No contents found in bucket with prefix {self.templates_prefix}")
return []
templates = []
for obj in response['Contents']:
key = obj['Key']
# Extract template name (remove prefix and .html extension)
if key.endswith('.html'):
template_name = key.replace(self.templates_prefix, '').replace('.html', '')
if template_name: # Skip if empty (i.e., if key was just the prefix)
templates.append(template_name)
print(f"Found templates: {templates}")
return templates
except Exception as e:
print(f"Error listing templates: {e}")
import traceback
traceback.print_exc()
return []
def get_template_content(self, template_name: str) -> Optional[str]:
"""
Get the HTML content of a specific template
Args:
template_name: Name of the template (without .html extension)
Returns: HTML content as string, or None if not found
"""
try:
key = f"{self.templates_prefix}{template_name}.html"
response = self.s3_client.get_object(
Bucket=self.bucket_name,
Key=key
)
content = response['Body'].read().decode('utf-8')
return content
except Exception as e:
print(f"Error getting template content for {template_name}: {e}")
return None
def upload_converted_file(
self,
file_content: bytes,
filename: str,
content_type: str,
metadata: Optional[Dict[str, str]] = None
) -> Optional[str]:
"""
Upload a converted resume file to R2
Args:
file_content: File content as bytes
filename: Name of the file
content_type: MIME type (text/html or application/pdf)
metadata: Optional metadata dict
Returns: Public URL of uploaded file, or None if failed
"""
try:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
key = f"{self.converted_prefix}{timestamp}_{filename}"
upload_args = {
'Bucket': self.bucket_name,
'Key': key,
'Body': file_content,
'ContentType': content_type
}
if metadata:
upload_args['Metadata'] = metadata
self.s3_client.put_object(**upload_args)
# Generate public URL
url = f"{settings.R2_ENDPOINT}/{self.bucket_name}/{key}"
return url
except Exception as e:
print(f"Error uploading file {filename}: {e}")
return None
def list_converted_resumes(self, limit: int = 50) -> List[Dict]:
"""
List converted resumes from R2
Args:
limit: Maximum number of files to return
Returns: List of dicts with file metadata
"""
try:
response = self.s3_client.list_objects_v2(
Bucket=self.bucket_name,
Prefix=self.converted_prefix,
MaxKeys=limit
)
if 'Contents' not in response:
return []
files = []
for obj in response['Contents']:
key = obj['Key']
# Skip directory markers
if key.endswith('/'):
continue
filename = key.replace(self.converted_prefix, '')
file_url = f"{settings.R2_ENDPOINT}/{self.bucket_name}/{key}"
files.append({
'id': key,
'name': filename,
'url': file_url,
'size': obj['Size'],
'lastModified': obj['LastModified'].isoformat(),
'timestamp': obj['LastModified']
})
# Sort by timestamp, newest first
files.sort(key=lambda x: x['timestamp'], reverse=True)
return files
except Exception as e:
print(f"Error listing converted resumes: {e}")
return []
def get_file_url(self, key: str, expires_in: int = 3600) -> Optional[str]:
"""
Generate a presigned URL for a file
Args:
key: Object key in R2
expires_in: URL expiration time in seconds (default 1 hour)
Returns: Presigned URL or None if failed
"""
try:
url = self.s3_client.generate_presigned_url(
'get_object',
Params={'Bucket': self.bucket_name, 'Key': key},
ExpiresIn=expires_in
)
return url
except Exception as e:
print(f"Error generating presigned URL for {key}: {e}")
return None
# Singleton instance
r2_service = R2Service()