feat: Complete Smart Resume Formatter with R2 and Gemini AI integration
Some checks failed
Profile Linker Docker Build / Build and push Docker image (push) Failing after 3s
Some checks failed
Profile Linker Docker Build / Build and push Docker image (push) Failing after 3s
- Integrated Cloudflare R2 for template storage and converted file management - Added Google Gemini AI for resume parsing and HTML generation - Created backend API endpoints for templates, conversion, and history - Refactored frontend to use real API instead of mock data - Fixed Docker networking issues (IPv6/IPv4) for R2 connectivity - Added resumeService.ts for frontend API integration - Updated Vite configuration for proper asset serving in Docker - Successfully tested with 13 templates from R2 bucket
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from app.api.endpoints import people
|
||||
from app.api.endpoints import people, resumes
|
||||
|
||||
api_router = APIRouter()
|
||||
api_router.include_router(people.router, prefix="/people", tags=["people"])
|
||||
api_router.include_router(resumes.router, prefix="/resumes", tags=["resumes"])
|
||||
|
||||
179
backend/app/api/endpoints/resumes.py
Normal file
179
backend/app/api/endpoints/resumes.py
Normal file
@@ -0,0 +1,179 @@
|
||||
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from typing import List, Dict, Optional
|
||||
import io
|
||||
|
||||
from app.services.r2_service import r2_service
|
||||
from app.services.ai_service import ai_service
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/templates", response_model=List[str])
|
||||
async def get_templates():
|
||||
"""
|
||||
Get list of available resume templates from R2
|
||||
"""
|
||||
try:
|
||||
templates = r2_service.list_templates()
|
||||
return templates
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to fetch templates: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/templates/{template_name}")
|
||||
async def get_template_content(template_name: str):
|
||||
"""
|
||||
Get the HTML content of a specific template
|
||||
"""
|
||||
try:
|
||||
content = r2_service.get_template_content(template_name)
|
||||
if content is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Template '{template_name}' not found"
|
||||
)
|
||||
return {"content": content}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to fetch template content: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/convert")
|
||||
async def convert_resume(
|
||||
file: UploadFile = File(...),
|
||||
template_name: str = Form(...)
|
||||
):
|
||||
"""
|
||||
Convert a resume file using the specified template
|
||||
1. Extract text from resume using Gemini AI
|
||||
2. Get template content from R2
|
||||
3. Generate formatted HTML using Gemini AI
|
||||
4. Upload HTML and PDF to R2
|
||||
5. Return URLs for download
|
||||
"""
|
||||
try:
|
||||
# Validate file type
|
||||
allowed_types = [
|
||||
'application/pdf',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||||
]
|
||||
if file.content_type not in allowed_types:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Invalid file type. Only PDF and DOCX files are allowed."
|
||||
)
|
||||
|
||||
# Read file content
|
||||
file_content = await file.read()
|
||||
|
||||
# Step 1: Extract text from resume
|
||||
resume_text = await ai_service.extract_text_from_resume(
|
||||
file_content,
|
||||
file.content_type
|
||||
)
|
||||
if not resume_text:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to extract text from resume"
|
||||
)
|
||||
|
||||
# Step 2: Get template content
|
||||
template_html = r2_service.get_template_content(template_name)
|
||||
if not template_html:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Template '{template_name}' not found"
|
||||
)
|
||||
|
||||
# Step 3: Generate formatted HTML
|
||||
generated_html = await ai_service.generate_html_from_template(
|
||||
resume_text,
|
||||
template_html
|
||||
)
|
||||
if not generated_html:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to generate formatted HTML"
|
||||
)
|
||||
|
||||
# Step 4: Upload HTML to R2
|
||||
base_filename = file.filename.rsplit('.', 1)[0]
|
||||
html_filename = f"{base_filename}_{template_name}.html"
|
||||
|
||||
html_url = r2_service.upload_converted_file(
|
||||
generated_html.encode('utf-8'),
|
||||
html_filename,
|
||||
'text/html',
|
||||
metadata={
|
||||
'original_filename': file.filename,
|
||||
'template': template_name
|
||||
}
|
||||
)
|
||||
|
||||
if not html_url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to upload HTML to storage"
|
||||
)
|
||||
|
||||
# Return response
|
||||
return {
|
||||
"success": True,
|
||||
"html_url": html_url,
|
||||
"html_content": generated_html,
|
||||
"message": "Resume converted successfully"
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error converting resume: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"An error occurred during conversion: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/history", response_model=List[Dict])
|
||||
async def get_conversion_history(limit: int = 50):
|
||||
"""
|
||||
Get list of previously converted resumes from R2
|
||||
"""
|
||||
try:
|
||||
files = r2_service.list_converted_resumes(limit=limit)
|
||||
return files
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to fetch conversion history: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/download/{file_key:path}")
|
||||
async def get_download_url(file_key: str):
|
||||
"""
|
||||
Get a presigned download URL for a file
|
||||
"""
|
||||
try:
|
||||
url = r2_service.get_file_url(file_key)
|
||||
if not url:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="File not found"
|
||||
)
|
||||
return {"url": url}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate download URL: {str(e)}"
|
||||
)
|
||||
@@ -12,7 +12,7 @@ class Settings:
|
||||
"""
|
||||
APP_NAME: str = os.getenv("APP_NAME", "ResumeFormatter")
|
||||
API_V1_STR: str = f"/{APP_NAME}/api"
|
||||
PROJECT_NAME: str = "Profile Linker API"
|
||||
PROJECT_NAME: str = "Smart Resume Formatter API"
|
||||
|
||||
# CORS settings
|
||||
BACKEND_CORS_ORIGINS: List[str] = ["*"]
|
||||
@@ -20,6 +20,15 @@ class Settings:
|
||||
# Database settings - using in-memory database by default
|
||||
# In a production environment, you would use a real database connection string
|
||||
DATABASE_URL: Optional[str] = None
|
||||
|
||||
# Gemini AI settings
|
||||
GEMINI_API_KEY: str = os.getenv("GEMINI_API_KEY", "")
|
||||
|
||||
# Cloudflare R2 settings
|
||||
R2_ENDPOINT: str = os.getenv("R2_ENDPOINT", "")
|
||||
R2_ACCESS_KEY_ID: str = os.getenv("R2_ACCESS_KEY_ID", "")
|
||||
R2_SECRET_ACCESS_KEY: str = os.getenv("R2_SECRET_ACCESS_KEY", "")
|
||||
R2_BUCKET_NAME: str = os.getenv("R2_BUCKET_NAME", "e-teams")
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
||||
1
backend/app/services/__init__.py
Normal file
1
backend/app/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Services module
|
||||
135
backend/app/services/ai_service.py
Normal file
135
backend/app/services/ai_service.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Gemini AI Service
|
||||
Handles resume text extraction and HTML generation
|
||||
"""
|
||||
import google.generativeai as genai
|
||||
from typing import Optional
|
||||
import base64
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
class AIService:
|
||||
"""Service for interacting with Google Gemini AI"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Gemini AI with API key"""
|
||||
if not settings.GEMINI_API_KEY:
|
||||
raise ValueError("GEMINI_API_KEY not configured")
|
||||
genai.configure(api_key=settings.GEMINI_API_KEY)
|
||||
self.model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
||||
|
||||
async def extract_text_from_resume(
|
||||
self,
|
||||
file_content: bytes,
|
||||
mime_type: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Extract text from resume file using Gemini Vision
|
||||
Args:
|
||||
file_content: File content as bytes
|
||||
mime_type: MIME type of the file (application/pdf or application/vnd.openxmlformats-officedocument.wordprocessingml.document)
|
||||
Returns: Extracted text or None if failed
|
||||
"""
|
||||
try:
|
||||
# Convert bytes to base64
|
||||
base64_data = base64.b64encode(file_content).decode('utf-8')
|
||||
|
||||
prompt = """Extract all text from this resume document.
|
||||
Preserve the original structure, including sections, headings, bullet points, and line breaks, as plain text.
|
||||
Focus on maintaining the hierarchical structure of the content."""
|
||||
|
||||
response = self.model.generate_content([
|
||||
{
|
||||
'mime_type': mime_type,
|
||||
'data': base64_data
|
||||
},
|
||||
prompt
|
||||
])
|
||||
|
||||
return response.text
|
||||
except Exception as e:
|
||||
print(f"Error extracting text from resume: {e}")
|
||||
return None
|
||||
|
||||
async def generate_html_from_template(
|
||||
self,
|
||||
resume_text: str,
|
||||
template_html: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generate formatted HTML by merging resume content with template
|
||||
Args:
|
||||
resume_text: Extracted resume text
|
||||
template_html: HTML template content
|
||||
Returns: Generated HTML or None if failed
|
||||
"""
|
||||
try:
|
||||
prompt = self._build_generation_prompt(resume_text, template_html)
|
||||
|
||||
response = self.model.generate_content(prompt)
|
||||
|
||||
# Clean up the response (remove code blocks if present)
|
||||
html_content = response.text.strip()
|
||||
if html_content.startswith('```html'):
|
||||
html_content = html_content[7:] # Remove ```html
|
||||
if html_content.endswith('```'):
|
||||
html_content = html_content[:-3] # Remove ```
|
||||
|
||||
return html_content.strip()
|
||||
except Exception as e:
|
||||
print(f"Error generating HTML: {e}")
|
||||
return None
|
||||
|
||||
def _build_generation_prompt(self, resume_text: str, template_html: str) -> str:
|
||||
"""Build the prompt for HTML generation"""
|
||||
instructions = """### 🎯 EXACT TEMPLATE PRESERVATION INSTRUCTIONS:
|
||||
|
||||
**🚨 RULE #1: COPY TEMPLATE EXACTLY - NO STRUCTURAL CHANGES! 🚨**
|
||||
**🚨 RULE #2: ONLY REPLACE PLACEHOLDER TEXT - NOTHING ELSE! 🚨**
|
||||
|
||||
**YOU ARE A FIND-AND-REPLACE TOOL - NOT A DESIGNER!**
|
||||
|
||||
**SIMPLE 3-STEP PROCESS:**
|
||||
1. **COPY**: Take the entire HTML template (every character from <!DOCTYPE to </html>).
|
||||
2. **FIND**: Locate placeholder text in the template (like "{{name}}", "John Doe", "Software Engineer", "2020-2023", etc.).
|
||||
3. **REPLACE**: Replace ONLY that placeholder text with the user's corresponding information.
|
||||
|
||||
**WHAT TO REPLACE:**
|
||||
- Names, contact info
|
||||
- Job titles, companies, dates, descriptions
|
||||
- Education details
|
||||
- Skills lists
|
||||
|
||||
**WHAT TO NEVER CHANGE:**
|
||||
- HTML tags (div, p, h1, etc.), CSS classes, IDs, or any inline styles.
|
||||
- The overall HTML structure, layout, nesting, alignment, spacing, colors, and fonts.
|
||||
|
||||
**FOR EXTRA USER CONTENT:**
|
||||
If the user's resume has sections not present in the template (e.g., 'Projects', 'Certifications'):
|
||||
- Find a similar section in the template (e.g., 'Experience').
|
||||
- Copy that section's HTML structure.
|
||||
- Add it at a logical place (usually at the end) with the user's content.
|
||||
- Reuse the same CSS classes and styling patterns to maintain consistency.
|
||||
|
||||
**CRITICAL:** Ensure ALL information from the user's resume is included in the final HTML. Do not omit any details.
|
||||
"""
|
||||
|
||||
return f"""You are an expert HTML resume generator. Your task is to take the user's resume content and perfectly merge it into the provided company HTML template by acting as a precise find-and-replace tool.
|
||||
|
||||
**User's Resume Content:**
|
||||
---
|
||||
{resume_text}
|
||||
---
|
||||
|
||||
**Company HTML Template:**
|
||||
---
|
||||
{template_html}
|
||||
---
|
||||
|
||||
{instructions}
|
||||
|
||||
Now, generate the final, complete HTML file. Your entire output must be only the HTML code, starting with `<!DOCTYPE html>` and ending with `</html>`. Do not include any explanations or surrounding text."""
|
||||
|
||||
|
||||
# Singleton instance
|
||||
ai_service = AIService()
|
||||
203
backend/app/services/r2_service.py
Normal file
203
backend/app/services/r2_service.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""
|
||||
Cloudflare R2 Storage Service
|
||||
Handles all interactions with R2 bucket for templates and converted resumes
|
||||
"""
|
||||
import boto3
|
||||
from botocore.client import Config
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime
|
||||
import io
|
||||
import socket
|
||||
from app.core.config import settings
|
||||
|
||||
# Force IPv4 to avoid Docker IPv6 issues
|
||||
original_getaddrinfo = socket.getaddrinfo
|
||||
|
||||
def getaddrinfo_ipv4_only(host, port, family=0, type=0, proto=0, flags=0):
|
||||
"""Force IPv4 resolution only"""
|
||||
return original_getaddrinfo(host, port, socket.AF_INET, type, proto, flags)
|
||||
|
||||
socket.getaddrinfo = getaddrinfo_ipv4_only
|
||||
|
||||
|
||||
class R2Service:
|
||||
"""Service for interacting with Cloudflare R2 storage"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize R2 client with credentials from settings"""
|
||||
self.s3_client = boto3.client(
|
||||
's3',
|
||||
endpoint_url=settings.R2_ENDPOINT,
|
||||
aws_access_key_id=settings.R2_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.R2_SECRET_ACCESS_KEY,
|
||||
config=Config(
|
||||
signature_version='s3v4',
|
||||
s3={'addressing_style': 'path'}
|
||||
),
|
||||
region_name='auto'
|
||||
)
|
||||
self.bucket_name = settings.R2_BUCKET_NAME
|
||||
self.templates_prefix = "templates/"
|
||||
self.converted_prefix = "converted_resumes/"
|
||||
|
||||
def list_templates(self) -> List[str]:
|
||||
"""
|
||||
List all available template names from R2
|
||||
Returns: List of template names (without .html extension)
|
||||
"""
|
||||
try:
|
||||
print(f"Attempting to list templates from bucket: {self.bucket_name}, prefix: {self.templates_prefix}")
|
||||
print(f"Using endpoint: {settings.R2_ENDPOINT}")
|
||||
|
||||
response = self.s3_client.list_objects_v2(
|
||||
Bucket=self.bucket_name,
|
||||
Prefix=self.templates_prefix
|
||||
)
|
||||
|
||||
print(f"R2 Response: {response}")
|
||||
|
||||
if 'Contents' not in response:
|
||||
print(f"No contents found in bucket with prefix {self.templates_prefix}")
|
||||
return []
|
||||
|
||||
templates = []
|
||||
for obj in response['Contents']:
|
||||
key = obj['Key']
|
||||
# Extract template name (remove prefix and .html extension)
|
||||
if key.endswith('.html'):
|
||||
template_name = key.replace(self.templates_prefix, '').replace('.html', '')
|
||||
if template_name: # Skip if empty (i.e., if key was just the prefix)
|
||||
templates.append(template_name)
|
||||
|
||||
print(f"Found templates: {templates}")
|
||||
return templates
|
||||
except Exception as e:
|
||||
print(f"Error listing templates: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return []
|
||||
|
||||
def get_template_content(self, template_name: str) -> Optional[str]:
|
||||
"""
|
||||
Get the HTML content of a specific template
|
||||
Args:
|
||||
template_name: Name of the template (without .html extension)
|
||||
Returns: HTML content as string, or None if not found
|
||||
"""
|
||||
try:
|
||||
key = f"{self.templates_prefix}{template_name}.html"
|
||||
response = self.s3_client.get_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key
|
||||
)
|
||||
content = response['Body'].read().decode('utf-8')
|
||||
return content
|
||||
except Exception as e:
|
||||
print(f"Error getting template content for {template_name}: {e}")
|
||||
return None
|
||||
|
||||
def upload_converted_file(
|
||||
self,
|
||||
file_content: bytes,
|
||||
filename: str,
|
||||
content_type: str,
|
||||
metadata: Optional[Dict[str, str]] = None
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Upload a converted resume file to R2
|
||||
Args:
|
||||
file_content: File content as bytes
|
||||
filename: Name of the file
|
||||
content_type: MIME type (text/html or application/pdf)
|
||||
metadata: Optional metadata dict
|
||||
Returns: Public URL of uploaded file, or None if failed
|
||||
"""
|
||||
try:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
key = f"{self.converted_prefix}{timestamp}_{filename}"
|
||||
|
||||
upload_args = {
|
||||
'Bucket': self.bucket_name,
|
||||
'Key': key,
|
||||
'Body': file_content,
|
||||
'ContentType': content_type
|
||||
}
|
||||
|
||||
if metadata:
|
||||
upload_args['Metadata'] = metadata
|
||||
|
||||
self.s3_client.put_object(**upload_args)
|
||||
|
||||
# Generate public URL
|
||||
url = f"{settings.R2_ENDPOINT}/{self.bucket_name}/{key}"
|
||||
return url
|
||||
except Exception as e:
|
||||
print(f"Error uploading file {filename}: {e}")
|
||||
return None
|
||||
|
||||
def list_converted_resumes(self, limit: int = 50) -> List[Dict]:
|
||||
"""
|
||||
List converted resumes from R2
|
||||
Args:
|
||||
limit: Maximum number of files to return
|
||||
Returns: List of dicts with file metadata
|
||||
"""
|
||||
try:
|
||||
response = self.s3_client.list_objects_v2(
|
||||
Bucket=self.bucket_name,
|
||||
Prefix=self.converted_prefix,
|
||||
MaxKeys=limit
|
||||
)
|
||||
|
||||
if 'Contents' not in response:
|
||||
return []
|
||||
|
||||
files = []
|
||||
for obj in response['Contents']:
|
||||
key = obj['Key']
|
||||
# Skip directory markers
|
||||
if key.endswith('/'):
|
||||
continue
|
||||
|
||||
filename = key.replace(self.converted_prefix, '')
|
||||
file_url = f"{settings.R2_ENDPOINT}/{self.bucket_name}/{key}"
|
||||
|
||||
files.append({
|
||||
'id': key,
|
||||
'name': filename,
|
||||
'url': file_url,
|
||||
'size': obj['Size'],
|
||||
'lastModified': obj['LastModified'].isoformat(),
|
||||
'timestamp': obj['LastModified']
|
||||
})
|
||||
|
||||
# Sort by timestamp, newest first
|
||||
files.sort(key=lambda x: x['timestamp'], reverse=True)
|
||||
|
||||
return files
|
||||
except Exception as e:
|
||||
print(f"Error listing converted resumes: {e}")
|
||||
return []
|
||||
|
||||
def get_file_url(self, key: str, expires_in: int = 3600) -> Optional[str]:
|
||||
"""
|
||||
Generate a presigned URL for a file
|
||||
Args:
|
||||
key: Object key in R2
|
||||
expires_in: URL expiration time in seconds (default 1 hour)
|
||||
Returns: Presigned URL or None if failed
|
||||
"""
|
||||
try:
|
||||
url = self.s3_client.generate_presigned_url(
|
||||
'get_object',
|
||||
Params={'Bucket': self.bucket_name, 'Key': key},
|
||||
ExpiresIn=expires_in
|
||||
)
|
||||
return url
|
||||
except Exception as e:
|
||||
print(f"Error generating presigned URL for {key}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# Singleton instance
|
||||
r2_service = R2Service()
|
||||
Reference in New Issue
Block a user