generative-ai-for-beginners/shared/python/input_validation.py
Claude 2bdc61d4cd
feat: Add comprehensive security fixes, code quality improvements, and documentation
Security Fixes (HIGH Severity):
- Fix hardcoded SECRET_KEY in Flask app - now uses environment variable
- Add function validation to prevent arbitrary function execution in JS
- Add path traversal protection in certificate handling
- Fix unsafe JSON parsing with proper error handling

Security Fixes (MEDIUM Severity):
- Add environment variable validation with helpful error messages
- Add request timeouts and proper error handling for HTTP calls
- Fix file handle leaks using context managers
- Add input validation and sanitization for user inputs

Code Quality Improvements:
- Add ESLint configuration for JavaScript/TypeScript linting
- Add Prettier configuration for consistent code formatting
- Add pyproject.toml with Black, Ruff, mypy, and pytest configuration
- Create shared Python utilities module with:
  - env_utils.py: Environment variable handling
  - input_validation.py: Input validation and sanitization
  - api_utils.py: Safe API request wrappers

Documentation:
- Add SECURITY_GUIDELINES.md with best practices for AI applications
- Add ENHANCED_FEATURES_ROADMAP.md with improvement recommendations
  including new lesson topics, API modernization, and CI/CD enhancements

Files Modified:
- 05-advanced-prompts/{python,javascript}/*
- 06-text-generation-apps/{python,js-githubmodels}/*
- 07-building-chat-applications/js-githubmodels/*
- 08-building-search-applications/{js-githubmodels,scripts}/*
- 09-building-image-applications/python/*
- 11-integrating-with-function-calling/{js-githubmodels,typescript}/*
2026-01-21 10:00:28 +00:00

215 lines
5.7 KiB
Python

"""
Input validation utilities for secure user input handling.
This module provides functions to validate and sanitize user input,
protecting against prompt injection and other input-based attacks.
"""
import re
from typing import Optional
def validate_number_input(
value: str,
min_val: int = 1,
max_val: int = 100,
field_name: str = "number"
) -> int:
"""
Validate and convert string input to an integer within bounds.
Args:
value: The string value to validate.
min_val: Minimum allowed value (inclusive).
max_val: Maximum allowed value (inclusive).
field_name: Name of the field for error messages.
Returns:
The validated integer value.
Raises:
ValueError: If the value is not a valid integer or is out of bounds.
Example:
>>> num = validate_number_input("5", min_val=1, max_val=20)
>>> print(num) # 5
"""
try:
num = int(value.strip())
if num < min_val or num > max_val:
raise ValueError(
f"{field_name} must be between {min_val} and {max_val}, got {num}"
)
return num
except (ValueError, AttributeError) as e:
if "must be between" in str(e):
raise
raise ValueError(
f"Please enter a valid {field_name} between {min_val} and {max_val}"
) from e
def validate_text_input(
value: str,
max_length: int = 500,
min_length: int = 1,
allow_empty: bool = False,
field_name: str = "input"
) -> str:
"""
Validate and sanitize text input.
Args:
value: The string value to validate.
max_length: Maximum allowed length.
min_length: Minimum required length.
allow_empty: Whether to allow empty strings.
field_name: Name of the field for error messages.
Returns:
The validated and trimmed string.
Raises:
ValueError: If the value fails validation.
Example:
>>> text = validate_text_input("Hello World", max_length=100)
"""
if value is None:
if allow_empty:
return ""
raise ValueError(f"{field_name} cannot be None")
trimmed = value.strip()
if not trimmed and not allow_empty:
raise ValueError(f"{field_name} cannot be empty")
if len(trimmed) > max_length:
raise ValueError(
f"{field_name} is too long. Maximum {max_length} characters allowed, "
f"got {len(trimmed)}"
)
if len(trimmed) < min_length:
raise ValueError(
f"{field_name} is too short. Minimum {min_length} characters required"
)
return trimmed
def sanitize_prompt_input(
value: str,
max_length: int = 1000,
strict: bool = False
) -> str:
"""
Sanitize user input intended for use in LLM prompts.
This function removes potentially dangerous characters and patterns
that could be used for prompt injection attacks.
Args:
value: The string to sanitize.
max_length: Maximum allowed length after sanitization.
strict: If True, only allow alphanumeric, spaces, and basic punctuation.
Returns:
The sanitized string.
Raises:
ValueError: If the input is too long or contains only invalid characters.
Example:
>>> safe_input = sanitize_prompt_input("Hello, world!")
"""
if not value:
return ""
# Trim whitespace
sanitized = value.strip()
# Remove null bytes and control characters (except newlines and tabs)
sanitized = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', sanitized)
# Remove potentially dangerous template/injection patterns
dangerous_patterns = [
r'\{\{.*?\}\}', # Template injection
r'\${.*?}', # Variable substitution
r'<script.*?>.*?</script>', # Script tags
r'javascript:', # JavaScript URLs
]
for pattern in dangerous_patterns:
sanitized = re.sub(pattern, '', sanitized, flags=re.IGNORECASE | re.DOTALL)
if strict:
# In strict mode, only allow safe characters
sanitized = re.sub(r'[^\w\s,.\'\"-?!@#$%&*()+=:;]', '', sanitized, flags=re.UNICODE)
# Normalize whitespace
sanitized = re.sub(r'\s+', ' ', sanitized)
sanitized = sanitized.strip()
if len(sanitized) > max_length:
raise ValueError(f"Input too long. Maximum {max_length} characters allowed.")
if not sanitized:
raise ValueError("Input contains only invalid characters")
return sanitized
def validate_email(email: str) -> str:
"""
Validate an email address format.
Args:
email: The email address to validate.
Returns:
The validated email address (lowercase).
Raises:
ValueError: If the email format is invalid.
"""
email = email.strip().lower()
# Basic email pattern
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
if not re.match(pattern, email):
raise ValueError(f"Invalid email format: {email}")
return email
def validate_url(url: str, require_https: bool = True) -> str:
"""
Validate a URL format.
Args:
url: The URL to validate.
require_https: If True, only allow HTTPS URLs.
Returns:
The validated URL.
Raises:
ValueError: If the URL format is invalid.
"""
url = url.strip()
# Basic URL pattern
if require_https:
pattern = r'^https://[a-zA-Z0-9.-]+(?:/[^\s]*)?$'
if not re.match(pattern, url):
raise ValueError(f"Invalid HTTPS URL: {url}")
else:
pattern = r'^https?://[a-zA-Z0-9.-]+(?:/[^\s]*)?$'
if not re.match(pattern, url):
raise ValueError(f"Invalid URL: {url}")
return url