--- /dev/null
+"""Open Food Facts API client.
+
+API Documentation: https://openfoodfacts.github.io/openfoodfacts-server/api/
+"""
+
+from typing import Optional
+import requests
+
+BASE_URL = "https://world.openfoodfacts.org/api/v2"
+
+
+class OpenFoodFactsClient:
+ """Client for Open Food Facts API."""
+
+ def __init__(self):
+ self.session = requests.Session()
+ # User-Agent required by OFF API
+ self.session.headers.update(
+ {"User-Agent": "NutraApp/0.1 (https://github.com/nutratech/gui-qt)"}
+ )
+
+ def get_by_barcode(self, barcode: str) -> dict:
+ """Get product by barcode/UPC.
+
+ Args:
+ barcode: UPC or EAN barcode
+
+ Returns:
+ Product data or error
+ """
+ url = f"{BASE_URL}/product/{barcode}"
+
+ try:
+ response = self.session.get(url, timeout=10)
+ response.raise_for_status()
+ data = response.json()
+
+ if data.get("status") != 1:
+ return {"error": "Product not found", "barcode": barcode}
+
+ return self._normalize_product(data.get("product", {}))
+ except requests.RequestException as e:
+ return {"error": str(e)}
+
+ def search(self, query: str, page_size: int = 10) -> list[dict]:
+ """Search for products by name.
+
+ Args:
+ query: Search term
+ page_size: Number of results
+
+ Returns:
+ List of normalized products
+ """
+ url = f"{BASE_URL}/search"
+ params = {
+ "search_terms": query,
+ "page_size": page_size,
+ "json": 1,
+ }
+
+ try:
+ response = self.session.get(url, params=params, timeout=10)
+ response.raise_for_status()
+ data = response.json()
+
+ products = []
+ for product in data.get("products", []):
+ products.append(self._normalize_product(product))
+ return products
+ except requests.RequestException as e:
+ return [{"error": str(e)}]
+
+ def _normalize_product(self, product: dict) -> dict:
+ """Normalize OFF product to standard format.
+
+ Maps OFF nutriment keys to more standard names.
+ """
+ nutriments = product.get("nutriments", {})
+
+ return {
+ "barcode": product.get("code"),
+ "name": product.get("product_name", "Unknown"),
+ "brand": product.get("brands"),
+ "serving_size": product.get("serving_size"),
+ "nutriscore": product.get("nutriscore_grade"),
+ "nova_group": product.get("nova_group"),
+ "nutrients": {
+ "energy_kcal": nutriments.get("energy-kcal_100g"),
+ "fat": nutriments.get("fat_100g"),
+ "saturated_fat": nutriments.get("saturated-fat_100g"),
+ "carbohydrates": nutriments.get("carbohydrates_100g"),
+ "sugars": nutriments.get("sugars_100g"),
+ "fiber": nutriments.get("fiber_100g"),
+ "proteins": nutriments.get("proteins_100g"),
+ "sodium": nutriments.get("sodium_100g"),
+ "salt": nutriments.get("salt_100g"),
+ },
+ }
--- /dev/null
+"""USDA FoodData Central API client.
+
+API Documentation: https://fdc.nal.usda.gov/api-guide.html
+"""
+
+from typing import Optional
+import requests
+
+# Free API key (demo key has rate limits)
+# Users should get their own at https://fdc.nal.usda.gov/api-key-signup.html
+DEFAULT_API_KEY = "DEMO_KEY"
+
+BASE_URL = "https://api.nal.usda.gov/fdc/v1"
+
+
+class USDAClient:
+ """Client for USDA FoodData Central API."""
+
+ def __init__(self, api_key: Optional[str] = None):
+ self.api_key = api_key or DEFAULT_API_KEY
+
+ def search(self, query: str, page_size: int = 10) -> dict:
+ """Search for foods by keyword.
+
+ Args:
+ query: Search term (e.g., "chicken breast")
+ page_size: Number of results to return
+
+ Returns:
+ API response with foods array containing fdcId, description, etc.
+ """
+ url = f"{BASE_URL}/foods/search"
+ params = {
+ "api_key": self.api_key,
+ "query": query,
+ "pageSize": page_size,
+ "dataType": ["Foundation", "SR Legacy", "Branded"],
+ }
+
+ try:
+ response = requests.get(url, params=params, timeout=10)
+ response.raise_for_status()
+ return response.json()
+ except requests.RequestException as e:
+ return {"error": str(e)}
+
+ def get_food(self, fdc_id: int) -> dict:
+ """Get detailed food information by FDC ID.
+
+ Args:
+ fdc_id: FoodData Central ID
+
+ Returns:
+ Full food data including nutrients
+ """
+ url = f"{BASE_URL}/food/{fdc_id}"
+ params = {"api_key": self.api_key}
+
+ try:
+ response = requests.get(url, params=params, timeout=10)
+ response.raise_for_status()
+ return response.json()
+ except requests.RequestException as e:
+ return {"error": str(e)}
+
+ def get_nutrients(self, fdc_id: int) -> list[dict]:
+ """Get nutrients for a food.
+
+ Args:
+ fdc_id: FoodData Central ID
+
+ Returns:
+ List of nutrients with id, name, amount, unit
+ """
+ data = self.get_food(fdc_id)
+ if "error" in data:
+ return []
+
+ nutrients = []
+ for nutrient in data.get("foodNutrients", []):
+ n = nutrient.get("nutrient", {})
+ nutrients.append(
+ {
+ "id": n.get("id"),
+ "name": n.get("name"),
+ "amount": nutrient.get("amount"),
+ "unit": n.get("unitName"),
+ }
+ )
+ return nutrients
-"""Natural language ingredient parser.
+"""Natural language ingredient parser with advanced NLP features.
-Parses strings like "2 cups flour" into structured data:
-{"quantity": 2.0, "unit": "cup", "food": "flour", "grams": 250.0}
+Features:
+- Multi-ingredient parsing ("2 cups flour and 1 tsp salt")
+- Fuzzy food matching to database
+- Optional spaCy integration for robust parsing
"""
import re
from typing import Optional
import pint
+from rapidfuzz import fuzz, process
# Unit registry for conversions
ureg = pint.UnitRegistry()
+# Try to load spaCy (optional)
+try:
+ # TODO: log warning, give some user feedback in status bar
+ import spacy
+
+ try:
+ nlp = spacy.load("en_core_web_sm")
+ SPACY_AVAILABLE = True
+ except OSError:
+ nlp = None
+ SPACY_AVAILABLE = False
+except ImportError:
+ nlp = None
+ SPACY_AVAILABLE = False
+
+
# Common cooking unit aliases
UNIT_ALIASES = {
"tbsp": "tablespoon",
}
# Approximate density conversions (grams per cup) for common ingredients
-# Used when converting volume to weight
DENSITY_MAP = {
"flour": 125,
"all-purpose flour": 125,
"honey": 340,
"oil": 218,
"salt": 288,
- # Default for unknown foods
"_default": 150,
}
+# Count nouns that look like units but are actually foods
+# These should be treated as food, not unit (e.g., "3 eggs" → food="eggs", not unit="egg")
+COUNT_NOUNS = {
+ "egg",
+ "eggs",
+ "apple",
+ "apples",
+ "banana",
+ "bananas",
+ "orange",
+ "oranges",
+ "clove",
+ "cloves",
+ "slice",
+ "slices",
+ "piece",
+ "pieces",
+ "stick",
+ "sticks",
+ "head",
+ "heads",
+ "bunch",
+ "bunches",
+ "sprig",
+ "sprigs",
+ "leaf",
+ "leaves",
+ "can",
+ "cans",
+ "package",
+ "packages",
+ "bag",
+ "bags",
+ "box",
+ "boxes",
+ "bottle",
+ "bottles",
+ "jar",
+ "jars",
+}
+
+# Ingredient separators for multi-ingredient parsing
+SEPARATORS = re.compile(
+ r"\s*(?:,\s*(?:and\s+)?|(?<!\d)\s+and\s+|\s*;\s*)\s*", re.IGNORECASE
+)
+
# Pattern to match: [quantity] [unit] [of] [food]
INGREDIENT_PATTERN = re.compile(
r"^\s*"
- r"(?P<quantity>[\d./]+(?:\s*[\d./]+)?)\s*" # quantity (e.g., "2", "1/2", "1 1/2")
+ r"(?P<quantity>[\d./]+(?:\s+[\d./]+)?)\s*" # quantity
r"(?P<unit>[a-zA-Z]+)?\s*" # optional unit
r"(?:of\s+)?" # optional "of"
- r"(?P<food>.+?)\s*$", # food name
+ r"(?P<food>.+?)\s*$",
re.IGNORECASE,
)
parts = s.split()
if len(parts) == 2:
- # Mixed number like "1 1/2"
whole = float(parts[0])
frac = parse_fraction(parts[1])
return whole + frac
"""Normalize unit to standard form."""
if not unit:
return None
- unit = unit.lower().rstrip("s") # Remove plural 's'
+ unit = unit.lower().rstrip("s")
return UNIT_ALIASES.get(unit, unit)
if not unit:
return None
- # If already in grams
if unit == "gram":
return quantity
try:
- # Try direct weight conversion
q = quantity * ureg(unit)
return q.to("gram").magnitude
except (pint.UndefinedUnitError, pint.DimensionalityError):
pass
- # Volume to weight conversion using density
food_lower = food.lower()
density = DENSITY_MAP.get(food_lower, DENSITY_MAP["_default"])
try:
- # Convert to cups first, then multiply by density
q = quantity * ureg(unit)
cups = q.to("cup").magnitude
return cups * density
return None
-def parse_ingredient(text: str) -> dict:
- """Parse an ingredient string into structured data.
-
- Args:
- text: Natural language ingredient string, e.g., "2 cups flour"
+def parse_single_ingredient(text: str) -> dict:
+ """Parse a single ingredient string into structured data."""
+ text = text.strip()
+ if not text:
+ return {"error": "Empty input"}
- Returns:
- Dict with keys: quantity, unit, food, grams (optional)
- """
match = INGREDIENT_PATTERN.match(text)
if not match:
+ # Try spaCy if available for difficult cases
+ if SPACY_AVAILABLE and nlp:
+ return _parse_with_spacy(text)
return {"error": "Could not parse ingredient", "text": text}
quantity_str = match.group("quantity")
except (ValueError, ZeroDivisionError):
return {"error": f"Invalid quantity: {quantity_str}", "text": text}
+ # Check if "unit" is actually a count noun (e.g., "3 eggs" → unit="egg", food="s")
+ # In that case, merge unit back into food
+ if unit and unit.lower() in COUNT_NOUNS:
+ food = unit + (" " + food if food else "")
+ unit = None
+
unit = normalize_unit(unit)
grams = get_grams(quantity, unit, food)
result["grams"] = round(grams, 1)
return result
+
+
+def _parse_with_spacy(text: str) -> dict:
+ """Use spaCy for more complex parsing when regex fails."""
+ doc = nlp(text)
+
+ # Extract numbers
+ quantity = None
+ for token in doc:
+ if token.like_num:
+ try:
+ quantity = float(token.text)
+ break
+ except ValueError:
+ pass
+
+ # Extract food (noun chunks)
+ food = None
+ for chunk in doc.noun_chunks:
+ food = chunk.text
+ break
+
+ if not food:
+ # Fall back to last noun
+ for token in reversed(doc):
+ if token.pos_ == "NOUN":
+ food = token.text
+ break
+
+ if not food:
+ return {"error": "Could not parse ingredient", "text": text}
+
+ return {
+ "quantity": quantity or 1.0,
+ "unit": None,
+ "food": food,
+ "parsed_by": "spacy",
+ }
+
+
+def parse_ingredient(text: str) -> dict | list[dict]:
+ """Parse ingredient text, handling multiple ingredients.
+
+ Args:
+ text: Natural language ingredient string
+ Single: "2 cups flour"
+ Multiple: "2 cups flour, 1 tsp salt, and 3 eggs"
+
+ Returns:
+ Single ingredient: dict with quantity, unit, food, grams
+ Multiple ingredients: list of dicts
+ """
+ # Split on separators
+ parts = SEPARATORS.split(text)
+ parts = [p.strip() for p in parts if p.strip()]
+
+ if len(parts) == 1:
+ return parse_single_ingredient(parts[0])
+
+ results = []
+ for part in parts:
+ result = parse_single_ingredient(part)
+ results.append(result)
+
+ return results
+
+
+class FuzzyMatcher:
+ """Fuzzy matcher for connecting parsed foods to database entries."""
+
+ def __init__(self, food_names: list[str], min_score: int = 70):
+ """Initialize with list of food names from database.
+
+ Args:
+ food_names: List of food names to match against
+ min_score: Minimum fuzzy match score (0-100)
+ """
+ self.food_names = food_names
+ self.min_score = min_score
+
+ def match(self, query: str, limit: int = 5) -> list[dict]:
+ """Find closest matching foods.
+
+ Args:
+ query: Parsed food name to match
+ limit: Max number of matches to return
+
+ Returns:
+ List of {name, score} dicts sorted by score descending
+ """
+ if not self.food_names:
+ return []
+
+ # Use token_set_ratio for better matching of partial/reordered terms
+ matches = process.extract(
+ query,
+ self.food_names,
+ scorer=fuzz.token_set_ratio,
+ limit=limit,
+ )
+
+ results = []
+ for name, score, _idx in matches:
+ if score >= self.min_score:
+ results.append({"name": name, "score": score})
+
+ return results
+
+ def best_match(self, query: str) -> Optional[str]:
+ """Get single best matching food name.
+
+ Args:
+ query: Parsed food name to match
+
+ Returns:
+ Best matching name or None if no good match
+ """
+ matches = self.match(query, limit=1)
+ return matches[0]["name"] if matches else None
+
+
+# For backwards compatibility
+def parse_ingredient_legacy(text: str) -> dict:
+ """Legacy single-ingredient parser (original API)."""
+ result = parse_ingredient(text)
+ if isinstance(result, list):
+ return result[0] if result else {"error": "No ingredients found"}
+ return result