- food_tokens = []
-
- # Track which tokens we've consumed
- consumed = set()
-
- # First pass: find numbers (quantity)
- for token in doc:
- if token.like_num or token.pos_ == "NUM":
- try:
- # Handle word numbers like "one", "two"
- if token.text.lower() in {
- "one": 1,
- "two": 2,
- "three": 3,
- "four": 4,
- "five": 5,
- "six": 6,
- "seven": 7,
- "eight": 8,
- "nine": 9,
- "ten": 10,
- "dozen": 12,
- }:
- quantity = {
- "one": 1,
- "two": 2,
- "three": 3,
- "four": 4,
- "five": 5,
- "six": 6,
- "seven": 7,
- "eight": 8,
- "nine": 9,
- "ten": 10,
- "dozen": 12,
- }[token.text.lower()]
- else:
- quantity = float(token.text)
- consumed.add(token.i)
- break
- except ValueError:
- pass
-
- # Second pass: find unit (must be a known measurement unit)
- for token in doc:
- if token.i not in consumed and is_measurement_unit(token.text):
- unit = token.text.lower()
- consumed.add(token.i)
- break
-
- # Third pass: remaining nouns/noun chunks are the food
- for chunk in doc.noun_chunks:
- # Skip chunks that only contain consumed tokens
- chunk_tokens = [t for t in chunk if t.i not in consumed]
- if chunk_tokens:
- # Skip determiners like "a", "the"
- food_text = " ".join(t.text for t in chunk_tokens if t.pos_ != "DET")
- if food_text:
- food_tokens.append(food_text)
- for t in chunk_tokens:
- consumed.add(t.i)
-
- # If no noun chunks, fall back to individual nouns
- if not food_tokens:
- for token in doc:
- if token.i not in consumed and token.pos_ in ("NOUN", "PROPN"):
- food_tokens.append(token.text)
- consumed.add(token.i)
-
- # Also include adjectives that modify food (e.g., "brown sugar")
- food_text = " ".join(food_tokens) if food_tokens else None
-
- if not food_text:
+ words = remaining.split()
+ if words and is_measurement_unit(words[0]):
+ unit = words[0]
+ remaining = " ".join(words[1:])
+
+ # Remove common filler words
+ remaining = re.sub(r"^(of|the)\s+", "", remaining, flags=re.IGNORECASE)
+
+ food = remaining.strip()
+
+ if not food: