Binary detection is a fast, cost-effective way to find documents matching a single category without running full taxonomy generation. Instead of classifying documents into multiple categories, it answers a simple question: “Does this document match my category?”
from delve import Delve# Define the ONE category you're looking forresult = Delve.find_matches( "traces.csv", category={ "name": "Refund Request", "description": "User asking for a refund, money back, order cancellation, or charge reversal", "keywords": ["refund", "money back", "cancel order", "charged twice"], }, text_column="content", threshold=0.6,)print(f"Found {result.stats['matches']} matches out of {result.stats['total_documents']}")# All documents are returned with scores# Matches have category="Refund Request", non-matches have category=Nonefor doc in result.matched_documents[:5]: print(f" {doc.id}: {doc.confidence:.2f} - {doc.content[:60]}...")
result = Delve.find_matches(...)# ALL documents with scores (sorted by score descending)result.documents # List[Doc] - all docs with .category, .confidence# Only matched documents (category != None)result.matched_documents # List[Doc] - only matches# Only unmatched documents (category == None)result.unmatched_documents # List[Doc] - below threshold# Category definition usedresult.category # Dict with name, description, keywords# Statisticsresult.stats # Dict with counts, rates, score distribution
# To DataFrame (all documents)df = result.to_dataframe()print(df.head())# Filter to just matchesmatched_df = df[df['category'].notna()]# To filespaths = result.export("./output", formats=["csv", "json"])print(f"Exported to: {paths}")
from delve import Delve# Find traces where users ask about a specific featurefeature_traces = Delve.find_matches( "langsmith://my-project", # LangSmith data source category={ "name": "Dark Mode Questions", "description": "User asking about dark mode, theme settings, or display preferences", "keywords": ["dark mode", "theme", "night mode", "light mode", "display"] }, threshold=0.6,)# See what users are askingfor trace in feature_traces.documents[:10]: print(f"Score: {trace.confidence:.2f}") print(f"Content: {trace.content[:200]}...") print("---")
# When your category is conceptual and keywords don't helpfrustrated_users = Delve.find_matches( "feedback.csv", category={ "name": "User Frustration", "description": "User expressing frustration, anger, disappointment, or dissatisfaction with the product or experience", # No keywords - rely entirely on semantic understanding }, text_column="feedback", threshold=0.65, keyword_weight=0, # Pure semantic matching)
import pandas as pdfrom delve import Delve# Load your own DataFramedf = pd.read_csv("data.csv")# Filter for specific contentmatches = Delve.find_matches( df, category={ "name": "Feature Request", "description": "User suggesting a new feature or improvement", "keywords": ["would be nice", "please add", "feature request", "suggestion"] }, text_column="message", id_column="msg_id",)# Merge results backmatched_ids = {doc.id for doc in matches.documents}df["is_feature_request"] = df["msg_id"].astype(str).isin(matched_ids)