import pandas as pdfrom delve import Delvedf = pd.DataFrame({ "id": ["1", "2", "3"], "text": [ "How do I reset my password?", "What are your pricing plans?", "I love this product!" ]})delve = Delve(use_case="Categorize customer feedback")result = delve.run_sync(df, text_column="text", id_column="id")# Convert results back to DataFrameresults_df = pd.DataFrame([ {"id": doc.id, "category": doc.category, "explanation": doc.explanation} for doc in result.labeled_documents])print(results_df['category'].value_counts())
Use predefined taxonomy when you already know your categories, want consistent labeling across runs, or need to match an existing classification system.
When the classifier is used (sample_size < total docs):
from delve import Delvedelve = Delve(sample_size=100) # Will use classifier if > 100 docsresult = delve.run_sync("large_dataset.csv", text_column="text")# Check if classifier was usedif "classifier_metrics" in result.metadata: metrics = result.metadata["classifier_metrics"] print(f"Classifier Performance:") print(f" Test Accuracy: {metrics['test_accuracy']:.1%}") print(f" Test F1 Score: {metrics['test_f1']:.3f}") print(f" Train Accuracy: {metrics['train_accuracy']:.1%}")else: print("All documents were labeled by LLM (no classifier needed)")
# Basic CSVdelve run data.csv --text-column message# JSON with nested pathdelve run data.json --json-path "$.items[*].content"# LangSmith projectdelve run langsmith://my-project --langsmith-key $LANGSMITH_API_KEY# Custom configurationdelve run data.csv --text-column text --sample-size 200 --use-case "Categorize support tickets"
When your data has class imbalance (some categories much more common than others), you may need to adjust parameters to ensure good classifier performance.
from delve import Delve# Run Delve and check for imbalance issuesdelve = Delve(sample_size=100, output_dir="./results")result = delve.run_sync("data.csv", text_column="text")# Check classifier performancemetrics = result.metadata.get("classifier_metrics", {})print(f"Test F1: {metrics.get('test_f1', 'N/A')}")# Check sample distributionsample_dist = result.metadata.get("sample_distribution", {})zero_cats = result.metadata.get("zero_sample_categories", [])if zero_cats: print(f"Warning: {len(zero_cats)} categories had no training examples") print(f" Missing: {zero_cats}")# Check per-class performanceper_class = metrics.get("per_class_f1", {})for cat, f1 in sorted(per_class.items(), key=lambda x: x[1]): if f1 < 0.5: print(f" Low F1 ({f1:.2f}): {cat}")
See the Handling Class Imbalance guide for a complete explanation of these metrics and how to tune them.
from delve import Delve, Verbosity# Run taxonomy generationdelve = Delve(sample_size=200, verbosity=Verbosity.VERBOSE)result = delve.run_sync("training_data.csv", text_column="content")# Save the classifier for laterresult.save_classifier("classifier.joblib")# Export labeled docs for reviewawait result.export() # Creates labeled_documents.csv
# Train improved classifier from corrected labelsresult = Delve.train_from_labeled( "corrected_labels.csv", text_column="content", label_column="category", taxonomy="taxonomy.json", # Use original taxonomy)print(f"Improved Test F1: {result.metrics['test_f1']:.2%}")result.save_classifier("production_classifier.joblib")
# Clone and setupgit clone https://github.com/anthropics/delve.gitcd delvepip install -e .# Set API keysexport ANTHROPIC_API_KEY="your-key"export OPENAI_API_KEY="your-key" # Required for classifier embeddings# Run examplescd examplespython basic_csv_example.py