Examples¶
Comprehensive examples for common PyCompTox workflows.
Basic Examples¶
Example 1: Search and Display Chemical Information¶
from pycomptox import Chemical, ChemicalDetails
# Search for a chemical
chem = Chemical()
results = chem.search_by_name("aspirin")
if results:
chemical = results[0]
print(f"Name: {chemical['preferredName']}")
print(f"DTXSID: {chemical['dtxsid']}")
print(f"CASRN: {chemical.get('casrn', 'N/A')}")
# Get detailed information
details = ChemicalDetails()
info = details.get_chemical_by_dtxsid(
chemical['dtxsid'],
projection="chemicaldetailall"
)
print(f"\nDetailed Information:")
print(f"Formula: {info.get('molFormula', 'N/A')}")
print(f"Weight: {info.get('molWeight', 'N/A')}")
print(f"SMILES: {info.get('smiles', 'N/A')}")
Example 2: Property Analysis¶
from pycomptox import ChemicalProperties
props = ChemicalProperties()
# Get property summary
dtxsid = "DTXSID7020182" # Bisphenol A
summary = props.get_property_summary_by_dtxsid(dtxsid)
print(f"Properties for {dtxsid}:")
for prop in summary:
name = prop['propName']
exp_median = prop.get('experimentalMedian', 'N/A')
pred_median = prop.get('predictedMedian', 'N/A')
unit = prop.get('unit', '')
print(f" {name}: Exp={exp_median}, Pred={pred_median} {unit}")
Example 3: Batch Processing¶
from pycomptox import Chemical, ExtraData
# Get multiple chemicals
chem = Chemical()
chemicals = ["caffeine", "aspirin", "ibuprofen"]
# Search for all
dtxsids = []
for name in chemicals:
results = chem.search_by_name(name)
if results:
dtxsids.append(results[0]['dtxsid'])
# Get reference data in batch
extra = ExtraData()
ref_data = extra.get_data_by_dtxsid_batch(dtxsids)
# Display results
for data in sorted(ref_data, key=lambda x: x['pubmed'], reverse=True):
print(f"{data['dtxsid']}: {data['pubmed']} PubMed citations")
Advanced Examples¶
Example 4: Complete Chemical Profile¶
from pycomptox import Chemical, ChemicalDetails, ChemicalProperties, ExtraData
def get_chemical_profile(identifier, id_type='name'):
"""Get complete chemical profile."""
# Search
chem = Chemical()
if id_type == 'name':
results = chem.search_by_name(identifier)
elif id_type == 'casrn':
results = chem.search_by_casrn(identifier)
elif id_type == 'dtxsid':
results = [{'dtxsid': identifier}]
else:
raise ValueError(f"Unknown id_type: {id_type}")
if not results:
return None
dtxsid = results[0]['dtxsid']
# Gather all data
details = ChemicalDetails()
props = ChemicalProperties()
extra = ExtraData()
return {
'search_result': results[0],
'details': details.get_chemical_by_dtxsid(
dtxsid,
projection='chemicaldetailall'
),
'property_summary': props.get_property_summary_by_dtxsid(dtxsid),
'predicted_props': props.get_predicted_properties_by_dtxsid(dtxsid),
'experimental_props': props.get_experimental_properties_by_dtxsid(dtxsid),
'references': extra.get_data_by_dtxsid(dtxsid)
}
# Usage
profile = get_chemical_profile("bisphenol A")
if profile:
print(f"Name: {profile['details']['preferredName']}")
print(f"Formula: {profile['details']['molFormula']}")
print(f"Property summaries: {len(profile['property_summary'])}")
print(f"Predicted properties: {len(profile['predicted_props'])}")
print(f"Experimental properties: {len(profile['experimental_props'])}")
print(f"PubMed citations: {profile['references']['pubmed']}")
Example 5: Property Comparison Across Chemicals¶
from pycomptox.chemical import ChemicalProperties
import pandas as pd
def compare_properties(dtxsids, property_names):
"""Compare specific properties across chemicals."""
props = ChemicalProperties()
# Get property data
all_data = props.get_property_summary_by_dtxsid_batch(dtxsids)
# Extract specific properties
results = []
for dtxsid in dtxsids:
chem_props = [p for p in all_data if p.get('dtxsid') == dtxsid]
row = {'dtxsid': dtxsid}
for prop_name in property_names:
matching = [p for p in chem_props if p['propName'] == prop_name]
if matching:
row[prop_name] = matching[0].get('experimentalMedian', 'N/A')
else:
row[prop_name] = 'N/A'
results.append(row)
return pd.DataFrame(results)
# Usage
chemicals = ["DTXSID7020182", "DTXSID2021315", "DTXSID6020139"]
properties = ["Boiling Point", "Melting Point", "Log P"]
df = compare_properties(chemicals, properties)
print(df)
Example 6: Finding Chemicals by Property Range¶
from pycomptox import ChemicalProperties
props = ChemicalProperties()
# Find chemicals with Log P between 2 and 4
results = props.get_predicted_property_by_name_and_range("Log P", 2.0, 4.0)
print(f"Found {len(results)} chemicals with Log P between 2 and 4:")
for chem in results[:10]:
print(f" {chem['dtxsid']}: Log P = {chem['propValue']}")
Example 7: Literature Analysis¶
from pycomptox import Chemical, ExtraData
def analyze_chemical_class(search_term, top_n=10):
"""Analyze literature coverage for a chemical class."""
# Search for chemicals
chem = Chemical()
results = chem.search_by_name(search_term)
if not results:
return None
# Get DTXSIDs
dtxsids = [r['dtxsid'] for r in results[:50]]
# Get reference data
extra = ExtraData()
ref_data = extra.get_data_by_dtxsid_batch(dtxsids)
# Sort by total references
sorted_data = sorted(ref_data, key=lambda x: x['refs'], reverse=True)
# Display top N
print(f"Top {top_n} most-referenced chemicals for '{search_term}':")
for i, data in enumerate(sorted_data[:top_n], 1):
print(f"{i}. {data['dtxsid']}")
print(f" Total: {data['refs']}, PubMed: {data['pubmed']}, "
f"Patents: {data['googlePatent']}")
# Statistics
total_refs = sum(d['refs'] for d in ref_data)
avg_refs = total_refs / len(ref_data) if ref_data else 0
print(f"\nStatistics:")
print(f" Total chemicals analyzed: {len(ref_data)}")
print(f" Total references: {total_refs}")
print(f" Average references per chemical: {avg_refs:.1f}")
return ref_data
# Usage
analyze_chemical_class("phthalate", top_n=5)
Example 8: Caching Wrapper¶
from functools import lru_cache
from pycomptox import ChemicalDetails
class CachedChemicalClient:
"""Chemical client with caching."""
def __init__(self):
self.details = ChemicalDetails(time_delay_between_calls=0.5)
@lru_cache(maxsize=256)
def get_chemical_cached(self, dtxsid, projection='chemicalidentifier'):
"""Get chemical with caching."""
return self.details.get_chemical_by_dtxsid(dtxsid, projection)
def clear_cache(self):
"""Clear the cache."""
self.get_chemical_cached.cache_clear()
def cache_info(self):
"""Get cache statistics."""
return self.get_chemical_cached.cache_info()
# Usage
client = CachedChemicalClient()
# First call - fetches from API
data1 = client.get_chemical_cached("DTXSID7020182")
# Second call - returns cached result (fast!)
data2 = client.get_chemical_cached("DTXSID7020182")
# Check cache statistics
print(client.cache_info())
Example 9: DataFrame Integration¶
from pycomptox import Chemical, ChemicalDetails, ExtraData
import pandas as pd
def create_chemical_dataframe(chemical_names):
"""Create pandas DataFrame from chemical data."""
chem = Chemical()
details = ChemicalDetails()
extra = ExtraData()
data = []
for name in chemical_names:
# Search
results = chem.search_by_name(name)
if not results:
continue
dtxsid = results[0]['dtxsid']
# Get details
info = details.get_chemical_by_dtxsid(dtxsid)
refs = extra.get_data_by_dtxsid(dtxsid)
# Combine
data.append({
'search_name': name,
'preferred_name': info.get('preferredName', ''),
'dtxsid': dtxsid,
'casrn': info.get('casrn', ''),
'formula': info.get('molFormula', ''),
'weight': info.get('molWeight', ''),
'total_refs': refs['refs'],
'pubmed': refs['pubmed'],
'patents': refs['googlePatent']
})
return pd.DataFrame(data)
# Usage
chemicals = ["caffeine", "aspirin", "ibuprofen", "acetaminophen"]
df = create_chemical_dataframe(chemicals)
# Analyze
print(df)
print(f"\nAverage PubMed citations: {df['pubmed'].mean():.1f}")
print(f"Most referenced: {df.loc[df['pubmed'].idxmax(), 'preferred_name']}")
Example 10: Error-Resilient Batch Processing¶
from pycomptox import Chemical
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def robust_batch_search(identifiers, id_type='name'):
"""Robust batch search with error handling."""
chem = Chemical(time_delay_between_calls=0.5)
results = []
errors = []
for identifier in identifiers:
try:
if id_type == 'name':
res = chem.search_by_name(identifier)
elif id_type == 'casrn':
res = chem.search_by_casrn(identifier)
else:
logger.warning(f"Unknown id_type: {id_type}")
continue
if res:
results.append({
'identifier': identifier,
'dtxsid': res[0]['dtxsid'],
'preferred_name': res[0].get('preferredName', ''),
'success': True
})
logger.info(f"✓ Found {identifier}")
else:
errors.append({
'identifier': identifier,
'error': 'No results found',
'success': False
})
logger.warning(f"✗ No results for {identifier}")
except Exception as e:
errors.append({
'identifier': identifier,
'error': str(e),
'success': False
})
logger.error(f"✗ Error for {identifier}: {e}")
return {
'results': results,
'errors': errors,
'success_rate': len(results) / len(identifiers) * 100
}
# Usage
chemicals = ["caffeine", "invalid123", "aspirin", "fake_chemical", "benzene"]
output = robust_batch_search(chemicals)
print(f"\nSuccess rate: {output['success_rate']:.1f}%")
print(f"Successful: {len(output['results'])}")
print(f"Failed: {len(output['errors'])}")