Skip to content

Best Practices

Guidelines for effective use of PyCompTox.

API Key Management

✅ Do

  • Save API key using save_api_key() for persistent storage
  • Use environment variables for CI/CD pipelines
  • Keep API keys confidential and out of version control

❌ Don't

  • Hardcode API keys in scripts
  • Commit API keys to Git repositories
  • Share API keys publicly
# ✅ Good
from pycomptox import save_api_key
save_api_key("your-key")  # Save once, use everywhere

# ❌ Bad
chem = Chemical(api_key="hardcoded-key-123")  # Don't do this!

Rate Limiting

✅ Do

  • Use rate limiting for bulk operations
  • Respect API limits to avoid throttling
  • Use batch methods when querying multiple chemicals

❌ Don't

  • Make rapid-fire requests without delays
  • Query one chemical at a time when batch methods are available
# ✅ Good - Use batch methods
dtxsids = ["DTXSID7020182", "DTXSID2021315", "DTXSID5020001"]
results = chem.get_chemical_by_dtxsid_batch(dtxsids)

# ❌ Bad - Individual requests
results = [chem.get_chemical_by_dtxsid(d) for d in dtxsids]

Batch Operations

Optimize Batch Sizes

def process_large_dataset(dtxsids, batch_size=500):
    """Process large datasets efficiently."""
    from pycomptox.chemical import Chemical

    chem = Chemical(time_delay_between_calls=1.0)
    all_results = []

    for i in range(0, len(dtxsids), batch_size):
        batch = dtxsids[i:i + batch_size]
        # API max is 1000, but smaller batches are more reliable
        results = chem.get_chemical_by_dtxsid_batch(batch[:1000])
        all_results.extend(results)

    return all_results

Error Handling

✅ Do

  • Always wrap API calls in try-except blocks
  • Handle specific exceptions appropriately
  • Log errors for debugging
import logging

logger = logging.getLogger(__name__)

def safe_search(name):
    """Safely search for a chemical."""
    try:
        chem = Chemical()
        results = chem.search_by_name(name)
        return results
    except ValueError as e:
        logger.warning(f"Chemical not found: {name}")
        return []
    except RuntimeError as e:
        logger.error(f"API error: {e}")
        return []

❌ Don't

  • Use bare except clauses
  • Silently ignore errors
  • Let exceptions propagate without handling
# ❌ Bad
try:
    results = chem.search_by_name(name)
except:  # Too broad!
    pass  # Silent failure!

Performance Optimization

Use Projections

Only request the data you need:

from pycomptox.chemical import ChemicalDetails

details = ChemicalDetails()

# ✅ Good - Request only what you need
info = details.get_chemical_by_dtxsid(
    "DTXSID7020182",
    projection="chemicalidentifier"  # Minimal data
)

# ⚠️ Less efficient - Gets everything
info = details.get_chemical_by_dtxsid(
    "DTXSID7020182",
    projection="chemicaldetailall"  # All data
)

Cache Results

Cache frequently accessed data:

from functools import lru_cache

@lru_cache(maxsize=128)
def get_chemical_cached(dtxsid):
    """Get chemical with caching."""
    details = ChemicalDetails()
    return details.get_chemical_by_dtxsid(dtxsid)

# First call - fetches from API
data1 = get_chemical_cached("DTXSID7020182")

# Second call - returns cached result
data2 = get_chemical_cached("DTXSID7020182")  # Fast!

Reuse Clients

Reuse client instances to benefit from connection pooling:

# ✅ Good - Reuse client
chem = Chemical()
for name in chemical_names:
    results = chem.search_by_name(name)

# ❌ Bad - New client each time
for name in chemical_names:
    chem = Chemical()  # Wasteful!
    results = chem.search_by_name(name)

Data Validation

Validate DTXSIDs

def is_valid_dtxsid(dtxsid):
    """Check if DTXSID format is valid."""
    import re
    pattern = r'^DTXSID\d+$'
    return bool(re.match(pattern, dtxsid))

# Use before API calls
if is_valid_dtxsid(dtxsid):
    data = chem.get_chemical_by_dtxsid(dtxsid)

Handle Missing Data

# ✅ Good - Check for None/missing values
result = chem.search_by_name("chemical")
if result and result[0].get('casrn'):
    casrn = result[0]['casrn']
else:
    casrn = "Not available"

# ❌ Bad - Assumes data exists
casrn = result[0]['casrn']  # May raise KeyError!

Code Organization

Create Wrapper Classes

class ChemicalAnalyzer:
    """Unified interface for chemical analysis."""

    def __init__(self, rate_limit=1.0):
        self.search = Chemical(time_delay_between_calls=rate_limit)
        self.details = ChemicalDetails(time_delay_between_calls=rate_limit)
        self.properties = ChemicalProperties(time_delay_between_calls=rate_limit)
        self.extra = ExtraData(time_delay_between_calls=rate_limit)

    def analyze_chemical(self, identifier, id_type='name'):
        """Complete chemical analysis."""
        # Search
        if id_type == 'name':
            results = self.search.search_by_name(identifier)
        elif id_type == 'casrn':
            results = self.search.search_by_casrn(identifier)

        if not results:
            return None

        dtxsid = results[0]['dtxsid']

        # Gather all data
        return {
            'basic': results[0],
            'details': self.details.get_chemical_by_dtxsid(dtxsid),
            'properties': self.properties.get_property_summary_by_dtxsid(dtxsid),
            'references': self.extra.get_data_by_dtxsid(dtxsid)
        }

Testing

Write Unit Tests

import pytest
from pycomptox.chemical import Chemical

@pytest.fixture
def chem_client():
    return Chemical()

def test_search_by_name(chem_client):
    results = chem_client.search_by_name("caffeine")
    assert len(results) > 0
    assert 'dtxsid' in results[0]

def test_invalid_search(chem_client):
    results = chem_client.search_by_name("xyz123notachemical")
    # Should return empty list, not raise error
    assert results == [] or len(results) == 0

Use Mocking for Tests

from unittest.mock import patch, MagicMock

def test_with_mock():
    with patch('pycomptox.search.Chemical.search_by_name') as mock_search:
        mock_search.return_value = [
            {'dtxsid': 'DTXSID123', 'preferredName': 'Test'}
        ]

        chem = Chemical()
        results = chem.search_by_name("test")

        assert len(results) == 1
        assert results[0]['dtxsid'] == 'DTXSID123'

Documentation

Document Your Code

def analyze_toxicity(dtxsid: str) -> dict:
    """
    Analyze toxicity profile for a chemical.

    Args:
        dtxsid: DSSTox Substance Identifier

    Returns:
        Dictionary containing toxicity analysis:
            - properties: Property data
            - predictions: QSAR predictions
            - experimental: Experimental measurements

    Raises:
        ValueError: If chemical not found
        RuntimeError: If API request fails

    Example:
        >>> profile = analyze_toxicity("DTXSID7020182")
        >>> print(profile['properties'])
    """
    pass

Logging

Implement Structured Logging

import logging
import json

class JSONFormatter(logging.Formatter):
    def format(self, record):
        log_data = {
            'timestamp': self.formatTime(record),
            'level': record.levelname,
            'message': record.getMessage(),
            'module': record.module
        }
        return json.dumps(log_data)

# Configure logger
logger = logging.getLogger('pycomptox')
handler = logging.FileHandler('pycomptox.json')
handler.setFormatter(JSONFormatter())
logger.addHandler(handler)

Security

Protect Sensitive Data

import os
from pathlib import Path

def get_api_key():
    """Securely load API key."""
    # Try environment variable first
    key = os.environ.get('COMPTOX_API_KEY')

    if not key:
        # Try config file with restricted permissions
        key_file = Path.home() / '.pycomptox' / 'api_key.txt'
        if key_file.exists():
            # Check file permissions (Unix)
            if key_file.stat().st_mode & 0o077:
                raise PermissionError("API key file has insecure permissions")
            key = key_file.read_text().strip()

    return key

See Also