Skip to content

CAS Common Chemistry API

The CASCommonChem class provides an interface to the CAS (Chemical Abstracts Service) Common Chemistry database, which offers free access to chemical substance information.

Overview

CAS Common Chemistry provides reliable chemical information for over 500,000 chemical substances from CAS REGISTRY. This class enables searches by CAS Registry Number, chemical name, and SMILES notation.

Class: CASCommonChem

Initialization

from provesid.cascommonchem import CASCommonChem

cas_api = CASCommonChem()

The CASCommonChem class initializes with: - base_url: "https://commonchemistry.cas.org/api" - Proper headers for API requests - Error handling and retry logic

Methods

cas_to_detail(cas_number)

Retrieve detailed information for a compound using its CAS Registry Number.

Parameters: - cas_number (str): CAS Registry Number (e.g., "64-17-5")

Returns: - dict or None: Detailed compound information if found, None if not found or error

Example:

cas_api = CASCommonChem()

# Get details for ethanol (CAS: 64-17-5)
ethanol_data = cas_api.cas_to_detail("64-17-5")

if ethanol_data:
    print(f"Name: {ethanol_data['name']}")
    print(f"Molecular Formula: {ethanol_data['molecularFormula']}")
    print(f"SMILES: {ethanol_data['smile']}")
    print(f"InChI: {ethanol_data['inchi']}")
    print(f"InChI Key: {ethanol_data['inchiKey']}")
else:
    print("Compound not found")

name_to_detail(name)

Search for compound information using chemical name.

Parameters: - name (str): Chemical name (common or systematic)

Returns: - dict or None: Detailed compound information if found, None if not found or error

Example:

cas_api = CASCommonChem()

# Search by common name
ethanol_data = cas_api.name_to_detail("ethanol")

# Search by systematic name
ethanol_data = cas_api.name_to_detail("ethyl alcohol")

if ethanol_data:
    print(f"CAS Number: {ethanol_data['rn']}")
    print(f"Molecular Formula: {ethanol_data['molecularFormula']}")

smiles_to_detail(smiles)

Search for compound information using SMILES notation.

Parameters: - smiles (str): SMILES string representation

Returns: - dict or None: Detailed compound information if found, None if not found or error

Example:

cas_api = CASCommonChem()

# Search by SMILES
compound_data = cas_api.smiles_to_detail("CCO")  # Ethanol

if compound_data:
    print(f"Name: {compound_data['name']}")
    print(f"CAS Number: {compound_data['rn']}")

Data Structure

The API returns detailed compound information in the following structure:

{
    "uri": "substance/pt/64175",
    "rn": "64-17-5",                    # CAS Registry Number
    "name": "Ethanol",                  # Primary name
    "image": "<svg>...</svg>",          # Chemical structure image (SVG)
    "inchi": "InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3",
    "inchiKey": "LFQSCWFLJHTTHZ-UHFFFAOYSA-N",
    "smile": "CCO",                     # SMILES notation
    "canonicalSmile": "CCO",            # Canonical SMILES
    "molecularFormula": "C<sub>2</sub>H<sub>6</sub>O",
    "molecularMass": "46.07",           # Molecular mass in g/mol
    "experimentalProperties": [         # Experimental data
        {
            "name": "Boiling Point",
            "property": "78.37 °C",
            "sourceNumber": 1
        }
    ],
    "propertyCitations": [              # Literature references
        {
            "docId": 1,
            "title": "CRC Handbook of Chemistry and Physics"
        }
    ],
    "synonyms": [                       # Alternative names
        "Ethyl alcohol",
        "Grain alcohol",
        "EtOH"
    ],
    "replacedRns": [],                  # Historical CAS numbers
    "hasMolfile": true                  # Molfile availability
}

Comprehensive Usage Examples

Single Compound Lookup

from provesid.cascommonchem import CASCommonChem

cas_api = CASCommonChem()

def get_compound_info(identifier, search_type="cas"):
    """Get comprehensive compound information"""

    if search_type == "cas":
        data = cas_api.cas_to_detail(identifier)
    elif search_type == "name":
        data = cas_api.name_to_detail(identifier)
    elif search_type == "smiles":
        data = cas_api.smiles_to_detail(identifier)
    else:
        print("Invalid search type")
        return None

    if data:
        print(f"=== {data['name']} ===")
        print(f"CAS Number: {data['rn']}")
        print(f"Molecular Formula: {data['molecularFormula']}")
        print(f"Molecular Mass: {data['molecularMass']} g/mol")
        print(f"SMILES: {data['smile']}")
        print(f"InChI Key: {data['inchiKey']}")

        # Synonyms
        if 'synonyms' in data and data['synonyms']:
            print(f"Synonyms: {', '.join(data['synonyms'][:5])}")  # First 5

        # Experimental properties
        if 'experimentalProperties' in data:
            print("Experimental Properties:")
            for prop in data['experimentalProperties']:
                print(f"  - {prop['name']}: {prop['property']}")

        return data
    else:
        print(f"No data found for {identifier}")
        return None

# Usage examples
ethanol_by_cas = get_compound_info("64-17-5", "cas")
ethanol_by_name = get_compound_info("ethanol", "name")
ethanol_by_smiles = get_compound_info("CCO", "smiles")

Batch Processing

def batch_cas_lookup(cas_numbers):
    """Process multiple CAS numbers with error handling"""
    cas_api = CASCommonChem()
    results = {}

    for cas_num in cas_numbers:
        print(f"Processing CAS: {cas_num}")

        try:
            data = cas_api.cas_to_detail(cas_num)

            if data:
                results[cas_num] = {
                    'name': data['name'],
                    'formula': data['molecularFormula'],
                    'smiles': data['smile'],
                    'mass': data['molecularMass']
                }
                print(f"  ✓ Found: {data['name']}")
            else:
                results[cas_num] = None
                print(f"  ✗ Not found")

        except Exception as e:
            print(f"  ✗ Error: {e}")
            results[cas_num] = None

        # Rate limiting - be respectful to the API
        time.sleep(0.5)

    return results

# Example usage
cas_list = ["64-17-5", "67-56-1", "108-88-3", "71-43-2"]
batch_results = batch_cas_lookup(cas_list)

for cas, data in batch_results.items():
    if data:
        print(f"{cas}: {data['name']} ({data['formula']})")
    else:
        print(f"{cas}: Not found")

Cross-Reference with Other APIs

from provesid.cascommonchem import CASCommonChem
from provesid.pubchem import PubChemAPI
from provesid.opsin import OPSIN

def comprehensive_compound_lookup(compound_name):
    """Cross-reference compound across multiple databases"""

    results = {
        'input_name': compound_name,
        'cas_data': None,
        'pubchem_data': None,
        'opsin_data': None,
        'cross_validated': False
    }

    # 1. Get structure from OPSIN
    opsin = OPSIN()
    opsin_result = opsin.get_id(compound_name)

    if opsin_result['status'] == 'SUCCESS':
        results['opsin_data'] = opsin_result
        smiles = opsin_result['smiles']

        # 2. Search CAS by SMILES
        cas_api = CASCommonChem()
        cas_data = cas_api.smiles_to_detail(smiles)

        if cas_data:
            results['cas_data'] = cas_data

            # 3. Search PubChem by CAS
            pubchem = PubChemAPI()
            pubchem_data = pubchem.get_compound_by_name(cas_data['rn'])

            if pubchem_data:
                results['pubchem_data'] = pubchem_data
                results['cross_validated'] = True

    # Alternative: try direct name search in CAS
    if not results['cas_data']:
        cas_data = cas_api.name_to_detail(compound_name)
        if cas_data:
            results['cas_data'] = cas_data

    return results

# Usage
compound_info = comprehensive_compound_lookup("caffeine")

if compound_info['cross_validated']:
    print("Successfully cross-validated across all databases!")
    print(f"CAS: {compound_info['cas_data']['rn']}")
    print(f"SMILES: {compound_info['opsin_data']['smiles']}")
    print(f"PubChem CID: {compound_info['pubchem_data'].get('CID')}")

Property Analysis

def analyze_experimental_properties(cas_number):
    """Analyze experimental properties from CAS data"""
    cas_api = CASCommonChem()
    data = cas_api.cas_to_detail(cas_number)

    if not data or 'experimentalProperties' not in data:
        print("No experimental properties available")
        return None

    properties = data['experimentalProperties']

    print(f"=== Properties for {data['name']} ===")

    # Categorize properties
    physical_props = []
    thermal_props = []
    other_props = []

    for prop in properties:
        prop_name = prop['name'].lower()

        if any(term in prop_name for term in ['boiling', 'melting', 'flash']):
            thermal_props.append(prop)
        elif any(term in prop_name for term in ['density', 'refractive', 'viscosity']):
            physical_props.append(prop)
        else:
            other_props.append(prop)

    # Display categorized properties
    categories = [
        ("Thermal Properties", thermal_props),
        ("Physical Properties", physical_props),
        ("Other Properties", other_props)
    ]

    for category_name, props in categories:
        if props:
            print(f"\n{category_name}:")
            for prop in props:
                print(f"  {prop['name']}: {prop['property']}")

    return {
        'thermal': thermal_props,
        'physical': physical_props,
        'other': other_props
    }

# Example
analyze_experimental_properties("64-17-5")  # Ethanol

Best Practices

1. Error Handling and Validation

def safe_cas_lookup(cas_number):
    """Safely lookup CAS number with validation"""
    import re

    # Validate CAS number format
    cas_pattern = r'^\d{1,7}-\d{2}-\d$'
    if not re.match(cas_pattern, cas_number):
        print(f"Invalid CAS format: {cas_number}")
        return None

    cas_api = CASCommonChem()

    try:
        data = cas_api.cas_to_detail(cas_number)

        if data:
            # Validate essential fields
            required_fields = ['name', 'molecularFormula', 'smile']
            missing_fields = [field for field in required_fields if field not in data]

            if missing_fields:
                print(f"Warning: Missing fields: {missing_fields}")

            return data
        else:
            print(f"No data found for CAS {cas_number}")
            return None

    except Exception as e:
        print(f"Error looking up CAS {cas_number}: {e}")
        return None

# Usage
data = safe_cas_lookup("64-17-5")

2. Rate Limiting and Caching

import time
from functools import lru_cache

class CachedCASCommonChem:
    """CAS API with caching and rate limiting"""

    def __init__(self, delay=0.5):
        self.api = CASCommonChem()
        self.delay = delay
        self.last_request_time = 0

    def _rate_limit(self):
        """Implement rate limiting"""
        elapsed = time.time() - self.last_request_time
        if elapsed < self.delay:
            time.sleep(self.delay - elapsed)
        self.last_request_time = time.time()

    @lru_cache(maxsize=1000)
    def cached_cas_lookup(self, cas_number):
        """Cached CAS lookup"""
        self._rate_limit()
        return self.api.cas_to_detail(cas_number)

    @lru_cache(maxsize=1000)
    def cached_name_lookup(self, name):
        """Cached name lookup"""
        self._rate_limit()
        return self.api.name_to_detail(name)

# Usage
cached_api = CachedCASCommonChem(delay=1.0)

# First call - hits API
data1 = cached_api.cached_cas_lookup("64-17-5")

# Second call - uses cache
data2 = cached_api.cached_cas_lookup("64-17-5")

3. Data Export and Analysis

import pandas as pd
import json

def export_cas_data(cas_numbers, output_format='csv'):
    """Export CAS data to various formats"""
    cas_api = CASCommonChem()
    results = []

    for cas_num in cas_numbers:
        data = cas_api.cas_to_detail(cas_num)

        if data:
            # Flatten data for tabular export
            flat_data = {
                'cas_number': data['rn'],
                'name': data['name'],
                'molecular_formula': data['molecularFormula'],
                'molecular_mass': data['molecularMass'],
                'smiles': data['smile'],
                'inchi_key': data['inchiKey'],
                'synonyms_count': len(data.get('synonyms', [])),
                'properties_count': len(data.get('experimentalProperties', []))
            }

            # Add first few synonyms
            synonyms = data.get('synonyms', [])
            for i in range(min(3, len(synonyms))):
                flat_data[f'synonym_{i+1}'] = synonyms[i]

            results.append(flat_data)

    if output_format == 'csv':
        df = pd.DataFrame(results)
        df.to_csv('cas_data.csv', index=False)
        print(f"Exported {len(results)} compounds to cas_data.csv")

    elif output_format == 'json':
        with open('cas_data.json', 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Exported {len(results)} compounds to cas_data.json")

    return results

# Usage
cas_list = ["64-17-5", "67-56-1", "108-88-3"]
exported_data = export_cas_data(cas_list, 'csv')

Integration Patterns

Database Integration

import sqlite3

def create_compound_database():
    """Create SQLite database for compound data"""
    conn = sqlite3.connect('compounds.db')

    conn.execute('''
        CREATE TABLE IF NOT EXISTS compounds (
            cas_number TEXT PRIMARY KEY,
            name TEXT,
            molecular_formula TEXT,
            molecular_mass REAL,
            smiles TEXT,
            inchi_key TEXT,
            data_json TEXT,
            last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')

    conn.commit()
    return conn

def store_cas_data(cas_number, force_update=False):
    """Store or update CAS data in database"""
    conn = create_compound_database()

    # Check if already exists
    cursor = conn.execute(
        'SELECT cas_number FROM compounds WHERE cas_number = ?',
        (cas_number,)
    )

    exists = cursor.fetchone() is not None

    if exists and not force_update:
        print(f"CAS {cas_number} already in database")
        return

    # Fetch from API
    cas_api = CASCommonChem()
    data = cas_api.cas_to_detail(cas_number)

    if data:
        conn.execute('''
            INSERT OR REPLACE INTO compounds 
            (cas_number, name, molecular_formula, molecular_mass, 
             smiles, inchi_key, data_json)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        ''', (
            data['rn'],
            data['name'],
            data['molecularFormula'],
            float(data['molecularMass']),
            data['smile'],
            data['inchiKey'],
            json.dumps(data)
        ))

        conn.commit()
        print(f"Stored {data['name']} (CAS: {cas_number})")

    conn.close()

# Usage
store_cas_data("64-17-5")

Limitations and Considerations

Data Coverage

  • Free tier covers ~500,000 substances
  • Focus on commonly used chemicals
  • May not include very specialized or proprietary compounds

Rate Limiting

  • No official documented limits
  • Recommended to implement delays between requests
  • Monitor for HTTP 429 (Too Many Requests) responses

Data Quality

  • High-quality data from CAS REGISTRY
  • Experimental properties may vary in precision
  • Cross-validation with other sources recommended

API Stability

  • Production-grade API with good uptime
  • Data structure may evolve over time
  • Always check for API updates and changes