Skip to content

Experimental Data

PyEPISuite provides access to curated experimental datasets for model validation and comparison.

Overview

The experimental data module includes:

  • Henry's Law Constants
  • Boiling Points
  • Melting Points
  • Vapor Pressures
  • Water Solubility data

Available Datasets

pyepisuite.expdata

BoilingPointData

Source code in src/pyepisuite/expdata.py
class BoilingPointData:
    def __init__(self) -> None:
        self.data = pd.read_csv(boiling_point_data_file())

    def boiling_point(self, cas: str) -> dict:
        """
        Returns the boiling point for a given CAS number with the following dictionary format:
        {'CASRN': '64-17-5',
        'name': 'Ethanol',
        'value': 78.37,
        'unit': 'C'
        """

        val = self.data[self.data['CAS'] == cas]['Boiling Pt (deg C)'].values[0]
        name = self.data[self.data['CAS'] == cas]['Name'].values[0]
        unit = 'C'
        return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit}

boiling_point(cas)

Returns the boiling point for a given CAS number with the following dictionary format: {'CASRN': '64-17-5', 'name': 'Ethanol', 'value': 78.37, 'unit': 'C'

Source code in src/pyepisuite/expdata.py
def boiling_point(self, cas: str) -> dict:
    """
    Returns the boiling point for a given CAS number with the following dictionary format:
    {'CASRN': '64-17-5',
    'name': 'Ethanol',
    'value': 78.37,
    'unit': 'C'
    """

    val = self.data[self.data['CAS'] == cas]['Boiling Pt (deg C)'].values[0]
    name = self.data[self.data['CAS'] == cas]['Name'].values[0]
    unit = 'C'
    return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit}

HenryData

Source code in src/pyepisuite/expdata.py
class HenryData:
    def __init__(self) -> None:
        self.data = pd.read_csv(os.path.join(henry_data_file()))

    def HLC(self, cas: str) -> dict:
        """
        Returns the Henry's Law Constant for a given CAS number with th following dictionary format:
        {'CASRN': '64-17-5', 
        'name': 'Ethanol',
        'value': 0.0001, 
        'unit': 'atm-m3/mole',
        'Temp (C)': 25.0,
        'type': 'EXP' or 'EST'}
        """

        val = self.data[self.data['CAS Number'] == cas]['HenryLC (atm-m3/mole)'].values[0]
        name = self.data[self.data['CAS Number'] == cas]['Name'].values[0]
        T = self.data[self.data['CAS Number'] == cas]['HLC Temp'].values[0]
        unit = 'atm-m3/mole'
        type_hlc = self.data[self.data['CAS Number'] == cas]['HLC type'].values[0]
        return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit, 'Temp (C)': T, 'type': type_hlc}

HLC(cas)

Returns the Henry's Law Constant for a given CAS number with th following dictionary format: {'CASRN': '64-17-5', 'name': 'Ethanol', 'value': 0.0001, 'unit': 'atm-m3/mole', 'Temp (C)': 25.0, 'type': 'EXP' or 'EST'}

Source code in src/pyepisuite/expdata.py
def HLC(self, cas: str) -> dict:
    """
    Returns the Henry's Law Constant for a given CAS number with th following dictionary format:
    {'CASRN': '64-17-5', 
    'name': 'Ethanol',
    'value': 0.0001, 
    'unit': 'atm-m3/mole',
    'Temp (C)': 25.0,
    'type': 'EXP' or 'EST'}
    """

    val = self.data[self.data['CAS Number'] == cas]['HenryLC (atm-m3/mole)'].values[0]
    name = self.data[self.data['CAS Number'] == cas]['Name'].values[0]
    T = self.data[self.data['CAS Number'] == cas]['HLC Temp'].values[0]
    unit = 'atm-m3/mole'
    type_hlc = self.data[self.data['CAS Number'] == cas]['HLC type'].values[0]
    return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit, 'Temp (C)': T, 'type': type_hlc}

MeltingPointData

Source code in src/pyepisuite/expdata.py
class MeltingPointData:
    def __init__(self) -> None:
        self.data = pd.read_csv(melting_point_data_file())

    def melting_point(self, cas: str) -> float:
        """
        Returns the melting point for a given CAS number with the following dictionary format:
        {'CASRN': '64-17-5',
        'name': 'Ethanol',
        'value': -114.1,
        'unit': 'C'}
        """

        val = self.data[self.data['CAS'] == cas]['Melt Pt (deg C)'].values[0]
        name = self.data[self.data['CAS'] == cas]['Name'].values[0]
        unit = 'C'
        return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit}

melting_point(cas)

Returns the melting point for a given CAS number with the following dictionary format: {'CASRN': '64-17-5', 'name': 'Ethanol', 'value': -114.1, 'unit': 'C'}

Source code in src/pyepisuite/expdata.py
def melting_point(self, cas: str) -> float:
    """
    Returns the melting point for a given CAS number with the following dictionary format:
    {'CASRN': '64-17-5',
    'name': 'Ethanol',
    'value': -114.1,
    'unit': 'C'}
    """

    val = self.data[self.data['CAS'] == cas]['Melt Pt (deg C)'].values[0]
    name = self.data[self.data['CAS'] == cas]['Name'].values[0]
    unit = 'C'
    return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit}

SolubilityData

Source code in src/pyepisuite/expdata.py
class SolubilityData:
    def __init__(self) -> None:
        self.data = pd.read_csv(solubility_data_file())

    def solubility(self, cas: str) -> dict:
        """
        Returns the solubility for a given CAS number with the following dictionary format:
        {'CASRN': '64-17-5',
        'name': 'Ethanol',
        'class': 'Alcohol',
        'logKow': -0.24,
        'water_solubility_mg_per_L': 50.0,
        'log_mol_per_L': -3.1,
        """

        name = self.data[self.data['CAS'] == cas]['NAME'].values[0]
        class_compound = self.data[self.data['CAS'] == cas]['class_name'].values[0]
        logKow = self.data[self.data['CAS'] == cas]['LOGP'].values[0]
        water_solubility_mg_per_L = self.data[self.data['CAS'] == cas]['WSOL'].values[0]
        log_mol_per_L = self.data[self.data['CAS'] == cas]['LOGMOLAR'].values[0]
        return {'CASRN': cas, 
                'name': name, 
                'class': class_compound, 
                'logKow': logKow, 
                'water_solubility_mg_per_L': water_solubility_mg_per_L, 
                'log_mol_per_L': log_mol_per_L}

solubility(cas)

Returns the solubility for a given CAS number with the following dictionary format: {'CASRN': '64-17-5', 'name': 'Ethanol', 'class': 'Alcohol', 'logKow': -0.24, 'water_solubility_mg_per_L': 50.0, 'log_mol_per_L': -3.1,

Source code in src/pyepisuite/expdata.py
def solubility(self, cas: str) -> dict:
    """
    Returns the solubility for a given CAS number with the following dictionary format:
    {'CASRN': '64-17-5',
    'name': 'Ethanol',
    'class': 'Alcohol',
    'logKow': -0.24,
    'water_solubility_mg_per_L': 50.0,
    'log_mol_per_L': -3.1,
    """

    name = self.data[self.data['CAS'] == cas]['NAME'].values[0]
    class_compound = self.data[self.data['CAS'] == cas]['class_name'].values[0]
    logKow = self.data[self.data['CAS'] == cas]['LOGP'].values[0]
    water_solubility_mg_per_L = self.data[self.data['CAS'] == cas]['WSOL'].values[0]
    log_mol_per_L = self.data[self.data['CAS'] == cas]['LOGMOLAR'].values[0]
    return {'CASRN': cas, 
            'name': name, 
            'class': class_compound, 
            'logKow': logKow, 
            'water_solubility_mg_per_L': water_solubility_mg_per_L, 
            'log_mol_per_L': log_mol_per_L}

VaporPressureData

Source code in src/pyepisuite/expdata.py
class VaporPressureData:
    def __init__(self) -> None:
        self.data = pd.read_csv(vapor_pressure_data_file())

    def vapor_pressure(self, cas: str) -> dict:
        """
        Returns the vapor pressure for a given CAS number with the following dictionary format:
        {'CASRN': '64-17-5',
        'name': 'Ethanol',
        'value': 59.3,
        'unit': 'mmHg',
        'Temp (C)': 20.0,
        'type': 'EXP' or 'EXT'}
        """

        val = self.data[self.data['CAS'] == cas]['VP (mm Hg)'].values[0]
        name = self.data[self.data['CAS'] == cas]['Name'].values[0]
        T = self.data[self.data['CAS'] == cas]['VP temp (degC)'].values[0]
        unit = 'mmHg'
        type_vp = self.data[self.data['CAS'] == cas]['VP type'].values[0]
        return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit, 'Temp (C)': T, 'type': type_vp}

vapor_pressure(cas)

Returns the vapor pressure for a given CAS number with the following dictionary format: {'CASRN': '64-17-5', 'name': 'Ethanol', 'value': 59.3, 'unit': 'mmHg', 'Temp (C)': 20.0, 'type': 'EXP' or 'EXT'}

Source code in src/pyepisuite/expdata.py
def vapor_pressure(self, cas: str) -> dict:
    """
    Returns the vapor pressure for a given CAS number with the following dictionary format:
    {'CASRN': '64-17-5',
    'name': 'Ethanol',
    'value': 59.3,
    'unit': 'mmHg',
    'Temp (C)': 20.0,
    'type': 'EXP' or 'EXT'}
    """

    val = self.data[self.data['CAS'] == cas]['VP (mm Hg)'].values[0]
    name = self.data[self.data['CAS'] == cas]['Name'].values[0]
    T = self.data[self.data['CAS'] == cas]['VP temp (degC)'].values[0]
    unit = 'mmHg'
    type_vp = self.data[self.data['CAS'] == cas]['VP type'].values[0]
    return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit, 'Temp (C)': T, 'type': type_vp}

logKowData

Source code in src/pyepisuite/expdata.py
class logKowData:
    def __init__(self) -> None:
        params, kow, kow_zwitterionic = kow_data_files()
        self.params = pd.read_csv(params)
        self.data = pd.read_csv(kow)
        self.zwitterionic_data = pd.read_csv(kow_zwitterionic)

    def logKow(self, cas: str) -> float:
        """
        Returns the logKow for a given CAS number
        """

        logKow = self.data[self.data['CASRN'] == cas]['logKow_exp'].values[0]
        return {'CASRN': cas, 'logKow': logKow}

    def logKow_zwitterionic(self, cas: str) -> float:
        """
        Returns the logKow for a given CAS number
        """

        logKow = self.zwitterionic_data[self.zwitterionic_data['CASRN'] == cas]['logKow_exp'].values[0]
        return {'CASRN': cas, 'logKow': logKow}

logKow(cas)

Returns the logKow for a given CAS number

Source code in src/pyepisuite/expdata.py
def logKow(self, cas: str) -> float:
    """
    Returns the logKow for a given CAS number
    """

    logKow = self.data[self.data['CASRN'] == cas]['logKow_exp'].values[0]
    return {'CASRN': cas, 'logKow': logKow}

logKow_zwitterionic(cas)

Returns the logKow for a given CAS number

Source code in src/pyepisuite/expdata.py
def logKow_zwitterionic(self, cas: str) -> float:
    """
    Returns the logKow for a given CAS number
    """

    logKow = self.zwitterionic_data[self.zwitterionic_data['CASRN'] == cas]['logKow_exp'].values[0]
    return {'CASRN': cas, 'logKow': logKow}

Usage Examples

Henry's Law Constants

from pyepisuite.expdata import HenryData

# Load Henry's law constant data
henry = HenryData()

# Get value for a specific chemical
hlc = henry.HLC("50-00-0")  # Formaldehyde
print(f"Henry's Law Constant: {hlc}")

# Access the full dataset
df = henry.data
print(df.head())

Solubility Data

from pyepisuite.expdata import SolubilityData

# Load solubility data
solubility = SolubilityData()

# Get solubility for a chemical
sol = solubility.solubility("50-00-0")
print(f"Water Solubility: {sol}")

Physical Properties

from pyepisuite.expdata import (
    BoilingPointData,
    MeltingPointData, 
    VaporPressureData
)

# Load physical property data
bp_data = BoilingPointData()
mp_data = MeltingPointData()
vp_data = VaporPressureData()

# Get values
bp = bp_data.boiling_point("50-00-0")
mp = mp_data.melting_point("50-00-0")
vp = vp_data.vapor_pressure("50-00-0")

Data Validation

Use experimental data to validate model predictions:

from pyepisuite import search_episuite_by_cas, submit_to_episuite
from pyepisuite.dataframe_utils import episuite_to_dataframe
from pyepisuite.expdata import HenryData

# Get model predictions
cas_list = ["50-00-0", "67-56-1"]  # Formaldehyde, Methanol
ids = search_episuite_by_cas(cas_list)
epi_results, _ = submit_to_episuite(ids)
df = episuite_to_dataframe(epi_results)

# Get experimental data
henry = HenryData()

# Compare predictions vs experimental
for _, row in df.iterrows():
    cas = row['cas']
    predicted = row['henrys_law_constant_estimated']
    experimental = henry.HLC(cas)

    if experimental is not None:
        print(f"{row['name']} (CAS: {cas})")
        print(f"  Predicted: {predicted}")
        print(f"  Experimental: {experimental}")
        print(f"  Ratio: {predicted/experimental:.2f}")

Data Sources

The experimental datasets are curated from:

  • EPA's experimental databases
  • Peer-reviewed literature
  • Standard reference sources
  • Quality-controlled measurements

Data Quality

All experimental data includes:

  • Source attribution
  • Quality flags
  • Uncertainty information where available
  • Units and conditions