Experimental Data
PyEPISuite provides access to curated experimental datasets for model validation and comparison.
Overview
The experimental data module includes:
- Henry's Law Constants
- Boiling Points
- Melting Points
- Vapor Pressures
- Water Solubility data
Available Datasets
pyepisuite.expdata
BoilingPointData
Source code in src/pyepisuite/expdata.py
| class BoilingPointData:
def __init__(self) -> None:
self.data = pd.read_csv(boiling_point_data_file())
def boiling_point(self, cas: str) -> dict:
"""
Returns the boiling point for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 78.37,
'unit': 'C'
"""
val = self.data[self.data['CAS'] == cas]['Boiling Pt (deg C)'].values[0]
name = self.data[self.data['CAS'] == cas]['Name'].values[0]
unit = 'C'
return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit}
|
boiling_point(cas)
Returns the boiling point for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 78.37,
'unit': 'C'
Source code in src/pyepisuite/expdata.py
| def boiling_point(self, cas: str) -> dict:
"""
Returns the boiling point for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 78.37,
'unit': 'C'
"""
val = self.data[self.data['CAS'] == cas]['Boiling Pt (deg C)'].values[0]
name = self.data[self.data['CAS'] == cas]['Name'].values[0]
unit = 'C'
return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit}
|
HenryData
Source code in src/pyepisuite/expdata.py
| class HenryData:
def __init__(self) -> None:
self.data = pd.read_csv(os.path.join(henry_data_file()))
def HLC(self, cas: str) -> dict:
"""
Returns the Henry's Law Constant for a given CAS number with th following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 0.0001,
'unit': 'atm-m3/mole',
'Temp (C)': 25.0,
'type': 'EXP' or 'EST'}
"""
val = self.data[self.data['CAS Number'] == cas]['HenryLC (atm-m3/mole)'].values[0]
name = self.data[self.data['CAS Number'] == cas]['Name'].values[0]
T = self.data[self.data['CAS Number'] == cas]['HLC Temp'].values[0]
unit = 'atm-m3/mole'
type_hlc = self.data[self.data['CAS Number'] == cas]['HLC type'].values[0]
return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit, 'Temp (C)': T, 'type': type_hlc}
|
HLC(cas)
Returns the Henry's Law Constant for a given CAS number with th following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 0.0001,
'unit': 'atm-m3/mole',
'Temp (C)': 25.0,
'type': 'EXP' or 'EST'}
Source code in src/pyepisuite/expdata.py
| def HLC(self, cas: str) -> dict:
"""
Returns the Henry's Law Constant for a given CAS number with th following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 0.0001,
'unit': 'atm-m3/mole',
'Temp (C)': 25.0,
'type': 'EXP' or 'EST'}
"""
val = self.data[self.data['CAS Number'] == cas]['HenryLC (atm-m3/mole)'].values[0]
name = self.data[self.data['CAS Number'] == cas]['Name'].values[0]
T = self.data[self.data['CAS Number'] == cas]['HLC Temp'].values[0]
unit = 'atm-m3/mole'
type_hlc = self.data[self.data['CAS Number'] == cas]['HLC type'].values[0]
return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit, 'Temp (C)': T, 'type': type_hlc}
|
MeltingPointData
Source code in src/pyepisuite/expdata.py
| class MeltingPointData:
def __init__(self) -> None:
self.data = pd.read_csv(melting_point_data_file())
def melting_point(self, cas: str) -> float:
"""
Returns the melting point for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': -114.1,
'unit': 'C'}
"""
val = self.data[self.data['CAS'] == cas]['Melt Pt (deg C)'].values[0]
name = self.data[self.data['CAS'] == cas]['Name'].values[0]
unit = 'C'
return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit}
|
melting_point(cas)
Returns the melting point for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': -114.1,
'unit': 'C'}
Source code in src/pyepisuite/expdata.py
| def melting_point(self, cas: str) -> float:
"""
Returns the melting point for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': -114.1,
'unit': 'C'}
"""
val = self.data[self.data['CAS'] == cas]['Melt Pt (deg C)'].values[0]
name = self.data[self.data['CAS'] == cas]['Name'].values[0]
unit = 'C'
return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit}
|
SolubilityData
Source code in src/pyepisuite/expdata.py
| class SolubilityData:
def __init__(self) -> None:
self.data = pd.read_csv(solubility_data_file())
def solubility(self, cas: str) -> dict:
"""
Returns the solubility for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'class': 'Alcohol',
'logKow': -0.24,
'water_solubility_mg_per_L': 50.0,
'log_mol_per_L': -3.1,
"""
name = self.data[self.data['CAS'] == cas]['NAME'].values[0]
class_compound = self.data[self.data['CAS'] == cas]['class_name'].values[0]
logKow = self.data[self.data['CAS'] == cas]['LOGP'].values[0]
water_solubility_mg_per_L = self.data[self.data['CAS'] == cas]['WSOL'].values[0]
log_mol_per_L = self.data[self.data['CAS'] == cas]['LOGMOLAR'].values[0]
return {'CASRN': cas,
'name': name,
'class': class_compound,
'logKow': logKow,
'water_solubility_mg_per_L': water_solubility_mg_per_L,
'log_mol_per_L': log_mol_per_L}
|
solubility(cas)
Returns the solubility for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'class': 'Alcohol',
'logKow': -0.24,
'water_solubility_mg_per_L': 50.0,
'log_mol_per_L': -3.1,
Source code in src/pyepisuite/expdata.py
| def solubility(self, cas: str) -> dict:
"""
Returns the solubility for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'class': 'Alcohol',
'logKow': -0.24,
'water_solubility_mg_per_L': 50.0,
'log_mol_per_L': -3.1,
"""
name = self.data[self.data['CAS'] == cas]['NAME'].values[0]
class_compound = self.data[self.data['CAS'] == cas]['class_name'].values[0]
logKow = self.data[self.data['CAS'] == cas]['LOGP'].values[0]
water_solubility_mg_per_L = self.data[self.data['CAS'] == cas]['WSOL'].values[0]
log_mol_per_L = self.data[self.data['CAS'] == cas]['LOGMOLAR'].values[0]
return {'CASRN': cas,
'name': name,
'class': class_compound,
'logKow': logKow,
'water_solubility_mg_per_L': water_solubility_mg_per_L,
'log_mol_per_L': log_mol_per_L}
|
VaporPressureData
Source code in src/pyepisuite/expdata.py
| class VaporPressureData:
def __init__(self) -> None:
self.data = pd.read_csv(vapor_pressure_data_file())
def vapor_pressure(self, cas: str) -> dict:
"""
Returns the vapor pressure for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 59.3,
'unit': 'mmHg',
'Temp (C)': 20.0,
'type': 'EXP' or 'EXT'}
"""
val = self.data[self.data['CAS'] == cas]['VP (mm Hg)'].values[0]
name = self.data[self.data['CAS'] == cas]['Name'].values[0]
T = self.data[self.data['CAS'] == cas]['VP temp (degC)'].values[0]
unit = 'mmHg'
type_vp = self.data[self.data['CAS'] == cas]['VP type'].values[0]
return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit, 'Temp (C)': T, 'type': type_vp}
|
vapor_pressure(cas)
Returns the vapor pressure for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 59.3,
'unit': 'mmHg',
'Temp (C)': 20.0,
'type': 'EXP' or 'EXT'}
Source code in src/pyepisuite/expdata.py
| def vapor_pressure(self, cas: str) -> dict:
"""
Returns the vapor pressure for a given CAS number with the following dictionary format:
{'CASRN': '64-17-5',
'name': 'Ethanol',
'value': 59.3,
'unit': 'mmHg',
'Temp (C)': 20.0,
'type': 'EXP' or 'EXT'}
"""
val = self.data[self.data['CAS'] == cas]['VP (mm Hg)'].values[0]
name = self.data[self.data['CAS'] == cas]['Name'].values[0]
T = self.data[self.data['CAS'] == cas]['VP temp (degC)'].values[0]
unit = 'mmHg'
type_vp = self.data[self.data['CAS'] == cas]['VP type'].values[0]
return {'CASRN': cas, 'name': name, 'value': val, 'unit': unit, 'Temp (C)': T, 'type': type_vp}
|
logKowData
Source code in src/pyepisuite/expdata.py
| class logKowData:
def __init__(self) -> None:
params, kow, kow_zwitterionic = kow_data_files()
self.params = pd.read_csv(params)
self.data = pd.read_csv(kow)
self.zwitterionic_data = pd.read_csv(kow_zwitterionic)
def logKow(self, cas: str) -> float:
"""
Returns the logKow for a given CAS number
"""
logKow = self.data[self.data['CASRN'] == cas]['logKow_exp'].values[0]
return {'CASRN': cas, 'logKow': logKow}
def logKow_zwitterionic(self, cas: str) -> float:
"""
Returns the logKow for a given CAS number
"""
logKow = self.zwitterionic_data[self.zwitterionic_data['CASRN'] == cas]['logKow_exp'].values[0]
return {'CASRN': cas, 'logKow': logKow}
|
logKow(cas)
Returns the logKow for a given CAS number
Source code in src/pyepisuite/expdata.py
| def logKow(self, cas: str) -> float:
"""
Returns the logKow for a given CAS number
"""
logKow = self.data[self.data['CASRN'] == cas]['logKow_exp'].values[0]
return {'CASRN': cas, 'logKow': logKow}
|
logKow_zwitterionic(cas)
Returns the logKow for a given CAS number
Source code in src/pyepisuite/expdata.py
| def logKow_zwitterionic(self, cas: str) -> float:
"""
Returns the logKow for a given CAS number
"""
logKow = self.zwitterionic_data[self.zwitterionic_data['CASRN'] == cas]['logKow_exp'].values[0]
return {'CASRN': cas, 'logKow': logKow}
|
Usage Examples
Henry's Law Constants
from pyepisuite.expdata import HenryData
# Load Henry's law constant data
henry = HenryData()
# Get value for a specific chemical
hlc = henry.HLC("50-00-0") # Formaldehyde
print(f"Henry's Law Constant: {hlc}")
# Access the full dataset
df = henry.data
print(df.head())
Solubility Data
from pyepisuite.expdata import SolubilityData
# Load solubility data
solubility = SolubilityData()
# Get solubility for a chemical
sol = solubility.solubility("50-00-0")
print(f"Water Solubility: {sol}")
Physical Properties
from pyepisuite.expdata import (
BoilingPointData,
MeltingPointData,
VaporPressureData
)
# Load physical property data
bp_data = BoilingPointData()
mp_data = MeltingPointData()
vp_data = VaporPressureData()
# Get values
bp = bp_data.boiling_point("50-00-0")
mp = mp_data.melting_point("50-00-0")
vp = vp_data.vapor_pressure("50-00-0")
Data Validation
Use experimental data to validate model predictions:
from pyepisuite import search_episuite_by_cas, submit_to_episuite
from pyepisuite.dataframe_utils import episuite_to_dataframe
from pyepisuite.expdata import HenryData
# Get model predictions
cas_list = ["50-00-0", "67-56-1"] # Formaldehyde, Methanol
ids = search_episuite_by_cas(cas_list)
epi_results, _ = submit_to_episuite(ids)
df = episuite_to_dataframe(epi_results)
# Get experimental data
henry = HenryData()
# Compare predictions vs experimental
for _, row in df.iterrows():
cas = row['cas']
predicted = row['henrys_law_constant_estimated']
experimental = henry.HLC(cas)
if experimental is not None:
print(f"{row['name']} (CAS: {cas})")
print(f" Predicted: {predicted}")
print(f" Experimental: {experimental}")
print(f" Ratio: {predicted/experimental:.2f}")
Data Sources
The experimental datasets are curated from:
- EPA's experimental databases
- Peer-reviewed literature
- Standard reference sources
- Quality-controlled measurements
Data Quality
All experimental data includes:
- Source attribution
- Quality flags
- Uncertainty information where available
- Units and conditions