Skip to content

USEtox Input API Reference

pyepisuite.usetox_input.USEtoxInput

Simple class for populating USEtox Excel templates with PyEPISuite data.

The goal is simple: 1. Call EPI Suite API for each chemical 2. Open Excel template with openpyxl 3. Add chemical data row by row starting from row 6 4. Use column mappings to place data in correct columns

Source code in src/pyepisuite/usetox_input.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
class USEtoxInput:
    """
    Simple class for populating USEtox Excel templates with PyEPISuite data.

    The goal is simple:
    1. Call EPI Suite API for each chemical
    2. Open Excel template with openpyxl
    3. Add chemical data row by row starting from row 6
    4. Use column mappings to place data in correct columns
    """

    # Excel column mapping (same as column_names_dict but as class attribute)
    EXCEL_COLUMN_MAPPING = column_names_dict

    # Experimental data priority - defines which sources take priority over estimates
    EXPERIMENTAL_PROPERTY_PRIORITY = {
        'Sol25': ['experimental_solubility', 'estimated_solubility'],
        'KOW': ['experimental_kow', 'estimated_kow'],
        'Pvap25': ['experimental_vapor_pressure', 'estimated_vapor_pressure'],
        'KH25C': ['experimental_henrys_constant', 'estimated_henrys_constant'],
        'Koc': ['experimental_koc', 'estimated_koc']
    }

    # Mapping from EPI Suite DataFrame columns to USEtox properties
    EPISUITE_TO_USETOX_MAPPING = {
        'cas': 'CAS RN',
        'name': 'Name',
        'molecular_weight': 'MW',
        'log_kow_estimated': 'KOW',  # Will be converted from log to linear
        'log_koc_estimated': 'Koc',  # Will be converted from log to linear
        'henrys_law_constant_estimated': 'KH25C',
        'vapor_pressure_estimated': 'Pvap25',
        'water_solubility_logkow_estimated': 'Sol25',
        'atmospheric_half_life_estimated': 'T1/2A',
        'fugacity_water_half_life': 'T1/2W',
        'fugacity_sediment_half_life': 'T1/2Sd',
        'fugacity_soil_half_life': 'T1/2Sl'
    }

    def __init__(self, template_path: Optional[str] = None):
        """
        Initialize USEtoxInput with template path.

        Args:
            template_path: Path to USEtox template Excel file
        """
        self.template_path = template_path or self._get_default_template_path()
        self.workbook = None
        self.worksheet = None
        self.current_row = 6  # Start from row 6 as requested

        # Load the template
        self._load_template()

    def _get_default_template_path(self) -> str:
        """Get the default USEtox template path."""
        current_dir = Path(__file__).parent
        return str(current_dir / ".." / ".." / "data" / "usetox3" / "AA_Model_substance_data_Default.xlsx")

    def _load_template(self):
        """Load the USEtox template Excel file with openpyxl."""
        try:
            self.workbook = load_workbook(self.template_path)
            self.worksheet = self.workbook["Substance inputs"]
            logger.info(f"Loaded USEtox template from {self.template_path}")
        except Exception as e:
            logger.error(f"Failed to load template: {e}")
            raise

    def add_chemical_from_episuite(self, episuite_result: Dict[str, Any]) -> int:
        """
        Add a single chemical from EPI Suite results to the Excel template.

        Args:
            episuite_result: Dictionary containing EPI Suite results for one chemical

        Returns:
            Row number where the chemical was added
        """
        if not self.worksheet:
            raise ValueError("Template not loaded")

        # Get the current row to populate
        row_num = self.current_row

        # Add RowNr (Column A)
        self.worksheet[f'A{row_num}'] = row_num - 5

        # Map EPI Suite data to Excel columns using the dictionaries
        self._populate_basic_properties(episuite_result, row_num)
        self._populate_baf_properties(episuite_result, row_num)
        self._populate_toxicity_properties(episuite_result, row_num)

        # Move to next row for next chemical
        self.current_row += 1

        logger.info(f"Added chemical {episuite_result.get('name', 'Unknown')} at row {row_num}")
        return row_num

    def _populate_basic_properties(self, episuite_result: Dict[str, Any], row_num: int):
        """Populate basic chemical properties using column_names_dict."""
        # Map EPI Suite fields to USEtox columns
        basic_mapping = {
            'CAS RN': episuite_result.get('cas', ''),
            'Name': episuite_result.get('name', ''),
            'MW': episuite_result.get('molecular_weight'),
            'KOW': self._convert_log_kow_to_kow(episuite_result.get('log_kow_estimated')),
            'Koc': self._convert_log_koc_to_koc(episuite_result.get('log_koc_estimated')),
            'KH25C': episuite_result.get('henrys_law_constant_estimated'),  # Henry's law constant
            'Pvap25': self._convert_mmhg_to_pa(episuite_result.get('vapor_pressure_estimated')),
            'Sol25': self._convert_mg_l_to_g_l(episuite_result.get('water_solubility_logkow_estimated')),
            'T1/2A': episuite_result.get('atmospheric_half_life_estimated'),
            'T1/2W': self._convert_hours_to_days(episuite_result.get('water_biodegradation_half_life_unacclimated')),  # Water half-life
            'T1/2Sd': self._convert_hours_to_days(episuite_result.get('sediment_half_life_hours')),  # Sediment half-life
            'T1/2Sl': self._convert_hours_to_days(episuite_result.get('soil_half_life_hours')),  # Soil half-life
        }

        # Set values in Excel using column letters from column_names_dict
        for usetox_prop, value in basic_mapping.items():
            if value is not None and usetox_prop in column_names_dict:
                excel_col = column_names_dict[usetox_prop]
                self.worksheet[f'{excel_col}{row_num}'] = value

    def _populate_baf_properties(self, episuite_result: Dict[str, Any], row_num: int):
        """Populate bioaccumulation properties using BAF_dict."""
        # Map EPI Suite BCF to BAFfish
        bcf_value = episuite_result.get('bioconcentration_factor')
        if bcf_value is not None and 'BAFfish' in BAF_dict:
            excel_col = BAF_dict['BAFfish']
            self.worksheet[f'{excel_col}{row_num}'] = bcf_value

    def _populate_toxicity_properties(self, episuite_result: Dict[str, Any], row_num: int):
        """Populate toxicity properties using toxicity_dict and flagged_indicative_dict."""
        # Add data source
        if 'Data source' in flagged_indicative_dict:
            excel_col = flagged_indicative_dict['Data source']
            self.worksheet[f'{excel_col}{row_num}'] = "PyEPISuite"

        # Check if chemical is inorganic (basic heuristic based on name/molecular structure)
        is_inorganic = self._check_if_inorganic(episuite_result)
        if 'Inorganics' in flagged_indicative_dict:
            excel_col = flagged_indicative_dict['Inorganics']
            self.worksheet[f'{excel_col}{row_num}'] = "Y" if is_inorganic else ""

    def _check_if_inorganic(self, episuite_result: Dict[str, Any]) -> bool:
        """
        Basic check to determine if a chemical might be inorganic.
        This is a simple heuristic - could be improved with more sophisticated logic.
        """
        name = episuite_result.get('name', '').lower()
        cas = episuite_result.get('cas', '')

        # Common inorganic indicators in chemical names
        inorganic_keywords = [
            'chloride', 'sulfate', 'phosphate', 'nitrate', 'oxide', 'hydroxide',
            'carbonate', 'bicarbonate', 'sulfide', 'fluoride', 'bromide', 'iodide',
            'sodium', 'potassium', 'calcium', 'magnesium', 'aluminum', 'iron',
            'zinc', 'copper', 'lead', 'mercury', 'silver', 'gold', 'platinum'
        ]

        # Check if any inorganic keywords are in the name
        for keyword in inorganic_keywords:
            if keyword in name:
                return True

        # Exclude common organic compounds that might have low molecular weight
        organic_indicators = [
            'methane', 'ethane', 'propane', 'butane', 'methanol', 'ethanol', 
            'propanol', 'butanol', 'formaldehyde', 'acetaldehyde', 'acetone',
            'benzene', 'toluene', 'xylene', 'phenol'
        ]

        # If it contains organic indicators, it's definitely organic
        for indicator in organic_indicators:
            if indicator in name:
                return False

        # Additional check: very low molecular weight (< 30) might indicate inorganic
        # but exclude known organic compounds
        mw = episuite_result.get('molecular_weight')
        if mw is not None and mw < 30:  # Only very small molecules
            return True

        return False

    def _convert_log_kow_to_kow(self, log_kow):
        """Convert log Kow to Kow."""
        if log_kow is not None:
            try:
                return 10 ** float(log_kow)
            except (ValueError, TypeError):
                pass
        return None

    def _convert_log_koc_to_koc(self, log_koc):
        """Convert log Koc to Koc."""
        if log_koc is not None:
            try:
                return 10 ** float(log_koc)
            except (ValueError, TypeError):
                pass
        return None

    def _convert_mmhg_to_pa(self, mmhg):
        """Convert mmHg to Pa."""
        if mmhg is not None:
            try:
                return float(mmhg) * 133.322
            except (ValueError, TypeError):
                pass
        return None

    def _convert_mg_l_to_g_l(self, mg_l):
        """Convert mg/L to g/L."""
        if mg_l is not None:
            try:
                return float(mg_l) / 1000
            except (ValueError, TypeError):
                pass
        return None

    def _convert_hours_to_days(self, hours):
        """Convert hours to days."""
        if hours is not None:
            try:
                return float(hours) / 24
            except (ValueError, TypeError):
                pass
        return None

    def add_chemicals_from_cas_list(self, cas_list: List[str]) -> Dict[str, int]:
        """
        Add multiple chemicals by calling EPI Suite API for each CAS number.

        Args:
            cas_list: List of CAS numbers to look up

        Returns:
            Dictionary mapping CAS numbers to row numbers where they were added
        """
        from .utils import search_episuite_by_cas, submit_to_episuite

        results = {}

        # Search all CAS numbers at once
        search_results = search_episuite_by_cas(cas_list)

        if search_results:
            # Get predictions for all found chemicals
            predictions = submit_to_episuite(search_results)

            # Handle the predictions structure: it's a tuple where the first element is a list of ResultEPISuite objects
            if predictions and isinstance(predictions, tuple) and len(predictions) > 0:
                result_objects = predictions[0]  # Get the list of ResultEPISuite objects

                # Process each prediction
                for i, prediction in enumerate(result_objects):
                    if i < len(search_results):
                        identifier = search_results[i]

                        # Convert to dictionary format
                        prediction_dict = self._convert_episuite_result_to_dict(prediction, identifier)

                        # Add to Excel template
                        row_num = self.add_chemical_from_episuite(prediction_dict)

                        # Find the original CAS number requested
                        original_cas = self._find_original_cas(identifier.cas, cas_list)
                        if original_cas:
                            results[original_cas] = row_num
                            logger.info(f"Successfully added {original_cas} at row {row_num}")

        return results

    def _find_original_cas(self, episuite_cas: str, original_cas_list: List[str]) -> Optional[str]:
        """Find the original CAS number that corresponds to the EPI Suite CAS."""
        # EPI Suite adds leading zeros, so we need to match
        def normalize_cas(cas_number):
            """Remove leading zeros from CAS number parts."""
            parts = cas_number.split('-')
            if len(parts) == 3:
                return f"{parts[0].lstrip('0') or '0'}-{parts[1].lstrip('0') or '0'}-{parts[2]}"
            return cas_number

        normalized_episuite = normalize_cas(episuite_cas)
        for original_cas in original_cas_list:
            if normalize_cas(original_cas) == normalized_episuite:
                return original_cas
        return None

    def _convert_episuite_result_to_dict(self, episuite_result, identifier) -> Dict[str, Any]:
        """Convert EPI Suite result object to dictionary format."""

        def safe_get_value(obj, path: str):
            """Safely navigate nested object properties."""
            try:
                parts = path.split('.')
                current = obj
                for part in parts:
                    current = getattr(current, part, None)
                    if current is None:
                        return None
                return current
            except (AttributeError, TypeError):
                return None

        result_dict = {
            'cas': identifier.cas,
            'name': identifier.name or '',

            # Molecular weight from chemical properties
            'molecular_weight': safe_get_value(episuite_result, 'chemicalProperties.molecularWeight'),

            # Log Kow from LogKow response (using selectedValue which includes experimental data)
            'log_kow_estimated': safe_get_value(episuite_result, 'logKow.selectedValue.value'),

            # Log Koc from LogKoc response (using selectedValue which includes experimental data)
            'log_koc_estimated': safe_get_value(episuite_result, 'logKoc.selectedValue.value'),

            # Henry's law constant (using selectedValue which includes experimental data)
            'henrys_law_constant_estimated': safe_get_value(episuite_result, 'henrysLawConstant.selectedValue.value'),

            # Vapor pressure (using selectedValue which includes experimental data)
            'vapor_pressure_estimated': safe_get_value(episuite_result, 'vaporPressure.selectedValue.value'),

            # Water solubility from LogKow (using selectedValue which includes experimental data)
            'water_solubility_logkow_estimated': safe_get_value(episuite_result, 'waterSolubilityFromLogKow.selectedValue.value'),

            # Atmospheric half-life (using estimatedValue because no experimental data usually available)
            'atmospheric_half_life_estimated': safe_get_value(episuite_result, 'atmosphericHalfLife.estimatedValue.value'),

            # Water biodegradation half-life: Use a simple default estimate since Biowin doesn't give direct half-life
            'water_biodegradation_half_life_unacclimated': 30,  # Default 30 days for biodegradable organics

            # Sediment and soil half-lives from fugacity model (in hours)
            'sediment_half_life_hours': self._safe_extract_fugacity_half_life(episuite_result, 'Sediment'),
            'soil_half_life_hours': self._safe_extract_fugacity_half_life(episuite_result, 'Soil'),

            # Bioconcentration factor (using default BCF estimate)
            'bioconcentration_factor': safe_get_value(episuite_result, 'bioconcentration.bioconcentrationFactor'),
        }
        return result_dict

    def _safe_extract_fugacity_half_life(self, episuite_result, compartment: str):
        """Safely extract half-life from fugacity model for specific compartment (Sediment or Soil)."""
        try:
            if hasattr(episuite_result, 'fugacityModel') and episuite_result.fugacityModel:
                if hasattr(episuite_result.fugacityModel, 'model') and episuite_result.fugacityModel.model:
                    compartment_data = getattr(episuite_result.fugacityModel.model, compartment, None)
                    if compartment_data and len(compartment_data) > 0 and compartment_data[0]:
                        return compartment_data[0].HalfLife
            return None
        except (AttributeError, IndexError, TypeError):
            return None

    def save_excel(self, output_path: str):
        """
        Save the populated Excel file.

        Args:
            output_path: Path for the output Excel file
        """
        if not self.workbook:
            raise ValueError("No workbook loaded")

        self.workbook.save(output_path)
        logger.info(f"Saved USEtox Excel file to {output_path}")

    def get_summary(self) -> Dict[str, Any]:
        """Get summary of populated data."""
        if not self.worksheet:
            return {}

        chemicals_added = self.current_row - 6
        return {
            'chemicals_added': chemicals_added,
            'current_row': self.current_row,
            'template_path': self.template_path
        }

    def get_summary_statistics(self) -> Dict[str, Any]:
        """Get summary statistics of populated data."""
        if not self.worksheet:
            return {
                'total_chemicals': 0,
                'properties_populated': 0,
                'missing_values': 0
            }

        chemicals_added = self.current_row - 6
        # Count populated properties by checking non-empty cells
        properties_populated = 0
        missing_values = 0

        for row_num in range(6, self.current_row):
            for col_letter in column_names_dict.values():
                cell_value = self.worksheet[f"{col_letter}{row_num}"].value
                if cell_value is not None and cell_value != "":
                    properties_populated += 1
                else:
                    missing_values += 1

        return {
            'total_chemicals': chemicals_added,
            'properties_populated': properties_populated,
            'missing_values': missing_values
        }

    def validate_data(self) -> Dict[str, List[str]]:
        """Validate the populated data and return warnings/errors."""
        warnings = []
        errors = []

        if not self.worksheet:
            errors.append("No worksheet loaded")
            return {'warnings': warnings, 'errors': errors}

        # Check for required fields
        for row_num in range(6, self.current_row):
            cas_cell = self.worksheet[f"{column_names_dict['CAS RN']}{row_num}"]
            name_cell = self.worksheet[f"{column_names_dict['Name']}{row_num}"]

            if not cas_cell.value:
                warnings.append(f"Row {row_num}: Missing CAS number")
            if not name_cell.value:
                warnings.append(f"Row {row_num}: Missing chemical name")

            # Check for unrealistic values
            mw_cell = self.worksheet[f"{column_names_dict['MW']}{row_num}"]
            if mw_cell.value and mw_cell.value < 0:
                warnings.append(f"Row {row_num}: Negative molecular weight")

        return {'warnings': warnings, 'errors': errors}

    def get_excel_column_letter(self, property_name: str) -> Optional[str]:
        """Get the Excel column letter for a given property name."""
        return column_names_dict.get(property_name)

    def get_data_source_analysis(self) -> Dict[str, int]:
        """Analyze data sources in the populated sheet."""
        if not self.worksheet:
            return {}

        # This is a simplified analysis - in practice you'd track data sources during population
        chemicals_count = self.current_row - 6
        return {
            'estimated': chemicals_count,
            'experimental': 0,  # Would track this during population
            'manual': 0
        }

    def populate_from_episuite_dataframe(self, episuite_df):
        """Populate USEtox template from EPI Suite DataFrame."""
        import pandas as pd

        if not isinstance(episuite_df, pd.DataFrame):
            raise ValueError("Input must be a pandas DataFrame")

        populated_rows = []
        for idx, row in episuite_df.iterrows():
            episuite_dict = row.to_dict()
            row_num = self.add_chemical_from_episuite(episuite_dict)
            if row_num > 0:
                populated_rows.append(row_num)

        # Return a simple representation - in practice this could be more sophisticated
        return episuite_df

    def export_to_excel(self, output_path: str, include_headers: bool = True, include_original_template: bool = True):
        """Export to Excel with additional options."""
        self.save_excel(output_path)

    @property
    def populated_df(self):
        """Get populated DataFrame representation."""
        # This is a simplified version - could be enhanced to return actual Excel data as DataFrame
        return None if not self.worksheet else "populated"

populated_df property

Get populated DataFrame representation.

__init__(template_path=None)

Initialize USEtoxInput with template path.

Parameters:

Name Type Description Default
template_path Optional[str]

Path to USEtox template Excel file

None
Source code in src/pyepisuite/usetox_input.py
def __init__(self, template_path: Optional[str] = None):
    """
    Initialize USEtoxInput with template path.

    Args:
        template_path: Path to USEtox template Excel file
    """
    self.template_path = template_path or self._get_default_template_path()
    self.workbook = None
    self.worksheet = None
    self.current_row = 6  # Start from row 6 as requested

    # Load the template
    self._load_template()

add_chemical_from_episuite(episuite_result)

Add a single chemical from EPI Suite results to the Excel template.

Parameters:

Name Type Description Default
episuite_result Dict[str, Any]

Dictionary containing EPI Suite results for one chemical

required

Returns:

Type Description
int

Row number where the chemical was added

Source code in src/pyepisuite/usetox_input.py
def add_chemical_from_episuite(self, episuite_result: Dict[str, Any]) -> int:
    """
    Add a single chemical from EPI Suite results to the Excel template.

    Args:
        episuite_result: Dictionary containing EPI Suite results for one chemical

    Returns:
        Row number where the chemical was added
    """
    if not self.worksheet:
        raise ValueError("Template not loaded")

    # Get the current row to populate
    row_num = self.current_row

    # Add RowNr (Column A)
    self.worksheet[f'A{row_num}'] = row_num - 5

    # Map EPI Suite data to Excel columns using the dictionaries
    self._populate_basic_properties(episuite_result, row_num)
    self._populate_baf_properties(episuite_result, row_num)
    self._populate_toxicity_properties(episuite_result, row_num)

    # Move to next row for next chemical
    self.current_row += 1

    logger.info(f"Added chemical {episuite_result.get('name', 'Unknown')} at row {row_num}")
    return row_num

add_chemicals_from_cas_list(cas_list)

Add multiple chemicals by calling EPI Suite API for each CAS number.

Parameters:

Name Type Description Default
cas_list List[str]

List of CAS numbers to look up

required

Returns:

Type Description
Dict[str, int]

Dictionary mapping CAS numbers to row numbers where they were added

Source code in src/pyepisuite/usetox_input.py
def add_chemicals_from_cas_list(self, cas_list: List[str]) -> Dict[str, int]:
    """
    Add multiple chemicals by calling EPI Suite API for each CAS number.

    Args:
        cas_list: List of CAS numbers to look up

    Returns:
        Dictionary mapping CAS numbers to row numbers where they were added
    """
    from .utils import search_episuite_by_cas, submit_to_episuite

    results = {}

    # Search all CAS numbers at once
    search_results = search_episuite_by_cas(cas_list)

    if search_results:
        # Get predictions for all found chemicals
        predictions = submit_to_episuite(search_results)

        # Handle the predictions structure: it's a tuple where the first element is a list of ResultEPISuite objects
        if predictions and isinstance(predictions, tuple) and len(predictions) > 0:
            result_objects = predictions[0]  # Get the list of ResultEPISuite objects

            # Process each prediction
            for i, prediction in enumerate(result_objects):
                if i < len(search_results):
                    identifier = search_results[i]

                    # Convert to dictionary format
                    prediction_dict = self._convert_episuite_result_to_dict(prediction, identifier)

                    # Add to Excel template
                    row_num = self.add_chemical_from_episuite(prediction_dict)

                    # Find the original CAS number requested
                    original_cas = self._find_original_cas(identifier.cas, cas_list)
                    if original_cas:
                        results[original_cas] = row_num
                        logger.info(f"Successfully added {original_cas} at row {row_num}")

    return results

export_to_excel(output_path, include_headers=True, include_original_template=True)

Export to Excel with additional options.

Source code in src/pyepisuite/usetox_input.py
def export_to_excel(self, output_path: str, include_headers: bool = True, include_original_template: bool = True):
    """Export to Excel with additional options."""
    self.save_excel(output_path)

get_data_source_analysis()

Analyze data sources in the populated sheet.

Source code in src/pyepisuite/usetox_input.py
def get_data_source_analysis(self) -> Dict[str, int]:
    """Analyze data sources in the populated sheet."""
    if not self.worksheet:
        return {}

    # This is a simplified analysis - in practice you'd track data sources during population
    chemicals_count = self.current_row - 6
    return {
        'estimated': chemicals_count,
        'experimental': 0,  # Would track this during population
        'manual': 0
    }

get_excel_column_letter(property_name)

Get the Excel column letter for a given property name.

Source code in src/pyepisuite/usetox_input.py
def get_excel_column_letter(self, property_name: str) -> Optional[str]:
    """Get the Excel column letter for a given property name."""
    return column_names_dict.get(property_name)

get_summary()

Get summary of populated data.

Source code in src/pyepisuite/usetox_input.py
def get_summary(self) -> Dict[str, Any]:
    """Get summary of populated data."""
    if not self.worksheet:
        return {}

    chemicals_added = self.current_row - 6
    return {
        'chemicals_added': chemicals_added,
        'current_row': self.current_row,
        'template_path': self.template_path
    }

get_summary_statistics()

Get summary statistics of populated data.

Source code in src/pyepisuite/usetox_input.py
def get_summary_statistics(self) -> Dict[str, Any]:
    """Get summary statistics of populated data."""
    if not self.worksheet:
        return {
            'total_chemicals': 0,
            'properties_populated': 0,
            'missing_values': 0
        }

    chemicals_added = self.current_row - 6
    # Count populated properties by checking non-empty cells
    properties_populated = 0
    missing_values = 0

    for row_num in range(6, self.current_row):
        for col_letter in column_names_dict.values():
            cell_value = self.worksheet[f"{col_letter}{row_num}"].value
            if cell_value is not None and cell_value != "":
                properties_populated += 1
            else:
                missing_values += 1

    return {
        'total_chemicals': chemicals_added,
        'properties_populated': properties_populated,
        'missing_values': missing_values
    }

populate_from_episuite_dataframe(episuite_df)

Populate USEtox template from EPI Suite DataFrame.

Source code in src/pyepisuite/usetox_input.py
def populate_from_episuite_dataframe(self, episuite_df):
    """Populate USEtox template from EPI Suite DataFrame."""
    import pandas as pd

    if not isinstance(episuite_df, pd.DataFrame):
        raise ValueError("Input must be a pandas DataFrame")

    populated_rows = []
    for idx, row in episuite_df.iterrows():
        episuite_dict = row.to_dict()
        row_num = self.add_chemical_from_episuite(episuite_dict)
        if row_num > 0:
            populated_rows.append(row_num)

    # Return a simple representation - in practice this could be more sophisticated
    return episuite_df

save_excel(output_path)

Save the populated Excel file.

Parameters:

Name Type Description Default
output_path str

Path for the output Excel file

required
Source code in src/pyepisuite/usetox_input.py
def save_excel(self, output_path: str):
    """
    Save the populated Excel file.

    Args:
        output_path: Path for the output Excel file
    """
    if not self.workbook:
        raise ValueError("No workbook loaded")

    self.workbook.save(output_path)
    logger.info(f"Saved USEtox Excel file to {output_path}")

validate_data()

Validate the populated data and return warnings/errors.

Source code in src/pyepisuite/usetox_input.py
def validate_data(self) -> Dict[str, List[str]]:
    """Validate the populated data and return warnings/errors."""
    warnings = []
    errors = []

    if not self.worksheet:
        errors.append("No worksheet loaded")
        return {'warnings': warnings, 'errors': errors}

    # Check for required fields
    for row_num in range(6, self.current_row):
        cas_cell = self.worksheet[f"{column_names_dict['CAS RN']}{row_num}"]
        name_cell = self.worksheet[f"{column_names_dict['Name']}{row_num}"]

        if not cas_cell.value:
            warnings.append(f"Row {row_num}: Missing CAS number")
        if not name_cell.value:
            warnings.append(f"Row {row_num}: Missing chemical name")

        # Check for unrealistic values
        mw_cell = self.worksheet[f"{column_names_dict['MW']}{row_num}"]
        if mw_cell.value and mw_cell.value < 0:
            warnings.append(f"Row {row_num}: Negative molecular weight")

    return {'warnings': warnings, 'errors': errors}

pyepisuite.usetox_input.create_usetox_input_from_episuite(episuite_df, output_path, template_path=None, experimental_data=None)

Convenience function to create USEtox input from EPI Suite DataFrame results.

Parameters:

Name Type Description Default
episuite_df Any

DataFrame with EPI Suite results (from episuite_to_dataframe)

required
output_path str

Path for output Excel file

required
template_path Optional[str]

Optional custom template path

None
experimental_data Optional[Dict[str, Dict[str, Any]]]

Optional dict of experimental data to override estimates

None

Returns:

Type Description
USEtoxInput

USEtoxInput instance with populated data

Example

from pyepisuite.utils import search_episuite_by_cas, submit_to_episuite from pyepisuite.dataframe_utils import episuite_to_dataframe ids = search_episuite_by_cas(['50-00-0', '67-56-1']) epi_results, _ = submit_to_episuite(ids) epi_df = episuite_to_dataframe(epi_results) usetox_input = create_usetox_input_from_episuite( ... episuite_df=epi_df, ... output_path='usetox_results.xlsx' ... )

Source code in src/pyepisuite/usetox_input.py
def create_usetox_input_from_episuite(episuite_df: Any,
                                     output_path: str,
                                     template_path: Optional[str] = None,
                                     experimental_data: Optional[Dict[str, Dict[str, Any]]] = None) -> USEtoxInput:
    """
    Convenience function to create USEtox input from EPI Suite DataFrame results.

    Args:
        episuite_df: DataFrame with EPI Suite results (from episuite_to_dataframe)
        output_path: Path for output Excel file
        template_path: Optional custom template path
        experimental_data: Optional dict of experimental data to override estimates

    Returns:
        USEtoxInput instance with populated data

    Example:
        >>> from pyepisuite.utils import search_episuite_by_cas, submit_to_episuite
        >>> from pyepisuite.dataframe_utils import episuite_to_dataframe
        >>> ids = search_episuite_by_cas(['50-00-0', '67-56-1'])
        >>> epi_results, _ = submit_to_episuite(ids)
        >>> epi_df = episuite_to_dataframe(epi_results)
        >>> usetox_input = create_usetox_input_from_episuite(
        ...     episuite_df=epi_df,
        ...     output_path='usetox_results.xlsx'
        ... )
    """
    try:
        # Create USEtoxInput instance
        usetox_input = USEtoxInput(template_path)

        # Process each chemical in the DataFrame
        chemicals_added = 0
        for idx, row in episuite_df.iterrows():
            # Convert DataFrame row to dictionary format expected by USEtoxInput
            episuite_dict = row.to_dict()

            # Add chemical to USEtox template
            row_num = usetox_input.add_chemical_from_episuite(episuite_dict)
            if row_num > 0:
                chemicals_added += 1

            # Apply experimental data if provided
            if experimental_data and 'cas' in episuite_dict:
                cas_number = episuite_dict['cas']
                if cas_number in experimental_data:
                    # Override with experimental data
                    exp_data = experimental_data[cas_number]
                    for prop, value in exp_data.items():
                        if prop in column_names_dict:
                            col_letter = column_names_dict[prop]
                            usetox_input.worksheet[f"{col_letter}{row_num}"] = value

        # Save to Excel
        usetox_input.save_excel(output_path)

        # Print summary
        summary = usetox_input.get_summary()
        print(f"✅ Successfully created USEtox input with {chemicals_added} chemicals")
        print(f"📋 Excel file exported to: {output_path}")

        return usetox_input

    except Exception as e:
        logger.error(f"Error creating USEtox input from EPI Suite DataFrame: {e}")
        raise