Source code for pylhc_submitter.submitter.mask

"""
Mask Resolver
-------------

This module provides functionality to resolve and write script masks for ``HTCondor`` jobs
submission.
"""

from __future__ import annotations

import logging
import re
from pathlib import Path
from typing import TYPE_CHECKING

from pylhc_submitter.constants.job_submitter import COLUMN_JOB_DIRECTORY, COLUMN_JOB_FILE

if TYPE_CHECKING:
    from collections.abc import Iterable, Sequence

    import pandas as pd
    from numpy.typing import ArrayLike

LOG = logging.getLogger(__name__)



[docs]
def create_job_scripts_from_mask(
    job_df: pd.DataFrame, maskfile: Path, replace_keys: dict, file_ext: str
) -> pd.DataFrame:
    """
    Takes path to mask file, list of parameter to be replaced and pandas dataframe containg per job
    the job directory where processed mask is to be put, and columns containing the parameter values
    with column named like replace parameters. Job directories have to be created beforehand.
    Processed (madx) mask has the same filename as mask but with the given file extension.
    Input Dataframe is returned with additional column containing path to the processed script
    files.

    Args:
        job_df (pd.DataFrame): Job parameters as defined in description.
        maskfile: `Path` object to the mask file.
        replace_keys: keys to be replaced (must correspond to columns in ``job_df``).
        file_ext: file extention to use (defaults to **madx**).

    Returns:
        The provided ``job_df`` but with added path to the scripts.
    """
    with maskfile.open("r") as mfile:
        template = mfile.read()

    jobname = maskfile.with_suffix("").name
    jobs = [None] * len(job_df)
    for idx, (jobid, values) in enumerate(job_df.iterrows()):
        jobfile_fullpath = (Path(values[COLUMN_JOB_DIRECTORY]) / jobname).with_suffix(file_ext)

        with jobfile_fullpath.open("w") as job_file:
            job_file.write(template % dict(zip(replace_keys, values[list(replace_keys)])))
        jobs[idx] = jobfile_fullpath.name
    job_df[COLUMN_JOB_FILE] = jobs
    return job_df




[docs]
def find_named_variables_in_mask(mask: str) -> set[str]:
    """Find all variable-names in the mask."""
    return set(re.findall(r"%\((\w+)\)", mask))




[docs]
def check_percentage_signs_in_mask(mask: str) -> None:
    """Checks for '%' in the mask, that are not replacement variables."""
    cleaned_mask = re.sub(r"%\((\w+)\)", "", mask)
    n_signs = cleaned_mask.count("%")
    if n_signs == 0:
        return

    # Help the user find the %
    for idx, line in enumerate(cleaned_mask.split("\n")):
        if "%" in line:
            positions = [str(i) for i, char in enumerate(line) if char == "%"]
            LOG.error(f"Problematic '%' sign(s) in line {idx}, pos {' ,'.join(positions)}.")
    raise KeyError(f"{n_signs} problematic '%' signs found in template. Please remove.")




[docs]
def generate_jobdf_index(
    old_df: pd.DataFrame, jobid_mask: str, keys: Sequence[str], values: ArrayLike
) -> list[str] | Iterable[int]:
    """Generates index for jobdf from mask for job_id naming.

    Args:
        old_df (pd.DataFrame): Existing jobdf.
        jobid_mask (str): Mask for naming the jobs.
        keys (Sequence[str]): Keys to be replaced in the mask.
        values (np.array_like): Values-Grid to be replaced in the mask.

    Returns:
        List[str]: Index for jobdf, either list of strings (the filled jobid_masks) or integer-range.
    """
    if not jobid_mask:
        # Use integer-range as index, if no mask is given
        # Start with last index if old_df is not None.
        nold = len(old_df.index) if old_df is not None else 0
        start = nold - 1 if nold > 0 else 0
        return range(start, start + values.shape[0])

    # Fill job-id mask
    return [jobid_mask % dict(zip(keys, v)) for v in values]




[docs]
def is_mask_file(mask: str) -> bool:
    """Check if given string points to a file."""
    try:
        return Path(mask).is_file()
    except OSError:
        return False




[docs]
def is_mask_string(mask: str) -> bool:
    """Checks that given string does not point to a file."""
    return not is_mask_file(mask)



if __name__ == "__main__":
    raise OSError(f"{__file__} is not supposed to run as main.")