Snakefile "ymp/rules/00_download.rules"

localrules: download_file_ftp
rule download_file_ftp:
    """
    Downloads remote file using *wget*
    """
    message:
        "Downloading {params.url}"
    output:
        "{:dir.downloads:}/{protocol}/{server}/{path}"
    log:
        "{:dir.downloads:}/{protocol}/{server}/{path}.log"
    wildcard_constraints:
        protocol="(ftp|ftps)",
        server="[^/]+"
    params:
        url="{protocol}://{server}/{path}"
    conda: "wget.yml"
    shell: """
    wget  "{params.url}" -O "{output[0]}" > {log} 2>&1
    """

rule download_file_http:
    """
    Downloads remote file using internal downloader
    """
    message:
        "Downloading {params.url}"
    output:
        "{:dir.downloads:}/{protocol}/{server}/{path}"
    log:
        "{:dir.downloads:}/{protocol}/{server}/{path}.log"
    wildcard_constraints:
        protocol="(http|https)",
        server="[^/]+"
    params:
        url="{protocol}://{server}/{path}"
    run:
        from ymp.download import FileDownloader
        FileDownloader(loglevel=0).get(params.url, output[0])


with Stage("references") as S:
    S.doc("""
    This is a "virtual" stage. It does not process read data, but comprises
    rules used for reference provisioning.
    """)
    localrules: prepare_reference
    rule prepare_reference:
        """
        Provisions files in ``<reference_dir>/<reference_name>``

        - Creates symlinks to downloaded references
        - Compresses references provided uncompressed upstream
        - Connects files requested by stages with downloaded files and unpacked archives
        """
        message:
            "Preparing {output}"
        input:
            files = lambda wc: ymp.get_config().ref[wc.refname].get_file(wc.path)
        output:
            "{:dir.references:}/{refname}/{path}"
        wildcard_constraints:
            path = "(?!_unpacked_).*",
            refname = str("({})".format("|".join(ymp.get_config().ref)))
        run:
            if isinstance(input.files, str):
                infile = input.files
            else:
                infiles = [fn for fn in input.files if fn.endswith(wildcards.path)]
                if len(infiles) == 1:
                    infile = infiles[0]
                elif not infiles:
                    raise RuleException(
                        "No reference file found matching path '{}' in reference '{}'"
                        "\nCandidates: {}"
                        "".format(wildcards.path, wildcards.refname, input.files)
                    )
                else:
                    raise RuleException(
                        "Ambiguous reference file in reference '{}' for path '{}': '{}'"
                        "".format(wildcards.refname, wildcards.path, infiles)
                    )

            if (wildcards.path.endswith(".gz")
                and not infile.endswith(".gz")):
                    shell("gzip -c '{infile}' > '{output}'")
            elif (not wildcards.path.endswith(".gz")
                  and infile.endswith(".gz")):
                    shell("gunzip -c '{infile}' > '{output}'")
            else:
                outdir = os.path.dirname(output[0])
                input_relpath = os.path.relpath(infile, outdir)
                os.symlink(input_relpath, output[0])


    localrules: unpack_archive
    rule unpack_archive:
        """
        Template rule for unpacking references provisioned upstream as archive.
        """
        message:
            "Unpacking {input.tar} into {params.prefix}"
        shell: """
        rm -rf {params.prefix}
        mkdir -p {params.prefix}
        tar -xf '{input.tar}' --strip-components {params.strip} -C '{params.prefix}'
        """
    ruleorder: unpack_archive > prepare_reference

    for ref in ymp.get_config().ref.values():
        for unpack_rule in ref.make_unpack_rules(workflow._rules['unpack_archive']):
            unpack_rule