Snakefile "ymp/rules/metaphlan2.rules"

# requires bowtie2

Env(name="metaphlan2", base="bioconda", packages=[
    "metaphlan2 =2.6*"  # newer versions clobber site-libraries
])


with Stage("metaphlan2") as S:
    S.doc("""
    Assess metagenome community composition using Metaphlan 2
    """)
    metaphlan_db = "{:ref.metaphlan2.dir:}/db_v20/mpa_v20_m200"
    BT2IDX_SUFFIXES = "1.bt2 2.bt2 3.bt2 4.bt2 rev.1.bt2 rev.2.bt2".split()
    rule metaphlan2_map:
        """
        Align reads to Metaphlan's custom reference database.
        """
        message:
            "Metaphlan2: Mapping reads with Bowtie2"
        input:
            fq = "{:prev:}/{:target:}.{:pairnames:}.fq.gz",
            index = [".".join((metaphlan_db, ext)) for ext in BT2IDX_SUFFIXES]
        output:
            "{:this:}/{sample}.bam"
        log:
            "{:this:}/{sample}.bam.log"
        conda:
            "bowtie2"
        threads:
            16
        params:
            bt2_base = metaphlan_db,
        resources:
            mem = "128g",
        shell: """
        bowtie2 \
        --threads {threads} \
        -x {params.bt2_base} \
        --no-unal \
        --very-sensitive \
        -U {input.fq[0]} -U {input.fq[1]} \
        2>{log} \
        | samtools view -b -o {output} -
        """

    localrules: metaphlan2
    rule metaphlan2:
        """ 
        Computes community profile from mapped reads and Metaphlan's
        custom reference database.  
        """
        message:
            "Metaphlan2: Computing community profile"
        input:
            bam = "{:this:}/{sample}.bam",
            pkl = metaphlan_db + ".pkl"
        output:
            "{:this:}/{sample}_profile.txt"
        log:
            "{:this:}/{sample}_profile.log"
        conda:
            "metaphlan2"
        threads:
            1
        shell: """
        samtools view {input.bam} | \
        metaphlan2.py \
        --mpa_pkl {input.pkl} \
        --input_type sam \
        -o {output} \
        --nproc {threads} \
        --sample_id {wildcards.sample} \
        >{log} 2>&1
        """

    localrules: metaphlan2_merge
    rule metaphlan2_merge:
        """
        Merges Metaphlan community profiles.
        """
        message:
            "Metaphlan2: Merging community profiles"
        input:
            "{:this:}/{:runs:}_profile.txt"
        output:
            "{:this:}/merged_abundance_table.txt"
        log:
            "{:this:}/merged_abundance_table.txt.log"
        conda:
            "metaphlan2"
        threads:
            1
        shell: """
        merge_metaphlan_tables.py {input} > {output} 2>{log}
        """