import os, sys, re, io, argparse
from tqdm import tqdm
import csv
import psycopg2
import pysam
import json

# Data Access Tools
from tools.get_common import get_common_from_taxid
from tools.get_taxid import get_taxid_from_accession
from tools.get_latin import get_latin_from_taxid
from tools.util import run_cmd, file_to_ref

conn = psycopg2.connect(host=lovelace.cluster.earlham.edu, dbname="ncbi", user="fieldsci", password="skalanes")


if __name__ == "__main__":
    file = sys.argv[1]
    output = run_cmd(f"samtools view {file}")
    line_list = output.split('\n')

    data = {}
    for line in line_list:
        if len(line.split()) > 1:
            acc = line.split()[2]
            if acc in data:
                data[acc] = data[acc] + 1
            else:
                data[acc] = 1
    for acc, count in data.items():
        taxid = get_taxid_from_accession(acc, conn)
        
        common_name = "Unknown"
        latin_name = "Unknown"

        if taxid != "Unknown":
            common_name = get_common_from_taxid(taxid, conn)
            latin_name = get_latin_from_taxid(taxid, conn)

        print(f"{acc} ({count} reads)")
        print(f"   TaxID: {taxid}")
        print(f"   Latin Name: {latin_name}, Common Name: {common_name}")
        
