import itertools
import pandas as pd
import urllib
import pandas as pd
from pandas import DataFrame
import numpy as np
[docs]def tRNADB_CE(url):
"""
Get the tRNA genes count from tRNADB-CE database
Args:
url (string): a url to anticodon table for organism from tRNADB_CE database (http://trna.ie.niigata-u.ac.jp/cgi-bin/trnadb/index.cgi)
Returns:
A dictionary of each anticodon and its gene count
Raises:
ValueError if the URL, not a valid for tRNADB-CE database
Example:
> tRNADB_CE("trna.ie.niigata-u.ac.jp/cgi-bin/trnadb/whole_anticodon.cgi?GID=|CP001631&DTYPE=CMP&VTYPE=1")
# Return an anticodon table of Acidimicrobium ferrooxidans DSM 10331
"""
if "http://trna.ie.niigata-u.ac.jp" not in url:
raise ValueError ("url is not includes in tRNADB-CE database")
all_trna_df = DataFrame()
read = pd.read_html(url)
read1 = read[1].iloc[3:,3:5]
read1.columns = ["anti_codon", "count"]
read2 = read[1].iloc[3:,6:8]
read2.columns = ["anti_codon", "count"]
read3 = read[1].iloc[3:,9:11]
read3.columns = ["anti_codon", "count"]
read4 = read[1].iloc[3:,12:14]
read4.columns = ["anti_codon", "count"]
all_trna_df = all_trna_df.append(read1,ignore_index=True, sort=False)
all_trna_df = all_trna_df.append(read2,ignore_index=True, sort=False)
all_trna_df = all_trna_df.append(read3,ignore_index=True, sort=False)
all_trna_df = all_trna_df.append(read4,ignore_index=True, sort=False)
A_to_I_anticodon = ["AGA","AAG","AGG","ACG","AAU","AGU","AAC","AGC","ACC"] #codons which will convert its A to I in Wobble pos.
anti_codon_dict = {}
for codon, count in zip( list(all_trna_df["anti_codon"]), list(all_trna_df["count"]) ):
codon = codon.replace("T","U")
if codon in A_to_I_anticodon:
codon = list(codon)
codon[0] = "I"
codon = "".join(codon)
else:
pass
anti_codon_dict[codon] = int(count)
#remove anti_codons with A in the first postion # caz it not recognize ani codon
for i_anticodon in list(anti_codon_dict.keys()):
if i_anticodon[0] == "A":
del anti_codon_dict[i_anticodon]
if len(anti_codon_dict) == 0:
raise ValueError ("No tables were found in this URL")
else:
return anti_codon_dict
########################################################
#########################
########################################################
[docs]def GtRNAdb(url):
"""
Get the tRNA genes count from GtRNAdb database
Args:
url (string): a url to anticodon table for organism from GtRNAdb database (http://gtrnadb.ucsc.edu/)
Returns:
A dictionary of each anticodon and its gene count
Raises:
ValueError if the URL, not a valid for GtRNAdb database
Example:
#example 1
> GtRNAdb("http://gtrnadb.ucsc.edu/genomes/eukaryota/Hsapi19/")
"""
amino_acid_list = ['Val', 'Ile', 'Leu', 'Glu', 'Gln', 'Asp', 'Asn', 'His', 'Trp', 'Phe', 'Tyr', 'Arg', 'Lys', 'Ser', 'Thr', 'Met', 'Ala', 'Gly', 'Pro', 'Cys']
for start_from_table in range(1,4):
if "gtrnadb.ucsc.edu" not in url:
raise ValueError ("url is not includes in GtRNAdb database")
tables = pd.read_html(url)
anti_codon_dict = {}
def SPLIT(x):
return x.split(" ")
A_to_I_anticodon = ["AGA","AAG","AGG","ACG","AAU","AGU","AAC","AGC","ACC"] #codons which will convert its A to I in Wobble pos.
cat_anti_codon = ()
for i in range(start_from_table,len(tables)):
tables_new = tables[i]
for j in range(0, len(tables_new.index)):
overview = tables_new.iloc[j,1:]
codon_anticodon = list ( map(str, overview) )
codon_anticodon = list ( map(SPLIT, codon_anticodon) )
#anti_codon with A that will convert to I
for i_codon in codon_anticodon:
if not i_codon[0].isdigit() and i_codon[0] != " ":
i_codon_rna = i_codon[0].replace("T","U")
if i_codon_rna in A_to_I_anticodon:
i_codon_rna = list(i_codon_rna)
i_codon_rna[0] = "I"
i_codon_rna = "".join(i_codon_rna)
try:
if i_codon_rna == "CAU" and "/" in i_codon[1]:
cat_anti_codon = cat_anti_codon + ( i_codon[1].split("/")[0] , )
cat_anti_codon = cat_anti_codon + ( i_codon[1].split("/")[1] , )
if i_codon_rna == "CAU" and "/" not in i_codon[1]:
cat_anti_codon = cat_anti_codon + ( i_codon[1] ,)
anti_codon_dict[i_codon_rna] = int(i_codon[1])
except:
anti_codon_dict[i_codon_rna] = 0
def seeifnumber(x):
# take list and return a tuble with only number element
result = ()
for i in x:
try:
int(i)
result = result + (int(i),)
except:
pass
return (result)
try:
anti_codon_dict["CAU"] = sum(seeifnumber(cat_anti_codon))
except:
pass
dict_anti_codons = {}
#remove anti_codons with A in the first postion # caz it not recognize ani codon
for i_anticodon in list(anti_codon_dict.keys()):
if i_anticodon[0] == "A":
del anti_codon_dict[i_anticodon]
if len(anti_codon_dict) == 0:
raise ValueError ("No tables were found in this URL")
else:
see = sum([1 for i in amino_acid_list if i in anti_codon_dict])
if see == 0:
#if len(anti_codon_dict) == 58 or len(anti_codon_dict) == 57:
return anti_codon_dict
break
else:
pass
########################################################
#########################
########################################################
[docs]def dict_codon_anticodon(anti_codon_dict):
"""
Identify all potential anticodons for each codon
Args:
anti_codon_dict (dict): a dictionary of all anticodons for an organism ( returned from ( tRNADB_CE() ) or ( GtRNAdb() ) )
Returns:
A dictionary of all potential anticodons for each codon
Raises:
ValueError if the length of anti_codon_dict equal to zero
Example:
> anticodon_dict = tRNADB_CE("trna.ie.niigata-u.ac.jp/cgi-bin/trnadb/whole_anticodon.cgi?GID=|CP001631&DTYPE=CMP&VTYPE=1")
# Return an anticodon table of Acidimicrobium ferrooxidans DSM 10331
> dict_codon_anticodon ( anticanticodon_dictodon )
"""
if len(anti_codon_dict) == 0:
raise ValueError ("The dictionary can not be with length equal to zero")
antiWC = { "A":"U", "U":"A", "C":"G", "G":"C" }
antiWP = { "U":"G", "C":"I", "A":"I", "G":"U" }
codon = ( "".join(i) for i in itertools.product('AUGC', repeat=3) )
dic_codon_anticodon ={}
#convert the codon to anticodon if anticodon in anti_codon_dict
for i in codon:
anti_codon_tuble = ( )
if i[2] == "U":
wc = antiWC[i[0]] + antiWC[i[1]] + "I"
wc = wc[::-1]
else:
wc = antiWC[i[0]] + antiWC[i[1]] + antiWC[i[2]]
wc = wc[::-1]
if wc in anti_codon_dict:
anti_codon_tuble = anti_codon_tuble + (wc, )
else:
pass
#######
wp = antiWC[i[0]] + antiWC[i[1]] + antiWP[i[2]]
wp = wp[::-1]
if wp in anti_codon_dict:
anti_codon_tuble = anti_codon_tuble + (wp, )
else:
pass
if len(anti_codon_tuble) != 0:
dic_codon_anticodon[i] = ( anti_codon_tuble )
else:
pass
return dic_codon_anticodon
########################################################
#########################
########################################################
[docs]def dict_codon_anticodon_count(dic_codon_anticodon,dict_tGCN_main, bacteria = False):
"""
Merge anticodon-codon dictionary with each anticodon tRNA gene copy number.
Args:
dic_codon_anticodon (dict): A dictionary of all potential anticodons for each codon returned from dict_codon_anticodon() function.
dict_tGCN_main (dict): a dictionary of all anticodons for an organism ( returned from ( tRNADB_CE() function ) or ( GtRNAdb() function ) )
bacteria (bool): True If the tested organism is prokaryotic or archaea, else equal to False ( default = False )
Returns:
a merged dictionary of anticodon-codon with each anticodon tRNA gene copy number.
Raises:
ValueError if the length of anti_codon_dict equal to zero
ValueError if the length of dict_tGCN_main equal to zero
Example:
> anticodon_dict = tRNADB_CE("trna.ie.niigata-u.ac.jp/cgi-bin/trnadb/whole_anticodon.cgi?GID=|CP001631&DTYPE=CMP&VTYPE=1")
# Return an anticodon table of Acidimicrobium ferrooxidans DSM 10331
> anticodon_codon = dict_codon_anticodon ( anticanticodon_dictodon )
> dict_codon_anticodon_count(anticodon_codon,anticodon_dict,bacteria = True)
"""
if len(dic_codon_anticodon) == 0 or len(dict_tGCN_main) == 0:
raise ValueError ("Both dic_codon_anticodon or dict_tGCN_main can not be with length equal to zero")
dict_anticodon_number = {}
for i in dic_codon_anticodon:
temp = {}
for j in range(len(dic_codon_anticodon[i])):
temp[ dic_codon_anticodon[i][j] ] = dict_tGCN_main[dic_codon_anticodon[i][j]]
dict_anticodon_number[i] = temp
if bacteria:
try:
dict_anticodon_number["AUA"].update( { "CAU":dict_tGCN_main["CAU"] } )
except:
pass
return dict_anticodon_number
########################################################
#########################
########################################################
[docs]def abs_Wi(dict_anticodon_number, Sug,Sci,Sai,Sgu,Sal, bacteria=False):
"""
Calculate the absolute adaptiveness values for each codon.
Args:
dict_anticodon_number (dict): a merged dictionary of anticodon-codon with each anticodon tRNA gene copy number returned from dict_codon_anticodon_count() function.
Sug (int): the S-value for codon with (U) in the third position and (G) in first anticodon position
Sci (int): the S-value for codon with (C) in the third position and (I) in first anticodon position
Sai (int): the S-value for codon with (A) in the third position and (I) in first anticodon position
Sgu (int): the S-value for codon with (G) in the third position and (U) in first anticodon position
Sal (int): the S-value for codon with (A) in the third position and (L) in first anticodon position ( if bacteria = True)
bacteria (bool): True If the tested organism is prokaryotic or archaea, else equal to False ( default = False )
Returns:
A dictionary of the absolute adaptiveness values for each codon
Note:
All Sij values (as Sug) should be a number from 0 to 1
Raises:
ValueError if the length of dict_anticodon_number equal to zero
ValueError if any of Sij values are less than 0 or greater than 1
Example:
> anticodon_dict = tRNADB_CE("trna.ie.niigata-u.ac.jp/cgi-bin/trnadb/whole_anticodon.cgi?GID=|CP001631&DTYPE=CMP&VTYPE=1")
# Return an anticodon table of Acidimicrobium ferrooxidans DSM 10331
> anticodon_codon = dict_codon_anticodon ( anticanticodon_dictodon )
> dict_codon_anticodon_count = dict_codon_anticodon_count(anticodon_codon,anticodon_dict,bacteria = True)
> abs_Wi(dict_codon_anticodon_count, Sug=1, Sci=1, Sai=1, Sgu=1, Sal=1, bacteria=True)
"""
if len(dict_anticodon_number) == 0:
raise ValueError ("dict_anticodon_number can not be with length equal to zero")
if 0 > Sug > 1 or 0 > Sci > 1 or 0 > Sai > 1 or 0 > Sgu > 1 :
raise ValueError ("All Sij values (as Sug) should be a number from 0 to 1")
if bacteria:
if 0 > Sal > 1:
raise ValueError ("All Sij values (as Sug) should be a number from 0 to 1")
# Sij = Scodon:anticodon
# Sij of WC interaction is equal to 0
Sij = { "Sui":0, "Scg":0, "Sau":0, "Sgc":0,
"Sug":Sug, "Sci":Sci, "Sai":Sai,
"Sgu":Sgu }
if bacteria: #Lysidine (L) is a bacterial RNA modification of the DNA nucleotide cytidine interact with (A)
try:
if "CAU" in dict_anticodon_number["AUA"]:
Sij["Sal"] = Sal
else:
dict_anticodon_number["AUA"].update( { "CAU":0 } )
except:
pass
Wi_dict = {}
for i_codon in dict_anticodon_number:
if i_codon[2] == "U":
#get the anticodon with I in first pos. for the codon with U in first pos.
key_I = [j for j in dict_anticodon_number[i_codon] if j[0] == "I"]
key_I = "".join(key_I)
#get the anticodon with G in first pos. for the codon with U in first pos. (wobble pos)
key_G = [j for j in dict_anticodon_number[i_codon] if j[0] == "G"]
key_G = "".join(key_G)
if key_I != "" and key_G != "":
Wu = ( ( 1 - Sij["Sui"] ) * dict_anticodon_number[i_codon][key_I] ) + ( ( 1 - Sij["Sug"] ) * dict_anticodon_number[i_codon][key_G] )
elif key_I != "" and key_G == "":
Wu = ( ( 1 - Sij["Sui"] ) * dict_anticodon_number[i_codon][key_I] )
elif key_I == "" and key_G != "":
Wu = ( ( 1 - Sij["Sug"] ) * dict_anticodon_number[i_codon][key_G] )
else:
Wu = 0
Wi_dict[i_codon] = Wu
elif i_codon[2] == "C":
#get the anticodon with G in first pos. for the codon with C in first pos.
key_G = [j for j in dict_anticodon_number[i_codon] if j[0] == "G"]
key_G = "".join(key_G)
#get the anticodon with I in first pos. for the codon with C in first pos. (wobble pos)
key_I = [j for j in dict_anticodon_number[i_codon] if j[0] == "I"]
key_I = "".join(key_I)
if key_I != "": # key_G must be found
Wc = ( ( 1 - Sij["Scg"] ) * dict_anticodon_number[i_codon][key_G] ) + ( ( 1 - Sij["Sci"] ) * dict_anticodon_number[i_codon][key_I] )
else: #if key)I == ""
Wc = ( ( 1 - Sij["Scg"] ) * dict_anticodon_number[i_codon][key_G] )
Wi_dict[i_codon] = Wc
elif i_codon[2] == "A":
if i_codon == "AUA":
if bacteria:
#get the anticodon with U in first pos. for the codon with A in first pos.
key_U = [j for j in dict_anticodon_number[i_codon] if j[0] == "U"]
key_U1 = "".join(key_U[0])
try:
key_U2 = "".join(key_U[1])
except:
key_U2 = False
#get the anticodon with I in first pos. for the codon with A in first pos. (wobble pos)
key_I = [j for j in dict_anticodon_number[i_codon] if j[0] == "I"]
key_I = "".join(key_I)
if key_U2 == True and key_I != "" :
Wa = ( ( 1 - Sij["Sau"] ) * dict_anticodon_number[i_codon][key_U1] ) + ( ( 1 - Sij["Sai"] ) * dict_anticodon_number[i_codon][key_I] ) + ( ( 1 - Sij["Sal"] ) * dict_anticodon_number[i_codon][key_U2] )
elif key_U2 == False and key_I != "" :
Wa = ( ( 1 - Sij["Sau"] ) * dict_anticodon_number[i_codon][key_U1] ) + ( ( 1 - Sij["Sai"] ) * dict_anticodon_number[i_codon][key_I] )
elif key_U2 == True and key_I == "":
Wa = ( ( 1 - Sij["Sau"] ) * dict_anticodon_number[i_codon][key_U1] ) + ( ( 1 - Sij["Sal"] ) * dict_anticodon_number[i_codon][key_U2] )
elif key_U2 == False and key_I == "":
Wa = ( ( 1 - Sij["Sau"] ) * dict_anticodon_number[i_codon][key_U1] )
Wi_dict[i_codon] = Wa
if not bacteria:
#get the anticodon with U in first pos. for the codon with A in first pos.
key_U = [j for j in dict_anticodon_number[i_codon] if j[0] == "U"]
key_U = "".join(key_U)
#get the anticodon with I in first pos. for the codon with A in first pos. (wobble pos)
key_I = [j for j in dict_anticodon_number[i_codon] if j[0] == "I"]
key_I = "".join(key_I)
if key_I != "":
Wa = ( ( 1 - Sij["Sau"] ) * dict_anticodon_number[i_codon][key_U] ) + ( ( 1 - Sij["Sai"] ) * dict_anticodon_number[i_codon][key_I] )
else:
Wa = ( ( 1 - Sij["Sau"] ) * dict_anticodon_number[i_codon][key_U] )
Wi_dict[i_codon] = Wa
else:
#get the anticodon with U in first pos. for the codon with A in first pos.
key_U = [j for j in dict_anticodon_number[i_codon] if j[0] == "U"]
key_U = "".join(key_U)
#get the anticodon with I in first pos. for the codon with A in first pos. (wobble pos)
key_I = [j for j in dict_anticodon_number[i_codon] if j[0] == "I"]
key_I = "".join(key_I)
if key_I != "":
Wa = ( ( 1 - Sij["Sau"] ) * dict_anticodon_number[i_codon][key_U] ) + ( ( 1 - Sij["Sai"] ) * dict_anticodon_number[i_codon][key_I] )
else:
Wa = ( ( 1 - Sij["Sau"] ) * dict_anticodon_number[i_codon][key_U] )
Wi_dict[i_codon] = Wa
elif i_codon[2] == "G":
#get the anticodon with C in first pos. for the codon with G in first pos.
key_C = [j for j in dict_anticodon_number[i_codon] if j[0] == "C"]
key_C = "".join(key_C)
#get the anticodon with U in first pos. for the codon with G in first pos. (wobble pos)
key_U = [j for j in dict_anticodon_number[i_codon] if j[0] == "U"]
key_U = "".join(key_U)
if key_U != "":
Wg = ( ( 1 - Sij["Sgc"] ) * dict_anticodon_number[i_codon][key_C] ) + ( ( 1 - Sij["Sgu"] ) * dict_anticodon_number[i_codon][key_U] )
else:
Wg = ( ( 1 - Sij["Sgc"] ) * dict_anticodon_number[i_codon][key_C] )
Wi_dict[i_codon] = Wg
return Wi_dict
########################################################
#########################
########################################################
[docs]def rel_Wi(dict_abs_Wi,genetic_code_number=1):
"""
Calculate the relative adaptiveness values for each codon.
Args:
dict_abs_Wi (dict): dictionary of the absolute adaptiveness values for each codon returned from abs_Wi() function
genetic_code_number (int): default = 1, The Genetic Codes number described by NCBI (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
Returns:
a dictionary of the relative adaptiveness values for each codon
Raises:
ValueError if the length of dict_abs_Wi equal to zero
Example:
> anticodon_dict = tRNADB_CE("trna.ie.niigata-u.ac.jp/cgi-bin/trnadb/whole_anticodon.cgi?GID=|CP001631&DTYPE=CMP&VTYPE=1")
# Return an anticodon table of Acidimicrobium ferrooxidans DSM 10331
> anticodon_codon = dict_codon_anticodon ( anticanticodon_dictodon )
> dict_codon_anticodon_count = dict_codon_anticodon_count(anticodon_codon,anticodon_dict,bacteria = True)
> abs_Wi = abs_Wi(dict_codon_anticodon_count, Sug=1, Sci=1, Sai=1, Sgu=1, Sal=1, bacteria=True)
> rel_Wi(abs_Wi, 11)
"""
if len(dict_abs_Wi) == 0:
raise ValueError ("dict_abs_Wi can not be with length equal to zero")
codons = ["".join(i) for i in itertools.product('AUGC', repeat=3)]
dict_rel_Wi = {}
# rel_wi if Wi != 0
max_Wi = max (list(dict_abs_Wi.values()))
for i_codon in codons:
if i_codon in dict_abs_Wi and dict_abs_Wi[i_codon] != 0:
dict_rel_Wi[i_codon] = dict_abs_Wi[i_codon] / max_Wi
#if stop codons or remove it
def get_stop_codons(genetic_code_number):
from Bio.Data import CodonTable
genetic_table = CodonTable.unambiguous_dna_by_id[genetic_code_number]
stop_codons = genetic_table.stop_codons
stop_M_codons = [i.replace("T","U") for i in stop_codons]
return stop_M_codons
#remove stop codon from the analysis brefore calc. the geo_mean
for i_codons_remove in get_stop_codons(genetic_code_number):
if i_codons_remove in dict_rel_Wi:
del dict_rel_Wi[i_codons_remove]
else:
pass
#rel_wi if Wi == 0
def geo_mean(iterable):
#function to calculate the geometric mean
#iterable = list of number
import numpy as np
a = np.log(iterable)
return np.exp(a.sum()/len(a))
#add the geo_mean if Wi == 0
#calc geo_mean for rel_wi with Wi != 0
wi_not_zero = [i for i in list(dict_rel_Wi.values()) if i != 0]
geo_rel_wi = geo_mean(wi_not_zero)
for i_codon in codons:
if i_codon not in dict_rel_Wi or dict_rel_Wi[i_codon] == 0 :
dict_rel_Wi[i_codon] = geo_rel_wi
#remove stop codon from the analysis after calculting the geo_mean
for i_codons_remove in get_stop_codons (genetic_code_number):
if i_codons_remove in dict_rel_Wi:
del dict_rel_Wi[i_codons_remove]
else:
pass
return dict_rel_Wi
########################################################
#########################
########################################################
[docs]def calc_Tai(DNA,rel_dict_wi,genetic_code_number=1):
"""
Calculate the tRNA adaptation index of a gene.
Args:
DNA (str): a coding sequence of DNA ( should only contain A, C, T, and G )
rel_dict_wi (dict): dictionary of the relative adaptiveness values for each codon returned from rel_Wi() function
genetic_code_number (int): default = 1, The Genetic Codes number described by NCBI (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
Returns:
the tRNA adaptation index of a gene
Raises:
ValueError if the length of dict_anticodon_number equal to zero
Example:
> anticodon_dict = tRNADB_CE("trna.ie.niigata-u.ac.jp/cgi-bin/trnadb/whole_anticodon.cgi?GID=|CP001631&DTYPE=CMP&VTYPE=1")
# Return an anticodon table of Acidimicrobium ferrooxidans DSM 10331
> anticodon_codon = dict_codon_anticodon ( anticanticodon_dictodon )
> dict_codon_anticodon_count = dict_codon_anticodon_count(anticodon_codon,anticodon_dict,bacteria = True)
> abs_Wi = abs_Wi(dict_codon_anticodon_count, Sug=1, Sci=1, Sai=1, Sgu=1, Sal=1, bacteria=True)
> rel_Wi(abs_Wi, 11)
"""
if len(rel_dict_wi) == 0:
raise ValueError ("rel_dict_wi can not be with length equal to zero")
if DNA.count("A") + DNA.count("T") + DNA.count("C") + DNA.count("G") != len(DNA):
raise ValueError("The DNA can not include any nucleotides expect (A, T, C, and G)")
codon_DNA = [ DNA[i:i+3].replace("T","U") for i in range(0,len(DNA),3) if len( DNA[i:i+3] ) == 3 ]
if codon_DNA[0] == "AUG": #start codon remove it
codon_DNA = codon_DNA[1:]
else:
pass
#if stop codons remove it
def get_stop_codons(genetic_code_number):
from Bio.Data import CodonTable
genetic_table = CodonTable.unambiguous_dna_by_id[genetic_code_number]
stop_codons = genetic_table.stop_codons
stop_M_codons = [i.replace("T","U") for i in stop_codons]
return stop_M_codons
stop_codons = get_stop_codons(genetic_code_number)
codon_wi = [ rel_dict_wi[i_codon] for i_codon in codon_DNA if i_codon not in stop_codons ]
def geo_mean(iterable):
#function to calculate the geometric mean
#iterable = list of number
import numpy as np
a = np.log(iterable)
return np.exp(a.sum()/len(a))
if sum(codon_wi) != 0:
Tai_result = geo_mean(codon_wi)
else:
Tai_result = 0
return Tai_result