Biopipeline ouverte – Muhammad Umar Ali

Biopipeline ouverte – Muhammad Umar Ali

    def blast():
    for file in os.listdir('fasta'):
    filename = os.fsdecode(file)
    if filename.endswith(".fasta"):
    print(filename)
    fasta_file.append(filename)
    record = SeqIO.read('fasta/'+fasta_file[0], format="fasta") result_handle = NCBIWWW.qblast("blastn", "nt", record.seq) blast_result = open("results.xml", "w")
    blast_result.write(result_handle.read())
    blast_result.close()
    result_handle.close()
    def get_pathway(protein_name):
    items = scrape('https://www.kegg.jp/kegg-bin/search_pathway_text?map=map&keyword='+protein_name+'&mode=1&viewImage=true','img')
    url_ext = []
    for i in range(len(items)):
    #print(i,items[i])
    url_ext.append(items[i].parent['href'])
    #print(url_ext[i])
    images = scrape('https://www.kegg.jp'+url_ext[0],'img')urllib.request.urlretrieve('https://www.kegg.jp'+images[2]['src'], 'pathway_img/'+protein_name+"_pathway.jpg")return 'https://www.kegg.jp'+images[2]['src']
    Figure 3. Image de la cascade de signaux cellulaires pour le gène VEGFA (résultat de la figure 2)
    def protein_atlas_parser(xml_file):
    root = ET.parse(xml_file).getroot()
    for i in range(len(root)):
    try:
    prot_name = root[i][0].text
    tissue_expr = root[i][20][3][0].text
    desc = root[i][7][1].get('description')
    path_expr = root[i][8][0].text
    except:
    print("nAn error has been thrown, please handlen")
    print(i,'Protein Name:', prot_name,'n Tissue expression summary: ',tissue_expr,'n Description: ',desc,'n RNA Cancer Specificity:', path_expr)
    Figure 5. Organigramme du pipeline bioinformatique, affichant le flux des entrées / sorties
    >>> from blast import * 
    >>> blast()
    >>> from xml_parser import blast_parser
    >>> blast_parser(results.xml)
    'VEGFA'
    >>> VEGFA = 'VEGFA'
    >>> from fetch_uniprot_metadata import *
    >>> gene_entry, genes = protein_entry(VEGFA)
    >>> protein_function = protein_function(gene_entry) # saves function as string
    >>> amino_acid_seq = protein_AASeq(gene_entry) # amino acid sequence
    >>> from get_kegg_pathway import get_pathway
    >>> get_pathway(VEGFA)
    >>> from protein_atlas import get_atlas_xml
    >>> get_atlas_xml(VEGFA)
    >>> from xml_parser import protein_atlas_parser
    >>> protein_atlas_parser('xml/protein_atlas/VEGFA_protein_atlas_data.xml')
    0 Protein Name: VEGFA
    Tissue expression summary: Most cancers showed strong cytoplasmic immunoreactivity. Lymphomas were in general moderately stained.
    Description: Antibody staining mainly consistent with RNA expression data. At least one protein variant secreted, tissue location of RNA and protein might differ and correlation is complex.
    RNA Cancer Specificity: Low cancer specificity
    ...