#!/usr/bin/python
# -*- coding:utf-8 -*-
'''Import fasta-format database information (by Bio.SeqIO from Biopython)
into Sqlite3 database


by Wubin Qu <quwubin@gmail.com>,
Copyright @ 2010, All Rights Reserved.
'''

Author  = 'Wubin Qu  <quwubin@gmail.com> CZlab, BIRM, China'
Date    = 'Dec-20-2010 16:33:42'
License = 'New-BSD License'
Version = '1.0'

import sys
import os
from optparse import OptionParser
import sqlite3
import pprint
from Bio import SeqIO

def get_opt():
    '''Handle options'''
    usage = '''Usage: %prog -i Infile -d Dbname -t TableName
    
    '''

    version = '%prog Version: ' + '%s [%s]' % (Version, Date)
    parser = OptionParser(usage=usage, version=version)
    parser.add_option('-i', '--infile', dest='infile', help='Input file name. [String]')
    parser.add_option('-d', '--dbname', dest='dbname', help='Database file name. [String]')
    parser.add_option('-t', '--table_name', dest='table_name', help='Table name in SQlite3 database. [String]')
    [options, args] = parser.parse_args()

    if len(args) > 1:
        parser.error('Incorrect argument, add" "-h" for help.')

    if not options.infile:
        parser.error('Input file (created by infoseq program) needed, add" "-h" for help.')

    if not options.dbname:
        parser.error('SQlite3 database name needed, add" "-h" for help.')

    if not options.table_name:
        parser.error('Table name in SQlite3 database needed, add" "-h" for help.')

    return options

def process(options):
    '''Process'''
    conn = sqlite3.connect(options.dbname)
    cur = conn.cursor()
    create_sent = 'create table %s (id integer, acc text, length integer, name text, description text)' % (options.table_name)
    try:
        cur.execute(create_sent)
    except:
        pass

    sn = 0
    value_list = []
    for record in SeqIO.parse(open(options.infile, 'Ur'), 'fasta'):
        fasta_id = record.id
        if fasta_id.startswith('gi'):
            fasta_id = fasta_id.split('|')[3].split('.')[0]
        else:
            if fasat_id.find('|') >= 0:
                fasta_id = fasta.replace('|', '-')
        name = record.name
        length = len(record.seq)
        description = record.description.partition(' ')[2]
        value_list.append([str(sn), str(fasta_id), str(length), str(name), str(description)])
        sn += 1

    for value in value_list:
        value_str = ['"%s"' % (tmp_value) for tmp_value in value]
        insert_sent = "insert into %s values (%s)" % (options.table_name, ', '.join(value_str))
        cur.execute(insert_sent)

    conn.commit()
    cur.close()
    conn.close()

def main ():
    '''Main'''
    options = get_opt()
    process(options)

if __name__ == '__main__':
    main()


