#!/usr/bin/env python
#
# desc: import data into Elasticsearch
# auth: Sheeva Plug
#

ES_INDEX = 'data'
ES_TYPE = '2018-03-10'

from elasticsearch import Elasticsearch, helpers
from argparse import ArgumentParser
import dateutil.parser
import csv
import sys

def toDatetime(d):
    d, t = d.split('_')
    t = t.replace('-', ':')
    return dateutil.parser.parse('%s %s'%(d, t))


if __name__ == '__main__':
    parser = ArgumentParser(description='import data into ES')
    parser.add_argument('filenames', nargs='+', help='filenames')
    args = parser.parse_args()

    es = Elasticsearch()
    
    actions = list()
    for filename in args.filenames:
        with open(filename, 'r') as ifh:
            reader = csv.reader(ifh, delimiter=',')
            header = list(map(lambda x: x.lower(), next(reader)))
            for line in reader:
                source = dict(zip(header, line))
                source['time'] = toDatetime(source['time'])
                action = {
                    '_op_type': 'index',
                    '_index': ES_INDEX,
                    '_type': ES_TYPE,
                    '_source': source
                }
                actions.append(action)

                if 0 == (len(actions) % 1000):
                    helpers.bulk(es, actions)
                    actions = list()
                    pass
                pass
            pass
        pass

    helpers.bulk(es, actions)
    actions = list()
