-
Notifications
You must be signed in to change notification settings - Fork 0
/
solr.py
executable file
·51 lines (43 loc) · 1.37 KB
/
solr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
from __future__ import division
import optparse
import os
import sys
import dwca
import json
import pprint
import sunburnt
parser = optparse.OptionParser()
parser.add_option("-f", "--file", help="DWC-a file to read")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose", default=True,
help="don't print status messages to stdout")
(options, args) = parser.parse_args()
si = sunburnt.SolrInterface("http://localhost:8080/solr/")
fieldList = { "dwc:scientificName": "dwc_scientific_name_t", "dwc:locality": "dwc_locality_t", "dwc:verbatimLocality": "dwc_verbatim_locality_t", "dwc:recordedBy": "dwc_recorded_by_t" }
def solrize(record):
r = { "text": ""}
for f in fieldList:
if f in record:
r[fieldList[f]] = record[f]
if fieldList[f].endswith("_t"):
r["text"] += " " + record[f]
if "coreid" in record:
r["id"] = record["coreid"]
else:
r["id"] = record["id"]
try:
si.add(r)
except:
pprint.pprint(r)
if options.file == None:
parser.print_help()
sys.exit(1)
else:
dwcaobj = dwca.Dwca(options.file)
for record in dwcaobj.core:
solrize(record)
#for dwcrf in dwcaobj.extensions:
#for record in dwcrf:
#solrize(record)
si.commit()