| Home | Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf8 -*-
2 """Some HL7 handling."""
3 #============================================================
4 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
5 __license__ = "GPL v2 or later"
6
7
8 import sys
9 import os
10 import codecs
11 import logging
12 import time
13 import datetime as pyDT
14 import hl7 as pyhl7
15 from xml.etree import ElementTree as pyxml
16
17
18 if __name__ == '__main__':
19 sys.path.insert(0, '../../')
20 from Gnumed.pycommon import gmTools
21 from Gnumed.pycommon import gmBusinessDBObject
22 from Gnumed.pycommon import gmPG2
23 from Gnumed.pycommon import gmDateTime
24 from Gnumed.business import gmPathLab
25
26
27 _log = logging.getLogger('gm.hl7')
28
29 # constants
30 HL7_EOL = u'\r'
31
32 HL7_SEGMENTS = u'FHS BHS MSH PID PV1 OBX NTE ORC OBR'.split()
33
34 MSH_sending_lab = 3
35
36 PID_name = 5
37 PID_lastname = 0
38 PID_firstname = 1
39 PID_middlename = 2
40 PID_dob = 7
41 PID_gender = 8
42
43 OBX_type = 3
44 OBX_LOINC = 0
45 OBX_name = 1
46 OBX_value = 5
47 OBX_unit = 6
48
49 HL7_field_labels = {
50 'PID': {
51 0: 'Segment Type',
52 1: '<PID> Set ID',
53 2: 'Patient ID',
54 5: 'Patient name',
55 7: 'Date/Time of birth',
56 8: 'Administrative gender'
57 },
58 'OBR': {
59 0: 'Segment Type'
60 },
61 'OBX': {
62 0: 'Segment Type',
63 1: 'Set ID',
64 2: 'Value Type',
65 3: 'Identifier (LOINC)',
66 4: 'Observation Sub-ID',
67 5: 'Value',
68 6: 'Units',
69 7: 'References Range (Low - High)',
70 8: 'Abnormal Flags',
71 11: 'Result Status',
72 14: 'Date/Time of Observation'
73 }
74 }
75
76 #============================================================
77 # class to handle unmatched incoming clinical data
78 #------------------------------------------------------------
79 _SQL_get_incoming_data = u"""SELECT * FROM clin.v_incoming_data_unmatched WHERE %s"""
80
82 """Represents items of incoming data, say, HL7 snippets."""
83
84 _cmd_fetch_payload = _SQL_get_incoming_data % u"pk_incoming_data_unmatched = %s"
85 _cmds_store_payload = [
86 u"""UPDATE clin.incoming_data_unmatched SET
87 fk_patient_candidates = %(pk_patient_candidates)s,
88 fk_identity_disambiguated = %(pk_identity_disambiguated)s,
89 fk_provider_disambiguated = %(pk_provider_disambiguated)s,
90 request_id = gm.nullify_empty_string(%(request_id)s),
91 firstnames = gm.nullify_empty_string(%(firstnames)s),
92 lastnames = gm.nullify_empty_string(%(lastnames)s),
93 dob = %(dob)s,
94 postcode = gm.nullify_empty_string(%(postcode)s),
95 other_info = gm.nullify_empty_string(%(other_info)s),
96 type = gm.nullify_empty_string(%(data_type)s),
97 gender = gm.nullify_empty_string(%(gender)s),
98 requestor = gm.nullify_empty_string(%(requestor)s),
99 external_data_id = gm.nullify_empty_string(%(external_data_id)s),
100 comment = gm.nullify_empty_string(%(comment)s)
101 WHERE
102 pk = %(pk_incoming_data_unmatched)s
103 AND
104 xmin = %(xmin_incoming_data_unmatched)s
105 RETURNING
106 xmin as xmin_incoming_data_unmatched,
107 octet_length(data) as data_size
108 """
109 ]
110 # view columns that can be updated:
111 _updatable_fields = [
112 u'pk_patient_candidates',
113 u'request_id', # request ID as found in <data>
114 u'firstnames',
115 u'lastnames',
116 u'dob',
117 u'postcode',
118 u'other_info', # other identifying info in .data
119 u'data_type',
120 u'gender',
121 u'requestor', # Requestor of data (e.g. who ordered test results) if available in source data.
122 u'external_data_id', # ID of content of .data in external system (e.g. importer) where appropriate
123 u'comment', # a free text comment on this row, eg. why is it here, error logs etc
124 u'pk_identity_disambiguated',
125 u'pk_provider_disambiguated' # The provider the data is relevant to.
126 ]
127 #--------------------------------------------------------
130 #--------------------------------------------------------
132 # sanity check
133 if not (os.access(fname, os.R_OK) and os.path.isfile(fname)):
134 _log.error('[%s] is not a readable file' % fname)
135 return False
136
137 gmPG2.file2bytea (
138 query = u"UPDATE clin.incoming_data_unmatched SET data = %(data)s::bytea WHERE pk = %(pk)s",
139 filename = fname,
140 args = {'pk': self.pk_obj}
141 )
142
143 # must update XMIN now ...
144 self.refetch_payload()
145 return True
146 #--------------------------------------------------------
148
149 if self._payload[self._idx['data_size']] == 0:
150 return None
151
152 if self._payload[self._idx['data_size']] is None:
153 return None
154
155 if filename is None:
156 filename = gmTools.get_unique_filename(prefix = 'gm-incoming_data_unmatched-')
157
158 success = gmPG2.bytea2file (
159 data_query = {
160 'cmd': u'SELECT substring(data from %(start)s for %(size)s) FROM clin.incoming_data_unmatched WHERE pk = %(pk)s',
161 'args': {'pk': self.pk_obj}
162 },
163 filename = filename,
164 chunk_size = aChunkSize,
165 data_size = self._payload[self._idx['data_size']]
166 )
167
168 if not success:
169 return None
170
171 return filename
172
173 #------------------------------------------------------------
175 if order_by is None:
176 order_by = u'true'
177 else:
178 order_by = u'true ORDER BY %s' % order_by
179 cmd = _SQL_get_incoming_data % order_by
180 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = True)
181 return [ cIncomingData(row = {'data': r, 'idx': idx, 'pk_field': 'pk_incoming_data_unmatched'}) for r in rows ]
182
183 #------------------------------------------------------------
185 args = {'typ': data_type}
186 cmd = u"""
187 INSERT INTO clin.incoming_data_unmatched (type, data)
188 VALUES (%(typ)s, 'new data'::bytea)
189 RETURNING pk"""
190 rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}], return_data = True, get_col_idx = False)
191 pk = rows[0]['pk']
192 incoming = cIncomingData(aPK_obj = pk)
193 if not incoming.update_data_from_file(fname = filename):
194 delete_incoming_data(incoming_data = pk)
195 return None
196 return incoming
197
198 #------------------------------------------------------------
200 args = {'pk': pk_incoming_data}
201 cmd = u"DELETE FROM clin.incoming_data_unmatched WHERE pk = %(pk)s"
202 gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}])
203 return True
204
205 #------------------------------------------------------------
206
207 #============================================================
209
210 out_fname = gmTools.get_unique_filename (
211 prefix = u'%s-fixed-' % gmTools.fname_stem(filename),
212 suffix = '.hl7'
213 )
214 _log.debug('fixing HL7 [%s] -> [%s]', filename, out_fname)
215 hl7_in = codecs.open(filename, 'rb', encoding)
216 hl7_out = codecs.open(out_fname, 'wb', 'utf8')
217
218 line_idx = 0
219 prev_line = None
220 for line in hl7_in:
221 line_idx += 1
222 # suspicious for non-terminating line ?
223 if line.endswith(u' \n'):
224 _log.debug('#%s: suspicious non-terminating line ("...<SPACE>\\n"): [%s...%s]', line_idx, line[:4], line[-7:])
225 if prev_line is None:
226 prev_line = line[:-1]
227 else:
228 prev_line = prev_line + line[:-1]
229 continue
230
231 line = line.strip('\r').strip('\n').strip('\r').strip('\n')
232
233 # final continuation line ?
234 if line[3] != u'|':
235 if prev_line is None:
236 raise ValueError('line #%s does not start with "<SEGMENT>|" but previous line did not end with BLANK either: [%s]' % (line_idx, line))
237 hl7_out.write(prev_line)
238 prev_line = None
239 hl7_out.write(line + HL7_EOL)
240 continue
241
242 # start of a known segment ?
243 if line[:3] in HL7_SEGMENTS:
244 if prev_line is not None:
245 hl7_out.write(prev_line + HL7_EOL)
246 prev_line = None
247 hl7_out.write(line + HL7_EOL)
248 continue
249
250 hl7_out.close()
251 hl7_in.close()
252
253 return out_fname
254 #============================================================
256
257 _log.debug('extracting HL7 from CDATA of <%s> nodes in XML file [%s]', xml_path, filename)
258
259 hl7_xml = pyxml.ElementTree()
260 try:
261 hl7_xml.parse(filename)
262 except pyxml.ParseError:
263 _log.exception('cannot parse [%s]' % filename)
264 return None
265 nodes = hl7_xml.findall(xml_path)
266 if len(nodes) == 0:
267 _log.debug('no data found')
268 return None
269
270 out_fname = gmTools.get_unique_filename(prefix = u'%s-' % gmTools.fname_stem(filename), suffix = '.hl7')
271 _log.debug('writing HL7 to [%s]', out_fname)
272 hl7_file = codecs.open(out_fname, 'wb', 'utf8')
273 for node in nodes:
274 hl7_file.write(node.text)
275
276 return out_fname
277 #============================================================
279
280 _log.debug('splitting [%s]', filename)
281
282 hl7_in = codecs.open(filename, 'rb', encoding)
283
284 idx = 0
285 first_line = True
286 MSH_file = None
287 MSH_fnames = []
288 for line in hl7_in:
289 # first line must be MSH
290 if first_line:
291 # ignore empty / FHS / BHS lines
292 if line.strip() == u'':
293 continue
294 if line.startswith(u'FHS|'):
295 _log.debug('ignoring FHS')
296 continue
297 if line.startswith(u'BHS|'):
298 _log.debug('ignoring BHS')
299 continue
300 if not line.startswith(u'MSH|'):
301 raise ValueError('HL7 file <%s> does not start with "MSH" line' % filename)
302 first_line = False
303 # start new file
304 if line.startswith(u'MSH|'):
305 if MSH_file is not None:
306 MSH_file.close()
307 idx += 1
308 out_fname = gmTools.get_unique_filename(prefix = u'%s-MSH_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
309 _log.debug('writing message %s to [%s]', idx, out_fname)
310 MSH_fnames.append(out_fname)
311 MSH_file = codecs.open(out_fname, 'wb', 'utf8')
312 # ignore BTS / FTS lines
313 if line.startswith(u'BTS|'):
314 _log.debug('ignoring BTS')
315 continue
316 if line.startswith(u'FTS|'):
317 _log.debug('ignoring FTS')
318 continue
319 # else write line to new file
320 MSH_file.write(line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r')
321
322 if MSH_file is not None:
323 MSH_file.close()
324 hl7_in.close()
325
326 return MSH_fnames
327
328 #============================================================
330 """Assumes:
331 - ONE MSH per file
332 - utf8 encoding
333 - first non-empty line must be MSH line
334
335 - anything between MSH and PID is lost
336
337 IOW, what's created by split_HL7_into_MSH()
338 """
339 _log.debug('splitting [%s]', filename)
340
341 MSH_in = codecs.open(filename, 'rb', 'utf8')
342
343 looking_for_MSH = True
344 MSH_line = None
345 looking_for_first_PID = True
346 PID_file = None
347 PID_fnames = []
348 idx = 0
349 for line in MSH_in:
350 # ignore empty
351 if line.strip() == u'':
352 continue
353
354 # first non-empty line must be MSH
355 if looking_for_MSH:
356 if line.startswith(u'MSH|'):
357 looking_for_MSH = False
358 MSH_line = line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r'
359 continue
360 raise ValueError('HL7 MSH file <%s> does not start with "MSH" line' % filename)
361
362 # first non-empty line after MSH must be PID
363 if looking_for_first_PID:
364 if not line.startswith(u'PID|'):
365 raise ValueError('HL7 MSH file <%s> does not have "PID" line follow "MSH" line' % filename)
366 looking_for_first_PID = False
367
368 # start new file if line is PID
369 if line.startswith(u'PID|'):
370 if PID_file is not None:
371 PID_file.close()
372 idx += 1
373 out_fname = gmTools.get_unique_filename(prefix = u'%s-PID_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
374 _log.debug('writing message for PID %s to [%s]', idx, out_fname)
375 PID_fnames.append(out_fname)
376 PID_file = codecs.open(out_fname, 'wb', 'utf8')
377 PID_file.write(MSH_line)
378 # else write line to new file
379 PID_file.write(line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r')
380
381 if PID_file is not None:
382 PID_file.close()
383 MSH_in.close()
384
385 return PID_fnames
386
387 #============================================================
389
390 PID_fnames = []
391 for MSH_fname in split_HL7_by_MSH(filename, encoding):
392 PID_fnames.extend(flatten_MSH_by_PID(MSH_fname))
393
394 return PID_fnames
395
396 #============================================================
398 comment_tag = u'[HL7 name::%s]' % hl7_lab
399 for gm_lab in gmPathLab.get_test_orgs():
400 if comment_tag in gmTools.coalesce(gm_lab['comment'], u''):
401 return gm_lab
402 _log.debug('lab not found: %s', hl7_lab)
403 gm_lab = gmPathLab.create_test_org(name = hl7_lab, comment = comment_tag)
404 if gm_lab is None:
405 raise ValueError('cannot create lab [%s] in GNUmed' % hl7_lab)
406 _log.debug('created lab: %s', gm_lab)
407 return gm_lab
408
409 #------------------------------------------------------------
411
412 tt = gmPathLab.find_measurement_type(lab = pk_lab, name = name)
413 if tt is None:
414 _log.debug('test type [%s %s (%s)] not found for lab #%s, creating', name, unit, loinc, pk_lab)
415 tt = gmPathLab.create_measurement_type(lab = pk_lab, abbrev = name, unit = unit, name = name)
416
417 if loinc is None:
418 return tt
419 if loinc.strip() == u'':
420 return tt
421 if tt['loinc'] is None:
422 tt['loinc'] = loinc
423 tt.save()
424 return tt
425 if tt['loinc'] != loinc:
426 # raise ValueError('LOINC code mismatch between GM (%s) and HL7 (%s) for result type [%s]' % (tt['loinc'], loinc, name))
427 _log.error('LOINC code mismatch between GM (%s) and HL7 (%s) for result type [%s]', tt['loinc'], loinc, name)
428
429 return tt
430
431 #------------------------------------------------------------
433 """Assumes what's produced by flatten_MSH_by_PID()."""
434
435 _log.debug('importing HL7 from [%s]', filename)
436
437 # read the file
438 MSH_file = codecs.open(filename, 'rb', 'utf8')
439 HL7 = pyhl7.parse(MSH_file.read(1024 * 1024 * 5)) # 5 MB max
440 MSH_file.close()
441
442 # verify lab is in database
443 gm_lab = __find_or_create_lab(HL7.segment('MSH')[MSH_sending_lab][0])
444
445 # verify test types are in database
446 for OBX in HL7.segments('OBX'):
447 tt = OBX[OBX_type]
448 unit = OBX[OBX_unit][0]
449 __find_or_create_test_type(tt[OBX_LOINC], tt[OBX_name], gm_lab['pk_test_org'], unit)
450
451 # find patient
452 name = HL7.segment('PID')[PID_name]
453 pat_lname = name[PID_lastname]
454 pat_fname = name[PID_firstname]
455 pat_mname = None
456 if len(name) > 2:
457 pat_mname = name[PID_middlename]
458 print " Patient: %s (%s) %s" % (pat_fname, pat_mname, pat_lname)
459
460 #------------------------------------------------------------
462 """Assumes what's produced by flatten_MSH_by_PID()."""
463
464 _log.debug('staging HL7%s from [%s]', gmTools.coalesce(source, u'', u' (%s)'), filename)
465
466 # parse HL7
467 MSH_file = codecs.open(filename, 'rb', 'utf8')
468 HL7 = pyhl7.parse(MSH_file.read(1024 * 1024 * 5)) # 5 MB max
469 MSH_file.close()
470
471 # import file
472 inc = create_incoming_data(u'HL7%s' % gmTools.coalesce(source, u'', u' (%s)'), filename)
473 if inc is None:
474 return None
475
476 try:
477 # set fields if known
478 PID = HL7.segment('PID')
479 name = PID[PID_name]
480 inc['lastnames'] = gmTools.coalesce(name[PID_lastname], u'')
481 inc['firstnames'] = gmTools.coalesce(name[PID_firstname], u'')
482 if len(name) > 2:
483 inc['firstnames'] += u' '
484 inc['firstnames'] += name[PID_middlename]
485 if PID[PID_dob] is not None:
486 tmp = time.strptime(PID[PID_dob][0], '%Y%m%d')
487 inc['dob'] = pyDT.datetime(tmp.tm_year, tmp.tm_mon, tmp.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
488 if PID[PID_gender] is not None:
489 inc['gender'] = PID[PID_gender][0]
490 inc['external_data_id'] = filename
491 #u'fk_patient_candidates',
492 # u'request_id', # request ID as found in <data>
493 # u'postcode',
494 # u'other_info', # other identifying info in .data
495 # u'requestor', # Requestor of data (e.g. who ordered test results) if available in source data.
496 # u'fk_identity_disambiguated',
497 # u'comment', # a free text comment on this row, eg. why is it here, error logs etc
498 # u'fk_provider_disambiguated' # The provider the data is relevant to.
499 inc.save()
500 except:
501 delete_incoming_data(pk_incoming_data = inc['pk_incoming_data_unmatched'])
502 raise
503
504 return inc
505
506 #------------------------------------------------------------
508 # a segment is a line starting with a type
509
510 msg = pyhl7.parse(message)
511
512 output = [[_('HL7 Message'), _(' %s segments (lines)%s') % (len(msg), gmTools.bool2subst(skip_empty_fields, _(', skipping empty fields'), u''))]]
513
514 max_len = 0
515 for s_idx in range(len(msg)):
516 seg = msg[s_idx]
517 seg_type = seg[0][0]
518
519 output.append([_('Segment #%s <%s>') % (s_idx, seg_type), _('%s fields') % len(seg)])
520
521 for f_idx in range(len(seg)):
522 field = seg[f_idx]
523 try:
524 label = HL7_field_labels[seg_type][f_idx]
525 except KeyError:
526 label = _('HL7 %s field') % seg_type
527
528 max_len = max(max_len, len(label))
529
530 if len(field) == 0:
531 if not skip_empty_fields:
532 output.append([u'%2s - %s' % (f_idx, label), _('<EMTPY>')])
533 continue
534 if (len(field) == 1) and (field[0].strip() == u''):
535 if not skip_empty_fields:
536 output.append([u'%2s - %s' % (f_idx, label), _('<EMTPY>')])
537 continue
538
539 output.append([u'%2s - %s' % (f_idx, label), u'%s' % field])
540
541 if eol is None:
542 return output
543
544 max_len += 7
545 return eol.join([ u'%s: %s' % ((o[0] + (u' ' * max_len))[:max_len], o[1]) for o in output ])
546
547 #------------------------------------------------------------
549 hl7_file = codecs.open(filename, 'rb', 'utf8')
550 output = format_hl7_message (
551 message = hl7_file.read(1024 * 1024 * 5), # 5 MB max
552 skip_empty_fields = skip_empty_fields,
553 eol = eol
554 )
555 hl7_file.close()
556
557 if not return_filename:
558 return output
559
560 if eol is None:
561 output = u'\n '.join([ u'%s: %s' % ((o[0] + (u' ' * max_len))[:max_len], o[1]) for o in output ])
562
563 out_name = gmTools.get_unique_filename(prefix = 'gm-formatted_hl7-', suffix = u'.hl7')
564 out_file = codecs.open(out_name, 'wb', 'utf8')
565 out_file.write(output)
566 out_file.close()
567
568 return out_name
569 #============================================================
570 # main
571 #------------------------------------------------------------
572 if __name__ == "__main__":
573
574 if len(sys.argv) < 2:
575 sys.exit()
576
577 if sys.argv[1] != 'test':
578 sys.exit()
579
580 from Gnumed.pycommon import gmLog2
581 from Gnumed.pycommon import gmI18N
582
583 gmDateTime.init()
584 gmI18N.activate_locale()
585 gmI18N.install_domain()
586
587 #-------------------------------------------------------
589 PID_names = split_HL7_by_PID(sys.argv[2], encoding='utf8')
590 for name in PID_names:
591 print name
592 import_MSH(name)
593 #-------------------------------------------------------
595 hl7 = extract_HL7_from_CDATA(sys.argv[2], u'.//Message')
596 print "HL7:", hl7
597 fixed = fix_HL7_stupidities(hl7)
598 print "fixed HL7:", fixed
599 PID_names = split_HL7_by_PID(fixed, encoding='utf8')
600 print "per-PID MSH files:"
601 for name in PID_names:
602 print " ", name
603 #-------------------------------------------------------
607 #-------------------------------------------------------
609 hl7 = extract_HL7_from_CDATA(sys.argv[2], u'.//Message')
610 print "HL7:", hl7
611 fixed = fix_HL7_stupidities(hl7)
612 print "fixed HL7:", fixed
613 PID_names = split_HL7_by_PID(fixed, encoding='utf8')
614 print "staging per-PID HL7 files:"
615 for name in PID_names:
616 print " file:", name
617 print "", stage_MSH_as_incoming_data(name, source = u'Excelleris')
618 #-------------------------------------------------------
620 fixed = fix_HL7_stupidities(sys.argv[2])
621 print "fixed HL7:", fixed
622 PID_names = split_HL7_by_PID(fixed, encoding='utf8')
623 print "staging per-PID HL7 files:"
624 for name in PID_names:
625 print " file:", name
626 print "", stage_MSH_as_incoming_data(name, source = u'?')
627 #-------------------------------------------------------
629 tests = [
630 "OBR|1||03-1350023-LIP-0|LIP^Lipids||20031004073300|20031004073300|||||||20031004073300||22333^MEDIC^IAN^TEST||031350023||03-1350023|031350023|20031004131600||CHEM|F|||22333^MEDIC^IAN^TEST",
631 "OBX|2|NM|22748-8^LDL Cholesterol||4.0|mmol/L|1.5 - 3.4|H|||F|||20031004073300"
632 ]
633 for test in tests:
634 print format_hl7_message (
635 # skip_empty_fields = True,
636 message = test
637 )
638 #-------------------------------------------------------
644 #-------------------------------------------------------
645 #test_import_HL7()
646 #test_xml_extract()
647 #test_incoming_data()
648 #test_stage_hl7_from_xml()
649 #test_stage_hl7()
650 #test_format_hl7_message()
651 test_format_hl7_file()
652
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Sat Oct 5 03:57:03 2013 | http://epydoc.sourceforge.net |