#!/usr/bin/python # -*- coding: utf-8 -*- from datetime import * import calendar import re import urllib import spynner from pyquery import PyQuery user_agent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36" browser = spynner.Browser( #debug_level=spynner.WARNING, #debug_level=spynner.ERROR, # debug_level=spynner.DEBUG, #debug_level=spynner.INFO, # user_agent = user_agent, ) SESSION = False def remove_space(s): return s.replace(u' ','').strip() def minguo2ce(minguo): pattern = '(\d+)\.([ 0-9]{1,2})\.([ 0-9]{1,2})' s = re.search(pattern, minguo) if s: yy = int(s.group(1))+1911 try: mm = int(s.group(2)) except: mm = 1 try: dd = int(s.group(3)) except: dd = 1 return date( yy, mm , dd ) pattern = '(\d+)/([ 0-9]{1,2})/([ 0-9]{1,2})' s = re.search(pattern, minguo) if s: yy = int(s.group(1))+1911 try: mm = int(s.group(2)) except: mm = 1 try: dd = int(s.group(3)) except: dd = 1 return date( yy, mm , dd ) return def Login(): global SESSION if SESSION: return SESSION # timeout when loading images try: browser.load("http://portal.ntuh.gov.tw/General/Login.aspx") except: pass # print browser.html.encode('utf8') # exit() # f = open('/tmp/workfile.txt', 'w') # print >>f, browser.html.encode('utf8') # f.close() # exit() browser.wk_fill("input[name=txtUserID]", "004552") browser.wk_fill("input[name=txtPass]", "n122119493") browser.click("input[id=rdblQuickMenu_0]") browser.click("input[name=imgBtnSubmitNew]") browser.wait_load() pattern = "SESSION=(\w*)" matches = re.findall('SESSION=(\w*)', browser.html) SESSION = matches[0] return SESSION #病歷號/身分證號 def ReportPathology(ID, SESSION=Login()): browser = spynner.Browser() # browser = spynner.Browser(debug_level=spynner.DEBUG) url = 'http://ihisaw.ntuh.gov.tw/WebApplication/ElectronicMedicalReportViewer/MobileMasterPage.aspx?SESSION=%s' % SESSION # print url browser.load(url) # print browser.url, len(browser.html) browser.wk_fill("#txbIDInput", str(ID)) browser.wk_click("#btnQueryAction") browser.wait_load(timeout=30) #print browser.url, len(browser.html) matches0 = re.findall('', browser.html, re.DOTALL) SingleMedicalReport = [] for m0 in matches0: if m0.encode('utf-8').find('病理報告')!=-1: if m0.find('>*<') != -1: # skip records w/o report date continue matches = re.findall('(.*?)', browser.html)[0] SpecimenGetDate = re.findall('lblSpecimenGetDate">(.*?)', browser.html)[0] ReportDate = re.findall('lblReportDate">(.*?)', browser.html)[0] SpecimenCode = re.findall('lblSpecimenCode">(.*?)', browser.html)[0] DepCode = re.findall('lblDepCode">(.*?)', browser.html)[0] WardNoRoomCoBedNo = re.findall('lblWardNoRoomCoBedNo">(.*?)', browser.html)[0] Result = re.findall('lblResult">(.*?)<', browser.html)[0] report.append({'url': browser.url, 'html': browser.html, 'ChartNo': ChartNo, 'ReportKey': ReportKey, 'ReportCode': ReportCode, 'PersonID': PersonID, 'PathCode': PathCode, 'SpecimenGetDate': SpecimenGetDate, 'ReportDate': ReportDate, 'SpecimenCode': SpecimenCode, 'DepCode': DepCode, 'WardNoRoomCoBedNo': WardNoRoomCoBedNo, 'Result': Result, }) return report def GetPatientList(ID, year, month, SESSION=Login()): weakday, number = calendar.monthrange(year, month) year = str(year) month = str(month) number = str(number) browser.load('http://ihisaw.ntuh.gov.tw/WebApplication/InPatient/Ward/OpenWard.aspx?SESSION=%s' % SESSION) browser.fill('input[id=NTUHWeb1_QueryInPatientPersonAccountControl1_IDInputTextBox]', ID) browser.fill('input[id=NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI1_YearInput]', year) browser.fill('input[id=NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI1_MonthInput]', month) browser.fill('input[id=NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI1_DayInput]', '1') browser.fill('input[id=NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI2_YearInput]', year) browser.fill('input[id=NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI2_MonthInput]', month) browser.fill('input[id=NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI2_DayInput]', number) browser.click("input[id=NTUHWeb1_QueryInPatientPersonAccountControl1_EmpNoCareQueryButton]") browser.wait_load() # print browser.html matches0 = re.findall('', browser.html, re.DOTALL) PatientList = [] pattern = u''' _WardLabel">(.*?) (.*?)_RoomLabel">(.*?) (.*?)_BedLabel">(.*?) (.*?)_LinkPatientName" title="(.*?)" (.*?)_PatChartNo">(.*?) (.*?)_PatientSex">(.*?) (.*?)_PatientAge" title="生日:(.*?)">(.*?) (.*?)"住院總天數:(.*?)天">(.*?) ''' pattern = pattern.replace('"', '\\"') pattern = pattern.replace('\n', '') for m0 in matches0: matches = re.findall(pattern, m0, re.DOTALL) if matches: # print matches Pat = {} Pat['Ward'] = remove_space(matches[0][0]) Pat['Room'] = remove_space(matches[0][2]) Pat['Bed'] = remove_space(matches[0][4]) Pat['Name'] = remove_space(matches[0][6]) Pat['ChartNo'] = remove_space(matches[0][8]) Pat['Sex'] = remove_space(matches[0][10]) Pat['Birthday'] = remove_space(matches[0][12]).replace('_', '-') Pat['Age'] = remove_space(matches[0][13]) Pat['HospitalDays']= remove_space(matches[0][15]) Pat['Enter'] = remove_space(matches[0][16]).replace('/', '-') # print Pat PatientList.append(Pat) return PatientList ''' ''' def QueryAccountNoByWardInput(Ward, SESSION = Login()): # print Ward # # import codecs, locale, sys # import time # browser.load('http://ihisaw.ntuh.gov.tw/WebApplication/InPatient/Ward/OpenWard.aspx?SESSION=%s' % SESSION, 1, wait_callback=wait_load) browser.load('http://ihisaw.ntuh.gov.tw/WebApplication/InPatient/Ward/OpenWard.aspx?SESSION=%s' % SESSION) ret = browser.load_jquery(True) browser.select('#NTUHWeb1_QueryInPatientPersonAccountControl1_DropListWard option[value="%s"]'% Ward) # browser.runjs("$('#NTUHWeb1_QueryInPatientPersonAccountControl1_DropListWard select').val('04A2').trigger('change');") browser.click('#NTUHWeb1_QueryInPatientPersonAccountControl1_CheckBoxShowDrMainColumn',wait_load=True) if 'LinkConfirmDiagnosisOrder' in browser.html: browser.click('#NTUHWeb1_QueryInPatientPersonAccountControl1_CheckBoxShowDrMainColumn',wait_load=True) # browser.browse() ## browser.fill('select[name=NTUHWeb1$QueryInPatientPersonAccountControl1$DropListWard]', Ward) # print 1 ## browser.wait_load(timeout = 9) # browser.click_ajax("input[name=NTUHWeb1$QueryInPatientPersonAccountControl1$QueryAccountNoByWardInput]") ## print 2 ## browser.wait_load() # sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) # print browser.html # exit() d = PyQuery(browser.html) ret = [] for tr in d('tr'): tr = PyQuery(tr) class1 = tr.attr['class'] if class1 in ['tableText', 'tableText2']: r = {} for td in tr('td'): span = PyQuery(td)('span') a = PyQuery(td)('a') if span.attr['id']: k = span.attr['id'].split('_')[-1] v = span.text() r[k] = v if a.attr['id']: k = a.attr['id'].split('_')[-1] v = a.text() r[k] = v if span.attr['title']: if len(span.attr['title'].split(':')) > 1: k = span.attr['title'].split(':')[0].strip() v = span.attr['title'].split(':')[1].strip() else: k = span.attr['id'].split('_')[-1]+'Title' v = span.attr['title'] r[k] = v # print r r['LinkPatientName'] = r['LinkPatientName'].replace(u'[禁]', '').strip() # print r['LinkPatientName'] # print class1 # print tr.text() ret.append(r) return ret def get_dischargenotelist(html): import urllib import urllib2 '''
''' form = re.findall('', html)[0] url = re.findall('action=\\"(.*?)\\"', form)[0] q = {} inputs = re.findall('', browser.html, re.DOTALL) pattern = u''' _InLabelHospName">(.*?) (.*?)_InLabelDeptName">(.*?) (.*?)_InLabelInDate">(.*?) (.*?)_InLabelOutDate">(.*?) (.*?)_InLabelWardName">(.*?) (.*?)_InLabelRoomName">(.*?) (.*?)_InLabelBedName">(.*?) (.*?)_InLabelMainDrName">(.*?) (.*?)_InLabelMainDiagnosisName">(.*?) (.*?)_InLabelStatusName">(.*?) (.*?)停止掛號住院醫師(代診不續掛)102.5.29
星期三 上午總院區外科部腦神經外科06105206不可曾漢民 請假;住院醫師(代診不續掛)
查詢 . 掛號 .蕭輔仁102.5.31
星期五 下午總院區西址-1樓外科部神經外科10205210可 神經外科 非有效掛號時段內陳敞牧102.5.29
星期三 下午總院區外科部腦神經外科08非有效時段內可查詢 ''' pattern = '(.*)(.*)(?P.*)
(?P.*)(.*)(.*)(?P.*)(.*)(?P.*)(.*)(.*)(.*)(.*)(.*)' pattern = pattern.replace('"', '\\"') # matches = re.findall(pattern, browser.html) # if not matches: # exit() # for m in matches: # for mm in m: # print mm, # print matches = [m.groupdict() for m in re.finditer(pattern, browser.html)] for m in matches: # print m['date'], m['ampm'], m['dept'], m['clinic'] ret.append({ 'date': minguo2ce(m['date']), 'ampm': m['ampm'].split(' ')[1], 'dept': m['dept'], 'clinic': m['clinic'], }) return ret # exit() #import pprint #class MyPrettyPrinter(pprint.PrettyPrinter): # def format(self, object, context, maxlevels, level): # if isinstance(object, unicode): # return (object.encode('utf8'), True, False) # return pprint.PrettyPrinter.format(self, object, context, maxlevels, level) def OpenClinicsPatientList(clinic, SESSION=Login()): url = 'http://hisaw.ntuh.gov.tw/WebApplication/Clinics/OpenClinics.aspx?SESSION=%s' % SESSION browser.load(url) # browser.load_jquery(True) OptionValue ={} pattern = 'value="(?P.*?)">(?P.*?)' pattern = pattern.replace('"', '\\"') matches = [m.groupdict() for m in re.finditer(pattern, browser.html)] for m in matches: t = m['text'].strip() v = m['val'].strip() if t != v: OptionValue[t] = v # for o in OptionValue: # print o, OptionValue[o] # exit() browser.wk_select('#NTUHWeb1_DeptDropList', OptionValue[clinic['dept']]) browser.wk_fill('input[name="NTUHWeb1$DateTextBoxYearMonthDayInputUI1$YearInput"]', str(clinic['date'].year)) browser.wk_fill('input[name="NTUHWeb1$DateTextBoxYearMonthDayInputUI1$MonthInput"]', str(clinic['date'].month)) browser.wk_fill('input[name="NTUHWeb1$DateTextBoxYearMonthDayInputUI1$DayInput"]', str(clinic['date'].day)) browser.wk_select('#NTUHWeb1_AMPMDropList', OptionValue[clinic['ampm']]) browser.wk_fill('input[name="NTUHWeb1$ClinicNoInput"]', clinic['clinic']) browser.wk_click('input[name="NTUHWeb1$QueryScheduleList"]', wait_load=True, timeout=9) # MyPrettyPrinter().pprint(clinic) try: browser.wk_click('input[name="NTUHWeb1$CheckBoxAllPatient"]', wait_load=True, timeout=9) except: return None # browser.wait_load() # browser.submit("input[name=NTUHWeb1$QueryScheduleList]") # print browser.html pattern = 'VisitSeqnoPlusName" value="(?P.*?)"' pattern = pattern.replace('"', '\\"') matches = [m.groupdict() for m in re.finditer(pattern, browser.html)] PatientList = [] for m in matches: seq, name = m['SeqnoPlusName'].split(' ') name2 = name.split('【')[0] # print seq, name, name2 PatientList.append((name2, clinic['physician'])) # print PatientList return PatientList def PACSImageShowList(PersonID, SESSION = Login()): ''' Show list of PACS Image ''' # url = "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PACSImageShowList.aspx?SESSION=%s&PatClass=I&AccountIDSE=10T01921636&PersonID=%s&Hosp=T0&Seed=20100915175850&EMRPop=Y" % (SESSION,PersonID) url = "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PACSImageShowList.aspx?SESSION=%s&PatClass=I&PersonID=%s&Hosp=T0&EMRPop=Y" % (SESSION,PersonID) # print url response = browser.load(url) body = browser.html ''' 5656063 T0122861905 20120425 MRI With/Without Contrast--Brain MR 已確認 ''' pattern=''' (?P.*?) (?P.*?) (?P.*?) (?P.*?) (.*?) (?P.*?) (?P.*?) ''' pattern = pattern.replace('"', '\\"') pattern = pattern.replace('\n', '\\s*') # matches = re.findall(pattern, body) matches = [m.groupdict() for m in re.finditer(pattern, browser.html, re.DOTALL)] results = [] for match in matches: r = {} r['PatChartNo'] = remove_space(match['PatChartNo']) r['RequestSheetNo'] = remove_space(match['RequestSheetNo']) r['ExamDate'] = remove_space(match['ExamDate']) r['LinkOrderName'] = remove_space(match['LinkOrderName']) r['Modality'] = remove_space(match['Modality']) r['VerifiedStateString'] = remove_space(match['VerifiedStateString']) results.append(r) return results #Excluded version def PACSImageShowReport(ID, RequestSheetNoExcluded = [], Modality = ['CT', 'MR'],SESSION = None): if SESSION is None: SESSION = Login() url = "http://ihisaw.ntuh.gov.tw/WebApplication/ElectronicMedicalReportViewer/MobileMasterPage.aspx?SESSION=%s" % SESSION browser.load(url) browser.wk_fill("input[name=txbIDInput]", ID) browser.click_link("input[name=btnQueryAction]") # browser.wait_load() d = PyQuery(browser.html) reportitems = [] d2 = d('div').filter(lambda i: PyQuery(this).children('div').attr.reportgroup == u'影核醫') for ri in d2('.reportitem'): Included = False for mod in Modality: if mod in ri.find('td').text: Included = True break if not Included: # print '%s excluded'%ri.find('td').text.strip() continue for rsn in RequestSheetNoExcluded: if rsn in ri.attrib['param']: ri = None break if ri is not None: # print ri.find('td').text.strip() reportitems.append(ri) # print reportitems # exit() urls = [] if reportitems: ri0 = reportitems[0] # link = "tr[id=%s]"%ri0.attrib['id'] link = "a[id=%s]"%ri0.attrib['id'].replace('itemHolder', 'lbnSelectPrintIEH') # print link browser.click_link(link) d = PyQuery(browser.html) url = d('#Reportifrm').attr['src'] # print ri0 # print url # print (ri0.attrib['param'], ri.attrib['param']) for ri in reportitems: # print (ri0.attrib['param'], ri.attrib['param']) urls.append(url.replace(ri0.attrib['param'], ri.attrib['param'])) # return urls reports = [] for url in urls: r = {} browser.load(url) if u'已發報告' in browser.html: d = PyQuery(browser.html) r['PatChartNo'] = d('div.patientInfo').find('td')[1].text r['RequestSheetNo'] = d('td').filter(lambda i: PyQuery(this).children('span').attr.id is not None and 'AccessNo' in PyQuery(this).children('span').attr.id).text() r['ExamDate'] = d('td').filter(lambda i: PyQuery(this).children('span').attr.id is not None and 'ExamDate' in PyQuery(this).children('span').attr.id).text() r['LinkOrderName'] = d('td').filter(lambda i: PyQuery(this).children('span').attr.id is not None and 'OrderDesc' in PyQuery(this).children('span').attr.id).text() r['Modality'] = r['LinkOrderName'][:2] r['VerifiedStateString'] = d('td').filter(lambda i: PyQuery(this).children('span').attr.id is not None and 'Status' in PyQuery(this).children('span').attr.id).text() r['Exam'] = d('span').filter(lambda i: PyQuery(this).attr.id is not None and PyQuery(this).attr.id.endswith('TitleExam')).parents('tr').nextAll().children('td')[0].text_content().strip() r['Impression'] = d('span').filter(lambda i: PyQuery(this).attr.id is not None and 'TitleImpression' in PyQuery(this).attr.id).parents('tr').nextAll().children('td')[0].text_content().strip() r['Report'] = browser.html r['ExamDate'] = datetime.strptime(r['ExamDate'], "%Y/%m/%d") reports.append(r) print r['RequestSheetNo'] return reports def ElectronicMedicalReportViewer(ID, ReportKeyExcluded = [], SESSION = None): if SESSION is None: SESSION = Login() url = "http://ihisaw.ntuh.gov.tw/WebApplication/ElectronicMedicalReportViewer/MobileMasterPage.aspx?SESSION=%s" % SESSION try: browser.load(url) except: return [] browser.wk_fill("input[name=txbIDInput]", ID) browser.click_link("input[name=btnQueryAction]") # browser.wait_load() d = PyQuery(browser.html) ret1 = [] for groupHolder in d('div').filter(lambda i: PyQuery(this).attr.id is not None and PyQuery(this).attr.id.endswith('groupHolder')): reportitems = [] d2 = PyQuery(groupHolder) print d2.attr.id for ri in d2('.reportitem'): td = PyQuery(ri).children('td') ReportClass = td[0].text.strip() # CheckDate = td[1].text.strip() # ReportDate = td[2].text.strip() # print CheckDate, ReportDate try: CheckDate = datetime.strptime(td[1].text.strip(), "%Y/%m/%d").date() except: CheckDate = None try: ReportDate = datetime.strptime(td[2].text.strip(), "%Y/%m/%d").date() except: ReportDate = None # print ReportClass, CheckDate, ReportDate for rk in ReportKeyExcluded: if rk in ri.attrib['param']: ri = None break if ri is not None: # print ri.find('td').text.strip() reportitems.append((ri, ReportClass, CheckDate, ReportDate)) if reportitems: ri0 = reportitems[0][0] link = "a[id=%s]"%ri0.attrib['id'].replace('itemHolder', 'lbnSelectPrintIEH') # print link try: browser.click_link(link, timeout=30) d = PyQuery(browser.html) url = d('#Reportifrm').attr['src'] browser.click_link("input[name=backToHome]") except: print 'timeout' reportitems = [] url = "http://ihisaw.ntuh.gov.tw/WebApplication/ElectronicMedicalReportViewer/MobileMasterPage.aspx?SESSION=%s" % SESSION browser.load(url, load_timeout=30) browser.wk_fill("input[name=txbIDInput]", ID) browser.click_link("input[name=btnQueryAction]") for ri, ReportClass, CheckDate, ReportDate in reportitems: # urls.append((url.replace(ri0.attrib['param'], ri.attrib['param']), ReportClass, CheckDate, ReportDate)) url2 = url.replace(ri0.attrib['param'], ri.attrib['param']) # print url2 ret1.append({ 'URL': url2, 'ReportClass': ReportClass, 'CheckDate': CheckDate, 'ReportDate': ReportDate, }) # return urls ret2 = [] for ret in ret1: ReportKey = re.search(r'ReportKey=(.+)&ReportCode', ret['URL']).group(1) ReportCode = re.search(r'ReportCode=(.+)&PersonID', ret['URL']).group(1) # print ret['URL'] try: # avoid stop on timeout browser.load(ret['URL']) except: continue html = browser.html # if u'未打報告' in html: # continue ret['ReportKey'] = ReportKey ret['ReportCode'] = ReportCode ret['Report'] = html ret2.append(ret) # print ret # exit # if len(reports) > 10: #debug with 10 reports maximum # break # print return ret2 def PatientMedicalRecordList(Chart, KeyCodeExcluded = [], KeyNameExcluded = [], SESSION = Login()): result ={} url = "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PatientMedicalRecordListQuery.aspx?QueryBySelf=N&SESSION=%s" % SESSION browser.load(url) browser.wk_fill('input[name="NTUHWeb1$PatientBasicInfoQueryByIDAndName1$ctl01"]', Chart) browser.click_link('input[name="NTUHWeb1$PatientBasicInfoQueryByIDAndName1$ctl09"]') body = browser.html pattern = r'(?P.*?)\((?P.*?),(?P.*?),(?P.*?)\) (?P .*?)? \((?P.*?)\)' r = re.compile(pattern) # print r.findall(body) d = [m.groupdict() for m in r.finditer(body)] if d: d[0]['Dead'] = None if d[0]['extra']: # print d[0]['extra'] matches = re.findall(ur'(..../../..) 已死亡', d[0]['extra']) # print matches if matches: d[0]['Dead'] = matches[0] # print d[0] result.update(d[0]) else: result['Dead'] = None d = PyQuery(browser.html) # 住 InPat = {} Emergency = {} OutPat = {} InPatList = [] EmergencyList = [] OutPatList = [] # for record in d('input').filter(lambda i: PyQuery(this).attr.id is not None and PyQuery(this).attr.id.endswith('ShowDischargeNote')).parents('tr'): for record in d('tr'): ieo = {} #住門急 record = PyQuery(record) if record.attr.onmouseover is None: continue for SpanLabel in PyQuery(record)('span'): SpanLabel = PyQuery(SpanLabel) # if SpanLabel.attr.id is None: # continue # print SpanLabel.attr.id if 'GridViewInPatRecord' in SpanLabel.attr.id: match = re.search(r'InLabel(.*)$', SpanLabel.attr.id) key = match.group(1) value = SpanLabel.text().strip() if value: ieo[key] = value ieo['spanid'] = re.search(r'(.*)_(.*)', SpanLabel.attr.id).group(1) if 'GridViewEmergencyContent' in SpanLabel.attr.id: match = re.search(r'LabelEmer(.*)$', SpanLabel.attr.id) key = match.group(1) value = SpanLabel.text().strip() if value: ieo[key] = value ieo['spanid'] = re.search(r'(.*)_(.*)', SpanLabel.attr.id).group(1) if 'GridViewOutPatRecord' in SpanLabel.attr.id: match = re.search(r'_Label(.*)$', SpanLabel.attr.id) key = match.group(1) value = SpanLabel.text().strip() if value: ieo[key] = value ieo['spanid'] = re.search(r'(.*)_(.*)', SpanLabel.attr.id).group(1) if 'GridViewInPatRecord' in ieo['spanid']: KeyName = u'%s_%s'%(ieo['DeptName'], ieo['InDate']) InPat[KeyName]= ieo if KeyName not in KeyNameExcluded: InPatList.append(KeyName) if 'GridViewEmergencyContent' in ieo['spanid']: KeyName = u'急%s_%s'%(ieo['DeptName'], ieo['ComeClinicDate']) Emergency[KeyName]= ieo if KeyName not in KeyNameExcluded: EmergencyList.append(KeyName) if 'GridViewOutPatRecord' in ieo['spanid']: KeyName = u'%s_%s'%(ieo['DeptName'], ieo['ComeClinicDate']) OutPat[KeyName]= ieo if KeyName not in KeyNameExcluded: OutPatList.append(KeyName) browser2 = spynner.Browser() if InPatList: browser.click_link('input[id="%s_ShowDischargeNote"]'%InPat.itervalues().next()['spanid']) df = PyQuery(browser.html) form = df('form[name="dischargenotelist"]') url = form.attr.action data = {} for input in form.children('input'): input = PyQuery(input) n = input.attr.name v = input.attr.value data[n] =v for KeyCode, KeyName in zip(data['KeyCodeList'].split('|'), data['KeyNameList'].split('|')): if KeyCode == '' or (KeyCode in KeyCodeExcluded): continue data2 = data data2['AccountIDSE'] = KeyCode data2['KeyCodeList'] = KeyCode data2['KeyNameList'] = KeyName.encode('utf8') print data2 posturl = '%s&%s' % (url, urllib.urlencode(data2)) try: browser2.load(posturl) except: continue InPat[KeyName]['KeyCode'] = KeyCode InPat[KeyName]['Func'] = data2['Func'] InPat[KeyName]['DOC'] = browser2.html if EmergencyList: browser.click_link('input[id="%s_ShowEmergencyClinicHistory"]'%Emergency.itervalues().next()['spanid']) df = PyQuery(browser.html) form = df('form[name="emerrecordlist"]') url = form.attr.action data = {} for input in form.children('input'): input = PyQuery(input) n = input.attr.name v = input.attr.value data[n] =v for KeyCode, KeyName in zip(data['KeyCodeList'].split('|'), data['KeyNameList'].split('|')): if KeyCode == '' or (KeyCode in KeyCodeExcluded): continue data2 = data data2['AccountIDSE'] = KeyCode data2['KeyCodeList'] = KeyCode data2['KeyNameList'] = KeyName.encode('utf8') print data2 posturl = '%s&%s' % (url, urllib.urlencode(data2)) try: browser2.load(posturl) except: continue Emergency[KeyName]['KeyCode'] = KeyCode Emergency[KeyName]['Func'] = data2['Func'] Emergency[KeyName]['DOC'] = browser2.html if OutPatList: browser.click_link('input[id="%s_ShowMedicalRecord"]'%OutPat.itervalues().next()['spanid']) df = PyQuery(browser.html) form = df('form[name="dischargenotelist"]') url = form.attr.action data = {} for input in form.children('input'): input = PyQuery(input) n = input.attr.name v = input.attr.value data[n] =v for KeyCode, KeyName in zip(data['KeyCodeList'].split('|'), data['KeyNameList'].split('|')): if KeyCode == '' or (KeyCode in KeyCodeExcluded): continue data2 = data data2['AccountIDSE'] = KeyCode data2['KeyCodeList'] = KeyCode data2['KeyNameList'] = KeyName.encode('utf8') print data2 posturl = '%s&%s' % (url, urllib.urlencode(data2)) try: browser2.load(posturl) except: continue OutPat[KeyName]['KeyCode'] = KeyCode OutPat[KeyName]['Func'] = data2['Func'] OutPat[KeyName]['DOC'] = browser2.html result['InPat'] = InPat result['Emergency'] = Emergency result['OutPat'] = OutPat return result ### Old #def PatientMedicalRecordListQuery(PersonID, SESSION = Login()): def PatientMedicalRecordListQuery(Chart, SESSION = Login()): ''' Show hospital visit ''' # print PersonID # print Chart url = "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PatientMedicalRecordListQuery.aspx?QueryBySelf=N&SESSION=%s" % SESSION browser.load(url) browser.wk_fill('input[name="NTUHWeb1$PatientBasicInfoQueryByIDAndName1$ctl01"]', Chart) browser.wk_click('input[name="NTUHWeb1$PatientBasicInfoQueryByIDAndName1$ctl09"]', wait_load=True, timeout=9) body = browser.html result = {} #已死亡? #丁美倫(F,1971/10/29,42y0m) (總院) #邵輝雄(M,1948/08/27,62y0m21d) 2010/09/17 已死亡 (總院) #郭德(M,1935/12/01,72y5m10d) 2008/05/11 已死亡 (總院) pattern = r'(?P.*?)\((?P.*?),(?P.*?),(?P.*?)\) (?P .*?)? \((?P.*?)\)' r = re.compile(pattern) # print r.findall(body) d = [m.groupdict() for m in r.finditer(body)] if d: d[0]['Dead'] = None if d[0]['extra']: # print d[0]['extra'] matches = re.findall(ur'(..../../..) 已死亡', d[0]['extra']) # print matches if matches: d[0]['Dead'] = matches[0] print d[0] result.update(d[0]) else: result['Dead'] = None # pattern = '(.*?)\\((.*?)\\)(.*?)' # matches = re.findall(pattern, body) # try: # match = matches[0] # search = re.search('..../../..', match[2]) # result['Dead'] = datetime.datetime.strptime(search.group(0),'%Y/%m/%d') # except: # result['Dead'] = None # match = matches[0] # if match[2].find('已死亡') != -1: # search = re.search('..../../..', match[2]) # result['Dead'] = search.group(0) # else: # result['Dead'] = None # 住 pattern =''' (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) ''' pattern = pattern.replace('"', '\\"') pattern = pattern.replace('\n', '\\s*?') matches = re.findall(pattern, body) In = [] for match in matches: r = {} r['HospName'] = remove_space(match[0]) r['DeptName'] = remove_space(match[1]) r['InDate'] = datetime.strptime(remove_space(match[2]),'%Y/%m/%d') # r['OutDate'] = datetime.datetime.strptime(remove_space(match[3]),'%Y/%m/%d') try: r['OutDate'] = datetime.strptime(remove_space(match[3]),'%Y/%m/%d') except: r['OutDate'] = None r['WardName'] = remove_space(match[4]) r['RoomName'] = remove_space(match[5]) r['BedName'] = remove_space(match[6]) r['MainDrName'] = remove_space(match[7]) r['MainDiagnosisName'] = remove_space(match[8]) r['StatusName'] = remove_space(match[9]) In.append(r) result['In'] = In # 急 pattern =''' (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) ''' pattern = pattern.replace('"', '\\"') pattern = pattern.replace('\n', '\\s*?') matches = re.findall(pattern, body) Emer = [] for match in matches: r = {} r['HospName'] = remove_space(match[0]) r['DeptName'] = remove_space(match[1]) r['ComeClinicDate'] = datetime.strptime(remove_space(match[2]),'%Y/%m/%d') try: r['DischargeDate'] = datetime.strptime(remove_space(match[3]),'%Y/%m/%d') except: r['DischargeDate'] = None r['MainDrName'] = remove_space(match[4]) r['MainDiagnosisName'] = remove_space(match[5]) r['StatusName'] = remove_space(match[6]) r['TempBedID'] = remove_space(match[7]) Emer.append(r) result['Emer'] = Emer # 門 pattern =''' (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) .*? (.*?) ''' pattern = pattern.replace('"', '\\"') pattern = pattern.replace('\n', '\\s*?') matches = re.findall(pattern, body) OutPat = [] for match in matches: r = {} r['HospName'] = remove_space(match[0]) r['DeptName'] = remove_space(match[1]) r['ComeClinicDate'] = datetime.strptime(remove_space(match[2]),'%Y/%m/%d') r['SpecialCureName'] = remove_space(match[3]) r['MainDrName'] = remove_space(match[4]) r['MainDiagnosisName'] = remove_space(match[5]) r['AccountStatusName'] = remove_space(match[6]) OutPat.append(r) result['OutPat'] = OutPat return result def ShowOperationList(Chart, KeyCodeExcluded = [], KeyNameExcluded = [], SESSION = Login()): url = "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PatientMedicalRecordListQuery.aspx?QueryBySelf=N&SESSION=%s" % SESSION browser.load(url) browser.wk_fill('input[name="NTUHWeb1$PatientBasicInfoQueryByIDAndName1$ctl01"]', Chart) browser.click_link('input[name="NTUHWeb1$PatientBasicInfoQueryByIDAndName1$ctl09"]') browser.click_link('input[name="NTUHWeb1$PatAccountListRecord1$ShowOperationList"]') match = re.search(r"'(http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/SimpleInfoShowUsingPlaceHolder.aspx.+?)'", browser.html) if match is None: return [] url = match.group(1) # url = re.search(r"http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/SimpleInfoShowUsingPlaceHolder.aspx?SESSION=(.+)", browser.html).group(1) browser.load(url) body = browser.html pattern ="'TreeViewItem','(.*?)'\)(.*?)>(.*?)" # pattern = 'name="KeyCodeList" value=(.*?)>' matches=re.findall(pattern, body) KeyList = [] for m in matches: KeyCode = m[0][1:] KeyName = m[2] if KeyCode in KeyCodeExcluded: continue if KeyName in KeyNameExcluded: continue KeyList.append((KeyCode, KeyName)) url = 'http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/SimpleInfoShowUsingPlaceHolder.aspx?SESSION=%s' % SESSION ret = [] for KeyCode, KeyName in KeyList: data = { 'KeyCodeList': KeyCode, 'KeyNameList': KeyName, 'Func' : 'OPNoteList', } posturl = '%s&%s' % (url, urllib.urlencode(data)) browser.load(posturl) ret.append({ 'KeyCode': KeyCode, 'KeyName': KeyName, 'DOC' : browser.html, }) return ret