o
    i                     @   s|  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlm Z  d dlm!Z! d	d
ii i dZ"g dZ#de$ddfddZ%de$dee$ef fddZ&dee$ef dee$ef fddZ'de(fddZ)dee$ef de$defddZ*de$de$fddZ+de$dee$ fd d!Z,d"e$dee$ fd#d$Z-d%ee$ d&ee$ de$fd'd(Z.d)ee$ de$fd*d+Z/d,ed-e$ddfd.d/Z0d,ed0e$dej1fd1d2Z2d3ej3dej3fd4d5Z4dee$ef d6ej5d7ej5deeej5 ej5f fd8d9Z6	dd:ej1d;ee$ d<eej5 d=ej5d"e$d>ee$ d?eee$  deej1ej1ee$e7f f fd@dAZ8d3ej3dej3fdBdCZ9dDede(fdEdFZ:dGeej5 dHeej5 dIe;dee7 fdJdKZ<dLee; dMeee7  dee$ fdNdOZ=dDedPe$dee$ fdQdRZ>dDedee$ fdSdTZ?d:ej1dUee$ dPe$dej1fdVdWZ@dDedefdXdYZA	Zdd[eee$ef  d\ee$ d]e$d^e7deeee$ef  e7f f
d_d`ZBd:ej1dae$dbee$ee$ f d]e$dcee$ee$e$f f deej1e7f fdddeZCdfej3dgee$ef deee( ee$ ee; eej5 ee$ f fdhdiZDd:ej1dej1fdjdkZEd:ej1dej1fdldmZFd:ej1dej1fdndoZGdpej1dej1fdqdrZHdsej1dtej5duej5dej1fdvdwZIdpej1dgee$ef dej1fdxdyZJdpej1dgee$ef dej1fdzd{ZKdpej1d7ej5dgee$ef dej1fd|d}ZL	dd~ej1dpej1dee$e7f deee$ef  dej1f
ddZMde$dee$e$f dee$ dee$ fddZNdDedee$e$f dee$ dee$ fddZOdpej1dee$ef dej1fddZPd~ej1dpej1dej1dej1dee$ef d6ej5d7ej5dee$ej1f fddZQd:ej1de$ddfddZRde7fddZSeTdkr<e	UeS  dS dS )    N)DecimalInvalidOperation)	dataclass)AnyDictIterableListOptionalTuple)text)Engine)URL)create_engineDate of Birthdate_of_birth_rawpatientsncd_pt_registersncd_followups)v_ncd_base_joinv_ncd_followups_clean!v_ncd_latest_followup_per_patientv_ncd_monthly_summary_seedlevelreturnc                 C   s    t jtt |  t jdd d S )Nz%%(asctime)s %(levelname)s %(message)s)r   format)loggingbasicConfiggetattrupperINFO)r    r!   !/var/www/html/mam/ncd_analysis.pysetup_logging'   s   
r#   pathc                 C   s<   t | ddd}t|W  d    S 1 sw   Y  d S Nrutf-8)encoding)openyaml	safe_loadr$   handler!   r!   r"   load_config.   s   $r.   configc                 C   s   t | di }ddddddddddd
}| D ]\}}t|}|r(|||< q|| d< td}|r8|| d	< td
}|rKdd |dD | d< | S )Nmysqlhostportuserpasswordsocket)
DB_HOSTDB_PORTDB_USERNAMEDB_PASSWORD	DB_SOCKETNCD_DB_HOSTNCD_DB_PORTNCD_DB_USERNCD_DB_PASSWORDNCD_DB_SOCKETNCD_LARAVEL_PATHlaravel_pathNCD_DATABASESc                 S   s   g | ]
}|  r|  qS r!   )strip).0db_namer!   r!   r"   
<listcomp>N   s    z'apply_env_overrides.<locals>.<listcomp>,	databases)dictgetitemsosgetenvsplit)r/   	mysql_cfgenv_mapenv_keycfg_keyvaluerA   databases_overrider!   r!   r"   apply_env_overrides3   s6   



rU   c                  C   s:   t d} | d u st|  dkrdS t|   dvS )NNCD_WRITE_OVERALL T>   0noofffalse)rL   rM   strrC   lower)rawr!   r!   r"   should_write_overall_outputW   s   
r_   rO   databasec              
   C   sh   t jd| d | d | d t| dd|d}dt| dd	i}| d
}|r,||d< t|dd|dS )Nzmysql+pymysqlr3   r4   r1   r2   i  )
drivernameusernamer4   r1   r2   r`   connect_timeout
   r5   unix_socketTi  )pool_pre_pingpool_recycleconnect_args)r   createintrJ   r   )rO   r`   urlrh   socket_pathr!   r!   r"   build_engine^   s&   	
rm   c                 C   s:   t | ddd}| W  d    S 1 sw   Y  d S r%   )r)   readr,   r!   r!   r"   read_sql_filev   s   $ro   sql_textc                 C   sr   g }g }|   D ]$}| }|ds|dkrq|| |dr,|d| g }q|r7|d| |S )Nz--rW   ;
)
splitlinesrC   
startswithappendendswithjoin)rp   
statementsbufferlinestrippedr!   r!   r"   split_sql_statements{   s   

r|   
table_namec                 C   s(   |  td| d }dd |D S )NzSHOW COLUMNS FROM ``c                 S   s   g | ]}|d  qS )r   r!   rD   rowr!   r!   r"   rF          z'fetch_table_columns.<locals>.<listcomp>)executer   fetchall)connr}   rowsr!   r!   r"   fetch_table_columns   s   r   register_colspatient_colsc                    sD  dt dt dt dt f fdd}|ddd	|dd
d|ddd|ddd|ddd|ddd|ddd|ddd|ddd|ddd|ddd|ddd|dd d!|dd"d#|dd$d%|dd&d'|dd(d)|dd*d+|dd,d-|dd.d/|dd0d1|dd2d3|dd4d5|dd6d7|dd8d9|dd:d;|d<d=d>g}d?d@| dA S )BNtable_aliascolumnaliasr   c                    sR   |v r| dkr|  d| d| S | v r$| dkr$|  d| d| S d| S )Nr&   z.`` AS pNULL AS r!   )r   r   r   r   r   r!   r"   
select_col   s
   
z,build_base_join_view_sql.<locals>.select_colr&   Clinic_codeclinic_codePidpidFuchiaID	fuchia_idGendergenderReg_Datereg_dateArea_Divisionarea_divisionTownshiptownship	visit_Age	visit_ageCurrent_Agecurrent_age1stBPfirst_bp
1stBP_datefirst_bp_date2ndBP	second_bp
2ndBP_datesecond_bp_date3rdBPthird_bp
3rdBP_datethird_bp_date1stHypertensionfirst_hypertension1st_DiagDatefirst_diag_datestaging_Hypertensionstaging_hypertension1st_tot_Diabetesfirst_dm_test_type1st_RBSfirst_dm_value1st_RBS_datefirst_dm_date2nd_tot_Diabetessecond_dm_test_type2nd_RBSsecond_dm_value2nd_RBS_datesecond_dm_date2nd_Hypertensionsecond_hypertension2nd_DiagDatesecond_diag_dater   r   r   z5CREATE OR REPLACE VIEW v_ncd_base_join AS
SELECT
    z,
    zC
FROM ncd_pt_registers r
LEFT JOIN patients p ON p.`Pid` = r.`Pid`;r\   rw   )r   r   r   select_partsr!   r   r"   build_base_join_view_sql   sF    


























r   followup_colsc           	         s|  dt dt dt f fdd}d}||d}| v rd}d	}nd
}d}d}| v r-||d}nd v r7|dd}nd}g |dd|dd|dd|dd|dd|dd|dd|dd|d d!|d"d#|d$d%|d&d'|d(d)|d*d+|d,d-|d.d/|d0d1|d2d3||d4d5|d6d7|d8d9|d:d;|d<d=|d>d?|d@dA|dBdC|dDdE|dFdG|dHdI|dJdK|dLdM|dNdO|dPdQ|dRdS|dTdU|dVdW|dXdY|dZd[|d\d]|d^d_|d`da|dbdc|ddde|dfdg|dhdi|djdk|dldm|dndo|dpdq|drds|dtdu|dvdw|dxdy|dzd{|d|d}|d~d|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|dd|||}dd| d S )Nr   r   r   c                    s"   |  v rd|  d| S d| S )Nzf.`r   r   r!   )r   r   r   r!   r"   r      s   
z,build_followups_view_sql.<locals>.select_colown_clinic_Bpbp_rawzCASE
            WHEN f.`own_clinic_Bp` REGEXP '^[[:space:]]*[0-9]{2,3}[[:space:]]*/[[:space:]]*[0-9]{2,3}[[:space:]]*$'
                THEN CAST(SUBSTRING_INDEX(TRIM(f.`own_clinic_Bp`), '/', 1) AS UNSIGNED)
            ELSE NULL
        END AS sbp_rawzCASE
            WHEN f.`own_clinic_Bp` REGEXP '^[[:space:]]*[0-9]{2,3}[[:space:]]*/[[:space:]]*[0-9]{2,3}[[:space:]]*$'
                THEN CAST(SUBSTRING_INDEX(TRIM(f.`own_clinic_Bp`), '/', -1) AS UNSIGNED)
            ELSE NULL
        END AS dbp_rawzNULL AS sbp_rawzNULL AS dbp_rawz
RBS result
rbs_result
RBS_resultzNULL AS rbs_resultidfollowup_idr   r   r   r   r   r   
Visit_date
visit_dater   r   Ageyr   r   r   r   r   r   r   NCD_Diagnosisncd_diagnosisType_cur_visittype_cur_visit
Late_visit
late_visitLate_durationlate_durationLate_duration_unitlate_duration_unitLate_followlate_followLate_fol_durationlate_fol_durationTime
visit_timeown_Bp_Stagebp_stagencdV_1st_tot_Diabetesdm_1st_totalFBSfbsFBS_test_datefbs_test_dateLoaction_testfbs_test_locationncdV_2nd_tot_Diabetesdm_2nd_total2HPPt2hpp2HPP_test_datet2hpp_test_dateLoaction_Test2t2hpp_test_locationLab_res_Datelab_res_dateAltaltHBA1Chba1cUring_AC_ratiouring_ac_ratioGlucose_resglucose_resProtein_resprotein_res
Creatinine
creatinine
Creat_unit
creat_unitCRCLcrclTotal_cholesteroltotal_cholesterolTotal_cho_Unittotal_cholesterol_unitCVD_Riskcvd_riskHDLhdlHDL_unithdl_unitLDLldlLDL_unitldl_unitTriglyceridetriglycerideTriglyceride_unittriglyceride_unitPulsepulse
Pulse_rate
pulse_rateDiabetic_footdiabetic_footDiabetic_Neuropathydiabetic_neuropathyzLifestyle advicelifestyle_advicezMedication changedmedication_changedzPatient_adhe medicpatient_adherenceDrug_Supplydrug_supplyF_Amlodipine_dosef_amlodipine_doseF_Enalapril_dosef_enalapril_doseF_Atorvastain_dosef_atorvastain_doseF_Hydrochlorothiazide_dosef_hydrochlorothiazide_doseF_Aspirin_dosef_aspirin_dosezF_Metformin(500)_dosef_metformin_500_dosezF_Metformin(1000)_dosef_metformin_1000_dosezF_Gliclazide(500)_dosef_gliclazide_500_dosezF_Gliclazide(1000)_dosef_gliclazide_1000_dosezSymptom hypoglycemiasymptom_hypoglycemia	Foth_mediother_medicationFoth_medi_specother_medication_specOut_comeoutcomeTout_mam_clinictout_mam_clinic
death_dateTout_physician_datatout_physician_dataNcd_Tout_icmv_locationncd_tout_icmv_locationCause_of_deathcause_of_deathFup_doc_initialfup_doc_initialNext_Appointmentnext_appointment
visit_typea  CREATE OR REPLACE VIEW v_ncd_followups_clean AS
SELECT
    f_raw.*,
    CASE
        WHEN f_raw.sbp_raw BETWEEN 50 AND 300 AND f_raw.dbp_raw BETWEEN 30 AND 200 THEN f_raw.sbp_raw
        ELSE NULL
    END AS sbp,
    CASE
        WHEN f_raw.sbp_raw BETWEEN 50 AND 300 AND f_raw.dbp_raw BETWEEN 30 AND 200 THEN f_raw.dbp_raw
        ELSE NULL
    END AS dbp
FROM (
    SELECT
        z
,
        z%
    FROM ncd_followups f
) AS f_raw;r   )	r   r   bp_colbp_raw_exprsbp_raw_exprdbp_raw_exprrbs_colrbs_exprr   r!   r   r"   build_followups_view_sql   sf  
	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNRrO  enginesql_pathc           	      C   s   t |}t|}|  @}t|d}t|d}t|d}|tt|| |tt| |D ]}d|v s;d|v r<q1|t| q1W d    d S 1 sOw   Y  d S )Nr   r   r   zVIEW v_ncd_base_joinzVIEW v_ncd_followups_clean)ro   r|   beginr   r   r   r   rO  )	rP  rQ  rp   rx   r   r   r   r   	statementr!   r!   r"   ensure_viewsE  s   



"rT  	view_namec                 C   s   t d| | S )NzSELECT * FROM )pdread_sql_query)rP  rU  r!   r!   r"   	read_viewV  s   rX  seriesc                 C      t j| ddS Ncoerceerrors)rV  to_datetimerY  r!   r!   r"   parse_date_seriesZ     ra  report_start
report_endc           
      C   s   |  di }| d}| dp| d}|rtj|ddnd }|d ur+t|r+d }|r4tj|ddn|}t|r=|}|d urE| nd }| }	||	fS )Ndata_qualitymin_valid_datemax_valid_datemax_allowed_dater\  r]  )rJ   rV  r_  isna	normalize)
r/   rc  rd  dq_cfgmin_date_rawmax_date_rawmin_datemax_datemin_normmax_normr!   r!   r"   resolve_date_bounds^  s   

rr  df	date_colsrn  ro  id_colsdrop_row_colsc                    s   j r fdd|D g d } tj|dddddfS     fdd|D }tj|dd	}|d ur<tj|dd	nd }t|rV|g d } tj|dddddfS | }|d uret|red }|d urm| nd }t|psg }	g }
tjd
 j	d}d}d}|D ]}| j
vrqt |  |<  | }tjd
 j	d}tjd
 j	d}|d ur| |j |k @ }|d ur| |j |k@ }||B }| sq|t| 7 }|t| 7 } j||f  }||d< ||d< tjt|dd j	d}|j| j|d< |j| dd |d< |
| ||	v r!||O }qtj j||f< qt| }|r; j|    |
rFtj|
dd}n
tj|g d d} ||||dfS )Nc                       g | ]	}| j v r|qS r!   columnsrD   colrs  r!   r"   rF   ~      z+flag_out_of_range_dates.<locals>.<listcomp>)tablefieldissue	bad_valuerx  r   )before_min_date_valuesafter_max_date_valuesrows_droppedc                    rw  r!   rx  rz  r|  r!   r"   rF     r}  r\  r]  Findexr~  r  before_min_dateafter_max_dater  c                 S   s   t | s	|  S d S N)rV  ri  	isoformatvr!   r!   r"   <lambda>      z)flag_out_of_range_dates.<locals>.<lambda>r  Tignore_index)emptyrV  	DataFramecopyr_  ri  rj  setSeriesr  ry  ra  notnadtanyrj   sumlocnpwherevaluesapplyru   NaTconcat)rs  rt  rn  ro  r}   ru  rv  
empty_colsavailable_id_cols	drop_colsbad_rows	drop_maskbefore_valuesafter_valuesr{  rY  before_mask
after_maskmaskrecordissuesr  bad_dfr!   r|  r"   flag_out_of_range_datest  s   	








r  c                 C   rZ  r[  )rV  
to_numericr`  r!   r!   r"   r    rb  r  rS   c                 C   s:   | d u rdS t | ttfr| dkS t|   }|dv S )NF   >   1yonyeslatetrue)
isinstancerj   floatr\   rC   r]   )rS   text_valr!   r!   r"   is_yes  s   r  dobref_datedivisorc                 C   s   t | trtj| dd} t |trtj|dd}t | tjr%tj| dd} t |tjr2tj|dd}| d u sDt| sD|d u sDt|rFd S ||  j}|dk rQd S tt	|| S )Nr\  r]  r   )
r  r\   rV  r_  r  
datetime64ri  daysrj   floor)r  r  r  
delta_daysr!   r!   r"   compute_age_years  s   

$
r  agebandsc                 C   sR   | d u s	t | rd S |D ]\}}||   kr|kr&n q| d|   S qd S )N-)rV  ri  )r  r  lowhighr!   r!   r"   assign_age_band  s   r  saltc                 C   s6   t | }|d u r
d S | d| d}t| S )N:r'   )canonicalize_id_valueencodehashlibsha256	hexdigest)rS   r  	canonicalr^   r!   r!   r"   hash_id  s
   r  c                 C   s   | d u rd S t | rd S t| ttjfrtt| S t| ttjfr<t	| r*d S t| 
 r6tt| S t| dS t|  }|dksL| dv rNd S zt|}|| krdt|tddW S W |S  typ   Y |S w )Nz.15grW   >   nannonenullr  f)rV  ri  r  rj   r  integerr\   r  floatingisnan
is_integerrC   r]   r   to_integral_valuer   quantizer   )rS   r   numberr!   r!   r"   r    s2   

r  fieldsc                    s:   |   }|D ]}||jv r||  fdd||< q|S )Nc                    
   t |  S r  )r  r  r  r!   r"   r       
 zmask_ids.<locals>.<lambda>)r  ry  r  )rs  r  r  maskedr  r!   r  r"   mask_ids  s   
r  c                 C   s   | d u rd S t | tjtjtjfrt| rd S |  S t | tj	tj
fr0t| r,d S |  S t | trAt| s?t| rAd S t| rHd S | S r  )r  rV  	Timestampr  datetimedateri  r  r  r  r  itemr  isinf)rS   r!   r!   r"   make_json_safe   s   


r     r   fields_to_decryptrA   
batch_sizec              	   C   s`  | r|s| dfS t j|dd}t j|s td| | dfS g }d}tdt| |D ]}| |||  }dd |D }	t	|	|d}
zt
jd|g|
d	t
jt
jd
d}W n tyl   td | df Y   S w |jdkrtd|jjd	dd | df  S t|jd	}|dg }|D ]}t|tr|dr|d7 }q|| q,||fS )Nr   toolszlaravel_decryptor.phpzDecryptor not found at %sc                 S   s,   g | ]}t |trd d | D n|qS )c                 S   s   i | ]	\}}|t |qS r!   )r  )rD   keyvalr!   r!   r"   
<dictcomp>F  r}  z7decrypt_rows_via_laravel.<locals>.<listcomp>.<dictcomp>)r  rI   rK   r   r!   r!   r"   rF   E  s    z,decrypt_rows_via_laravel.<locals>.<listcomp>)r   r  phpr'   F)inputstdoutstderrcheckz)PHP binary not found; skipping decryptionzDecryptor failed: %signorer]  r   _decrypt_errorsr  )rL   r$   rw   existsr   warningrangelenjsondumps
subprocessrunr  PIPEFileNotFoundErrorerror
returncoder  decodeloadsr  rJ   r  rI   extend)r   r  rA   r  decryptor_pathdecrypted_rowserror_countibatch
safe_batchpayloadresultoutput
batch_rowsr   r!   r!   r"   decrypt_rows_via_laravel2  sJ   


r  	table_keyencrypted_columns_cfgcolumn_alias_mapc                 C   s   | |g }|r| jr| dfS | |i }g }|D ]}|| jv r&|| q||v r8|| | jv r8|||  q|s?| dfS | jdd}	t|	||\}
}t|
}||fS )Nr   records)orient)rJ   r  ry  ru   to_dictr  rV  r  )rs  r  r  rA   r  encrypted_fields	alias_mapr  r  r   r  r  decrypted_dfr!   r!   r"   decrypt_dataframee  s"   


r  r   
thresholdsc                 C   s   |  d}|  d}|  d}|  d}t|r)||d k dt||  dd fS t|rA||d k dt||  d	|  d
fS t|rY||d k dt||  d|  dfS t|rw| dd}||k dt||  dpt|  dd fS dS )Nr   r   r   r   HbA1cr   twoppr   r   r   r   r   r   rbsr  RBSr   )NNNNN)rJ   rV  r  r  )r   r  r   r   r   r  rbs_thresholdr!   r!   r"   compute_dm_control  sD   










r!  c                 C   s   | j r| S |  } g d}|D ]}|| jv rt| | | |< qg d}|D ]}|| jv r4t| | | |< q%| d jdj | d< | S )N)r   r   r   r   r   rG  r=  )sbpdbpsbp_rawdbp_rawr   r   r   r   r   r  r  r  r   Mvisit_month)r  r  ry  ra  r  r  	to_periodto_timestamp)rs  rt  r{  numeric_colsr!   r!   r"   prepare_followups  s   	

r+  c                 C   sB   | j r| S |  } g d}|D ]}|| jv rt| | | |< q| S )Nr   r   r   r   r   r   r   r   )r  r  ry  ra  )rs  rt  r{  r!   r!   r"   prepare_registers  s   

r-  c                 C   s.   |   } | d | d  | d| d< | S )Nr   r   
patient_id)r  r  r  rJ   r|  r!   r!   r"   compute_patient_ids  s   "r/  	followupsc                 C   s0   | j r| S | jg dg dd}|jdgddS )N)r   r   r   )TFF)	ascendingr   first)subsetkeep)r  sort_valuesdrop_duplicates)r0  	sorted_dfr!   r!   r"   compute_latest_followups  s   r8  seed_df
start_dateend_datec                 C   s\   | j r| S t| d | d< | jdddddg  }|d |k|d |k@ }|j| dS )Nmonth_startFas_indexnew_regsr0  )r  ra  groupbyr  r  r5  )r9  r:  r;  monthlyr  r!   r!   r"   compute_monthly_summary  s   rB  c                 C   s   | j r| S | ddgjddgddd}|d  |d  @ |d	< |d	 |d |d
 k @ |d |d k @ |d< |jdddjdddd}|d |d	 dtj |d< |S )Nr   r   r'  Fr=  r  r"  r#  bp_with_valuesbp_control_sbpbp_control_dbpbp_controlledr   countrC  r  rF  r  )visitsrC  rF  r   bp_control_rate)	r  r5  r@  tailr  aggreplacer  r  )r0  r  	per_monthtrendr!   r!   r"   compute_bp_control_trend  s"   "rR  c                    s   | j r| S | ddgjddgddd}|j fdddd	d
}||g d< |d  |d< |jdddjdddd}|d |d dtj	 |d< |S )Nr   r   r'  Fr=  r  c                    r  r  r!  r   r  r!   r"   r    r  z"compute_dm_trend.<locals>.<lambda>expandaxisresult_typedm_controlleddm_test_useddm_test_valuedm_test_datedm_test_locationr[  dm_with_valuesrG  r`  r  r[  r  )rK  r`  r[  r   dm_control_rate)
r  r5  r@  rM  r  r  rN  rO  r  r  )r0  r  rP  
dm_resultsrQ  r!   rU  r"   compute_dm_trend  s   "re  c              	      s  | j rt S |tjt|d d |tjt|d d |tjt|d d  dtjdtdtjdtfd	d
| d	 fdd}tg d|d 
 |d 
 |d 
 |d 
 |d 
 g|jd gd d}|d |d dtj |d< |S )Nhba1c_lookback_months)monthskidney_lookback_monthscvd_risk_lookback_monthsgroupr{  cutoffr   c                 S   s(   | j | d |k| |  @  jd dkS )Nr   r   )r  r  shape)rj  r{  rk  r!   r!   r"   
has_recent+  s   (z+compute_quality_metrics.<locals>.has_recentr   c              
      s>   t | d| d| d| d| d dS )Nr   r  r  r   r  hba1c_recentcreatinine_recentcrcl_recenturing_ac_ratio_recentcvd_risk_recent)rV  r  )g
cvd_cutoffrm  	hb_cutoffkidney_cutoffr!   r"   r  /  s    




z)compute_quality_metrics.<locals>.<lambda>rn  ro  rp  rq  rr  rs  r      metricrH  total_patientsrH  r|  rate)r  rV  r  
DateOffsetrj   r\   r  boolr@  r  r  rl  rO  r  r  )r0  rd  r  qualitysummaryr!   ru  r"   compute_quality_metrics  s,   





r  	registersdecrypt_failuresfuture_date_countsc                 C   s8  g }| j s9|dt| d |dd| jv r| d   ndd |dd| jv r4| d   ndd |j sp|dt|d |d	|d
dg  jdd d |d|d   |d
    d |	 D ]\}}|d| |d qt|r|	 D ]\}}|||d qt
|S )Nregister_rowsr{  rS   register_missing_dobr   r   register_duplicate_pidr   followup_rowsfollowup_missing_bpr"  r#  r  rX  followup_invalid_bpr$  decrypt_failures_)r  ru   r  ry  ri  r  
duplicatedr  r  rK   rV  r  )r  r0  r  r  metricsr  rS   r!   r!   r"   compute_data_qualityP  sN   


r  r   aliases	stopwordsc           	      C   s   |    }|dv rd S tdd|}td|}dd |D }d }dd |D }|| D ]}||v r4q-t|d	krDtd
d |D sDq-|} |sKd S |||S )N>   rY   r  nilr  z\b([a-z])\s+(\d+)\bz\1\2z	[a-z]+\d*c                 S   s   h | ]}|  qS r!   )r]   )rD   wr!   r!   r"   	<setcomp>  r   z-normalize_medication_label.<locals>.<setcomp>c                 S   s"   g | ]}t d d |D r|qS )c                 s       | ]}|  V  qd S r  isdigitrD   chr!   r!   r"   	<genexpr>      z8normalize_medication_label.<locals>.<listcomp>.<genexpr>)r  )rD   wordr!   r!   r"   rF     s   " z.normalize_medication_label.<locals>.<listcomp>r  c                 s   r  r  r  r  r!   r!   r"   r    r  z-normalize_medication_label.<locals>.<genexpr>)r]   rC   resubfindallr  r  rJ   )	r   r  r  loweredwordsstop_set	candidatedigit_tokensr  r!   r!   r"   normalize_medication_label  s$   r  c                 C   s   | d u s	t | rg S t| }|dddd}|dd}tdd|}tdd|}tdd|}d	d
 |dD }g }|D ]}|sGqBt|||}|rT|| qB|S )NrG   rr   * zw\b\d+\s*(od|bd|tid|qid|qd|qod|hs|qhs|bid|daily|day|days|wk|wks|week|weeks|weks|mo|mos|month|months|yr|yrs|year|years)\bz%\b\d+\s*(mg|g|mcg|ml|iu|unit|units)\bz[;/|]+c                 S   s   g | ]}t d d| qS )z\s+r  )r  r  rC   )rD   partr!   r!   r"   rF     s    z-extract_medication_tokens.<locals>.<listcomp>)	rV  ri  r\   rO  r  r  rN   r  ru   )rS   r  r  r   partstokensr  labelr!   r!   r"   extract_medication_tokens  s,   
r  other_med_cfgc                 C   s   | j sd| jvrtjg ddS dd |dpi  D }|dp$g }g }| d  D ]}|t||| q-|sCtjg ddS t	|
  }dd	g|_|d	 |d	   |d
< |S )Nr8  )
medicationrH  share_of_mentionsrx  c                 S   s.   i | ]\}}|r|rt | t | qS r!   )r\   r]   )rD   kr  r!   r!   r"   r    s    z2compute_other_med_distribution.<locals>.<dictcomp>r  r  r  rH  r  )r  ry  rV  r  rJ   rK   dropnar  r  r  value_countsreset_indexr  )r0  r  r  r  r  rS   countsr!   r!   r"   compute_other_med_distribution  s   
r  latest_followupsmonthly_seedc                    sr  |d |d  t | } t |}t |}d| jv r"t| d | d< ntj| d< | j fdddd| d	< t| d  d
d | d< | d | j	| d	 
 | d  @ d	f< d| j	| d	 
 | d  @ df< | d | j	| d	 
 | d  @ d	f< d| j	| d	 
 | d  @ df< |j| g d dddd}|j fdddd|d< |d |j	|d 
 |d  @ df< |j fdddd|d< |d	 |j	|d 
 |d	  @ df< |d  fdd|d< |d  |d  @ |d< |d |d d k @ |d d k @ |d< |jfdddd d!}||g d"< |d#  |d$< t||}t|}	t|}
tjtd% d& }|d'  |d' |k@ |d(< tjtd) d& }|d' 
 |d' |k B |d*< |d+  |d+ k @ |d,< |d- t|d. tB |d/< tg d0|d(  |d*  |d,  |d/  g|jd1 gd2 d3}|d4 |d5 d1tj |d6< t|}d7td8tffd9d:}|d; ||d<< |d=  |d= d> k@ |d?  |d? d@ k @ B |dA  |dA dB k@ B |dC< |dD t|dE< |dF t|dG< |dH t|dI< tg dJ|d<  |dC  |dE  |dG  |dI  g|jd1 gdK d3}|d4 |d5 d1tj |d6< |dL t|dM< tdLg|dM  g|jd1 gd3}|d4 |d5 d1tj |d6< |dN jdOdP }dQd4g|_dN|dR< |dS jdOdP }dQd4g|_dS|dR< tj||gdTdU}|dVi }t||}|jg dWdOdPj dXdYdZd[ }|d |d d1tj |d\< |jg dWdOdPj dXd]d^d_ }|d# |d$ d1tj |d`< |j	|d df ! }|j	|d$ d#f ! }tg da|jd1 |d(  |||d* ! gdb}| ||||	|
|||||||||dcS )dNr  r  r   date_of_birthc                       t | d| d d S )Nr  r   year_divisorr  rJ   r&   age_cfgr!   r"   r        z,compute_metrics_for_clinic.<locals>.<lambda>r  r  
age_at_regr  
age_sourcer   r   )r   r  r   r   r   r   r  r   left)rW   _reg)r  howsuffixesc                    r  )Nr  r   r  r  r  r  r!   r"   r    r  age_at_visitc                    s   t | d d S )Nr  r  r  r  )r  rd  r!   r"   r    r  age_at_report_endc                    s   t |  d S )Nr  )r  )ar  r!   r"   r    s    age_bandr"  r#  rC  rD  rE  rF  c                    r  r  rS  rT  rU  r!   r"   r    r  rV  rW  rZ  r[  r`  active_days)r  r   active_patient	ltfu_daysltfurG  missed_appointmentr   r   late_visit_flag)active_caseloadr  missed_appointmentsr   r      rz  rH  r|  r}  r  r   c              	      sL   t | rdS zt| t d kW S  ttfy%   dt|  v  Y S w )NFcvd_risk_highr  )rV  ri  r  	TypeError
ValueErrorr\   r]   )r  rU  r!   r"   cvd_high1  s   
z,compute_metrics_for_clinic.<locals>.cvd_highr  r  r  creatinine_highr  crcl_lowr   uring_ac_ratio_high
ckd_markerr  diabetic_foot_flagr   neuropathy_flagr4  hypoglycemia_flag)r  r  r  
neuropathyhypoglycemiary  r"  med_changed_flagr#  F)r  categoryr{  r%  Tr  r6  )r   r  r   rG  rJ  rI  )r   rF  rC  rL  rb  ra  )r   r[  r`  rc  )r   r  rL  rc  	ltfu_rater  )r  r0  r  monthly_summarybp_trenddm_trend
continuityr  risk
operationsoperations_distother_med_dist	equity_bp	equity_dmkpi_summary)"r/  ry  ra  rV  r  r  r  r  r  r  ri  mergerB  rR  re  	Timedeltarj   r  r  r  rl  rO  r  r  r   r  r  r  r  rJ   r  r@  rN  mean)r  r0  r  r  r/   rc  rd  rd  r  r  r  active_cutoffltfu_cutoffr  r  r  r  r  adherence_distdrug_supply_distr  r  r  r  r  bp_ratedm_rater  r!   )r  rd  r  r"   compute_metrics_for_clinic  s*  	


&"&"

&
&



 














r  c                 C   s   t j|}|r!t j|dd zt |d W n	 ty    Y nw z	| j|dd W n tyE   t j|r<t 	| | j|dd Y nw z	t |d W d S  tyY   Y d S w )NTexist_oki  Fr  i  )
rL   r$   dirnamemakedirschmodOSErrorto_csvPermissionErrorr  remove)rs  r$   parentr!   r!   r"   
export_csv  s(   
r  c            4      C   s  t jdd} | jdddd | jddd	d
 |  }td tt|j}t	|
di 
dd t|d d }t|d d }t|||\}}|}|d urX||krX|}|
di 
dd}tj|dd |
di }	|d }
g }g }g }g }g }g }g }g }g }|d D ]}td| t|d |}t||j tt|d}tt|d}t|d}dddd}t|d|	|
t\}|d< t|d|	|
t\}|d< t|}t|}g d }g d!}t||||d"g d#d$gd%\}}}t||||dg d&d'gd%\}}}t|} t| } ||d(< ||d(< || d(< tj||gdd)}!|!js,||!d(< ||! t||| ||||}"t ||||d urE|! " nd |! " |d* |d+ |d, |d* |d+ |d, d-}#tj#$||}$tj|$dd |
d.i 
d/d}%|
d.i 
d0d1}&|%rt%|"d2 g d3|&n|"d2 }'|%rt%|"d4 g d3|&n|"d4 }(|%rt%|"d5 g d3|&n|"d5 })t&|'tj#$|$d6 t&|(tj#$|$d7 t&|)tj#$|$d8 t&|"d9 tj#$|$d: t&|"d; tj#$|$d< t&|"d= tj#$|$d> t&|"d? tj#$|$d@ t&|"dA tj#$|$dB t&|"dC tj#$|$dD t&|"dE tj#$|$dF t&|"dG tj#$|$dH t&|"dI tj#$|$dJ t&|"dK tj#$|$dL t&|"dM tj#$|$dN t&|"dO tj#$|$dP t&|#tj#$|$dQ |!jstj#$|$dR}*tj|*dd t&|!tj#$|*dS tj#$|$dT}+tj|+dd |
d.i 
dUd},|,r|"d5 n|)}-|-j'|-dV |-dW B g dXf }.t&|.tj#$|+dY ||"d2  ||"d4  ||"d5  ||"d9 j(|dZ ||"dA j(|dZ ||"dK j(|dZ ||"dM j(|dZ ||"dO j(|dZ q|st)d[ d\S tj|dd)}/tj|dd)}0tj|dd)}1t* rht|/|0|1tj|dd)|||}2tj#$|d]}3tj|3dd |
d.i 
d/d}%|
d.i 
d0d1}&|%rrt%|2d2 g d3|&n|2d2 }'|%rt%|2d4 g d3|&n|2d4 }(|%rt%|2d5 g d3|&n|2d5 })t&|'tj#$|3d6 t&|(tj#$|3d7 t&|)tj#$|3d8 t&|2d9 tj#$|3d: t&|2d; tj#$|3d< t&|2d= tj#$|3d> t&|2d? tj#$|3d@ t&|2dA tj#$|3dB t&|2dC tj#$|3dD t&|2dE tj#$|3dF t&|2dG tj#$|3dH t&|2dI tj#$|3dJ t&|2dK tj#$|3dL t&|2dM tj#$|3dN t&|2dO tj#$|3dP t&tj|dd)tj#$|3d^ |rgt&tj|dd)tj#$|3dS ntd_ td`| dS )aNzNCD analytics pipeline)descriptionz--configTzPath to config.yaml)requiredhelpz--viewszmetrics_views.sqlzPath to metrics_views.sql)defaultr  r   r   r   r    
date_ranger:  r;  outputsrootr  encrypted_columnsrA   rH   zProcessing %sr0   r   r   r   r   r   r   r,  )r   r   r   r   r   r=  r   )r   r   r.  r   r   )rv  )r   r   r.  r   r   r   	source_dbr  r  r  r  )rf  rg  register_before_min_valuesregister_after_max_valuesregister_rows_droppedfollowup_before_min_valuesfollowup_after_max_valuesfollowup_rows_droppedprivacyr  id_saltrW   r  )r   r.  r   r0  r  zregisters_clean.csvzfollowups_clean.csvzpatient_latest.csvr  zmonthly_summary.csvr  zbp_control_trend.csvr  zdm_control_trend.csvr  zcontinuity_metrics.csvr  zquality_metrics.csvr  zrisk_metrics.csvr  zoperations_metrics.csvr  zoperations_distributions.csvr  zother_medications.csvr  zequity_bp_control.csvr  zequity_dm_control.csvr  zkpi_summary.csvzdata_quality_report.csvre  zinvalid_date_ranges.csvdoctorkeep_full_ids_in_doctor_listsr  r  )r   r.  r   r   r   r   r   rG  r  r  rE  zdoctor_action_lists.csv)r  zANo data processed. Check configuration and database connectivity.r  overallzkpi_summary_by_clinic.csvz.Skipping outputs/overall refresh for this run.zDone. Outputs in %s)+argparseArgumentParseradd_argument
parse_argsrL   umaskrU   r.   r/   r#   rJ   rV  r_  rr  r  r   inform   rT  viewsr-  rX  r+  r  VIEW_ENCRYPTED_ALIASr/  r  r8  r  r  ru   r  r  r  r  r$   rw   r  r  r  assignr  r_   )4parserargsr/   rc  rd  rf  rg  effective_report_startoutputs_rootencrypted_cfgrA   all_registersall_followups
all_latestall_monthlyall_qualityall_equity_bpall_equity_dmall_kpisall_invalid_datesrE   rP  r  r0  r  r  register_date_colsfollowup_date_colsbad_registersreg_date_countsbad_followupsfollowup_date_countsr  invalid_datesr  quality_report
clinic_dirmask_ids_enabledr  stake_registersstake_followupsstake_latestdq_dir
doctor_dirkeep_full_idsdoctor_latestdoctor_listscombined_registerscombined_followupscombined_latestcombined_metricsoverall_dirr!   r!   r"   main  s  


			


"""

"""
rH  __main__r  )r  )Vr  r  r  r  r  r   rL   r  r  sysdecimalr   r   dataclassesr   typingr   r   r   r   r	   r
   numpyr  pandasrV  r*   
sqlalchemyr   sqlalchemy.enginer   sqlalchemy.engine.urlr   r   r!  REQUIRED_VIEWSr\   r#   r.   rU   r  r_   rm   ro   r|   r   r   rO  rT  r  rX  r  ra  r  rr  rj   r  r  r  r  r  r  r  r  r  r  r  r  r!  r+  r-  r/  r8  rB  rR  re  r  r  r  r  r  r  r  rH  __name__exitr!   r!   r!   r"   <module>   sd   "$/ 



V*	&	"
3
D*'""	"

5

4







 M  
