| | |
| | import yaml |
| | import pickle |
| | import numpy as np |
| | import pandas as pd |
| |
|
| | from lifelines import LogLogisticAFTFitter |
| | from KaplanMeierEstimator import KaplanMeierEstimator |
| |
|
| | def load_config(config_path): |
| | '''Method to load config-file.''' |
| | with open(config_path, 'r') as file: |
| | config = yaml.safe_load(file) |
| | return config |
| |
|
| | def save_object_by_pickle(path, saved_obj): |
| | '''Method to save the object into file (serialization).''' |
| | s = pickle.dumps(saved_obj) |
| | fd = open(path, 'wb') |
| | fd.write(s) |
| | fd.close() |
| |
|
| | def example_how_to_train_survival_models(config): |
| | '''Example how to train survival models (You have to train YOUR models and REMOVED this function).''' |
| | |
| | dataset = pd.DataFrame([ |
| | {'id': 0, 'sex': 'M', 'age': 48, 'has_cancer': 0, 'visit_num': 12, 'has_D00_D48': 0, 'diagnosis_prop': 0.72}, |
| | {'id': 1, 'sex': 'M', 'age': 59, 'has_cancer': 1, 'visit_num': 31, 'has_D00_D48': 0, 'diagnosis_prop': 0.46}, |
| | {'id': 2, 'sex': 'M', 'age': 64, 'has_cancer': 0, 'visit_num': 22, 'has_D00_D48': 1, 'diagnosis_prop': 0.53}, |
| | {'id': 3, 'sex': 'M', 'age': 67, 'has_cancer': 1, 'visit_num': 25, 'has_D00_D48': 1, 'diagnosis_prop': 0.58}, |
| | {'id': 4, 'sex': 'M', 'age': 72, 'has_cancer': 0, 'visit_num': 18, 'has_D00_D48': 0, 'diagnosis_prop': 0.63}, |
| | {'id': 5, 'sex': 'F', 'age': 52, 'has_cancer': 0, 'visit_num': 27, 'has_D00_D48': 0, 'diagnosis_prop': 0.68}, |
| | {'id': 6, 'sex': 'F', 'age': 61, 'has_cancer': 0, 'visit_num': 32, 'has_D00_D48': 1, 'diagnosis_prop': 0.62}, |
| | {'id': 7, 'sex': 'F', 'age': 66, 'has_cancer': 1, 'visit_num': 38, 'has_D00_D48': 0, 'diagnosis_prop': 0.44}, |
| | {'id': 8, 'sex': 'F', 'age': 69, 'has_cancer': 1, 'visit_num': 35, 'has_D00_D48': 1, 'diagnosis_prop': 0.38}, |
| | {'id': 9, 'sex': 'F', 'age': 75, 'has_cancer': 0, 'visit_num': 33, 'has_D00_D48': 1, 'diagnosis_prop': 0.63}, |
| | ]).set_index('id') |
| |
|
| | |
| | mask = dataset['sex'] == 'M' |
| | df = dataset[mask] |
| | T = df['age'] |
| | C = 1 - df['has_cancer'] |
| | km_males = KaplanMeierEstimator(T=T, C=C) |
| |
|
| | |
| | mask = dataset['sex'] == 'F' |
| | df = dataset[mask] |
| | T = df['age'] |
| | C = 1 - df['has_cancer'] |
| | km_females = KaplanMeierEstimator(T=T, C=C) |
| |
|
| | |
| | T = dataset['age'] |
| | C = 1 - dataset['has_cancer'] |
| | km_both = KaplanMeierEstimator(T=T, C=C) |
| |
|
| | |
| | train = dataset.copy() |
| | train['has_cancer'] = 1 - train['has_cancer'] |
| | train['sex'] = train['sex'].apply(lambda sex: 1 if sex == 'M' else 0) |
| |
|
| | aft = LogLogisticAFTFitter( |
| | alpha=0.05, |
| | fit_intercept=True |
| | ).fit(train, duration_col='age', event_col='has_cancer') |
| |
|
| | aft_obj = { |
| | 'model': aft, |
| | 'covariates': ['sex', 'visit_num', 'has_D00_D48', 'diagnosis_prop'], |
| | } |
| |
|
| | |
| | save_object_by_pickle(config['path_kaplan_meier_males'], km_males) |
| | save_object_by_pickle(config['path_kaplan_meier_females'], km_females) |
| | save_object_by_pickle(config['path_kaplan_meier_both'], km_both) |
| | save_object_by_pickle(config['path_aft'], aft_obj) |
| |
|
| | |
| | if __name__ == '__main__': |
| | |
| | config_path = './CONFIG_CanSave.yaml' |
| | config = load_config(config_path) |
| |
|
| | |
| | example_how_to_train_survival_models(config) |
| |
|