PrCore Documentation
AI Assistant GitHub Toggle Dark/Light/Auto mode Toggle Dark/Light/Auto mode Toggle Dark/Light/Auto mode Back to homepage

Ongoing Dataset Example

Here you can find a Python script that automatically handles the uploading of training data and test data files, defining the project, and getting the results.

flowchart TB
    upload(Upload the event log file and test log) --> set(Set the columns definition)
    set --> create(Create the project)
    create --> get(Get the result)

Prerequisites

Before you start, make sure you have the following packages installed:

python3 -m venv ./venv
./venv/bin/pip install requests

Example script

You can also download the script from here.

According to the script, uploaded event log should be this one, and the test event log should be this one, unless you manually modify the columns definition, etc. The latter one only contains ongoing cases.

Please change the EVENT_LOG_FILE to get the correct path to your local event log file.

Warning
The outcome and treatment of the project defined in exmaple script is only for demonstration purposes. They don’t represent the actual outcome and treatment of the domain, so you can modify them to fit your needs.
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import json
import pprint
import requests
from requests import Response
from time import sleep

# Change this to your own event log file
EVENT_LOG_FILE = "./bpic2012-CSV.zip"
TEST_FILE = "./bpic2012-ongoing-CSV.zip"

BASE_URL = "http***********"  # Please change this to your local instance address
API_TOKEN = "UaJW0QvkMA1cVnOXB89E0NbLf3JRRoHwv2wWmaY5v=QYpaxr1UD9/FupeZ85sa2r"
HEADERS = {
    "Authorization": f"Bearer {API_TOKEN}"
}
REQUEST_HEADERS = {
    "Authorization": f"Bearer {API_TOKEN}",
    "Content-Type": "application/json"
}


def upload_file() -> Response:
    # Upload a file to the server.
    url = f"{BASE_URL}/event_log"
    files = [
        ("file", ("bpic2012-CSV.zip", open(EVENT_LOG_FILE, "rb"), "application/zip")),
        ("test", ("bpic2012-ongoing-CSV.zip", open(TEST_FILE, "rb"), "application/zip"))
    ]
    response = requests.post(url, files=files, headers=HEADERS, data={"separator": ","})
    return response


def set_columns_definition(event_log_id) -> Response:
    # Set the columns definition for the uploaded file.
    url = f"{BASE_URL}/event_log/{event_log_id}"
    data = {
        "columns_definition": {
            "Case ID": "CASE_ID",
            "start_time": "START_TIMESTAMP",
            "end_time": "END_TIMESTAMP",
            "AMOUNT_REQ": "NUMBER",
            "REG_DATE": "DATETIME",
            "Activity": "ACTIVITY",
            "Resource": "RESOURCE"
        }
    }
    response = requests.put(url, json=data, headers=REQUEST_HEADERS)
    return response


def create_project(event_log_id) -> Response:
    # Create a project with the definition
    url = f"{BASE_URL}/project"
    data = {
        "event_log_id": event_log_id,
        "positive_outcome": [
            [
                {
                    "column": "Activity",
                    "operator": "EQUAL",
                    "value": "A_APPROVED"
                }
            ]
        ],
        "treatment": [
            [
                {
                    "column": "Activity",
                    "operator": "EQUAL",
                    "value": "O_SENT_BACK"
                }
            ]
        ]
    }
    response = requests.post(url, json=data, headers=REQUEST_HEADERS)
    return response


def get_result(project_id, result_key) -> Response:
    # Get the result of the project
    url = f"{BASE_URL}/project/{project_id}/result/{result_key}"
    response = requests.get(url, headers=HEADERS)
    return response


def main():
    print("\nStaring the client...\n")

    try:
        # Upload the event log file
        print("Uploading the event log file...")
        response = upload_file()
        response.raise_for_status()
        event_log_id = response.json()["event_log_id"]
        print(f"Event log {event_log_id} has been uploaded!\n")

        # Set the columns definition
        print("Setting the columns definition...")
        response = set_columns_definition(event_log_id)
        response.raise_for_status()
        print("The columns definition has been set!\n")

        # Create the project
        print("Creating the project...")
        response = create_project(event_log_id)
        response.raise_for_status()
        project_id = response.json()["project"]["id"]
        result_key = response.json()["result_key"]
        print(f"Project {project_id} has been created!\n")

        # Get the result
        print("Getting the project status...\n")
        i = 1
        while True:
            response = get_result(project_id, result_key)
            response.raise_for_status()
            cases = response.json()["cases"]

            if cases:
                break

            project_status = response.json()["project_status"]
            expected_plugins = response.json()["expected_plugins"]
            finished_plugins = response.json()["finished_plugins"]

            if not finished_plugins:
                print(f"[{i:03d}] - Now the project status is {project_status}")
            else:
                print(f"[{i:03d}] - We have got results from {', '.join(finished_plugins)}, "
                      f"and we are still waiting for {', '.join(list(set(expected_plugins) - set(finished_plugins)))}.")

            sleep(1)
            i += 1
        
        print("\nWe have got all results!\n")
        cases = response.json()["cases"]

        print("Here is the first case:\n")
        pprint.pprint(cases[list(cases.keys())[0]])
    except KeyboardInterrupt:
        print("Interrupted by user\n")
    except Exception as e:
        print(f"Error: {e}\n")

    print("\nDone!\n")


if __name__ == "__main__":
    main()

Running the script

To run the script, simply execute the following command:

./venv/bin/python workflow-example.py

Example output

Here is an snippet of the output of the script:

Staring the client...

Uploading the event log file...
Event log 34 has been uploaded!

Setting the columns definition...
The columns definition has been set!

Creating the project...
Project 27 has been created!

Getting the project status...

[001] - Now the project status is PREPROCESSING
[002] - Now the project status is PREPROCESSING
[003] - Now the project status is PREPROCESSING
[004] - Now the project status is PREPROCESSING
[005] - Now the project status is PREPROCESSING
[006] - Now the project status is PREPROCESSING
[007] - Now the project status is PREPROCESSING
[008] - Now the project status is PREPROCESSING
[009] - Now the project status is PREPROCESSING
[010] - Now the project status is PREPROCESSING
[011] - Now the project status is PREPROCESSING
[012] - Now the project status is PREPROCESSING
[013] - Now the project status is PREPROCESSING
[014] - Now the project status is PREPROCESSING
[015] - Now the project status is PREPROCESSING
[016] - Now the project status is WAITING
[017] - Now the project status is PREPROCESSING
[018] - Now the project status is PREPROCESSING
[019] - Now the project status is PREPROCESSING
[020] - Now the project status is PREPROCESSING
[021] - Now the project status is PREPROCESSING
[022] - Now the project status is PREPROCESSING
[023] - Now the project status is PREPROCESSING
[024] - Now the project status is PREPROCESSING
[025] - Now the project status is PREPROCESSING
[026] - Now the project status is PREPROCESSING
[027] - Now the project status is PREPROCESSING
[028] - Now the project status is PREPROCESSING
[029] - Now the project status is PREPROCESSING
[030] - Now the project status is PREPROCESSING
[031] - Now the project status is PREPROCESSING
[032] - Now the project status is PREPROCESSING
[033] - Now the project status is PREPROCESSING
[034] - Now the project status is PREPROCESSING
[035] - Now the project status is PREPROCESSING
[036] - Now the project status is TRAINING
[037] - Now the project status is TRAINING
[038] - Now the project status is TRAINING
[039] - Now the project status is TRAINING
[040] - Now the project status is TRAINING
[041] - Now the project status is TRAINING
[042] - Now the project status is TRAINING
[043] - Now the project status is TRAINED
[044] - Now the project status is TRAINED
[045] - Now the project status is TRAINED
[046] - Now the project status is TRAINED
[047] - We have got results from plugin-random-forest-alarm, 
and we are still waiting for plugin-causallift-treatment-effect, plugin-knn-next-activity.
[048] - We have got results from plugin-random-forest-alarm, plugin-knn-next-activity, 
and we are still waiting for plugin-causallift-treatment-effect.

We have got all results!

Here is the first case:

{'events': [['173688',
             '2011-09-30 22:38:44.546',
             '2011-09-30 22:38:44.546',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'A_SUBMITTED',
             '112'],
            ['173688',
             '2011-09-30 22:38:44.880',
             '2011-09-30 22:38:44.880',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'A_PARTLYSUBMITTED',
             '112'],
            ['173688',
             '2011-09-30 22:39:37.906',
             '2011-09-30 22:39:37.906',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'A_PREACCEPTED',
             '112'],
            ['173688',
             '2011-10-01 09:36:46.437',
             '2011-10-01 09:45:13.917',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'W_Completeren aanvraag',
             'nan'],
            ['173688',
             '2011-10-01 09:42:43.308',
             '2011-10-01 09:42:43.308',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'A_ACCEPTED',
             '10862'],
            ['173688',
             '2011-10-01 09:45:09.243',
             '2011-10-01 09:45:09.243',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'O_SELECTED',
             '10862'],
            ['173688',
             '2011-10-01 09:45:09.243',
             '2011-10-01 09:45:09.243',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'A_FINALIZED',
             '10862'],
            ['173688',
             '2011-10-01 09:45:11.197',
             '2011-10-01 09:45:11.197',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'O_CREATED',
             '10862'],
            ['173688',
             '2011-10-01 09:45:11.380',
             '2011-10-01 09:45:11.380',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'O_SENT',
             '10862'],
            ['173688',
             '2011-10-01 10:15:41.290',
             '2011-10-01 10:17:08.924',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'W_Nabellen offertes',
             'nan'],
            ['173688',
             '2011-10-08 14:26:57.720',
             '2011-10-08 14:32:00.886',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'W_Nabellen offertes',
             '10913'],
            ['173688',
             '2011-10-10 09:32:22.495',
             '2011-10-10 09:33:05.791',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'W_Nabellen offertes',
             '11049'],
            ['173688',
             '2011-10-10 09:33:03.668',
             '2011-10-10 09:33:03.668',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'O_SENT_BACK',
             '11049'],
            ['173688',
             '2011-10-13 08:05:26.925',
             '2011-10-13 08:37:37.026',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'W_Valideren aanvraag',
             '10629'],
            ['173688',
             '2011-10-13 08:37:29.226',
             '2011-10-13 08:37:29.226',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'A_REGISTERED',
             '10629'],
            ['173688',
             '2011-10-13 08:37:29.226',
             '2011-10-13 08:37:29.226',
             '20000',
             '2011-09-30T22:38:44.546Z',
             'A_APPROVED',
             '10629']],
 'prescriptions': [{'date': '2023-03-04T17:10:48.137653',
                    'output': 0.5606,
                    'plugin': {'accuracy': 0.5409,
                               'f1_score': 0.5284,
                               'model': 'count-encoding',
                               'name': 'Random forest negative outcome '
                                       'probability',
                               'precision': 0.5241,
                               'recall': 0.5409},
                    'type': 'ALARM'},
                   {'date': '2023-03-04T17:10:48.391516',
                    'output': 'A_ACTIVATED',
                    'plugin': {'accuracy': 0.8495,
                               'f1_score': 0.8461,
                               'model': 'count-encoding',
                               'name': 'KNN next activity prediction',
                               'precision': 0.8534,
                               'recall': 0.8495},
                    'type': 'NEXT_ACTIVITY'},
                   {'date': '2023-03-04T17:10:50.202392',
                    'output': {'cate': 0.6826,
                               'proba_if_treated': 0.6827,
                               'proba_if_untreated': 0.0001,
                               'treatment': [[{'column': 'Activity',
                                               'operator': 'EQUAL',
                                               'value': 'O_SENT_BACK'}]]},
                    'plugin': {'model': 'count-encoding',
                               'name': 'CasualLift treatment effect'},
                    'type': 'TREATMENT_EFFECT'}]}
Done!