Machine Learning Functions in Expression Language: Difference between revisions
Jump to navigation
Jump to search
(Created page with "This pages describes functions and properties that are related to the machine learning functionality in the QPR ProcessAnalyzer expression language. == MLModel == {| class="w...") |
No edit summary |
||
Line 35: | Line 35: | ||
Returns the trained MLModel object itself. | Returns the trained MLModel object itself. | ||
|- | |||
||Transform | |||
|| | |||
|| | |||
Transforms given input data using the machine learning model thus generating predictions/classifications. | |||
Parameters: | |||
* '''input data''': Two dimensional array of data where: | |||
** The first dimension (rows) specifies different data points. | |||
** The second dimension (columns) specifies the feature values. | |||
Returns an array of predictions/classifications. Transformations for each row in the input data can be found at the same index of the returned array. | |||
|} | |} | ||
== Examples == | |||
<pre> | |||
Example #1: Train a model using an event log and test its performance by replaying training data itself. | |||
Def("GetOneHotColumnInformation", ( | |||
Let("el", _), | |||
ToDictionary([ | |||
"et": OrderByValue(el.EventTypes), | |||
"at": ToDictionary(ConcatTop(OrderByTop(el.CaseAttributes, Name).[_: Values])) | |||
]) | |||
)); | |||
Def("GenerateOneHot", "cases", ( | |||
Let("columnInformation", _), | |||
cases.( | |||
Let("cas", _), | |||
Flatten( | |||
[ | |||
columnInformation.Get("et").(Let("et", _), If(Count(cas.EventsByType(et)) > 0, 1, 0)), | |||
( | |||
Let("atColumns", columnInformation.Get("at")), | |||
OrderByValue(atColumns.Keys).( | |||
Let("key", _), | |||
Let("values", atColumns.Get(key)), | |||
Let("caseValue", cas.Attribute(key)), | |||
values.(If(_ == caseValue, 1, 0)) | |||
) | |||
) | |||
] | |||
) | |||
) | |||
)); | |||
Let("el", EventLogById(1)); | |||
Let("columnInformation", el.GetOneHotColumnInformation()); | |||
Let("allCases", el.Cases); | |||
Let("allCasesOH", columnInformation.GenerateOneHot(el.Cases)); | |||
Let("trainDataOH", allCasesOH); | |||
Let("outcomes", allCases.(Duration > TimeSpan(24))); | |||
Let("testDataOH", allCasesOH); | |||
Let("predictions", | |||
MLModel("randomforest") | |||
.Train(trainDataOH, outcomes) | |||
.Transform(trainDataOH)); | |||
Sum(Zip(outcomes, predictions).(_[0] == _[1] != 0)) / Count(outcomes) | |||
Example #2: Train a model using an a 75% sample of an event log and test its performance by using the rest 25% of the event log. | |||
Def("GetOneHotColumnInformation", ( | |||
Let("el", _), | |||
ToDictionary([ | |||
"et": OrderByValue(el.EventTypes), | |||
"at": ToDictionary(ConcatTop(OrderByTop(el.CaseAttributes, Name).[_: Values])) | |||
]) | |||
)); | |||
Def("GenerateOneHot", "cases", ( | |||
Let("columnInformation", _), | |||
cases.( | |||
Let("cas", _), | |||
Flatten( | |||
[ | |||
columnInformation.Get("et").(Let("et", _), If(Count(cas.EventsByType(et)) > 0, 1, 0)), | |||
( | |||
Let("atColumns", columnInformation.Get("at")), | |||
OrderByValue(atColumns.Keys).( | |||
Let("key", _), | |||
Let("values", atColumns.Get(key)), | |||
Let("caseValue", cas.Attribute(key)), | |||
values.(If(_ == caseValue, 1, 0)) | |||
) | |||
) | |||
] | |||
) | |||
) | |||
)); | |||
Let("el", EventLogById(1)); | |||
Let("columnInformation", el.GetOneHotColumnInformation()); | |||
Let("allCases", Shuffle(el.Cases)); | |||
Let("lastTrainCaseIndex", 0.75 * CountTop(el.Cases)); | |||
Let("trainCases", allCases[NumberRange(0, lastTrainCaseIndex)]); | |||
Let("testCases", allCases[NumberRange(lastTrainCaseIndex + 1, CountTop(el.Cases) - 1)]); | |||
Let("trainDataOH", columnInformation.GenerateOneHot(trainCases)); | |||
Let("testDataOH", columnInformation.GenerateOneHot(testCases)); | |||
Let("trainOutcomes", trainCases.(Duration > TimeSpan(24))); | |||
Let("testOutcomes", testCases.(Duration > TimeSpan(24))); | |||
Let("predictions", | |||
MLModel("randomforest") | |||
.Train(trainDataOH, trainOutcomes) | |||
.Transform(testDataOH)); | |||
Sum(Zip(testOutcomes, predictions).(_[0] == _[1] != 0)) / Count(testOutcomes) | |||
Example #3: Three sets of cases: training cases, target cases (subset of training cases) and test cases (independent set of cases). Try to predict which cases in the test set will eventually end up becoming a case in target cases. | |||
Def("GetOneHotColumnInformation", ( | |||
Let("el", _), | |||
ToDictionary([ | |||
"et": OrderByValue(el.EventTypes), | |||
"at": ToDictionary(ConcatTop(OrderByTop(el.CaseAttributes, Name).[_: Values])) | |||
]) | |||
)); | |||
Def("GenerateOneHot", "cases", ( | |||
Let("columnInformation", _), | |||
cases.( | |||
Let("cas", _), | |||
Flatten( | |||
[ | |||
columnInformation.Get("et").(Let("et", _), If(Count(cas.EventsByType(et)) > 0, 1, 0)), | |||
( | |||
Let("atColumns", columnInformation.Get("at")), | |||
OrderByValue(atColumns.Keys).( | |||
Let("key", _), | |||
Let("values", atColumns.Get(key)), | |||
Let("caseValue", cas.Attribute(key)), | |||
values.(If(_ == caseValue, 1, 0)) | |||
) | |||
) | |||
] | |||
) | |||
) | |||
)); | |||
Let("el", <event log to use>); | |||
Let("trainCases", <cases to use for training>); | |||
Let("targetCases", <cases representing the properties we want to try to predict (subset of traincases)>); | |||
Let("testCases", <cases to use for testing>); | |||
Let("targetCasesDict", ToDictionary(targetCases:true)); | |||
Let("outcomes", traincases.(Let("c", _), targetCasesDict.ContainsKey(c) ? 1 : 0)); | |||
Let("columnInformation", el.GetOneHotColumnInformation()); | |||
Let("mlModel", MLModel("randomforest")); | |||
mlModel.Train(columnInformation.GenerateOneHot(trainCases), outcomes); | |||
mlModel.Transform(columnInformation.GenerateOneHot(testCases)); | |||
</pre> |
Revision as of 12:47, 6 May 2019
This pages describes functions and properties that are related to the machine learning functionality in the QPR ProcessAnalyzer expression language.
MLModel
MLModel properties | Description |
---|---|
Type | Returns the exact type of the MLModel. |
DateTime functions | Parameters | Description |
---|---|---|
Train () |
|
Trains given MLModel using given input data and expected outcomes. Parameters
Returns the trained MLModel object itself. |
Transform |
Transforms given input data using the machine learning model thus generating predictions/classifications. Parameters:
Returns an array of predictions/classifications. Transformations for each row in the input data can be found at the same index of the returned array. |
Examples
Example #1: Train a model using an event log and test its performance by replaying training data itself. Def("GetOneHotColumnInformation", ( Let("el", _), ToDictionary([ "et": OrderByValue(el.EventTypes), "at": ToDictionary(ConcatTop(OrderByTop(el.CaseAttributes, Name).[_: Values])) ]) )); Def("GenerateOneHot", "cases", ( Let("columnInformation", _), cases.( Let("cas", _), Flatten( [ columnInformation.Get("et").(Let("et", _), If(Count(cas.EventsByType(et)) > 0, 1, 0)), ( Let("atColumns", columnInformation.Get("at")), OrderByValue(atColumns.Keys).( Let("key", _), Let("values", atColumns.Get(key)), Let("caseValue", cas.Attribute(key)), values.(If(_ == caseValue, 1, 0)) ) ) ] ) ) )); Let("el", EventLogById(1)); Let("columnInformation", el.GetOneHotColumnInformation()); Let("allCases", el.Cases); Let("allCasesOH", columnInformation.GenerateOneHot(el.Cases)); Let("trainDataOH", allCasesOH); Let("outcomes", allCases.(Duration > TimeSpan(24))); Let("testDataOH", allCasesOH); Let("predictions", MLModel("randomforest") .Train(trainDataOH, outcomes) .Transform(trainDataOH)); Sum(Zip(outcomes, predictions).(_[0] == _[1] != 0)) / Count(outcomes) Example #2: Train a model using an a 75% sample of an event log and test its performance by using the rest 25% of the event log. Def("GetOneHotColumnInformation", ( Let("el", _), ToDictionary([ "et": OrderByValue(el.EventTypes), "at": ToDictionary(ConcatTop(OrderByTop(el.CaseAttributes, Name).[_: Values])) ]) )); Def("GenerateOneHot", "cases", ( Let("columnInformation", _), cases.( Let("cas", _), Flatten( [ columnInformation.Get("et").(Let("et", _), If(Count(cas.EventsByType(et)) > 0, 1, 0)), ( Let("atColumns", columnInformation.Get("at")), OrderByValue(atColumns.Keys).( Let("key", _), Let("values", atColumns.Get(key)), Let("caseValue", cas.Attribute(key)), values.(If(_ == caseValue, 1, 0)) ) ) ] ) ) )); Let("el", EventLogById(1)); Let("columnInformation", el.GetOneHotColumnInformation()); Let("allCases", Shuffle(el.Cases)); Let("lastTrainCaseIndex", 0.75 * CountTop(el.Cases)); Let("trainCases", allCases[NumberRange(0, lastTrainCaseIndex)]); Let("testCases", allCases[NumberRange(lastTrainCaseIndex + 1, CountTop(el.Cases) - 1)]); Let("trainDataOH", columnInformation.GenerateOneHot(trainCases)); Let("testDataOH", columnInformation.GenerateOneHot(testCases)); Let("trainOutcomes", trainCases.(Duration > TimeSpan(24))); Let("testOutcomes", testCases.(Duration > TimeSpan(24))); Let("predictions", MLModel("randomforest") .Train(trainDataOH, trainOutcomes) .Transform(testDataOH)); Sum(Zip(testOutcomes, predictions).(_[0] == _[1] != 0)) / Count(testOutcomes) Example #3: Three sets of cases: training cases, target cases (subset of training cases) and test cases (independent set of cases). Try to predict which cases in the test set will eventually end up becoming a case in target cases. Def("GetOneHotColumnInformation", ( Let("el", _), ToDictionary([ "et": OrderByValue(el.EventTypes), "at": ToDictionary(ConcatTop(OrderByTop(el.CaseAttributes, Name).[_: Values])) ]) )); Def("GenerateOneHot", "cases", ( Let("columnInformation", _), cases.( Let("cas", _), Flatten( [ columnInformation.Get("et").(Let("et", _), If(Count(cas.EventsByType(et)) > 0, 1, 0)), ( Let("atColumns", columnInformation.Get("at")), OrderByValue(atColumns.Keys).( Let("key", _), Let("values", atColumns.Get(key)), Let("caseValue", cas.Attribute(key)), values.(If(_ == caseValue, 1, 0)) ) ) ] ) ) )); Let("el", <event log to use>); Let("trainCases", <cases to use for training>); Let("targetCases", <cases representing the properties we want to try to predict (subset of traincases)>); Let("testCases", <cases to use for testing>); Let("targetCasesDict", ToDictionary(targetCases:true)); Let("outcomes", traincases.(Let("c", _), targetCasesDict.ContainsKey(c) ? 1 : 0)); Let("columnInformation", el.GetOneHotColumnInformation()); Let("mlModel", MLModel("randomforest")); mlModel.Train(columnInformation.GenerateOneHot(trainCases), outcomes); mlModel.Transform(columnInformation.GenerateOneHot(testCases));