Validate Relational Datamodel: Difference between revisions

From QPR ProcessAnalyzer Wiki
Jump to navigation Jump to search
(Created page with "This page contains a script that validates a relational model. The script checks all datatables in the project where the script is run. The following validation checks are performed: * Primary key columns should not contain null values. * Rows should be unique for the primary key columns. * Foreign keys refer to existing data, i.e., the target table should contain a row the source table foreign key is referring to. <syntaxhighlight lang="typescript" line> function pre...")
 
No edit summary
 
(12 intermediate revisions by the same user not shown)
Line 1: Line 1:
This page contains a script that validates a relational model. The script checks all datatables in the project where the script is run.
This page features a script designed to validate a relational model stored in QPR ProcessAnalyzer datatables. The script checks all datatables in the project where the script is run.


The following validation checks are performed:
The validation includes the following checks:
* Primary key columns should not contain null values.
- Primary key columns must not contain null values.
* Rows should be unique for the primary key columns.
- Each row associated with the primary key columns must be unique.
* Foreign keys refer to existing data, i.e., the target table should contain a row the source table foreign key is referring to.  
- For foreign keys, the target table must have a corresponding row that matches the reference from the source table's foreign key.
 
Create a new expression script to the project and run the script. When the script run is completed, open the script run log. There is either is message that the data model is valid, or there is a list of found validation errors.


<syntaxhighlight lang="typescript" line>
<syntaxhighlight lang="typescript" line>
let MAX_ROWS_TO_REPORT = 100;
function presentDataset(dataset) {
function presentDataset(dataset) {
   return dataset.Rows.{
   return dataset.Rows.{
Line 40: Line 44:
     .Where(Column("DuplicateCount") > 1)
     .Where(Column("DuplicateCount") > 1)
     .OrderByColumns(["DuplicateCount"], ["False"])
     .OrderByColumns(["DuplicateCount"], ["False"])
     .Head(1000)
     .Head(MAX_ROWS_TO_REPORT)
     .Select(columns)
     .Select(columns)
     .Collect();
     .Collect();
Line 47: Line 51:


function getInvalidRowsForForeignKey(datatable, foreignKey) {
function getInvalidRowsForForeignKey(datatable, foreignKey) {
  let targetDatatableName = foreignKey.TargetDatatable;
  if (IsNull(targetDatatableName) || targetDatatableName == "") {
    errorsOccurred = true;
    WriteLog("In datatable \"" + datatable.Name + "\" target table is incorrectly empty in foreign key \"" + ToJson(foreignKey) + ".");
    return [];
  }
  let targetDatatable = Project.DatatableByName(targetDatatableName);
  if (IsNull(targetDatatable)) {
    errorsOccurred = true;
    WriteLog("In datatable \"" + datatable.Name + "\" foreign key target datatable \"" + targetDatatableName + "\" is not found.");
    return [];
  }
   let TARGET_COLUMN_PREFIX = "Target";
   let TARGET_COLUMN_PREFIX = "Target";
   let foreignKeyColumns = foreignKey.Columns;
   let foreignKeyColumns = foreignKey.Columns;
   let targetDataFrame = Project.DatatableByName(foreignKey.TargetDatatable)
   let targetDataFrame = targetDatatable
     .SqlDataFrame
     .SqlDataFrame
     .Select(foreignKeyColumns.{ let col = _;[(TARGET_COLUMN_PREFIX + col.Target): col.Target] }[0]);
     .Select(foreignKeyColumns.{ let col = _;[(TARGET_COLUMN_PREFIX + col.Target): col.Target] }[0]);
Line 56: Line 74:
     .Join(targetDataFrame, foreignKeyColumns.{ let col = _;[col.Source: (TARGET_COLUMN_PREFIX + col.Target)] }[0], "leftouter")
     .Join(targetDataFrame, foreignKeyColumns.{ let col = _;[col.Source: (TARGET_COLUMN_PREFIX + col.Target)] }[0], "leftouter")
     .Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Target.("Column(\"" + TARGET_COLUMN_PREFIX + _ + "\") == null")))})
     .Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Target.("Column(\"" + TARGET_COLUMN_PREFIX + _ + "\") == null")))})
     .Head(1000)
     .Head(MAX_ROWS_TO_REPORT)
     .Select(foreignKeyColumns.Source)
     .Select(foreignKeyColumns.Source)
     .Collect();
     .Collect();
Line 79: Line 97:
   if (Count(duplicatePrimaryKeys) > 0) {
   if (Count(duplicatePrimaryKeys) > 0) {
     errorsOccurred = true;
     errorsOccurred = true;
     WriteLog("Datatable \"" + datatable.Name + "\" has following duplicate primary keys: " + StringJoin(", ", duplicatePrimaryKeys));
     WriteLog("Datatable \"" + datatable.Name + "\" has following duplicate rows for the primary key: " + StringJoin(", ", duplicatePrimaryKeys) + (Count(duplicatePrimaryKeys) >= MAX_ROWS_TO_REPORT ? " (+ more)": ""));
   }
   }


Line 87: Line 105:
     if (Count(invalidForeignKeyRows) > 0) {
     if (Count(invalidForeignKeyRows) > 0) {
       errorsOccurred = true;
       errorsOccurred = true;
       WriteLog("Datatable \"" + datatable.Name + "\" has in the foreign key " + ToJson(foreignKeys[index]) + " following invalid rows: " + StringJoin(", ", invalidForeignKeyRows));
       WriteLog("Datatable \"" + datatable.Name + "\" has in the foreign key " + ToJson(foreignKeys[index]) + " following invalid rows: " + StringJoin(", ", invalidForeignKeyRows) + (Count(invalidForeignKeyRows) >= MAX_ROWS_TO_REPORT ? " (+ more)": ""));
     }
     }
   }
   }
Line 95: Line 113:
   WriteLog("Data model is valid.");
   WriteLog("Data model is valid.");
}
}
</syntaxhighlight>
This script prints a report of the datatable's primary and foreign keys in the project to the script log:
<syntaxhighlight lang="typescript" line>
function appendToArray(arr, line, level) {
  return Concat(arr, StringJoin("", Repeat(level, "\u00A0\u00A0\u00A0\u00A0")) + line);
}
let reportContent = [];
OrderBy(Project.Datatables, Name).{
  let datatable = _;
  reportContent = appendToArray(reportContent, "\nDatatable: " + datatable.Name, 0);
  let primaryKeys = datatable.Columns.Where(PrimaryKey).Name;
  reportContent = appendToArray(reportContent, "Primary key: " + (Count(primaryKeys) == 0 ? "(none)" : StringJoin(", ", primaryKeys)), 1);
  let foreignKeys = datatable.ForeignKeys;
  for (let index = 0; index < Count(foreignKeys); index++) {
    reportContent = appendToArray(reportContent, "Foreign key: " + foreignKeys[index].TargetDatatable, 1);
    for (let index2 = 0; index2 < Count(foreignKeys[index].Columns); index2++) {
      reportContent = appendToArray(reportContent, foreignKeys[index].Columns[index2].Source + " ⭢ " + foreignKeys[index].Columns[index2].Target, 2);
    }
  }
}
WriteLog("\n" + StringJoin("\n", reportContent));
</syntaxhighlight>
</syntaxhighlight>

Latest revision as of 11:52, 15 January 2026

This page features a script designed to validate a relational model stored in QPR ProcessAnalyzer datatables. The script checks all datatables in the project where the script is run.

The validation includes the following checks: - Primary key columns must not contain null values. - Each row associated with the primary key columns must be unique. - For foreign keys, the target table must have a corresponding row that matches the reference from the source table's foreign key.

Create a new expression script to the project and run the script. When the script run is completed, open the script run log. There is either is message that the data model is valid, or there is a list of found validation errors.

let MAX_ROWS_TO_REPORT = 100;

function presentDataset(dataset) {
  return dataset.Rows.{
    let row = _;
    if (Count(dataset.Rows[0]) == 1) {
      row[0];
    } else {
      ToJson(row);
    }
  };
}

function getNullValueCountInColumn(datatable, columnName) {
  let nullRowCountData = datatable
    .SqlDataFrame
    .Where(Column(#expr{ columnName }) == null)
    .GroupBy([])
    .Aggregate(["count"], ["count"])
    .Select(["count"])
    .Collect();
  return nullRowCountData.Rows[0][0];
}

function getDuplicateRowsByColumns(datatable, columns) {
  if (Count(columns) == 0) {
    return [];
  }

  let duplicatePrimaryKeys = datatable
    .SqlDataFrame
    .GroupBy(columns)
    .Aggregate(["DuplicateCount"], ["count"])
    .Where(Column("DuplicateCount") > 1)
    .OrderByColumns(["DuplicateCount"], ["False"])
    .Head(MAX_ROWS_TO_REPORT)
    .Select(columns)
    .Collect();
  return presentDataset(duplicatePrimaryKeys);
}

function getInvalidRowsForForeignKey(datatable, foreignKey) {
   let targetDatatableName = foreignKey.TargetDatatable;
   if (IsNull(targetDatatableName) || targetDatatableName == "") {
    errorsOccurred = true;
    WriteLog("In datatable \"" + datatable.Name + "\" target table is incorrectly empty in foreign key \"" + ToJson(foreignKey) + ".");
    return [];
  }

  let targetDatatable = Project.DatatableByName(targetDatatableName);
  if (IsNull(targetDatatable)) {
    errorsOccurred = true;
    WriteLog("In datatable \"" + datatable.Name + "\" foreign key target datatable \"" + targetDatatableName + "\" is not found.");
    return [];
  }

  let TARGET_COLUMN_PREFIX = "Target";
  let foreignKeyColumns = foreignKey.Columns;
  let targetDataFrame = targetDatatable
    .SqlDataFrame
    .Select(foreignKeyColumns.{ let col = _;[(TARGET_COLUMN_PREFIX + col.Target): col.Target] }[0]);
  let invalidRows = datatable.SqlDataFrame
    .Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Source.("Column(\"" + _ + "\") != null")))})
    .Join(targetDataFrame, foreignKeyColumns.{ let col = _;[col.Source: (TARGET_COLUMN_PREFIX + col.Target)] }[0], "leftouter")
    .Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Target.("Column(\"" + TARGET_COLUMN_PREFIX + _ + "\") == null")))})
    .Head(MAX_ROWS_TO_REPORT)
    .Select(foreignKeyColumns.Source)
    .Collect();
  return presentDataset(invalidRows);
}

let errorsOccurred = false;

Project.Datatables.{
  let datatable = _;

  let primaryKeyColumns = datatable.Columns.Where(PrimaryKey).Name;
  for (let index = 0; index < Count(primaryKeyColumns); index++) {
    let nullRowCount = getNullValueCountInColumn(datatable, primaryKeyColumns[index]);
    if (nullRowCount > 0) {
      errorsOccurred = true;
      WriteLog("Datatable \"" + datatable.Name + "\" has " + nullRowCount + " rows with null values in primary key column \"" + primaryKeyColumns[index] + "\".");
    }
  }

  let duplicatePrimaryKeys = getDuplicateRowsByColumns(datatable, primaryKeyColumns);
  if (Count(duplicatePrimaryKeys) > 0) {
    errorsOccurred = true;
    WriteLog("Datatable \"" + datatable.Name + "\" has following duplicate rows for the primary key: " + StringJoin(", ", duplicatePrimaryKeys) + (Count(duplicatePrimaryKeys) >= MAX_ROWS_TO_REPORT ? " (+ more)": ""));
  }

  let foreignKeys = datatable.ForeignKeys;
  for (let index = 0; index < Count(foreignKeys); index++) {
    let invalidForeignKeyRows = getInvalidRowsForForeignKey(datatable, foreignKeys[index]);
    if (Count(invalidForeignKeyRows) > 0) {
      errorsOccurred = true;
      WriteLog("Datatable \"" + datatable.Name + "\" has in the foreign key " + ToJson(foreignKeys[index]) + " following invalid rows: " + StringJoin(", ", invalidForeignKeyRows) + (Count(invalidForeignKeyRows) >= MAX_ROWS_TO_REPORT ? " (+ more)": ""));
    }
  }
}

if (!errorsOccurred) {
  WriteLog("Data model is valid.");
}

This script prints a report of the datatable's primary and foreign keys in the project to the script log:

function appendToArray(arr, line, level) {
  return Concat(arr, StringJoin("", Repeat(level, "\u00A0\u00A0\u00A0\u00A0")) + line);
}

let reportContent = [];
OrderBy(Project.Datatables, Name).{
  let datatable = _;
  reportContent = appendToArray(reportContent, "\nDatatable: " + datatable.Name, 0);
  let primaryKeys = datatable.Columns.Where(PrimaryKey).Name;
  reportContent = appendToArray(reportContent, "Primary key: " + (Count(primaryKeys) == 0 ? "(none)" : StringJoin(", ", primaryKeys)), 1);
  let foreignKeys = datatable.ForeignKeys;
  for (let index = 0; index < Count(foreignKeys); index++) {
    reportContent = appendToArray(reportContent, "Foreign key: " + foreignKeys[index].TargetDatatable, 1);
    for (let index2 = 0; index2 < Count(foreignKeys[index].Columns); index2++) {
      reportContent = appendToArray(reportContent, foreignKeys[index].Columns[index2].Source + " ⭢ " + foreignKeys[index].Columns[index2].Target, 2);
    }
  }
}
WriteLog("\n" + StringJoin("\n", reportContent));