Validate Relational Datamodel

From QPR ProcessAnalyzer Wiki
Revision as of 12:47, 11 December 2025 by Ollvihe (talk | contribs) (Created page with "This page contains a script that validates a relational model. The script checks all datatables in the project where the script is run. The following validation checks are performed: * Primary key columns should not contain null values. * Rows should be unique for the primary key columns. * Foreign keys refer to existing data, i.e., the target table should contain a row the source table foreign key is referring to. <syntaxhighlight lang="typescript" line> function pre...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

This page contains a script that validates a relational model. The script checks all datatables in the project where the script is run.

The following validation checks are performed:

  • Primary key columns should not contain null values.
  • Rows should be unique for the primary key columns.
  • Foreign keys refer to existing data, i.e., the target table should contain a row the source table foreign key is referring to.
function presentDataset(dataset) {
  return dataset.Rows.{
    let row = _;
    if (Count(dataset.Rows[0]) == 1) {
      row[0];
    } else {
      ToJson(row);
    }
  };
}

function getNullValueCountInColumn(datatable, columnName) {
  let nullRowCountData = datatable
    .SqlDataFrame
    .Where(Column(#expr{ columnName }) == null)
    .GroupBy([])
    .Aggregate(["count"], ["count"])
    .Select(["count"])
    .Collect();
  return nullRowCountData.Rows[0][0];
}

function getDuplicateRowsByColumns(datatable, columns) {
  if (Count(columns) == 0) {
    return [];
  }

  let duplicatePrimaryKeys = datatable
    .SqlDataFrame
    .GroupBy(columns)
    .Aggregate(["DuplicateCount"], ["count"])
    .Where(Column("DuplicateCount") > 1)
    .OrderByColumns(["DuplicateCount"], ["False"])
    .Head(1000)
    .Select(columns)
    .Collect();
  return presentDataset(duplicatePrimaryKeys);
}

function getInvalidRowsForForeignKey(datatable, foreignKey) {
  let TARGET_COLUMN_PREFIX = "Target";
  let foreignKeyColumns = foreignKey.Columns;
  let targetDataFrame = Project.DatatableByName(foreignKey.TargetDatatable)
    .SqlDataFrame
    .Select(foreignKeyColumns.{ let col = _;[(TARGET_COLUMN_PREFIX + col.Target): col.Target] }[0]);
  let invalidRows = datatable.SqlDataFrame
    .Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Source.("Column(\"" + _ + "\") != null")))})
    .Join(targetDataFrame, foreignKeyColumns.{ let col = _;[col.Source: (TARGET_COLUMN_PREFIX + col.Target)] }[0], "leftouter")
    .Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Target.("Column(\"" + TARGET_COLUMN_PREFIX + _ + "\") == null")))})
    .Head(1000)
    .Select(foreignKeyColumns.Source)
    .Collect();
  return presentDataset(invalidRows);
}

let errorsOccurred = false;

Project.Datatables.{
  let datatable = _;

  let primaryKeyColumns = datatable.Columns.Where(PrimaryKey).Name;
  for (let index = 0; index < Count(primaryKeyColumns); index++) {
    let nullRowCount = getNullValueCountInColumn(datatable, primaryKeyColumns[index]);
    if (nullRowCount > 0) {
      errorsOccurred = true;
      WriteLog("Datatable \"" + datatable.Name + "\" has " + nullRowCount + " rows with null values in primary key column \"" + primaryKeyColumns[index] + "\".");
    }
  }

  let duplicatePrimaryKeys = getDuplicateRowsByColumns(datatable, primaryKeyColumns);
  if (Count(duplicatePrimaryKeys) > 0) {
    errorsOccurred = true;
    WriteLog("Datatable \"" + datatable.Name + "\" has following duplicate primary keys: " + StringJoin(", ", duplicatePrimaryKeys));
  }

  let foreignKeys = datatable.ForeignKeys;
  for (let index = 0; index < Count(foreignKeys); index++) {
    let invalidForeignKeyRows = getInvalidRowsForForeignKey(datatable, foreignKeys[index]);
    if (Count(invalidForeignKeyRows) > 0) {
      errorsOccurred = true;
      WriteLog("Datatable \"" + datatable.Name + "\" has in the foreign key " + ToJson(foreignKeys[index]) + " following invalid rows: " + StringJoin(", ", invalidForeignKeyRows));
    }
  }
}

if (!errorsOccurred) {
  WriteLog("Data model is valid.");
}