Validate Relational Datamodel: Difference between revisions
Jump to navigation
Jump to search
(Created page with "This page contains a script that validates a relational model. The script checks all datatables in the project where the script is run. The following validation checks are performed: * Primary key columns should not contain null values. * Rows should be unique for the primary key columns. * Foreign keys refer to existing data, i.e., the target table should contain a row the source table foreign key is referring to. <syntaxhighlight lang="typescript" line> function pre...") |
No edit summary |
||
| Line 7: | Line 7: | ||
<syntaxhighlight lang="typescript" line> | <syntaxhighlight lang="typescript" line> | ||
MAX_ROWS_TO_REPORT = 100; | |||
function presentDataset(dataset) { | function presentDataset(dataset) { | ||
return dataset.Rows.{ | return dataset.Rows.{ | ||
| Line 40: | Line 42: | ||
.Where(Column("DuplicateCount") > 1) | .Where(Column("DuplicateCount") > 1) | ||
.OrderByColumns(["DuplicateCount"], ["False"]) | .OrderByColumns(["DuplicateCount"], ["False"]) | ||
.Head( | .Head(MAX_ROWS_TO_REPORT) | ||
.Select(columns) | .Select(columns) | ||
.Collect(); | .Collect(); | ||
| Line 56: | Line 58: | ||
.Join(targetDataFrame, foreignKeyColumns.{ let col = _;[col.Source: (TARGET_COLUMN_PREFIX + col.Target)] }[0], "leftouter") | .Join(targetDataFrame, foreignKeyColumns.{ let col = _;[col.Source: (TARGET_COLUMN_PREFIX + col.Target)] }[0], "leftouter") | ||
.Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Target.("Column(\"" + TARGET_COLUMN_PREFIX + _ + "\") == null")))}) | .Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Target.("Column(\"" + TARGET_COLUMN_PREFIX + _ + "\") == null")))}) | ||
.Head( | .Head(MAX_ROWS_TO_REPORT) | ||
.Select(foreignKeyColumns.Source) | .Select(foreignKeyColumns.Source) | ||
.Collect(); | .Collect(); | ||
Revision as of 12:56, 11 December 2025
This page contains a script that validates a relational model. The script checks all datatables in the project where the script is run.
The following validation checks are performed:
- Primary key columns should not contain null values.
- Rows should be unique for the primary key columns.
- Foreign keys refer to existing data, i.e., the target table should contain a row the source table foreign key is referring to.
MAX_ROWS_TO_REPORT = 100;
function presentDataset(dataset) {
return dataset.Rows.{
let row = _;
if (Count(dataset.Rows[0]) == 1) {
row[0];
} else {
ToJson(row);
}
};
}
function getNullValueCountInColumn(datatable, columnName) {
let nullRowCountData = datatable
.SqlDataFrame
.Where(Column(#expr{ columnName }) == null)
.GroupBy([])
.Aggregate(["count"], ["count"])
.Select(["count"])
.Collect();
return nullRowCountData.Rows[0][0];
}
function getDuplicateRowsByColumns(datatable, columns) {
if (Count(columns) == 0) {
return [];
}
let duplicatePrimaryKeys = datatable
.SqlDataFrame
.GroupBy(columns)
.Aggregate(["DuplicateCount"], ["count"])
.Where(Column("DuplicateCount") > 1)
.OrderByColumns(["DuplicateCount"], ["False"])
.Head(MAX_ROWS_TO_REPORT)
.Select(columns)
.Collect();
return presentDataset(duplicatePrimaryKeys);
}
function getInvalidRowsForForeignKey(datatable, foreignKey) {
let TARGET_COLUMN_PREFIX = "Target";
let foreignKeyColumns = foreignKey.Columns;
let targetDataFrame = Project.DatatableByName(foreignKey.TargetDatatable)
.SqlDataFrame
.Select(foreignKeyColumns.{ let col = _;[(TARGET_COLUMN_PREFIX + col.Target): col.Target] }[0]);
let invalidRows = datatable.SqlDataFrame
.Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Source.("Column(\"" + _ + "\") != null")))})
.Join(targetDataFrame, foreignKeyColumns.{ let col = _;[col.Source: (TARGET_COLUMN_PREFIX + col.Target)] }[0], "leftouter")
.Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Target.("Column(\"" + TARGET_COLUMN_PREFIX + _ + "\") == null")))})
.Head(MAX_ROWS_TO_REPORT)
.Select(foreignKeyColumns.Source)
.Collect();
return presentDataset(invalidRows);
}
let errorsOccurred = false;
Project.Datatables.{
let datatable = _;
let primaryKeyColumns = datatable.Columns.Where(PrimaryKey).Name;
for (let index = 0; index < Count(primaryKeyColumns); index++) {
let nullRowCount = getNullValueCountInColumn(datatable, primaryKeyColumns[index]);
if (nullRowCount > 0) {
errorsOccurred = true;
WriteLog("Datatable \"" + datatable.Name + "\" has " + nullRowCount + " rows with null values in primary key column \"" + primaryKeyColumns[index] + "\".");
}
}
let duplicatePrimaryKeys = getDuplicateRowsByColumns(datatable, primaryKeyColumns);
if (Count(duplicatePrimaryKeys) > 0) {
errorsOccurred = true;
WriteLog("Datatable \"" + datatable.Name + "\" has following duplicate primary keys: " + StringJoin(", ", duplicatePrimaryKeys));
}
let foreignKeys = datatable.ForeignKeys;
for (let index = 0; index < Count(foreignKeys); index++) {
let invalidForeignKeyRows = getInvalidRowsForForeignKey(datatable, foreignKeys[index]);
if (Count(invalidForeignKeyRows) > 0) {
errorsOccurred = true;
WriteLog("Datatable \"" + datatable.Name + "\" has in the foreign key " + ToJson(foreignKeys[index]) + " following invalid rows: " + StringJoin(", ", invalidForeignKeyRows));
}
}
}
if (!errorsOccurred) {
WriteLog("Data model is valid.");
}