Validate Relational Datamodel
Jump to navigation
Jump to search
This page contains a script that validates a relational model. The script checks all datatables in the project where the script is run.
The following validation checks are performed:
- Primary key columns should not contain null values.
- Rows should be unique for the primary key columns.
- Foreign keys refer to existing data, i.e., the target table should contain a row the source table foreign key is referring to.
function presentDataset(dataset) {
return dataset.Rows.{
let row = _;
if (Count(dataset.Rows[0]) == 1) {
row[0];
} else {
ToJson(row);
}
};
}
function getNullValueCountInColumn(datatable, columnName) {
let nullRowCountData = datatable
.SqlDataFrame
.Where(Column(#expr{ columnName }) == null)
.GroupBy([])
.Aggregate(["count"], ["count"])
.Select(["count"])
.Collect();
return nullRowCountData.Rows[0][0];
}
function getDuplicateRowsByColumns(datatable, columns) {
if (Count(columns) == 0) {
return [];
}
let duplicatePrimaryKeys = datatable
.SqlDataFrame
.GroupBy(columns)
.Aggregate(["DuplicateCount"], ["count"])
.Where(Column("DuplicateCount") > 1)
.OrderByColumns(["DuplicateCount"], ["False"])
.Head(1000)
.Select(columns)
.Collect();
return presentDataset(duplicatePrimaryKeys);
}
function getInvalidRowsForForeignKey(datatable, foreignKey) {
let TARGET_COLUMN_PREFIX = "Target";
let foreignKeyColumns = foreignKey.Columns;
let targetDataFrame = Project.DatatableByName(foreignKey.TargetDatatable)
.SqlDataFrame
.Select(foreignKeyColumns.{ let col = _;[(TARGET_COLUMN_PREFIX + col.Target): col.Target] }[0]);
let invalidRows = datatable.SqlDataFrame
.Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Source.("Column(\"" + _ + "\") != null")))})
.Join(targetDataFrame, foreignKeyColumns.{ let col = _;[col.Source: (TARGET_COLUMN_PREFIX + col.Target)] }[0], "leftouter")
.Where(#expr{ ToSqlExpression(StringJoin(" || ", foreignKeyColumns.Target.("Column(\"" + TARGET_COLUMN_PREFIX + _ + "\") == null")))})
.Head(1000)
.Select(foreignKeyColumns.Source)
.Collect();
return presentDataset(invalidRows);
}
let errorsOccurred = false;
Project.Datatables.{
let datatable = _;
let primaryKeyColumns = datatable.Columns.Where(PrimaryKey).Name;
for (let index = 0; index < Count(primaryKeyColumns); index++) {
let nullRowCount = getNullValueCountInColumn(datatable, primaryKeyColumns[index]);
if (nullRowCount > 0) {
errorsOccurred = true;
WriteLog("Datatable \"" + datatable.Name + "\" has " + nullRowCount + " rows with null values in primary key column \"" + primaryKeyColumns[index] + "\".");
}
}
let duplicatePrimaryKeys = getDuplicateRowsByColumns(datatable, primaryKeyColumns);
if (Count(duplicatePrimaryKeys) > 0) {
errorsOccurred = true;
WriteLog("Datatable \"" + datatable.Name + "\" has following duplicate primary keys: " + StringJoin(", ", duplicatePrimaryKeys));
}
let foreignKeys = datatable.ForeignKeys;
for (let index = 0; index < Count(foreignKeys); index++) {
let invalidForeignKeyRows = getInvalidRowsForForeignKey(datatable, foreignKeys[index]);
if (Count(invalidForeignKeyRows) > 0) {
errorsOccurred = true;
WriteLog("Datatable \"" + datatable.Name + "\" has in the foreign key " + ToJson(foreignKeys[index]) + " following invalid rows: " + StringJoin(", ", invalidForeignKeyRows));
}
}
}
if (!errorsOccurred) {
WriteLog("Data model is valid.");
}