Anonymize data: Difference between revisions
Jump to navigation
Jump to search
(Created page with "Data can be anonymized using scripts with the following methods: * When data is extracted from a source system, the data is immediately anonymized and the anonymized data is s...") |
No edit summary |
||
Line 18: | Line 18: | ||
} | } | ||
if (!dict.ContainsKey(oldValue)) { | if (!dict.ContainsKey(oldValue)) { | ||
dict.Set(oldValue, ` | dict.Set(oldValue, `${dict.Count}`); | ||
} | } | ||
return dict[oldValue]; | return dict[oldValue]; | ||
Line 27: | Line 27: | ||
let col = cols[i]; | let col = cols[i]; | ||
df = df.SetColumns([ | df = df.SetColumns([ | ||
`${col} | `${col}`: () => AnonymizeColumn(col, Column(col)) | ||
]); | ]); | ||
} | } | ||
} | } | ||
let | let sourceDatatable = DataTableById(1); | ||
Anonymize(sourceDatatable.SqlDataFrame.Collect(), ["Case Name","Customer Business Team","Customer Name","Customer Number"]) | |||
.Persist(sourceDatatable.Name + "_anonymized", #{"ProjectId": sourceDatatable.Project.Id}) | |||
</pre> | </pre> |
Revision as of 22:45, 13 December 2022
Data can be anonymized using scripts with the following methods:
- When data is extracted from a source system, the data is immediately anonymized and the anonymized data is stored to datatables.
- Data in the datatables are analymized and stored to other datatables, where the anonymized data can be exported or visualized in dashboards.
If the original data is stored in the system, make sure that users who are only allowed to see the anonymized data, don't have access to the original data.
Following example anonymizes selected columns in a datatable and writes them to another datatable.
let anonymizationDict = #{}; function AnonymizeColumn(columnName, oldValue) { let dict if (!anonymizationDict.ContainsKey(columnName)) { dict = #{}; anonymizationDict.Set(columnName, dict); } else { dict = anonymizationDict[columnName]; } if (!dict.ContainsKey(oldValue)) { dict.Set(oldValue, `${dict.Count}`); } return dict[oldValue]; } function Anonymize(df, cols) { for (let i = 0; i < CountTop(cols); ++i) { let col = cols[i]; df = df.SetColumns([ `${col}`: () => AnonymizeColumn(col, Column(col)) ]); } } let sourceDatatable = DataTableById(1); Anonymize(sourceDatatable.SqlDataFrame.Collect(), ["Case Name","Customer Business Team","Customer Name","Customer Number"]) .Persist(sourceDatatable.Name + "_anonymized", #{"ProjectId": sourceDatatable.Project.Id})