Protect two tables with common cells

protectLinkedTables() can be used to protect tables that have common cells. It is of course required that after the anonymization process has finished, all common cells have the same anonymization state in both tables.

protectLinkedTables(objectA, objectB, commonCells, method, ...)

Arguments

objectA	a sdcProblem object
objectB	a sdcProblem object
commonCells	a list object defining common cells in `objectA` and `objectB`. For each variable that has one or more common codes in both tables, a list element needs to be specified. List-elements of length 3: Variable has exact same levels and structure in both input tables `first element`: scalar character vector specifying the variable name in argument `objectA` `second element`: scalar character vector specifying the variable name in argument `objectB` `third element`: scalar character vector being with keyword `"ALL"` List-elements of length `4`: Variable has different codes and levels in tables `objectA` and `objectB` `first element`: scalar character vector specifying the variable name in argument `objectA` `second element`: scalar character vector specifying the variable name in argument `objectB` `third element`: character vector defining codes within `objectA` `fourth element`: character vector with length that equals the length of the third list-element. This vector defines codes of the dimensional variable in `objectB` that match the codes given in the third list-element for `objectA`.
method	scalar character vector defining the algorithm that should be used to protect the primary sensitive table cells. The possible values are `"HITAS"`, `"SIMPLEHEURISTIC"` and `"OPT"`; For details please see `protectTable()`.
...	additional arguments to control the secondary cell suppression algorithm. For details, see `protectTable()`.

Value

a list of length 2 with each list-element being an safeObj object

Examples

if (FALSE) {
# load micro data for further processing
sp <- searchpaths()
fn <- paste(sp[grep("sdcTable", sp)], "/data/microData2.RData", sep="")
microData <- get(load(fn))

# table1: defined by variables 'gender' and 'ecoOld'
microData1 <- microData[,c(2,3,5)]

# table2: defined by variables 'region', 'gender' and 'ecoNew'
microData2 <- microData[,c(1,2,4,5)]

# we need to create information on the hierarchies
# variable 'region': exists only in microDat2
d_region <- hier_create(root = "Tot", nodes = c("R1", "R2"))

# variable 'gender': exists in both datasets
d_gender <- hier_create(root = "Tot", nodes = c("m", "f"))

# variable 'eco1': exists only in microDat1
d_eco1 <- hier_create(root = "Tot", nodes = c("A", "B"))
d_eco1 <- hier_add(d_eco1, root = "A", nodes = c("Aa", "Ab"))
d_eco1 <- hier_add(d_eco1, root = "B", nodes = c("Ba", "Bb"))

# variable 'ecoNew': exists only in microDat2
d_eco2 <- hier_create(root = "Tot", nodes = c("C", "D"))
d_eco2 <- hier_add(d_eco2, root = "C", nodes = c("Ca", "Cb", "Cc"))
d_eco2 <- hier_add(d_eco2, root = "D", nodes = c("Da", "Db", "Dc"))

# creating objects holding information on dimensions
dl1 <- list(gender = d_gender, ecoOld = d_eco1)
dl2 <- list(region = d_region, gender = d_gender, ecoNew = d_eco2)

# creating input objects for further processing.
# For details, see ?makeProblem.
p1 <- makeProblem(
  data = microData1,
  dimList = dl1,
  dimVarInd = 1:2,
  numVarInd = 3)

p2 <- makeProblem(
  data = microData2,
  dimList = dl2,
  dimVarInd = 1:3,
  numVarInd = 4)

# the cell specified by gender == "Tot" and ecoOld == "A"
# is one of the common cells! -> we mark it as primary suppression
p1 <- changeCellStatus(
  object = p1,
  characteristics = c("Tot", "A"),
  varNames = c("gender", "ecoOld"),
  rule = "u",
  verbose = FALSE)

# the cell specified by region == "Tot" and gender == "f" and ecoNew == "C"
# is one of the common cells! -> we mark it as primary suppression
p2 <- changeCellStatus(
  object = p2,
  characteristics = c("Tot", "f", "C"),
  varNames = c("region", "gender", "ecoNew"),
  rule = "u",
  verbose = FALSE)

# specifying input to define common cells
common_cells <- list()

# variable "gender"
common_cells$v.gender <- list()
common_cells$v.gender[[1]] <- "gender" # variable name in "p1"
common_cells$v.gender[[2]] <- "gender" # variable name in "p2"

# "gender" has equal characteristics on both datasets -> keyword "ALL"
common_cells$v.gender[[3]] <- "ALL"

# variables: "ecoOld" and "ecoNew"
common_cells$v.eco <- list()
common_cells$v.eco[[1]] <- "ecoOld" # variable name in "p1"
common_cells$v.eco[[2]] <- "ecoNew" # variable name in "p2"

# vector of common characteristics:
# "A" and "B" in variable "ecoOld" in "p1"
common_cells$v.eco[[3]] <- c("A", "B")

# correspond to codes "C" and "D" in variable "ecoNew" in "p2"
common_cells$v.eco[[4]] <- c("C", "D")

# protect the linked data
result <- protectLinkedTables(
  objectA = p1,
  objectB = p2,
  commonCells = common_cells,
  method = "HITAS",
  verbose = TRUE)

# having a look at the results
result_tab1 <- result[[1]]
result_tab2 <- result[[2]]
summary(result_tab1)
summary(result_tab2)
}

Arguments

Value

See also

Examples