setwd("C:/users/graab/documents/syncs/rds/workshops2") ###-------------------- repeat the first bit to get syn1--------------------- load("bike1.Rdata") dim(bike1) summary(bike1) barplot(table(bike1$start_hour)) barplot(table(bike1$start_wday)) library(synthpop) #codebook.syn(bike1)$tab system.time(syn1 <- syn(bike1, method = "sample", seed = 5678)) ## under 2seconds #compare(syn1, bike1) #names(syn1) ###------------------- now try some high fidelity methods------------------- system.time(syn1.cart <- syn(bike1, seed = 5678)) ## this gave an error message because the places had too many categories. ## one option would be to group them - good for disclosure too # but for now i will just drop them system.time(syn1.cart <- syn(bike1[,1:9], seed = 5678)) ## took 307 seconds save.image("high_fidel.Rdata") utility.tables(syn1.cart, bike1) ###------------------------- some plots --------------------------------------- #png(file="latlongplot2.PNG", height =300, width=800) par(mfrow=c(1,3)) with(bike1, plot(c(START.LAT),c(START.LONG), type = "n",main="Low fidelity", xlab="Latitude", ylab="Longitude")) with(syn1$syn, segments(START.LAT,START.LONG,END.LAT,END.LONG)) ## now Original with(bike1, plot(c(START.LAT),c(START.LONG), type = "n",main="?Original or High fidelity", xlab="Latitude", ylab="Longitude")) with(bike1, segments(START.LAT,START.LONG,END.LAT,END.LONG)) ## now High fidelity with(bike1, plot(c(START.LAT),c(START.LONG), type = "n",main="?Original or High fidelity", xlab="Latitude", ylab="Longitude")) with(syn1.cart$syn, segments(START.LAT,START.LONG,END.LAT,END.LONG)) ###------------------------- now try parametric ------------- system.time(syn1.para <- syn(bike1, method = "parametric", visit.sequence = c(7:10,1:6), seed = 5678)) ## moved factors with lots of levels to end of visit.sequence ## but not enough as my effort ran out of time