.Rhistory

cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- cauth_lut[cauth_lut$]
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth.csv", sep = ""), sep="|")
cauth_lut$ladid <- lookup_lad[match(cauth_lut$lad,lookup_lad$LAD11NM),1]
cauth_lut
write.table(cauth_lut, file = paste(getwd(),"/CombinedAuth/cauth_lut.csv", sep = ""), row.names=FALSE, quote = FALSE, na="", sep="|")
con <- odbcConnect("cdrcdb", uid = "kd", pwd= pwd)
csv_types <- "cauthid varchar, cauth varchar, lad varchar, ladid varchar"
import_csv(con,paste(getwd(),"/CombinedAuth", sep = ""), "cauth_lut","cauth_lut.csv",csv_types,"|")
odbcQuery(con, "DROP TABLE IF EXISTS lad_cauth;")
odbcQuery(con, "CREATE TABLE lad_cauth AS SELECT * FROM lads RIGHT JOIN cauth_lut ON (cauth_lut.ladid = lads.code);")
odbcQuery(con, "DROP TABLE IF EXISTS cauth_fin;")
odbcQuery(con, "CREATE TABLE cauth_fin AS SELECT ST_UnaryUnion(ST_Collect(geom)), lad_cauth.cauthid, lad_cauth.cauth FROM lad_cauth  GROUP BY lad_cauth.cauth, lad_cauth.cauthid;")
cauth_lut$n_ladid <- ifelse(cauth_lut$ladid == "E08000020", "E08000037", ifelse(cauth_lut$ladid == "E06000048", "E08000057",as.character(cauth_lut$ladid)))
write.table(cauth_lut, file = paste(getwd(),"/CombinedAuth/cauth_lut.csv", sep = ""), row.names=FALSE, quote = FALSE, na="", sep="|")
csv_types <- "cauthid varchar, cauth varchar, lad varchar, ladid varchar, n_ladid varchar"
odbcQuery(con, "DROP TABLE IF EXISTS cauth_lut;")
import_csv(con,paste(getwd(),"/CombinedAuth", sep = ""), "cauth_lut","cauth_lut.csv",csv_types,"|")
odbcQuery(con, "DROP TABLE IF EXISTS lad_cauth;")
odbcQuery(con, "CREATE TABLE lad_cauth AS SELECT * FROM lads RIGHT JOIN cauth_lut ON (cauth_lut.ladid = lads.code);")
odbcQuery(con, "DROP TABLE IF EXISTS cauth_fin;")
odbcQuery(con, "CREATE TABLE cauth_fin AS SELECT ST_UnaryUnion(ST_Collect(geom)), lad_cauth.cauthid, lad_cauth.cauth FROM lad_cauth  GROUP BY lad_cauth.cauth, lad_cauth.cauthid;")
odbcQuery(con, "DROP TABLE IF EXISTS lad_cauth;")
odbcQuery(con, "CREATE TABLE lad_cauth AS SELECT * FROM lads RIGHT JOIN cauth_lut ON (cauth_lut.n_ladid = lads.code);")
odbcQuery(con, "DROP TABLE IF EXISTS cauth_fin;")
odbcQuery(con, "CREATE TABLE cauth_fin AS SELECT ST_UnaryUnion(ST_Collect(geom)), lad_cauth.cauthid, lad_cauth.cauth FROM lad_cauth  GROUP BY lad_cauth.cauth, lad_cauth.cauthid;")
cauth_lut$n_ladid <- ifelse(cauth_lut$ladid == "E08000020", "E08000037", ifelse(cauth_lut$ladid == "E06000048", "E06000057",as.character(cauth_lut$ladid)))
write.table(cauth_lut, file = paste(getwd(),"/CombinedAuth/cauth_lut.csv", sep = ""), row.names=FALSE, quote = FALSE, na="", sep="|")
csv_types <- "cauthid varchar, cauth varchar, lad varchar, ladid varchar, n_ladid varchar"
odbcQuery(con, "DROP TABLE IF EXISTS cauth_lut;")
import_csv(con,paste(getwd(),"/CombinedAuth", sep = ""), "cauth_lut","cauth_lut.csv",csv_types,"|")
odbcQuery(con, "DROP TABLE IF EXISTS lad_cauth;")
odbcQuery(con, "CREATE TABLE lad_cauth AS SELECT * FROM lads RIGHT JOIN cauth_lut ON (cauth_lut.n_ladid = lads.code);")
odbcQuery(con, "DROP TABLE IF EXISTS cauth_fin;")
odbcQuery(con, "CREATE TABLE cauth_fin AS SELECT ST_UnaryUnion(ST_Collect(geom)), lad_cauth.cauthid, lad_cauth.cauth FROM lad_cauth  GROUP BY lad_cauth.cauth, lad_cauth.cauthid;")
system(paste0('pgsql2shp -f ', getwd(), '/CombinedAuth/data/CombinedAuth.shp -h localhost -u ', username, ' -P ', pwd, ' ', db_name,' "SELECT * FROM cauth_fin"'))
var_desc <- read.csv("m_code/ckan/var_desc.csv")
pg_tables <- unique(var_desc$DatasetId)
pg_tables <- data.frame(pg_tables)
pg_cols <- paste(var_desc$ColumnVariableCode, ifelse(var_desc$ColumnVariableMeasurementUnit == "Sum", "decimal",
ifelse(var_desc$ColumnVariableMeasurementUnit == "Ratio", "decimal",
ifelse(var_desc$ColumnVariableMeasurementUnit == "Percentage", "decimal",
ifelse(var_desc$ColumnVariableMeasurementUnit == "Years", "decimal",
ifelse(var_desc$ColumnVariableMeasurementUnit == "Average", "decimal","integer"))))), sep = " ")
# Create CombinedAuth directories
for (i in leps_ids[,1]){
print(i)
lep_tb_path <- paste0("/media/kd/Data/temp/CombinedAuth/CombAuth",as.character(i))
dir.create(lep_tb_path)
}
for (i in leps_ids[,1]){
print(i)
lep_tb_path <- paste0("/media/kd/Data/temp/CombinedAuth/CombAuth",as.character(i),"/tables/")
dir.create(lep_tb_path)
}
for (i in leps_ids[,1]){
print(i)
lep_tb_path <- paste0("/media/kd/Data/temp/CombinedAuth/CombAuth",as.character(i),"/shapefiles/")
dir.create(lep_tb_path)
}
cauthids <- unique(cauth_lut[,c("cauthid", "cauth")])
View(cauthids)
# Create CombinedAuth directories
for (i in cauthids[,1]){
print(i)
cauth_tb_path <- paste0("/media/kd/Data/temp/CombinedAuth/CombAuth",as.character(i))
dir.create(cauth_tb_path)
}
for (i in cauthids[,1]){
print(i)
cauth_tb_path <- paste0("/media/kd/Data/temp/CombinedAuth/CombAuth",as.character(i),"/tables/")
dir.create(cauth_tb_path)
}
for (i in cauthids[,1]){
print(i)
cauth_tb_path <- paste0("/media/kd/Data/temp/CombinedAuth/CombAuth",as.character(i),"/shapefiles/")
dir.create(cauth_tb_path)
}
v_lads <- data.frame(cauth_lut[cauth_lut$cauthid == i,4])
v_lads <- v_lads[complete.cases(v_lads),]
v_lads <- data.frame(v_lads)
i <-"1"
v_lads <- data.frame(cauth_lut[cauth_lut$cauthid == i,4])
v_lads <- v_lads[complete.cases(v_lads),]
v_lads <- data.frame(v_lads)
View(v_lads)
# Extract Census tables
for (i in cauthids[,1]){
print(i)
cauth_tb_path <- paste0("/media/kd/Data/temp/CombinedAuth/CombAuth",as.character(i), "/tables")
v_lads <- data.frame(cauth_lut[cauth_lut$cauthid == i,4])
v_lads <- v_lads[complete.cases(v_lads),]
v_lads <- data.frame(v_lads)
field_value_oa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),1]))
field_value_lsoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),2]))
field_value_msoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),4]))
for (x in 1:nrow(pg_tables[1])){
tb <- as.character(tolower(pg_tables$pg_tables[x]))
get_subset_table(paste0(tb,"_oa"), "geographycode", field_value_oa, paste0(cauth_tb_path,"/",toupper(tb),"_oa11.csv"),con)
get_subset_table(paste0(tb,"_lsoa"), "geographycode", field_value_lsoa, paste0(cauth_tb_path,"/",toupper(tb),"_lsoa11.csv"),con)
get_subset_table(paste0(tb,"_msoa"), "geographycode", field_value_msoa, paste0(cauth_tb_path,"/",toupper(tb),"_msoa11.csv"),con)
}
}
# Extract LEP's Shapefiles for OA,LSOA & MSOA geographies
for (i in cauthids[,1]){
print(i)
cauth_tb_path <- paste0("/media/kd/Data/temp/CombinedAuth/CombAuth",as.character(i), "/shapefiles")
v_lads <- data.frame(cauth_lut[cauth_lut$cauthid == i,4])
v_lads <- v_lads[complete.cases(v_lads),]
v_lads <- data.frame(v_lads)
field_value_oa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),1]))
field_value_lsoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),2]))
field_value_msoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),4]))
tb <- tolower(paste0("CombAuth",as.character(i)))
get_subset_geo(paste0("oa11"), "oa11cd", field_value_oa, paste0(cauth_tb_path,"/",toupper(tb),"_oa11.shp"),con,pwd)
get_subset_geo(paste0("lsoa11"), "lsoa11cd", field_value_lsoa, paste0(cauth_tb_path,"/",toupper(tb),"_lsoa11.shp"),con,pwd)
get_subset_geo(paste0("msoa11"), "msoa11cd", field_value_msoa, paste0(cauth_tb_path,"/",toupper(tb),"_msoa11.shp"),con,pwd)
}
source("/media/kd/Data/Dropbox/Repos/r_source/postgis_functions.r")
np_lut <- read.csv(paste(getwd(),"/NPowerhouse/np.csv", sep = ""), sep="|")
np_lut$ladid <- lookup_lad[match(np_lut$lad,lookup_lad$LAD11NM),1]
np_lut$n_ladid <- ifelse(np_lut$ladid == "E08000020", "E08000037", ifelse(np_lut$ladid == "E06000048", "E06000057",as.character(np_lut$ladid)))
write.table(np_lut, file = paste(getwd(),"/NPowerhouse/np_lut.csv", sep = ""), row.names=FALSE, quote = FALSE, na="", sep="|")
View(np_lut)
np_lut <- read.csv(paste(getwd(),"/NPowerhouse/np.csv", sep = ""), sep="|")
np_lut$ladid <- lookup_lad[match(np_lut$lad,lookup_lad$LAD11NM),1]
np_lut$n_ladid <- ifelse(np_lut$ladid == "E08000020", "E08000037", ifelse(np_lut$ladid == "E06000048", "E06000057",as.character(np_lut$ladid)))
write.table(np_lut, file = paste(getwd(),"/NPowerhouse/np_lut.csv", sep = ""), row.names=FALSE, quote = FALSE, na="", sep="|")
np_lut <- read.csv(paste(getwd(),"/NPowerhouse/np.csv", sep = ""), sep="|")
np_lut$ladid <- lookup_lad[match(np_lut$lad,lookup_lad$LAD11NM),1]
np_lut$n_ladid <- ifelse(np_lut$ladid == "E08000020", "E08000037", ifelse(np_lut$ladid == "E06000048", "E06000057",as.character(np_lut$ladid)))
write.table(np_lut, file = paste(getwd(),"/NPowerhouse/np_lut.csv", sep = ""), row.names=FALSE, quote = FALSE, na="", sep="|")
npids <- unique(np_lut[,c("npid", "np")])
View(np_lut)
View(npids)
con <- odbcConnect("cdrcdb", uid = "kd", pwd= pwd)
csv_types <- "npid varchar, np varchar, lad varchar, ladid varchar, n_ladid varchar"
odbcQuery(con, "DROP TABLE IF EXISTS np_lut;")
import_csv(con,paste(getwd(),"/NPowerhouse", sep = ""), "np_lut","np_lut.csv",csv_types,"|")
odbcQuery(con, "DROP TABLE IF EXISTS lad_np;")
odbcQuery(con, "CREATE TABLE lad_np AS SELECT * FROM lads RIGHT JOIN np_lut ON (np_lut.n_ladid = lads.code);")
odbcQuery(con, "DROP TABLE IF EXISTS np_fin;")
odbcQuery(con, "CREATE TABLE np_fin AS SELECT ST_UnaryUnion(ST_Collect(geom)), lad_np.npid, lad_np.np FROM lad_np  GROUP BY lad_np.np, lad_np.npid;")
system(paste0('pgsql2shp -f ', getwd(), '/NPowerhouse/data/NPowerhouse.shp -h localhost -u ', username, ' -P ', pwd, ' ', db_name,' "SELECT * FROM np_fin"'))
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i))
dir.create(np_tb_path)
}
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i),"/tables/")
dir.create(np_tb_path)
}
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i),"/shapefiles/")
dir.create(np_tb_path)
}
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i))
dir.create(np_tb_path)
}
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i),"/tables/")
dir.create(np_tb_path)
}
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i),"/shapefiles/")
dir.create(np_tb_path)
}
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i), "/shapefiles")
v_lads <- data.frame(np_lut[np_lut$npid == i,4])
v_lads <- v_lads[complete.cases(v_lads),]
v_lads <- data.frame(v_lads)
field_value_oa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),1]))
field_value_lsoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),2]))
field_value_msoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),4]))
tb <- tolower(paste0("CombAuth",as.character(i)))
get_subset_geo(paste0("oa11"), "oa11cd", field_value_oa, paste0(np_tb_path,"/",toupper(tb),"_oa11.shp"),con,pwd)
get_subset_geo(paste0("lsoa11"), "lsoa11cd", field_value_lsoa, paste0(np_tb_path,"/",toupper(tb),"_lsoa11.shp"),con,pwd)
get_subset_geo(paste0("msoa11"), "msoa11cd", field_value_msoa, paste0(np_tb_path,"/",toupper(tb),"_msoa11.shp"),con,pwd)
}
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i), "/tables")
v_lads <- data.frame(np_lut[np_lut$npid == i,4])
v_lads <- v_lads[complete.cases(v_lads),]
v_lads <- data.frame(v_lads)
field_value_oa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),1]))
field_value_lsoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),2]))
field_value_msoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),4]))
for (x in 1:nrow(pg_tables[1])){
tb <- as.character(tolower(pg_tables$pg_tables[x]))
get_subset_table(paste0(tb,"_oa"), "geographycode", field_value_oa, paste0(np_tb_path,"/",toupper(tb),"_oa11.csv"),con)
get_subset_table(paste0(tb,"_lsoa"), "geographycode", field_value_lsoa, paste0(np_tb_path,"/",toupper(tb),"_lsoa11.csv"),con)
get_subset_table(paste0(tb,"_msoa"), "geographycode", field_value_msoa, paste0(np_tb_path,"/",toupper(tb),"_msoa11.csv"),con)
}
}
library(whisker)
data <- list(grantNo = "ES/L011840/1",
titl = "",
AuthEnty = c("Alex Singleton, University of Liverpool", "Konstantinos Daras, University of Liverpool"),
depositr = "Alex Singleton, University of Liverpool",
fundAg = "Economic and Social Research Council",
producer= "ESRC Consumer Data Research Centre",
copyright = "The following attribution statements must be used to acknowledge ONS and OS copyright and source in use of these datasets:
Contains National Statistics data Crown copyright and database right 2016;
Contains Ordnance Survey data Crown copyright and database right 2016",
dataCollector = c("Office for National Statistics", "Ordnance Survey"),
dataSrc = c("Census 2011"),
topcClas = c("Transport", "Health","Ethnicity", "Energy", "Demographics","Housing"),
keyword = "",
abstract = "",
timePrd = "2011",
collDate = "27/03/11",
nation = "",
geogCover = "",
geogUnit = c("Output Area", "Lower Super Output Area", "Middle Super Output Area"),
anlyUnit = c("Area", "Household", "Person"),
dataKind = c("Count","Sum","Ratio", "Percent"),
accsPlac = c("Consumer Data Research Centre", "UK Data Service"),
contact = "data@cdrc.ac.uk",
relPubl = "",
serName = "CDRC 2011 Census Data Packs"
)
setwd("/media/kd/Data/Dropbox/Repos/rds")
template <- readLines("/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/xml_doc.xml")
cauth_lut <- read.csv(paste(getwd(),"/LEPs/cauth_lut.csv", sep = ""), sep="|")
cauth_ids <- unique(cauth_lut[,c("cauthid", "cauth")])
setwd("/media/kd/Data/temp/CombinedAuth")
fromfile1 <- "/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/datasets_description.csv"
fromfile2 <- "/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/var_desc.csv"
setwd("/media/kd/Data/Dropbox/Repos/rds")
template <- readLines("/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/xml_doc.xml")
cauth_lut <- read.csv(paste(getwd(),"/CombinedAuth/cauth_lut.csv", sep = ""), sep="|")
cauth_ids <- unique(cauth_lut[,c("cauthid", "cauth")])
setwd("/media/kd/Data/temp/CombinedAuth")
fromfile1 <- "/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/datasets_description.csv"
fromfile2 <- "/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/var_desc.csv"
for (i in 1:nrow(cauth_ids)){
code <- paste0("CombAuth", cauth_ids[i,"cauthid"])
name <- cauth_ids[i,"cauth"]
data$keyword <- c("Census", "2011", code)
data$titl <- paste("CDRC 2011 Census Data Packs for Combined Authority: ", name,  " (", code, ")", sep="")
data$abstract <- paste("This census data pack provides 2011 Census estimates for the 'Key Statistic' and 'Quick Statistic' tables within the Combined Authority: ",
name, " (", code, ")",
" at the Output Area, Lower Super Output Area and Middle Super Output Area scale. The estimates are as at census day, 27 March 2011.",sep="")
data$nation <- "England"
data$geogCover <- name
readme <- file(paste(getwd(), code, "readme.txt", sep="/"))
writeLines(c(paste("CDRC 2011 Census Geodata pack - ", name, " (", code, ")", ":\n", sep=""),
"+ Abstract",
data$abstract,
"\n",
"+ Contents",
"\t - readme.txt: Information about the CDRC Geodata pack",
"\t - metadata.xml: Metadata",
"\t - datasets_description.csv: Description of the 2011 Census Key Statistic and Quick Statistic datasets",
"\t - variables_description.csv: Description of the 2011 Census Key Statistic and Quick Statistic variables",
paste("\t - tables: Folder containing the OA, LSOA and MSOA 2011 Census data for the Combined Authority: ", name, " (", code, ")", sep=""),
paste("\t - shapefiles: Folder containing the OA, LSOA and MSOA digital boundaries as shapefiles for the Combined Authority: ", name, " (", code, ")", sep=""),
"\n",
"+ Statistical Disclosure Control",
"In order to protect against disclosure of personal information from the 2011 Census, there has been swapping of records in the Census database between different geographic areas and so some counts will be affected.",
"In the main, the greatest effects will be at the lowest geographies, since the record swapping is targeted towards those households with unusual characteristics in small areas.",
"More details on the ONS Census disclosure control strategy may be found on the Statistical Disclosure Control page on the ONS website:",
"http://www.ons.gov.uk/ons/guide-method/census/2011/census-data/2011-census-prospectus/new-developments-for-2011-census-results/statistical-disclosure-control/index.html","\n",
"+ Citation and Copyright",
"The following attribution statements must be used to acknowledge ONS and OS copyright and source in use of these datasets:",
"Contains National Statistics data Crown copyright and database right 2016",
"Contains Ordnance Survey data Crown copyright and database right 2016\n",
"+ Funding",
paste("Funded by: Economic and Social Research Council", data$grantNo)),
readme)
close(readme)
new_xml <- whisker.render(template, data)
writeLines(new_xml, paste(getwd(), "/", code, "/metadata.xml", sep=""))
tofile1 <- paste(getwd(), "/", code, "/datasets_description.csv", sep="")
tofile2 <- paste(getwd(), "/", code, "/variables_description.csv", sep="")
file.copy(fromfile1, tofile1, overwrite = TRUE, recursive = FALSE, copy.mode = TRUE, copy.date = FALSE)
file.copy(fromfile2, tofile2, overwrite = TRUE, recursive = FALSE, copy.mode = TRUE, copy.date = FALSE)
}
library(whisker)
data <- list(grantNo = "ES/L011840/1",
titl = "",
AuthEnty = c("Alex Singleton, University of Liverpool", "Konstantinos Daras, University of Liverpool"),
depositr = "Alex Singleton, University of Liverpool",
fundAg = "Economic and Social Research Council",
producer= "ESRC Consumer Data Research Centre",
copyright = "The following attribution statements must be used to acknowledge ONS and OS copyright and source in use of these datasets:
Contains National Statistics data Crown copyright and database right 2016;
Contains Ordnance Survey data Crown copyright and database right 2016",
dataCollector = c("Office for National Statistics", "Ordnance Survey"),
dataSrc = c("Census 2011"),
topcClas = c("Transport", "Health","Ethnicity", "Energy", "Demographics","Housing"),
keyword = "",
abstract = "",
timePrd = "2011",
collDate = "27/03/11",
nation = "",
geogCover = "",
geogUnit = c("Output Area", "Lower Super Output Area", "Middle Super Output Area"),
anlyUnit = c("Area", "Household", "Person"),
dataKind = c("Count","Sum","Ratio", "Percent"),
accsPlac = c("Consumer Data Research Centre", "UK Data Service"),
contact = "data@cdrc.ac.uk",
relPubl = "",
serName = "CDRC 2011 Census Data Packs"
)
setwd("/media/kd/Data/Dropbox/Repos/rds")
template <- readLines("/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/xml_doc.xml")
np_lut <- read.csv(paste(getwd(),"/NPowerhouse/np_lut.csv", sep = ""), sep="|")
np_ids <- unique(np_lut[,c("npid", "np")])
setwd("/media/kd/Data/temp/NPowerhouse")
fromfile1 <- "/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/datasets_description.csv"
fromfile2 <- "/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/var_desc.csv"
for (i in 1:nrow(np_ids)){
code <- paste0("NP", np_ids[i,"cnpid"])
name <- np_ids[i,"np"]
data$keyword <- c("Census", "2011", code)
data$titl <- paste("CDRC 2011 Census Data Packs for Northern Powerhouse Region: ", name,  " (", code, ")", sep="")
data$abstract <- paste("This census data pack provides 2011 Census estimates for the 'Key Statistic' and 'Quick Statistic' tables within the Northern Powerhouse Region: ",
name, " (", code, ")",
" at the Output Area, Lower Super Output Area and Middle Super Output Area scale. The estimates are as at census day, 27 March 2011.",sep="")
data$nation <- "England"
data$geogCover <- name
readme <- file(paste(getwd(), code, "readme.txt", sep="/"))
writeLines(c(paste("CDRC 2011 Census Geodata pack - ", name, " (", code, ")", ":\n", sep=""),
"+ Abstract",
data$abstract,
"\n",
"+ Contents",
"\t - readme.txt: Information about the CDRC Geodata pack",
"\t - metadata.xml: Metadata",
"\t - datasets_description.csv: Description of the 2011 Census Key Statistic and Quick Statistic datasets",
"\t - variables_description.csv: Description of the 2011 Census Key Statistic and Quick Statistic variables",
paste("\t - tables: Folder containing the OA, LSOA and MSOA 2011 Census data for the Northern Powerhouse Region: ", name, " (", code, ")", sep=""),
paste("\t - shapefiles: Folder containing the OA, LSOA and MSOA digital boundaries as shapefiles for the Northern Powerhouse Region: ", name, " (", code, ")", sep=""),
"\n",
"+ Statistical Disclosure Control",
"In order to protect against disclosure of personal information from the 2011 Census, there has been swapping of records in the Census database between different geographic areas and so some counts will be affected.",
"In the main, the greatest effects will be at the lowest geographies, since the record swapping is targeted towards those households with unusual characteristics in small areas.",
"More details on the ONS Census disclosure control strategy may be found on the Statistical Disclosure Control page on the ONS website:",
"http://www.ons.gov.uk/ons/guide-method/census/2011/census-data/2011-census-prospectus/new-developments-for-2011-census-results/statistical-disclosure-control/index.html","\n",
"+ Citation and Copyright",
"The following attribution statements must be used to acknowledge ONS and OS copyright and source in use of these datasets:",
"Contains National Statistics data Crown copyright and database right 2016",
"Contains Ordnance Survey data Crown copyright and database right 2016\n",
"+ Funding",
paste("Funded by: Economic and Social Research Council", data$grantNo)),
readme)
close(readme)
new_xml <- whisker.render(template, data)
writeLines(new_xml, paste(getwd(), "/", code, "/metadata.xml", sep=""))
tofile1 <- paste(getwd(), "/", code, "/datasets_description.csv", sep="")
tofile2 <- paste(getwd(), "/", code, "/variables_description.csv", sep="")
file.copy(fromfile1, tofile1, overwrite = TRUE, recursive = FALSE, copy.mode = TRUE, copy.date = FALSE)
file.copy(fromfile2, tofile2, overwrite = TRUE, recursive = FALSE, copy.mode = TRUE, copy.date = FALSE)
}
View(npids)
library(whisker)
data <- list(grantNo = "ES/L011840/1",
titl = "",
AuthEnty = c("Alex Singleton, University of Liverpool", "Konstantinos Daras, University of Liverpool"),
depositr = "Alex Singleton, University of Liverpool",
fundAg = "Economic and Social Research Council",
producer= "ESRC Consumer Data Research Centre",
copyright = "The following attribution statements must be used to acknowledge ONS and OS copyright and source in use of these datasets:
Contains National Statistics data Crown copyright and database right 2016;
Contains Ordnance Survey data Crown copyright and database right 2016",
dataCollector = c("Office for National Statistics", "Ordnance Survey"),
dataSrc = c("Census 2011"),
topcClas = c("Transport", "Health","Ethnicity", "Energy", "Demographics","Housing"),
keyword = "",
abstract = "",
timePrd = "2011",
collDate = "27/03/11",
nation = "",
geogCover = "",
geogUnit = c("Output Area", "Lower Super Output Area", "Middle Super Output Area"),
anlyUnit = c("Area", "Household", "Person"),
dataKind = c("Count","Sum","Ratio", "Percent"),
accsPlac = c("Consumer Data Research Centre", "UK Data Service"),
contact = "data@cdrc.ac.uk",
relPubl = "",
serName = "CDRC 2011 Census Data Packs"
)
setwd("/media/kd/Data/Dropbox/Repos/rds")
template <- readLines("/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/xml_doc.xml")
np_lut <- read.csv(paste(getwd(),"/NPowerhouse/np_lut.csv", sep = ""), sep="|")
np_ids <- unique(np_lut[,c("npid", "np")])
setwd("/media/kd/Data/temp/NPowerhouse")
fromfile1 <- "/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/datasets_description.csv"
fromfile2 <- "/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/var_desc.csv"
for (i in 1:nrow(np_ids)){
code <- paste0("NP", np_ids[i,"npid"])
name <- np_ids[i,"np"]
data$keyword <- c("Census", "2011", code)
data$titl <- paste("CDRC 2011 Census Data Packs for Northern Powerhouse Region: ", name,  " (", code, ")", sep="")
data$abstract <- paste("This census data pack provides 2011 Census estimates for the 'Key Statistic' and 'Quick Statistic' tables within the Northern Powerhouse Region: ",
name, " (", code, ")",
" at the Output Area, Lower Super Output Area and Middle Super Output Area scale. The estimates are as at census day, 27 March 2011.",sep="")
data$nation <- "England"
data$geogCover <- name
readme <- file(paste(getwd(), code, "readme.txt", sep="/"))
writeLines(c(paste("CDRC 2011 Census Geodata pack - ", name, " (", code, ")", ":\n", sep=""),
"+ Abstract",
data$abstract,
"\n",
"+ Contents",
"\t - readme.txt: Information about the CDRC Geodata pack",
"\t - metadata.xml: Metadata",
"\t - datasets_description.csv: Description of the 2011 Census Key Statistic and Quick Statistic datasets",
"\t - variables_description.csv: Description of the 2011 Census Key Statistic and Quick Statistic variables",
paste("\t - tables: Folder containing the OA, LSOA and MSOA 2011 Census data for the Northern Powerhouse Region: ", name, " (", code, ")", sep=""),
paste("\t - shapefiles: Folder containing the OA, LSOA and MSOA digital boundaries as shapefiles for the Northern Powerhouse Region: ", name, " (", code, ")", sep=""),
"\n",
"+ Statistical Disclosure Control",
"In order to protect against disclosure of personal information from the 2011 Census, there has been swapping of records in the Census database between different geographic areas and so some counts will be affected.",
"In the main, the greatest effects will be at the lowest geographies, since the record swapping is targeted towards those households with unusual characteristics in small areas.",
"More details on the ONS Census disclosure control strategy may be found on the Statistical Disclosure Control page on the ONS website:",
"http://www.ons.gov.uk/ons/guide-method/census/2011/census-data/2011-census-prospectus/new-developments-for-2011-census-results/statistical-disclosure-control/index.html","\n",
"+ Citation and Copyright",
"The following attribution statements must be used to acknowledge ONS and OS copyright and source in use of these datasets:",
"Contains National Statistics data Crown copyright and database right 2016",
"Contains Ordnance Survey data Crown copyright and database right 2016\n",
"+ Funding",
paste("Funded by: Economic and Social Research Council", data$grantNo)),
readme)
close(readme)
new_xml <- whisker.render(template, data)
writeLines(new_xml, paste(getwd(), "/", code, "/metadata.xml", sep=""))
tofile1 <- paste(getwd(), "/", code, "/datasets_description.csv", sep="")
tofile2 <- paste(getwd(), "/", code, "/variables_description.csv", sep="")
file.copy(fromfile1, tofile1, overwrite = TRUE, recursive = FALSE, copy.mode = TRUE, copy.date = FALSE)
file.copy(fromfile2, tofile2, overwrite = TRUE, recursive = FALSE, copy.mode = TRUE, copy.date = FALSE)
}
# Extract Northern Powerhouse's shapefiles for OA,LSOA & MSOA geographies
for (i in npids[,1]){
print(i)
np_tb_path <- paste0("/media/kd/Data/temp/NPowerhouse/NP",as.character(i), "/shapefiles")
v_lads <- data.frame(np_lut[np_lut$npid == i,4])
v_lads <- v_lads[complete.cases(v_lads),]
v_lads <- data.frame(v_lads)
field_value_oa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),1]))
field_value_lsoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),2]))
field_value_msoa <- as.character(unique(lookup[ which( lookup$LAD11CD %in% v_lads$v_lads ),4]))
tb <- tolower(paste0("NP",as.character(i)))
get_subset_geo(paste0("oa11"), "oa11cd", field_value_oa, paste0(np_tb_path,"/",toupper(tb),"_oa11.shp"),con,pwd)
get_subset_geo(paste0("lsoa11"), "lsoa11cd", field_value_lsoa, paste0(np_tb_path,"/",toupper(tb),"_lsoa11.shp"),con,pwd)
get_subset_geo(paste0("msoa11"), "msoa11cd", field_value_msoa, paste0(np_tb_path,"/",toupper(tb),"_msoa11.shp"),con,pwd)
}
zip(paste0(getwd(),"/NP1.zip"), files = "NP1")
for (np in npids$npid){
zip(paste(getwd(),"/NP", np,".zip",sep=""), files=paste0("NP",lad))
}
for (np in npids$npid){
zip(paste(getwd(),"/NP", np,".zip",sep=""), files=paste0("NP",np))
}
setwd("/media/kd/Data/temp/CombinedAuth")
############################################### zip folders #################################################
for (cauth in cauthids$cauthid){
zip(paste(getwd(),"/CombAuth", cauth,".zip",sep=""), files=paste0("CombAuth",cauth))
}
setwd("/media/kd/Data/Dropbox/Repos/rds")
setwd("/media/kd/Data/temp/LEPs")
leps_lut <- read.csv(paste(getwd(),"/LEPs/leps.csv", sep = ""), sep="|")
leps_ids <- read.csv(paste(getwd(),"/LEPs/lepids.csv", sep = ""), sep="|", colClasses = c("character", "character"))
leps_lut$lepid <- leps_ids[match(leps_lut$lep, leps_ids$lepname),1]
lookup_lep <- unique(leps_lut[,c("lepid", "lep")])
lookup_lep[,1] <- as.character(lookup_lep[,1])
lookup_lep[,2] <- as.character(lookup_lep[,2])
setwd("/media/kd/Data/Dropbox/Repos/rds")
template <- readLines("/media/kd/Data/Dropbox/Repos/rds/m_code/ckan/xml_doc.xml")
leps_lut <- read.csv(paste(getwd(),"/LEPs/leps.csv", sep = ""), sep="|")
leps_ids <- read.csv(paste(getwd(),"/LEPs/lepids.csv", sep = ""), sep="|", colClasses = c("character", "character"))
leps_lut$lepid <- leps_ids[match(leps_lut$lep, leps_ids$lepname),1]
lookup_lep <- unique(leps_lut[,c("lepid", "lep")])
lookup_lep[,1] <- as.character(lookup_lep[,1])
lookup_lep[,2] <- as.character(lookup_lep[,2])
setwd("/media/kd/Data/temp/LEPs")
############################################### zip folders #################################################
for (lep in lookup_lep$lepid){
zip(paste(getwd(),"/LEP", lep,".zip",sep=""), files=paste0("LEP",lep))
}