Commit 3d35d711 authored by Markus Mößler's avatar Markus Mößler
Browse files

updated data handling to new approach and switched to expenditure view

parent a9f09f86
Loading
Loading
Loading
Loading
+21.5 KiB

File added.

No diff preview for this file type.

+97.4 KiB

File added.

No diff preview for this file type.

+4 −40
Original line number Diff line number Diff line
readsdmx__read_sdmx <- function (path, destfile = tempfile(fileext = ".xml"), quiet = TRUE, 
                                 method = "libcurl", mode = "w", ...) {
  
  url <- "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/QNA/all?format=SDMX-ML"
  # check: https://data-explorer.oecd.org/vis?df[ds]=DisseminateFinalDMZ&df[id]=DSD_NAMAIN1%40DF_QNA_EXPENDITURE_INDICES&df[ag]=OECD.SDD.NAD&df[vs]=1.1&dq=Q............&lom=LASTNPERIODS&lo=5&to[TIME_PERIOD]=false
  url <- "https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_EXPENDITURE_INDICES,1.1/Q............"
  path <- url
  destfile <- tempfile(fileext = ".xml")
  quiet <- TRUE
  method <- "libcurl"
@@ -19,44 +21,6 @@ readsdmx__read_sdmx <- function (path, destfile = tempfile(fileext = ".xml"), qu
  stopifnot(file.exists(path))
  path <- normalizePath(path)
  d <- readsdmx:::read_sdmx_(path)
  as.data.frame(d, stringsAsFactors = FALSE)
  data <- as.data.frame(d, stringsAsFactors = FALSE)

}



url <- "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/QNA/all?format=SDMX-ML"
destfile <- tempfile(fileext = ".xml")
method <- "libcurl"

df <- download.file(url, destfile, method = method, quiet = TRUE)
stopifnot(df == 0L)

# Read and inspect the first few lines of the downloaded file
file_content <- readLines(destfile, n = 10)
print(file_content)



# Install and load jsonlite package if necessary
# install.packages("jsonlite")
library(jsonlite)

url <- "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/QNA/all?format=SDMX-ML"
destfile <- tempfile(fileext = ".json")  # Change file extension to .json

# Download the file
df <- download.file(url, destfile, method = "libcurl", quiet = TRUE)
stopifnot(df == 0L)

# Read and parse the JSON file
json_data <- fromJSON(destfile)
print(json_data)  # Inspect the data structure

str(json_data$data)



url <- "https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_EXPENDITURE_GROWTH_OECD/?format=jsondata"

+87 −0
Original line number Diff line number Diff line

# -> 0) set up
rm(list=ls())

library(OECD)
library(dplyr)
library(zoo)
library(ggplot2)
library(plotly)

# # -> 0) load data structure for qna
# dataset <- "QNA"
# url <- paste0("https://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/", dataset)
# data_structure <- readsdmx::read_sdmx(url)
# data_structure %>%
#   filter(id == c("CL_QNA_SUBJECT"),
#          value %in% c("B1G", "B1GVA", "B1GVB_E", "B1GVF", "B1GVG_I", "B1GVJ", "B1GVK", "B1GVL", "B1GVM_N", "B1GVO_Q", "B1GVR_U")) %>%
#   select(value, en_description) %>%
#   kable(col.names = c("Code", "Description"))
# 
# # -> 1) load data
# 
# dataset <- "QNA"
# start_time <- as.Date("2010-01-01")
# end_time <- as.Date("2023-10-01")
# # Note: Q1: 2020-01-01; Q2: 2020-04-01; Q3: 2020-07-01; Q4: 2020-10-01
# filter = list(LOCATION = c("DEU", "FRA"),
#               SUBJECT = c("B1G", "B1GVA", "B1GVB_E", "B1GVF", "B1GVG_I", "B1GVJ", "B1GVK", "B1GVL", "B1GVM_N", "B1GVO_Q", "B1GVR_U"),
#               MEASURE = c("LNBQRSA"),
#               FREQUENCY = c("Q"))
# 
# qna_dat <- get_dataset(dataset, filter, start_time, end_time)



library(readsdmx)
url <- "https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_EXPENDITURE_INDICES,1.1/Q............"
qna_dat <- read_sdmx(path = url)

library(dplyr)
qna_dat <- qna_dat %>%
  filter(REF_AREA %in% c("DEU", "FRA"),
         TRANSACTION %in% c("B1GQ", "P3", "P51G", "P6", "P7"),
         PRICE_BASE %in% c("LR"))

# check
head(qna_dat)
tail(qna_dat)

# save original data
save(qna_dat, file = "./01_data/qna_dat_00_updated.RData")

# handle values
class(qna_dat$ObsValue)
qna_dat$ObsValue <- as.numeric(qna_dat$ObsValue)

# handle dates
class(qna_dat$ObsDimension)
qna_dat$ObsDimension <- as.yearqtr(qna_dat$ObsDimension, "%Y-Q%q")

# check
head(qna_dat)
tail(qna_dat)

# transform data ----
qna_dat <- qna_dat %>%
  left_join(
    qna_dat %>%
      filter(TRANSACTION == "B1GQ") %>%
      select(REF_AREA, ObsDimension, ObsValue) %>%
      rename(B1GQ_lev = ObsValue),
    by = c("REF_AREA", "ObsDimension")
  ) %>%
  group_by(REF_AREA, TRANSACTION) %>%
  mutate(
    lev = ObsValue,
    gro = (lev - lag(lev))/lag(lev)*100,
    sha = (lev)/B1GQ_lev*100,
    gro_con = gro*lag(lev)/lag(B1GQ_lev)
  )

# check 
head(qna_dat)
tail(qna_dat)

# save transformed data ----
save(qna_dat, file = "./01_data/qna_dat_01_updated.RData")