Loading 221207_JB_SQL_assignment.pdf 0 → 100644 +212 KiB File added.No diff preview for this file type. View file CODE/220112_JB_Solution_Code_CLEAN.R 0 → 100644 +337 −0 Original line number Diff line number Diff line #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # PRAEAMBLE ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## a. CLEAN WORKSPACE AND LOAD LIBRARIES ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ rm(list=ls()) options(stringsAsFactors = FALSE) library("RPostgreSQL") #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # TASKS ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dsn_database <- "aidaho" # Specify the name of your Database dsn_hostname <- "193.196.53.49" # localhost = 127.0.0.1 dsn_port <- "8001" # Specify your port number. e.g. 98939 dsn_uid <- "student" # Specify your username. e.g. "admin" dsn_pwd <- "aidaho" # Specify your password. e.g. "xxx" tryCatch({ drv <- dbDriver("PostgreSQL") print("Connecting to Database…") connect <- dbConnect(drv, dbname = dsn_database, host = dsn_hostname, port = dsn_port, user = dsn_uid, password = dsn_pwd) print("Database Connected!") }, error=function(cond) { print("Unable to connect to Database.") } ) # Check Connection res <- dbSendQuery(connect,"SELECT version();") dbFetch(res, n = -1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 2. Get an overview over the database ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ res <- dbSendQuery(connect," SELECT * FROM iex.trade_reports LIMIT 10;") dbFetch(res, n = -1) res <- dbSendQuery(connect,"SELECT column_name, data_type FROM information_schema.columns WHERE table_schema = 'iex';") dbFetch(res, n = -1) #+ What do the above queries return? #+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #+ The first query returns the 10 first observations from the table iex.trade_reports. #+ The second query returns the data types of the columns within the table. #+ What other tables does the \lstinline[style=Sql]{information_schema} contain? #+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #+ Well let's see: res <- dbSendQuery(connect,"SELECT * FROM information_schema.columns WHERE table_schema = 'iex';") dbFetch(res, n = -1) #+ What information do the columns of iex.trade_reports contain? #+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #+ ordinal - ordinal number that IDs the timestamp #+ timestamp - the timestamp of the trade up to 6 digit precision #+ flags - the trade flag as used by the IEX #+ symbol - the stock ticker #+ size - the size of the transaction (how many shares have been transacted) #+ price - the price of the trade #+ trade_id - id number the identifies the transaction #+ Does a primary key exist in the table? #+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #+ Yes, all columns that have is_nullable = NO in information_schema.columns belong to the primary key. #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 3. Short Queries ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+ How many distinct symbols does the table contain? res <- dbSendQuery(connect,"SELECT COUNT(DISTINCT symbol) from iex.trade_reports;") dbFetch(res, n = -1) #+How many different financial instruments (symbols) have been traded #+BETWEEN 2022-01-24 10:00:00-05 AND 2022-01-24 11:00:00-05? res <- dbSendQuery(connect,"SELECT COUNT(DISTINCT symbol) FROM iex.trade_reports WHERE timestamp BETWEEN '2022-01-24 10:00:00-05' AND ' 2022-01-24 11:00:00-05';") dbFetch(res, n = -1) #+How many trades of \texttt{AAPL} have taken place within the trading hours 10h00 and 11h00? res <- dbSendQuery(connect,"SELECT COUNT(DISTINCT TRADE_ID) FROM iex.trade_reports WHERE timestamp BETWEEN '2022-01-24 10:00:00 -5:00:00' AND '2022-01-24 11:00:00 -5:00:00' AND symbol = 'AAPL'") dbFetch(res, n = -1) #+Calculate the average price for each symbol in the sample? res <- dbSendQuery(connect,"SELECT symbol,AVG(price) FROM iex.trade_reports GROUP BY symbol;") dbFetch(res, n = -1) #+Which symbol has the highest average price? res <- dbSendQuery(connect,"SELECT symbol, AVG(price) FROM iex.trade_reports GROUP BY symbol ORDER BY AVG(price) DESC LIMIT 1;;") dbFetch(res, n = -1) #+How many symbols have an average price above 1000 USD? res <- dbSendQuery(connect,"SELECT symbol, AVG(price) FROM iex.trade_reports GROUP BY symbol HAVING AVG(price) > 1000;") dbFetch(res, n = -1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 4. Last price within a 5 minute interval ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ticker <- "MSFT" interval <- "5" innerquery <- paste0("SELECT TO_TIMESTAMP( FLOOR( EXTRACT(epoch FROM timestamp) / EXTRACT(epoch FROM INTERVAL '",interval," min') ) * EXTRACT(epoch FROM INTERVAL '",interval," min') ) as time_interval, * FROM iex.trade_reports WHERE symbol = '",ticker,"' ORDER BY timestamp") # test innerquery res <- dbSendQuery(connect,paste0(innerquery," LIMIT 10")) dbFetch(res, n = -1) mezzaninequery <- paste0("SELECT ", "row_number() OVER (PARTITION BY time_interval ORDER BY timestamp DESC) as rownumber, ", "* ", "FROM ", "(",innerquery,") as iq") # test mezzaninequery res <- dbSendQuery(connect,paste0(mezzaninequery," LIMIT 10")) dbFetch(res, n = -1) outerquery <- paste0("SELECT * ", "FROM ", "(",mezzaninequery,") as mq ", "WHERE rownumber=1 ", "ORDER BY time_interval") # test outerquery (no limit) res <- dbSendQuery(connect,paste0(outerquery)) dbFetch(res, n = -1) # Construct a function get_outerquery <- function(interval,ticker){ innerquery <- paste0("SELECT TO_TIMESTAMP( FLOOR( EXTRACT(epoch FROM timestamp) / EXTRACT(epoch FROM INTERVAL '",interval," min') ) * EXTRACT(epoch FROM INTERVAL '",interval," min') ) as time_interval, * FROM iex.trade_reports WHERE symbol = '",ticker,"' ORDER BY timestamp") mezzaninequery <- paste0("SELECT ", "row_number() OVER (PARTITION BY time_interval ORDER BY timestamp DESC) as rownumber, ", "* ", "FROM ", "(",innerquery,") as iq") outerquery <- paste0("SELECT * ", "FROM ", "(",mezzaninequery,") as mq ", "WHERE rownumber=1") return(outerquery) } #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 5. MERGING ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ### i. INNER JOIN #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ oq.AAPL <- get_outerquery(interval="5",ticker="AAPL") oq.MSFT <- get_outerquery(interval="5",ticker="MSFT") join_statement <- paste0("SELECT a.time_interval as ati, b.time_interval as bti, a.symbol as symbol_a, b.symbol as symbol_b, a.price as price_a, b.price as price_b FROM ", "(",oq.AAPL,") as a ", " LEFT JOIN ", "(",oq.MSFT,") as b ", "ON a.time_interval = b.time_interval;") # test outerquery (no limit) res <- dbSendQuery(connect,join_statement) dbFetch(res, n = -1) ### ii. LEFT JOIN #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ interval <- "5" # Get the minimum and maximum time_interval minmax_time_str <- paste0("SELECT min(time_interval),max(time_interval) from (",oq.AAPL,") as a;") res <- dbSendQuery(connect,minmax_time_str) minmax_time <- dbFetch(res, n = -1) timeseriesquery <- paste0("SELECT generate_series('",format(minmax_time[1],tz="UTC"),"'::TIMESTAMP AT TIME ZONE 'UTC','", format(minmax_time[2],tz="UTC"),"'::TIMESTAMPTZ AT TIME ZONE 'UTC','",interval,"m') as time_interval") res <- dbSendQuery(connect,timeseriesquery) dbFetch(res, n = -1) left_join_statement <- paste0("SELECT a.time_interval, b.symbol, b.price FROM ", "(",timeseriesquery,") as a ", " LEFT JOIN ", "(",oq.MSFT,") as b ", "ON a.time_interval = b.time_interval;") res <- dbSendQuery(connect,left_join_statement) dbFetch(res, n = -1) get_Xmin_prices <- function(interval,ticker){ oq <- get_outerquery(interval=interval,ticker=ticker) # Get the minimum and maximum time_interval minmax_time_str <- paste0("SELECT min(time_interval),max(time_interval) from (",oq,") as a;") res <- dbSendQuery(connect,minmax_time_str) minmax_time <- dbFetch(res, n = -1) timeseriesquery <- paste0("SELECT generate_series('",format(minmax_time[1],tz="UTC"),"'::TIMESTAMP AT TIME ZONE 'UTC','", format(minmax_time[2],tz="UTC"),"'::TIMESTAMPTZ AT TIME ZONE 'UTC','",interval,"m') as time_interval") left_join_statement <- paste0("SELECT a.time_interval, b.symbol, b.price FROM ", "(",timeseriesquery,") as a ", " LEFT JOIN ", "(",oq,") as b ", "ON a.time_interval = b.time_interval") return(left_join_statement) } test1 <- get_Xmin_prices(interval="1",ticker ="GME") res <- dbSendQuery(connect,test1) dbFetch(res, n = -1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 6. WRITE INFORMATION FORWARD ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ query0 <- get_Xmin_prices(interval="1",ticker ="GME") query1 <- paste0("SELECT count(price) OVER (PARTITION BY 1 ORDER BY time_interval) AS count_prices, * FROM (",query0," ) as q0") query2 <- paste0("SELECT count_prices,time_interval,symbol,price, ", "first_value(price) OVER part_window AS price_filled ", "FROM (", query1, ") as foo WINDOW part_window AS (PARTITION BY count_prices ORDER BY time_interval)") res <- dbSendQuery(connect,query2) dbFetch(res, n = -1) get_Xmin_prices_no_gaps <- function(interval,ticker){ query1 <- get_Xmin_prices(interval=interval,ticker =ticker) query2 <- paste0("SELECT count(price) OVER (PARTITION BY 1 ORDER BY time_interval) AS count_prices, * FROM (",query1," ) as GME") res_query <- paste0("SELECT count_prices,time_interval,", "first_value(symbol) OVER part_window AS symbol, ", "first_value(price) OVER part_window AS price ", "FROM (", query2, ") as foo WINDOW part_window AS (PARTITION BY count_prices ORDER BY time_interval)") return(res_query) } query <- get_Xmin_prices_no_gaps(interval="1",ticker ="GME") res <- dbSendQuery(connect,query) dbFetch(res, n = -1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # 7. CALCULATE LOGARITHMIC FIRST DIFFERENCES (Log-returns) ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ clean_query <- get_Xmin_prices_no_gaps(interval="1",ticker ="GME") lagged <- paste0("SELECT *, log(price) - lag(log(price),1) OVER (ORDER BY time_interval) as log_return FROM ", "(",clean_query,") as cq;") res <- dbSendQuery(connect,lagged) dbFetch(res, n = -1) get_first_differences <- function(interval,ticker){ clean_query <- get_Xmin_prices_no_gaps(interval=interval,ticker =ticker) lagged <- paste0("SELECT time_interval,symbol, log(price) - lag(log(price),1) OVER (ORDER BY time_interval) as log_return FROM ", "(",clean_query,") as cq;") return(lagged) } test_lagged <- get_first_differences(interval="10",ticker ="TSLA") res <- dbSendQuery(connect,test_lagged) dbFetch(res, n = -1) Loading
221207_JB_SQL_assignment.pdf 0 → 100644 +212 KiB File added.No diff preview for this file type. View file
CODE/220112_JB_Solution_Code_CLEAN.R 0 → 100644 +337 −0 Original line number Diff line number Diff line #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # PRAEAMBLE ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## a. CLEAN WORKSPACE AND LOAD LIBRARIES ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ rm(list=ls()) options(stringsAsFactors = FALSE) library("RPostgreSQL") #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # TASKS ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dsn_database <- "aidaho" # Specify the name of your Database dsn_hostname <- "193.196.53.49" # localhost = 127.0.0.1 dsn_port <- "8001" # Specify your port number. e.g. 98939 dsn_uid <- "student" # Specify your username. e.g. "admin" dsn_pwd <- "aidaho" # Specify your password. e.g. "xxx" tryCatch({ drv <- dbDriver("PostgreSQL") print("Connecting to Database…") connect <- dbConnect(drv, dbname = dsn_database, host = dsn_hostname, port = dsn_port, user = dsn_uid, password = dsn_pwd) print("Database Connected!") }, error=function(cond) { print("Unable to connect to Database.") } ) # Check Connection res <- dbSendQuery(connect,"SELECT version();") dbFetch(res, n = -1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 2. Get an overview over the database ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ res <- dbSendQuery(connect," SELECT * FROM iex.trade_reports LIMIT 10;") dbFetch(res, n = -1) res <- dbSendQuery(connect,"SELECT column_name, data_type FROM information_schema.columns WHERE table_schema = 'iex';") dbFetch(res, n = -1) #+ What do the above queries return? #+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #+ The first query returns the 10 first observations from the table iex.trade_reports. #+ The second query returns the data types of the columns within the table. #+ What other tables does the \lstinline[style=Sql]{information_schema} contain? #+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #+ Well let's see: res <- dbSendQuery(connect,"SELECT * FROM information_schema.columns WHERE table_schema = 'iex';") dbFetch(res, n = -1) #+ What information do the columns of iex.trade_reports contain? #+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #+ ordinal - ordinal number that IDs the timestamp #+ timestamp - the timestamp of the trade up to 6 digit precision #+ flags - the trade flag as used by the IEX #+ symbol - the stock ticker #+ size - the size of the transaction (how many shares have been transacted) #+ price - the price of the trade #+ trade_id - id number the identifies the transaction #+ Does a primary key exist in the table? #+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #+ Yes, all columns that have is_nullable = NO in information_schema.columns belong to the primary key. #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 3. Short Queries ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+ How many distinct symbols does the table contain? res <- dbSendQuery(connect,"SELECT COUNT(DISTINCT symbol) from iex.trade_reports;") dbFetch(res, n = -1) #+How many different financial instruments (symbols) have been traded #+BETWEEN 2022-01-24 10:00:00-05 AND 2022-01-24 11:00:00-05? res <- dbSendQuery(connect,"SELECT COUNT(DISTINCT symbol) FROM iex.trade_reports WHERE timestamp BETWEEN '2022-01-24 10:00:00-05' AND ' 2022-01-24 11:00:00-05';") dbFetch(res, n = -1) #+How many trades of \texttt{AAPL} have taken place within the trading hours 10h00 and 11h00? res <- dbSendQuery(connect,"SELECT COUNT(DISTINCT TRADE_ID) FROM iex.trade_reports WHERE timestamp BETWEEN '2022-01-24 10:00:00 -5:00:00' AND '2022-01-24 11:00:00 -5:00:00' AND symbol = 'AAPL'") dbFetch(res, n = -1) #+Calculate the average price for each symbol in the sample? res <- dbSendQuery(connect,"SELECT symbol,AVG(price) FROM iex.trade_reports GROUP BY symbol;") dbFetch(res, n = -1) #+Which symbol has the highest average price? res <- dbSendQuery(connect,"SELECT symbol, AVG(price) FROM iex.trade_reports GROUP BY symbol ORDER BY AVG(price) DESC LIMIT 1;;") dbFetch(res, n = -1) #+How many symbols have an average price above 1000 USD? res <- dbSendQuery(connect,"SELECT symbol, AVG(price) FROM iex.trade_reports GROUP BY symbol HAVING AVG(price) > 1000;") dbFetch(res, n = -1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 4. Last price within a 5 minute interval ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ticker <- "MSFT" interval <- "5" innerquery <- paste0("SELECT TO_TIMESTAMP( FLOOR( EXTRACT(epoch FROM timestamp) / EXTRACT(epoch FROM INTERVAL '",interval," min') ) * EXTRACT(epoch FROM INTERVAL '",interval," min') ) as time_interval, * FROM iex.trade_reports WHERE symbol = '",ticker,"' ORDER BY timestamp") # test innerquery res <- dbSendQuery(connect,paste0(innerquery," LIMIT 10")) dbFetch(res, n = -1) mezzaninequery <- paste0("SELECT ", "row_number() OVER (PARTITION BY time_interval ORDER BY timestamp DESC) as rownumber, ", "* ", "FROM ", "(",innerquery,") as iq") # test mezzaninequery res <- dbSendQuery(connect,paste0(mezzaninequery," LIMIT 10")) dbFetch(res, n = -1) outerquery <- paste0("SELECT * ", "FROM ", "(",mezzaninequery,") as mq ", "WHERE rownumber=1 ", "ORDER BY time_interval") # test outerquery (no limit) res <- dbSendQuery(connect,paste0(outerquery)) dbFetch(res, n = -1) # Construct a function get_outerquery <- function(interval,ticker){ innerquery <- paste0("SELECT TO_TIMESTAMP( FLOOR( EXTRACT(epoch FROM timestamp) / EXTRACT(epoch FROM INTERVAL '",interval," min') ) * EXTRACT(epoch FROM INTERVAL '",interval," min') ) as time_interval, * FROM iex.trade_reports WHERE symbol = '",ticker,"' ORDER BY timestamp") mezzaninequery <- paste0("SELECT ", "row_number() OVER (PARTITION BY time_interval ORDER BY timestamp DESC) as rownumber, ", "* ", "FROM ", "(",innerquery,") as iq") outerquery <- paste0("SELECT * ", "FROM ", "(",mezzaninequery,") as mq ", "WHERE rownumber=1") return(outerquery) } #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 5. MERGING ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ### i. INNER JOIN #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ oq.AAPL <- get_outerquery(interval="5",ticker="AAPL") oq.MSFT <- get_outerquery(interval="5",ticker="MSFT") join_statement <- paste0("SELECT a.time_interval as ati, b.time_interval as bti, a.symbol as symbol_a, b.symbol as symbol_b, a.price as price_a, b.price as price_b FROM ", "(",oq.AAPL,") as a ", " LEFT JOIN ", "(",oq.MSFT,") as b ", "ON a.time_interval = b.time_interval;") # test outerquery (no limit) res <- dbSendQuery(connect,join_statement) dbFetch(res, n = -1) ### ii. LEFT JOIN #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ interval <- "5" # Get the minimum and maximum time_interval minmax_time_str <- paste0("SELECT min(time_interval),max(time_interval) from (",oq.AAPL,") as a;") res <- dbSendQuery(connect,minmax_time_str) minmax_time <- dbFetch(res, n = -1) timeseriesquery <- paste0("SELECT generate_series('",format(minmax_time[1],tz="UTC"),"'::TIMESTAMP AT TIME ZONE 'UTC','", format(minmax_time[2],tz="UTC"),"'::TIMESTAMPTZ AT TIME ZONE 'UTC','",interval,"m') as time_interval") res <- dbSendQuery(connect,timeseriesquery) dbFetch(res, n = -1) left_join_statement <- paste0("SELECT a.time_interval, b.symbol, b.price FROM ", "(",timeseriesquery,") as a ", " LEFT JOIN ", "(",oq.MSFT,") as b ", "ON a.time_interval = b.time_interval;") res <- dbSendQuery(connect,left_join_statement) dbFetch(res, n = -1) get_Xmin_prices <- function(interval,ticker){ oq <- get_outerquery(interval=interval,ticker=ticker) # Get the minimum and maximum time_interval minmax_time_str <- paste0("SELECT min(time_interval),max(time_interval) from (",oq,") as a;") res <- dbSendQuery(connect,minmax_time_str) minmax_time <- dbFetch(res, n = -1) timeseriesquery <- paste0("SELECT generate_series('",format(minmax_time[1],tz="UTC"),"'::TIMESTAMP AT TIME ZONE 'UTC','", format(minmax_time[2],tz="UTC"),"'::TIMESTAMPTZ AT TIME ZONE 'UTC','",interval,"m') as time_interval") left_join_statement <- paste0("SELECT a.time_interval, b.symbol, b.price FROM ", "(",timeseriesquery,") as a ", " LEFT JOIN ", "(",oq,") as b ", "ON a.time_interval = b.time_interval") return(left_join_statement) } test1 <- get_Xmin_prices(interval="1",ticker ="GME") res <- dbSendQuery(connect,test1) dbFetch(res, n = -1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## 6. WRITE INFORMATION FORWARD ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ query0 <- get_Xmin_prices(interval="1",ticker ="GME") query1 <- paste0("SELECT count(price) OVER (PARTITION BY 1 ORDER BY time_interval) AS count_prices, * FROM (",query0," ) as q0") query2 <- paste0("SELECT count_prices,time_interval,symbol,price, ", "first_value(price) OVER part_window AS price_filled ", "FROM (", query1, ") as foo WINDOW part_window AS (PARTITION BY count_prices ORDER BY time_interval)") res <- dbSendQuery(connect,query2) dbFetch(res, n = -1) get_Xmin_prices_no_gaps <- function(interval,ticker){ query1 <- get_Xmin_prices(interval=interval,ticker =ticker) query2 <- paste0("SELECT count(price) OVER (PARTITION BY 1 ORDER BY time_interval) AS count_prices, * FROM (",query1," ) as GME") res_query <- paste0("SELECT count_prices,time_interval,", "first_value(symbol) OVER part_window AS symbol, ", "first_value(price) OVER part_window AS price ", "FROM (", query2, ") as foo WINDOW part_window AS (PARTITION BY count_prices ORDER BY time_interval)") return(res_query) } query <- get_Xmin_prices_no_gaps(interval="1",ticker ="GME") res <- dbSendQuery(connect,query) dbFetch(res, n = -1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # 7. CALCULATE LOGARITHMIC FIRST DIFFERENCES (Log-returns) ---- #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ clean_query <- get_Xmin_prices_no_gaps(interval="1",ticker ="GME") lagged <- paste0("SELECT *, log(price) - lag(log(price),1) OVER (ORDER BY time_interval) as log_return FROM ", "(",clean_query,") as cq;") res <- dbSendQuery(connect,lagged) dbFetch(res, n = -1) get_first_differences <- function(interval,ticker){ clean_query <- get_Xmin_prices_no_gaps(interval=interval,ticker =ticker) lagged <- paste0("SELECT time_interval,symbol, log(price) - lag(log(price),1) OVER (ORDER BY time_interval) as log_return FROM ", "(",clean_query,") as cq;") return(lagged) } test_lagged <- get_first_differences(interval="10",ticker ="TSLA") res <- dbSendQuery(connect,test_lagged) dbFetch(res, n = -1)