R & SQL Server for real-time predictions

Steph Locke @theStephLocke Locke Data

2018-10-26

Steph Locke

SQL Server + ML Services

Overview

  • Use sp_execute_external_script to call R from SQL
  • Store model objects in SQL Server
  • Use certain models in a native PREDICT function

Editions

  • Express (w/ Advanced tools) - R Open or base R
  • Standard - R Open or base R
  • Enterprise - ML Server

SQL Server feature comparison

Key things to manage

  • RAM & CPU
  • Licenses vs Server resources
  • Permissions
  • Code

Connecting

library(DBI)
library(odbc)

driver   = "ODBC Driver 13 for SQL Server"
server   = "lockedata.westcentralus.cloudapp.azure.com"
database = "datasci"
uid      = "lockedata"
pwd      = "ITDevconnections!"

dbConn<-dbConnect(odbc(),
          driver=driver, server=server,
          database=database, uid=uid,
          pwd=pwd)

library(RODBCext)
## Loading required package: RODBC
## 
## Attaching package: 'RODBCext'
## The following objects are masked from 'package:RODBC':
## 
##     odbcFetchRows, sqlFetchMore
dbstring <- glue::glue('Driver={driver};Server={server};Database={database};Uid={uid};Pwd={pwd}')
 
dbconn <- RODBC::odbcDriverConnect(dbstring)

A basic execution

EXECUTE sp_execute_external_script
        @language = N'R'
        ,@script = N'OutputDataSet <- InputDataSet'
        ,@input_data_1 = N'SELECT 1 as Col'
 WITH RESULT SETS ((col varchar(50) not null))        
1 records
col
1

Model storage area

CREATE TABLE [companyModels]    (  
  [id] int NOT NULL PRIMARY KEY IDENTITY (1,1)   
, [name] varchar(200) NOT NULL      
, [modelObj] varbinary(max)    
, [ValidFrom] datetime2 (2) GENERATED ALWAYS AS ROW START  
, [ValidTo] datetime2 (2) GENERATED ALWAYS AS ROW END  
, PERIOD FOR SYSTEM_TIME (ValidFrom, ValidTo)  
, CONSTRAINT unique_modelname UNIQUE ([name]))
WITH (SYSTEM_VERSIONING = ON (HISTORY_TABLE = dbo.companyModelsHistory));  

Model UPSERT stored procedure

CREATE PROCEDURE modelUpsert
@modelname  varchar(200) , 
@modelobj varbinary(max) 
AS
WITH MySource as (
    select @modelname as [name], @modelobj as [modelObj]
)
MERGE companymodels AS MyTarget
USING MySource
     ON MySource.[name] = MyTarget.[name]
WHEN MATCHED THEN UPDATE SET 
    modelObj = MySource.[modelObj]
WHEN NOT MATCHED THEN INSERT
    (
        [name], 
        modelObj
    )
    VALUES (
        MySource.[name], 
        MySource.modelObj
    );

Add some data

dbWriteTable(dbConn, "flights", nycflights13::flights, overwrite=TRUE)

From R

Produce a basic model

library(nycflights13)
flightLM<- lm(arr_delay ~ month + day +  hour, data=flights, model = FALSE)

flightLM
## 
## Call:
## lm(formula = arr_delay ~ month + day + hour, data = flights, 
##     model = FALSE)
## 
## Coefficients:
## (Intercept)        month          day         hour  
##  -13.466402    -0.218229    -0.001232     1.659978

Use model in R

sample_flights<- flights[1:5,]
predict(flightLM, sample_flights)
##         1         2         3         4         5 
## -5.385970 -5.385970 -5.385970 -5.385970 -3.725991

Publish model to Microsoft ML Server

mrsdeploy

publishService(
  "basicFlightsLM",
  code = NULL,
  # --- `model` is required for web service with serviceType `Realtime` --- #
  model = "model = flightsLM",
  # --- `serviceType` is required for this web service --- #
  serviceType = "Realtime"
)

Store a model

RODBCext::sqlExecute(dbconn, 
  "exec modelUpsert @modelname=? , @modelobj=?",
   data = data.frame("modelFromR", 
          paste0( serialize(flightLM,NULL)
                ,collapse = "")))

SQL Demo - any R code

Produce a basic model

CREATE PROCEDURE generate_flightlm
AS
BEGIN
CREATE TABLE #varcha
([name] varchar(200),
    [modelobj] VARCHAR(MAX)
)

INSERT INTO #varcha
EXECUTE sp_execute_external_script
        @language = N'R'
        ,@script = N'
        flightLM<-lm(arr_delay ~ month + day +  hour, data=InputDataSet, model=FALSE)
        OutputDataSet<-data.frame(modelname="modelFromInSQL",
        modelobj=paste0(                              serialize(flightLM,NULL)
       ,collapse = "") )
        '
        ,@input_data_1 = N'SELECT * FROM flights'
         ;
INSERT INTO companyModels(name, modelObj) 
SELECT [name], CONVERT(VARBINARY(MAX), modelObj, 2) 
FROM #varcha

END

Produce a basic model

EXEC generate_flightlm

Use model in SQL

DECLARE @mymodel VARBINARY(MAX)=(SELECT modelobj 
                FROM companymodels 
                WHERE [name]='modelFromInSQL'
                );
EXEC sp_execute_external_script
@language = N'R',  
@script = N'
OutputDataSet<-data.frame( predict(unserialize(as.raw(model)), InputDataSet),
InputDataSet[,"arr_delay"]
    )
',
@input_data_1 = N'SELECT TOP 5 * from flights',  
@params = N'@model varbinary(max)',  
@model =  @mymodel 
WITH RESULT SETS ((
    [arr_delay.Pred]  FLOAT (53)    NULL,
    [arr_delay]  FLOAT (53)    NULL))
5 records
arr_delay.Pred arr_delay
-5.385970 11
-5.385970 20
-5.385970 33
-5.385970 -18
-3.725991 -25

In-SQL - use native PREDICT

Produce a basic model

CREATE PROCEDURE generate_flightlm2
AS
BEGIN
DECLARE @model varbinary(max);
EXECUTE sp_execute_external_script
  @language = N'R'
  , @script = N'
        flightLM<-rxLinMod(arr_delay ~ month + day +  hour, data=InputDataSet)
    model <- rxSerializeModel(flightLM, realtimeScoringOnly = TRUE)
    '
  ,@input_data_1 = N'SELECT * FROM flights'
  , @params = N'@model varbinary(max) OUTPUT'
  , @model = @model OUTPUT
  INSERT [companyModels] ([name], [modelObj])
  VALUES('modelFromRevo', @model) ;

END

Produce a basic model

EXEC generate_flightlm2

Use model in SQL

DECLARE @model varbinary(max) = (
  SELECT modelobj
  FROM companyModels
  WHERE [name] = 'modelFromRevo');
SELECT TOP 10 d.*, p.*
  FROM PREDICT(MODEL = @model, DATA = flights as d)
  WITH("arr_delay_Pred" float) as p;
Displaying records 1 - 10
year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum origin dest air_time distance hour minute time_hour arr_delay_Pred
2013 1 1 517 515 2 830 819 11 UA 1545 N14228 EWR IAH 227 1400 5 15 2013-01-01 10:00:00 -5.385970
2013 1 1 533 529 4 850 830 20 UA 1714 N24211 LGA IAH 227 1416 5 29 2013-01-01 10:00:00 -5.385970
2013 1 1 542 540 2 923 850 33 AA 1141 N619AA JFK MIA 160 1089 5 40 2013-01-01 10:00:00 -5.385970
2013 1 1 544 545 -1 1004 1022 -18 B6 725 N804JB JFK BQN 183 1576 5 45 2013-01-01 10:00:00 -5.385970
2013 1 1 554 600 -6 812 837 -25 DL 461 N668DN LGA ATL 116 762 6 0 2013-01-01 11:00:00 -3.725991
2013 1 1 554 558 -4 740 728 12 UA 1696 N39463 EWR ORD 150 719 5 58 2013-01-01 10:00:00 -5.385970
2013 1 1 555 600 -5 913 854 19 B6 507 N516JB EWR FLL 158 1065 6 0 2013-01-01 11:00:00 -3.725991
2013 1 1 557 600 -3 709 723 -14 EV 5708 N829AS LGA IAD 53 229 6 0 2013-01-01 11:00:00 -3.725991
2013 1 1 557 600 -3 838 846 -8 B6 79 N593JB JFK MCO 140 944 6 0 2013-01-01 11:00:00 -3.725991
2013 1 1 558 600 -2 753 745 8 AA 301 N3ALAA LGA ORD 138 733 6 0 2013-01-01 11:00:00 -3.725991

Deploying SQL Server & R Services

Azure is your friend

  • Automatable
  • Disposable
  • Scalable

ARM templates

  • JSON specification of resources
  • Paramaterised for reuse
  • Downloadable
  • Deploy in many different languages

PowerShell

  • Azure modules
  • Use ARM templates
  • Used in Azure Automation

Demo

Github

Next steps

Other Microsoft & R things

Azure ML

  • GUI data science
  • R & Python
  • Notebooks
  • Code deployment potential
  • Easy webservices

R in PowerBI

  • Interactive reports
  • R data source
  • R graphics

Wrapup

Microsoft resources

Wrapup