Requesting 100 servers cannot be optimized code. We put a comma

A number of algorithms can be distinguished that are basic and underlie almost every line of programs written in high-level languages. It is good to have at hand the classic multivolume work of Donald Knuth "The Art of Computer Programming" , where many basic algorithms are analyzed in detail. But reading and assimilating everything is a task that requires a lot of effort and time, which must somehow be motivated.







Many may assume that it was necessary to know the nuances 50 years ago, but now you can use ready-made packages and functions and not dive into details. However, this is not the case. Likewise, no one has canceled the importance of understanding the representation of methods for storing data in memory and processing it in the processor.







. . .







.









, . , , .







, , :







  • case_id



    โ€” /;
  • record



    โ€” ;
  • start



    โ€” .








library(tidyverse)
library(data.table)
library(rTRNG)
      
      





. . "". . 10^5-10^n



, .







#   
nn <- 100
#    
records <- c("first", "one and a half", "second", "third", "fourth", 
             "fifth", "sixth")

#     
df <- tibble(case_id = 1:nn, recs = list(records)) %>%
  unnest(recs)

dt <- as.data.table(df)[, case_id := as.numeric(case_id)]
#       
setkey(dt, case_id)

head(df, 10)
      
      





  # A tibble: 10 x 2
     case_id recs          
       <int> <chr>         
   1       1 first         
   2       1 one and a half
   3       1 second        
   4       1 third         
   5       1 fourth        
   6       1 fifth         
   7       1 sixth         
   8       2 first         
   9       2 one and a half
  10       2 second  
      
      





โ€” . [0; 1]



. unixtimestamp , . . , , .









1.



. , .







f1 <- function(df) {
  df %>%
    group_by(case_id) %>%
    mutate(t_idx = sort(runif(n(), 0, 1))) %>%
    ungroup()
}
      
      





. , . , 100 .







  median `itr/sec` mem_alloc
 15.38ms      63.2   284.9KB
      
      





, ?







1+1/2. +



rTRNG



. , , . :







f1_5 <- function(df) {
  df %>%
    group_by(case_id) %>%
    mutate(t_idx = sort(runif_trng(n(), 0, 1))) %>%
    ungroup()
}
      
      





  median `itr/sec` mem_alloc
 29.34ms      29.5   284.9KB
      
      





. ? . , tidyverse



data.table



, .







2. , data.table



โ€” , .







f2 <- function(dt) {
  #    ,       `case_id``
  #        
  vec <- dt[, t_idx := runif_trng(.N, 0, 1)][order(case_id, t_idx), t_idx]
  #   
  dt[, t_idx := vec]
}
      
      





, 15-20 .







  median `itr/sec` mem_alloc 
  1.69ms     554.      109KB 
      
      





? ?







3. ,



, , , by



, . . โ€” , . . [0; 1]



, . case_id



, โ€” . case_id



,







f3 <- function(dt) {
  #  ,     case_id,   ,    
  #       [0, 1],       ( )
  #       0  1     
  #    
  dt[, t_idx := sort(case_id + runif_trng(.N, 0, 1, parallelGrain = 10000L)) - case_id]
}
      
      





2 , , .







  median `itr/sec` mem_alloc
 826.7us    1013.     54.3KB
      
      





3+1/2. , , set





? , . , NSE . , .







f3_5 <- function(dt) {
  set(dt, j = "t_idx", 
      value = sort(dt$case_id + runif(nrow(dt), 0, 1)) - dt$case_id)
}
      
      





5 , 4







  median `itr/sec` mem_alloc
 161.5us    5519.     16.3KB
      
      







.







bench::mark(
  f1(df),
  f1_5(df),
  f2(dt),
  f3(dt),
  f3_5(dt),
  check = FALSE
)
      
      





  expression      min   median `itr/sec` mem_alloc
  <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>
1 f1(df)       14.3ms  15.38ms      63.2   284.9KB
2 f1_5(df)    24.43ms  29.34ms      29.5   284.9KB
3 f2(dt)       1.55ms   1.69ms     554.      109KB
4 f3(dt)        722us  826.7us    1013.     54.3KB
5 f3_5(dt)    142.5us  161.5us    5519.     16.3KB
      
      





90 , 18 . , .









. (" "). .







, . .







1.



โ€” , , , . , , , , , . . :







# C   -- case_id, record, start, finish
#    ,     finish > start 
#      1, 2    start_2 > finish_1 

dt[, t_idx := NULL] #    

f1 <- function(df) {
  df %>%
    group_by(case_id) %>%
    mutate(ts_idx = sort(runif(n(), 0, 1))) %>%
    ungroup() %>%
    #    ,       
    #   NaN     (  max < min), 
    #    1   , NA       1
    mutate(tf_idx = {lead(ts_idx, default = 1) %>% if_else(. > ts_idx, ., 1)}) %>%
    mutate(tf_idx = map2_dbl(ts_idx, tf_idx, ~runif(1, .x, .y)))
}
      
      





, , , .







  median `itr/sec` mem_alloc 
 28.16ms      30.7    2.06MB 
      
      





2. ,



. 2 , . ! โ€” , . .







f2 <- function(dt){
  dt[, c("ts_idx", "tf_idx") := {
    #   vector recycling
    x <- case_id + runif(2 * .N, 0, 1);
    m <- matrix(sort(x), ncol = 2, byrow = TRUE) - case_id;
    list(m[, 1], m[, 2])
  }]
}
      
      





30 ! .







  median `itr/sec` mem_alloc 
  1.04ms     733.    74.38KB 
      
      





2+1/2. , , set





f2_5 <- function(dt){
  x <- dt$case_id + runif(2 * nrow(dt), 0, 1)
  m <- matrix(sort(x), ncol = 2, byrow = TRUE) - dt$case_id
  set(dt, j = "ts_idx", value = m[, 1])
  set(dt, j = "tf_idx", value = m[, 2])
}
      
      





. 4 .







  median `itr/sec` mem_alloc 
 278.1us    2781.    57.55KB 
      
      







.







bench::mark(
  f1(df),
  f2(dt),
  f2_5(dt),
  check = FALSE
)
      
      





  median `itr/sec` mem_alloc 
 28.16ms      30.7    2.06MB 
  1.04ms     733.    74.38KB 
 278.1us    2781.    57.55KB 
      
      





90 , 35 .









, . , , , . , , . โ€” , . . , .







, BigData , , - .







P.S.

, . -



โ€” . .

- .







โ€” ยซ Rยป.







ruvds







ruvds








All Articles