-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_predicate.h
12452 lines (11237 loc) · 543 KB
/
eval_predicate.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
==============================================
# Licensed Materials - Property of IBM
# Copyright IBM Corp. 2021, 2023
==============================================
*/
/*
============================================================
First created on: Mar/05/2021
Last modified on: Oct/11/2023
Author(s): Senthil Nathan ([email protected])
This toolkit's public GitHub URL:
https://github.com/IBMStreams/streamsx.eval_predicate
This file contains C++ native function(s) provided in the
eval_predicate toolkit. It has very elaborate logic to
evaluate a user given expression a.k.a user defined rule.
It differs from the IBM Streams built-in function
evalPredicate in the following ways.
1) This new eval_predicate function allows the user
given expression (rule) to access map elements.
2) This new eval_predicate function allows the user
given expression (rule) to access nested tuple attributes.
3) This new eval_predicate function allows the user
given expression (rule) to have operational verbs such as
contains, startsWith, endsWith, notContains, notStartsWith,
notEndsWith, in. For case insensitive (CI) string operations, these
operational verbs can be used: containsCI, startsWithCI,
endsWithCI, inCI, equalsCI, notContainsCI, notStartsWithCI,
notEndsWithCI, notEqualsCI.
For checking the size of the set, list and map, these
operational verbs can be used: sizeEQ, sizeNE, sizeLT,
sizeLE, sizeGT, sizeGE
4) This new eval_predicate function supports the following operations.
--> It supports these relational operations: ==, !=, <, <=, >, >=
--> It supports these logical operations: ||, &&
--> It supports these arithmetic operations: +, -, *, /, %
--> It supports these special operations for rstring, set, list and map:
contains, startsWith, endsWith, notContains, notStartsWith,
notEndsWith, in, containsCI, startsWithCI, endsWithCI,
inCI, equalsCI, notContainsCI, notStartsWithCI,
notEndsWithCI, notEqualsCI,
sizeEQ, sizeNE, sizeLT, sizeLE, sizeGT, sizeGE
--> No bitwise operations are supported at this time.
5) Following are the data types currently allowed in an expression (rule).
If you need more data types to be allowed, please create an
issue in the IBMStreams GitHub or contact the author of this toolkit.
boolean, int32, uint32, int64, uint64, float32, float64, rstring,
set<int32>, set<int64>, set<float32>, set<float64>, set<rstring>,
list<int32>, list<int64>, list<float32>, list<float64>, list<rstring>, list<TUPLE>,
map<rstring,int32>, map<int32,rstring>, map<rstring,int64>, map<int64,rstring>,
map<rstring,float32>, map<float32,rstring>, map<rstring,float64>,
map<float64,rstring>, map<rstring,rstring>, map<int32,int32>, map<int32,int64>,
map<int64,int32>, map<int64,int64>, map<int32,float32>, map<int32,float64>,
map<int64,float32>, map<int64,float64>, map<float32,int32>, map<float32,int64>,
map<float64,int32>, map<float64,int64>, map<float32,float32>, map<float32,float64>,
map<float64,float32>, map<float64,float64> and nested tuple references
pointing to any of the attributes made using the types shown above.
Following are three tuple examples with varying degree of complexity.
1) tuple<rstring symbol,float32 price,uint32 quantity,boolean buyOrSell>
2) tuple<tuple<rstring name,rstring title,int32 id,rstring gender,set<rstring> skills> employee,tuple<rstring name,rstring id,rstring manager,int32 employeeCnt> department>
3) tuple<rstring name,tuple<tuple<tuple<float32 latitude,float32 longitude> geo,tuple<rstring state,rstring zipCode,map<rstring,rstring> officials,list<rstring> businesses> info> location,tuple<float32 temperature,float32 humidity> weather> details,tuple<int32 population,int32 numberOfSchools,int32 numberOfHospitals> stats,int32 rank,list<int32> roadwayNumbers,map<rstring,int32> housingNumbers>
Following are the examples of expressions that can be sent for evaluation.
We support either zero or single level or multilevel (nested) parenthesis combination.
Within a given subexpression, you must use the same logical operators.
Zero parenthesis is used in this expression:
a == "hi" && b contains "xyz" && g[4] > 6.7 && id % 8 == 3
Single level parenthesis is used within each subexpression:
(a == "hi") && (b contains "xyz" || g[4] > 6.7 || id % 8 == 3)
(a == "hi") && (b contains "xyz") && (g[4] > 6.7) && (id % 8 == 3)
Following expressions use nested parenthesis.
(a == "hi") && ((b contains "xyz" || g[4] > 6.7) && id % 8 == 3)
(a == "hi") && (b contains "xyz" && (g[4] > 6.7 || id % 8 == 3))
(a == "hi") && ((b contains "xyz") || (g[4] > 6.7) || (id % 8 == 3))
(a == "hi") && ((b contains "xyz") || (g[4] > 6.7 || id % 8 == 3))
((a == "hi" || c endsWith 'pqr') && (b contains "xyz")) || (g[4] > 6.7 || id % 8 == 3)
(((a == 'hi') || (x <= 5) || (t == 3.14) || (p > 7)) && ((j == 3) && (y < 1) && (r == 9)) && s endsWith 'Nation')
In addition to the expressions shown above, there is a whole different category of
deeply nested ones. To see them, you can search in this file for
"multi-level nested subexpression examples".
Author's Note
-------------
The evalPredicate built-in function that is already available in
the IBM Streams product is implemented using C++ logic that is
tightly interwoven into the SPL programming model. It uses the
C++ boost spirit classic library for parsing the expressions and
then mapping them into the tuple attributes again via other modules
that are already part of the SPL runtime code. That one was designed
and built at least by 4 former colleagues over a period of 8 years.
To make changes in that existing implementation with thousands of
lines of complex C++ logic with no commentary proved to be so hard for me.
So, I took a fresh approach to write this new eval_predicate function
with my own ideas along with sufficient commentary to explain the logic.
That obviously led to implementation and feature differences between
the built-in evalPredicate and my new eval_predicate in this file.
As this toolkit evolved, very good features got added to make it a
great rule processing engine within the IBM Streams product.
If positioned properly, this can be a difference maker for IBM Streams
in the competitive marketplace. It will also help customers and partners
in creating useful and powerful custom rule processors for many
use cases in different business domains.
What is available for diagnosing problems that may occur during rule evaluation?
--------------------------------------------------------------------------------
No one has written a completely bug free software asset yet. My code below is
very much subject to that reality as well. Any complexly constructed rule string
that is sent to the code below for processing can potentially trigger anomalies
and errors during rule validation and during rule evaluation.
If that happens, my sincere apologies.
If a runtime error is encountered or the rule evaluation returns an unexpected
error code or the rule evaluation returns a wrong boolean result, the caller of
the eval_predicate API can set the final method argument of that API to true for
enabling the internal tracing. That will create plenty of log tracing which
will be displayed on the stdout (e-g: screen or a log file). That trace
information will provide us details about the code path it took while
executing the API and it will help us to get more insights about why and where it
went wrong.
Such trace point ids will give us an idea about how the entire rule expression
was parsed, what subexpression ids were assigned, how the preparation was
done for each subexpression, what was stored in a few very important
internal data structures, how each SE was evaluated and how the individual
SE results were combined to produce the final result etc.
In the trace output, one can specifically search for these trace point ids:
i) 1, 2a, 3a, 4a, 5a will give details about cache hit, tuple schema forming and tuple attribute parsing.
ii) 6a, 7a, 8a, 9a will give parsing details for a subexpression made of LHS, OpVerb, RHS, LogicalOp.
iii) 10a will give a full summary about the validation results of the entire expression.
iv) 11a will give details about adding a fully validated expression to cache.
v) 4b will give details about each subexpression that is about to be evaluated.
vi) 4c will give details about how a given subexpression (non-nested,
single-level nested, multi-level nested) goes through a step by step evaluation.
vii) 4d will give details about the final step of combining all the inter subexpression eval results.
In addition, one can also search for _GGGGG_ and _HHHHH_ in the trace output
which will show the key steps performed during rule processing. All of that will
aid in understanding the code path that led to the rule processing errors.
You can do self-diagnosis of the trace output or you can email it to this
toolkit author's email address shown above to get it resolved. After the
problem is resolved, caller has to remember to set the final method argument of
the eval_predicate API to false for disabling the internal tracing.
============================================================
*/
#ifndef FUNCTIONS_H_
#define FUNCTIONS_H_
// Include this SPL file so that we can use the SPL functions and types in this C++ code.
#include <SPL/Runtime/Function/SPLFunctions.h>
#include <SPL/Runtime/Type/SPLType.h>
#include <SPL/Runtime/Type/Tuple.h>
#include <vector>
#include <sstream>
#include <iostream>
#include <stack>
#include <cstdio>
#include <cstdlib>
#include <dirent.h>
#include <string>
#include <tr1/unordered_map>
// ====================================================================
// All the constants are defined here. It covers all the
// error codes returned by the eval_predicate function.
#define ALL_CLEAR 0
#define EMPTY_EXPRESSION 1
#define MISSING_OPEN_TUPLE_TAG 2
#define MISSING_CLOSE_ANGLE_BRACKET_FOR_NESTED_TUPLE1 3
#define MISSING_COMMA_OR_CLOSE_ANGLE_BRACKET_AFTER_TUPLE_ATTRIBUTE_NAME 4
#define MISSING_COMMA_OR_CLOSE_ANGLE_BRACKET_FOR_NESTED_TUPLE2 5
#define MISSING_SPACE_BEFORE_TUPLE_ATTRIBUTE_NAME 6
#define MISSING_COMMA_OR_CLOSE_ANGLE_BRACKET_AFTER_TUPLE_ATTRIBUTE_NAME2 7
#define INVALID_CHARACTER_FOUND_IN_EXPRESSION 8
#define UNMATCHED_CLOSE_PARENTHESIS_IN_EXPRESSION1 9
#define UNMATCHED_CLOSE_PARENTHESIS_IN_EXPRESSION2 10
#define UNMATCHED_CLOSE_BRACKET_IN_EXPRESSION1 11
#define UNMATCHED_CLOSE_BRACKET_IN_EXPRESSION2 12
#define UNMATCHED_OPEN_PARENTHESIS_OR_SQUARE_BRACKET_IN_EXPRESSION 13
#define PERIOD_CHARACTER_FOUND_OUTSIDE_OF_LHS_AND_RHS 14
#define EQUAL_CHARACTER_WITHOUT_AN_LHS 15
#define LHS_NOT_MATCHING_WITH_ANY_TUPLE_ATTRIBUTE 16
#define OPEN_SQUARE_BRACKET_NOT_FOUND_AFTER_LIST 17
#define CLOSE_SQUARE_BRACKET_NOT_FOUND_AFTER_LIST 18
#define ALL_NUMERALS_NOT_FOUND_AS_LIST_INDEX 19
#define OPEN_SQUARE_BRACKET_NOT_FOUND_AFTER_MAP 20
#define UNSUPPORTED_KEY_TYPE_FOUND_IN_MAP 21
#define SPACE_MIXED_WITH_NUMERALS_IN_LIST_INDEX 22
#define ALL_NUMERALS_NOT_FOUND_IN_INT_MAP_KEY 23
#define CLOSE_SQUARE_BRACKET_NOT_FOUND_IN_INT_MAP_KEY 24
#define SPACE_MIXED_WITH_NUMERALS_IN_INT_MAP_KEY 25
#define MISSING_DECIMAL_POINT_IN_FLOAT_MAP_KEY 26
#define MORE_THAN_ONE_DECIMAL_POINT_IN_FLOAT_MAP_KEY 27
#define SPACE_MIXED_WITH_NUMERALS_IN_FLOAT_MAP_KEY 28
#define ALL_NUMERALS_NOT_FOUND_IN_FLOAT_MAP_KEY 29
#define CLOSE_SQUARE_BRACKET_NOT_FOUND_IN_FLOAT_MAP_KEY 30
#define MISSING_OPEN_QUOTE_IN_STRING_MAP_KEY 31
#define MISSING_CLOSE_QUOTE_IN_STRING_MAP_KEY 32
#define INVALID_CHAR_FOUND_IN_STRING_MAP_KEY 33
#define CHAR_FOUND_AFTER_CLOSE_QUOTE_IN_STRING_MAP_KEY 34
#define CLOSE_SQUARE_BRACKET_NOT_FOUND_IN_STRING_MAP_KEY 35
#define EMPTY_STRING_MAP_KEY_FOUND 36
#define INVALID_OPERATION_VERB_FOUND_IN_EXPRESSION 37
#define INCOMPATIBLE_DOUBLE_EQUALS_OPERATION_FOR_LHS_ATTRIB_TYPE 38
#define INCOMPATIBLE_NOT_EQUALS_OPERATION_FOR_LHS_ATTRIB_TYPE 39
#define INCOMPATIBLE_LESS_THAN_OPERATION_FOR_LHS_ATTRIB_TYPE 40
#define INCOMPATIBLE_LESS_THAN_OR_EQUALS_OPERATION_FOR_LHS_ATTRIB_TYPE 41
#define INCOMPATIBLE_GREATER_THAN_OPERATION_FOR_LHS_ATTRIB_TYPE 42
#define INCOMPATIBLE_GREATER_THAN_OR_EQUALS_OPERATION_FOR_LHS_ATTRIB_TYPE 43
#define INCOMPATIBLE_ADD_OPERATION_FOR_LHS_ATTRIB_TYPE 44
#define INCOMPATIBLE_SUBTRACT_OPERATION_FOR_LHS_ATTRIB_TYPE 45
#define INCOMPATIBLE_MULTIPLY_OPERATION_FOR_LHS_ATTRIB_TYPE 46
#define INCOMPATIBLE_DIVIDE_OPERATION_FOR_LHS_ATTRIB_TYPE 47
#define INCOMPATIBLE_MOD_OPERATION_FOR_LHS_ATTRIB_TYPE 48
#define INCOMPATIBLE_CONTAINS_OPERATION_FOR_LHS_ATTRIB_TYPE 49
#define INCOMPATIBLE_STARTS_WITH_OPERATION_FOR_LHS_ATTRIB_TYPE 50
#define INCOMPATIBLE_ENDS_WITH_OPERATION_FOR_LHS_ATTRIB_TYPE 51
#define INCOMPATIBLE_NOT_CONTAINS_OPERATION_FOR_LHS_ATTRIB_TYPE 52
#define INCOMPATIBLE_NOT_STARTS_WITH_OPERATION_FOR_LHS_ATTRIB_TYPE 53
#define INCOMPATIBLE_NOT_ENDS_WITH_OPERATION_FOR_LHS_ATTRIB_TYPE 54
#define ALL_NUMERALS_NOT_FOUND_IN_ARITHMETIC_OPERAND 55
#define NO_DECIMAL_POINT_IN_FLOAT_ARITHMETIC_OPERAND 56
#define MORE_THAN_ONE_DECIMAL_POINT_IN_ARITHMETIC_OPERAND 57
#define NO_OPERATION_VERB_FOUND_AFTER_ARITHMETIC_OPERAND 58
#define INVALID_OPERATION_VERB_FOUND_AFTER_ARITHMETIC_OPERAND 59
#define DECIMAL_POINT_FOUND_IN_NON_FLOAT_ARITHMETIC_OPERAND 60
#define UNPROCESSED_PARENTHESIS_FOUND_IN_EXPRESSION 61
#define UNPROCESSED_LHS_FOUND_IN_EXPRESSION 62
#define UNPROCESSED_OPERATION_VERB_FOUND_IN_EXPRESSION 63
#define UNPROCESSED_RHS_FOUND_IN_EXPRESSION 64
#define CLOSE_PARENTHESIS_FOUND_WITH_ZERO_PENDING_OPEN_PARENTHESIS 65
#define RHS_VALUE_NO_MATCH_FOR_BOOLEAN_LHS_TYPE 66
#define RHS_VALUE_NO_MATCH_FOR_INTEGER_LHS_TYPE 67
#define NO_DECIMAL_POINT_IN_RHS_VALUE 68
#define MORE_THAN_ONE_DECIMAL_POINT_IN_RHS_VALUE 69
#define RHS_VALUE_NO_MATCH_FOR_FLOAT_LHS_TYPE 70
#define RHS_VALUE_WITH_MISSING_OPEN_QUOTE_NO_MATCH_FOR_STRING_LHS_TYPE 71
#define RHS_VALUE_WITH_MISSING_CLOSE_QUOTE_NO_MATCH_FOR_STRING_LHS_TYPE 72
#define NEGATIVE_SIGN_AT_WRONG_POSITION_OF_AN_RHS_INTEGER 73
#define MORE_THAN_ONE_NEGATIVE_SIGN_IN_AN_RHS_INTEGER 74
#define NEGATIVE_SIGN_AT_WRONG_POSITION_OF_AN_RHS_FLOAT 75
#define MORE_THAN_ONE_NEGATIVE_SIGN_IN_AN_RHS_FLOAT 76
#define NEGATIVE_SIGN_FOUND_IN_NON_INTEGER_NON_FLOAT_ARITHMETIC_OPERAND 77
#define NEGATIVE_SIGN_AT_WRONG_POSITION_IN_ARITHMETIC_OPERAND 78
#define NEGATIVE_SIGN_FOUND_IN_UNSIGNED_INTEGER_ARITHMETIC_OPERAND 79
#define EXPRESSION_WITH_NO_LHS_AND_OPERATION_VERB_AND_RHS 80
#define INCOMPLETE_EXPRESSION_ENDING_WITH_LOGICAL_OPERATOR 81
#define INVALID_LOGICAL_OPERATOR_FOUND_IN_EXPRESSION 82
#define OPEN_PARENTHESIS_FOUND_NOT_RIGHT_BEFORE_LHS 83
#define CLOSE_PARENTHESIS_FOUND_NOT_RIGHT_AFTER_RHS 84
#define NO_SPACE_OR_ANOTHER_OPEN_PARENTHESIS_BEFORE_NEW_OPEN_PARENTHESIS 85
#define NO_SPACE_OR_ANOTHER_CLOSE_PARENTHESIS_AFTER_NEW_CLOSE_PARENTHESIS 86
#define NO_SPACE_RIGHT_BEFORE_LOGICAL_OPERATOR 87
#define NO_SPACE_RIGHT_AFTER_LOGICAL_OPERATOR 88
#define NESTED_OPEN_PARENTHESIS_FOUND 89
#define NESTED_CLOSE_PARENTHESIS_FOUND 90
#define MIXED_LOGICAL_OPERATORS_FOUND_IN_SUBEXPRESSION 91
#define MIXED_LOGICAL_OPERATORS_FOUND_IN_INTER_SUBEXPRESSIONS 92
#define FIRST_OPEN_PARENTHESIS_OCCURS_AFTER_A_COMPLETED_SUBEXPRESSION 93
#define PARENTHESIS_NOT_USED_CONSISTENTLY_THROUGHOUT_THE_EXPRESSION 94
#define TUPLE_SCHEMA_MISMATCH_FOUND_IN_EXP_EVAL_PLAN_CACHE 95
#define TUPLE_LITERAL_SCHEMA_GENERATION_ERROR 96
#define EXP_EVAL_CACHE_OBJECT_CREATION_ERROR 97
#define EXP_EVAL_PLAN_OBJECT_CREATION_ERROR 98
#define ERROR_INSERTING_EVAL_PLAN_PTR_IN_CACHE 99
#define INVALID_RSTRING_OPERATION_VERB_FOUND_DURING_EXP_EVAL 100
#define INVALID_INDEX_FOR_LHS_LIST_ATTRIBUTE 101
#define INVALID_KEY_FOR_LHS_MAP_ATTRIBUTE 102
#define THREE_TOKENS_NOT_FOUND_IN_ARITHMETIC_OPERATION_VERB 103
#define EMPTY_VALUE_FOUND_FOR_ARITHMETIC_OPERAND 104
#define EMPTY_VALUE_FOUND_FOR_POST_ARITHMETIC_OPERATION_VERB 105
#define INVALID_POST_ARITHMETIC_OPERATION_VERB_FOUND_DURING_EXP_EVAL 106
#define DIVIDE_BY_ZERO_ARITHMETIC_FOUND_DURING_EXP_EVAL 107
#define COLLECTION_ITEM_EXISTENCE_INVALID_OPERATION_VERB_FOUND_DURING_EXP_EVAL 108
#define RELATIONAL_OR_ARITHMETIC_INVALID_OPERATION_VERB_FOUND_DURING_EXP_EVAL 109
#define INCORRECT_NUMBER_OF_INTER_SUBEXPRESSION_LOGICAL_OPERATORS 110
#define ZERO_SUBEXPRESSIONS_MAP_KEYS_FOUND_DURING_EVAL 111
#define KEY_NOT_FOUND_IN_SUB_EXP_MAP_DURING_EVAL 112
#define EMPTY_SUB_EXP_LAYOUT_LIST_DURING_EVAL 113
#define LHS_ATTRIB_NAME_STOPS_ABRUPTLY_AT_THE_END_OF_THE_EXPRESSION 114
#define MIXED_LOGICAL_OPERATORS_FOUND_IN_NESTED_SUBEXPRESSIONS 115
#define MISSING_TWO_CLOSE_ANGLE_BRACKETS_AFTER_LIST_OF_TUPLE 116
#define OPEN_SQUARE_BRACKET_NOT_FOUND_AFTER_LIST_OF_TUPLE 117
#define ATTRIBUTE_PARSING_ERROR_IN_LIST_OF_TUPLE_VALIDATION 118
#define NO_PERIOD_FOUND_AFTER_LIST_OF_TUPLE 119
#define ATTRIBUTE_PARSING_ERROR_IN_LIST_OF_TUPLE_EVALUATION 120
#define EXP_EVAL_PLAN_OBJECT_CREATION_ERROR_FOR_LIST_OF_TUPLE 121
#define SPACE_NOT_FOUND_AFTER_SPECIAL_OPERATION_VERB 122
#define INCOMPATIBLE_SIZE_EQ_OPERATION_FOR_LHS_ATTRIB_TYPE 123
#define INCOMPATIBLE_SIZE_NE_OPERATION_FOR_LHS_ATTRIB_TYPE 124
#define INCOMPATIBLE_SIZE_LT_OPERATION_FOR_LHS_ATTRIB_TYPE 125
#define INCOMPATIBLE_SIZE_LE_OPERATION_FOR_LHS_ATTRIB_TYPE 126
#define INCOMPATIBLE_SIZE_GT_OPERATION_FOR_LHS_ATTRIB_TYPE 127
#define INCOMPATIBLE_SIZE_GE_OPERATION_FOR_LHS_ATTRIB_TYPE 128
#define RHS_VALUE_NO_MATCH_FOR_SIZEXX_OPERATION_VERB 129
#define INVALID_COLLECTION_SIZE_CHECK_OPERATION_VERB_FOUND_DURING_EXP_EVAL 130
#define EMPTY_ATTRIBUTE_NAME_GIVEN_FOR_VALUE_FETCHING 131
#define NON_SPACE_CHARACTER_FOUND_AFTER_A_VALID_ATTRIBUTE_NAME 132
#define ATTRIBUTE_NAME_WITH_NO_VALID_CHARACTERS 133
#define ATTRIBUTE_NAME_NOT_GOOD_FOR_VALIDATION 134
#define EMPTY_ATTRIBUTE_NAME_LAYOUT_LIST_DURING_VALUE_FETCH 135
#define WRONG_TYPE_OF_ATTRIBUTE_PASSED_AS_FUNCTION_ARGUMENT_BY_CALLER 136
#define ATTRIBUTE_PARSING_ERROR_IN_LIST_OF_TUPLE_VALUE_FETCH 137
#define UNSUPPORTED_EVAL_CONDITION_DETECTED 138
#define UNSUPPORTED_FETCH_ATTRIBUTE_VALUE_CONDITION_DETECTED 139
#define INCOMPATIBLE_CONTAINS_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 140
#define INCOMPATIBLE_STARTS_WITH_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 141
#define INCOMPATIBLE_ENDS_WITH_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 142
#define INCOMPATIBLE_NOT_CONTAINS_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 143
#define INCOMPATIBLE_NOT_STARTS_WITH_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 144
#define INCOMPATIBLE_NOT_ENDS_WITH_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 145
#define INCOMPATIBLE_IN_OPERATION_FOR_LHS_ATTRIB_TYPE 146
#define INCOMPATIBLE_IN_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 147
#define INCOMPATIBLE_EQUALS_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 148
#define INCOMPATIBLE_NOT_EQUALS_CI_OPERATION_FOR_LHS_ATTRIB_TYPE 149
#define UNABLE_TO_PARSE_RHS_VALUE 150
#define RHS_VALUE_WITH_MISSING_OPEN_BRACKET_NO_MATCH_FOR_IN_OR_IN_CI_OPVERB 151
#define RHS_VALUE_WITH_MISSING_CLOSE_BRACKET_NO_MATCH_FOR_IN_OR_IN_CI_OPVERB 152
#define INVALID_RHS_LIST_LITERAL_STRING_FOUND_FOR_IN_OR_IN_CI_OPVERB 153
#define INVALID_ATTRIBUTE_FOUND_DURING_COMPARISON_OF_TUPLES 154
#define SE_ID_NOT_FOUND_IN_INTRA_NESTED_SE_LOGICAL_OP_MAP 155
#define SE_ID_NOT_FOUND_IN_INTRA_MULTI_LEVEL_NESTED_SE_LOGICAL_OP_MAP 156
// ====================================================================
// Define a C++ namespace that will contain our native function code.
namespace eval_predicate_functions {
using namespace std;
// By including this line, we will have access to the SPL namespace and anything defined within that.
using namespace SPL;
// ====================================================================
// This is a crucial class definition that holds different
// subexpressions found in the user given expression string.
// It forms the basis for having a cache for the repeated
// evaluation of a given expression. The main idea here is to
// have a full evaluation plan made ready for use whenever
// needed. That will allow us to evaluate a given expression string
// using a readily available evaluation plan by executing its steps.
// This class definition is akin to a blueprint that contains all the
// details necessary to evaluate a given expression.
//
// Following is the class that represents the evaluation plan for a
// given expression. In this class, we store the data structures
// required to evaluate each subexpression present within a given
// full expression string.
//
class ExpressionEvaluationPlan {
public:
// Destructor.
~ExpressionEvaluationPlan() {
}
// Public getter methods of this class.
rstring const & getExpression() {
return(expression);
}
rstring const & getTupleSchema() {
return(tupleSchema);
}
SPL::map<rstring, SPL::list<rstring> > const & getSubexpressionsMap() {
return(subexpressionsMap);
}
SPL::list<rstring> const & getSubexpressionsMapKeys() {
return(subexpressionsMapKeys);
}
SPL::map<rstring, rstring> const & getIntraNestedSubexpressionLogicalOperatorsMap() {
return(intraNestedSubexpressionLogicalOperatorsMap);
}
SPL::list<rstring> const & getInterSubexpressionLogicalOperatorsList() {
return(interSubexpressionLogicalOperatorsList);
}
// Senthil added this on Sep/20/2023.
SPL::map<rstring, int32> const & getMultiLevelNestedSubExpressionIdMap() {
return(multiLevelNestedSubExpressionIdMap);
}
// Senthil added this on Sep/20/2023.
SPL::map<rstring, rstring> const & getIntraMultiLevelNestedSubexpressionLogicalOperatorsMap() {
return(intraMultiLevelNestedSubexpressionLogicalOperatorsMap);
}
// Public setter methods of this class.
void setExpression(rstring const & expr) {
expression = expr;
}
void setTupleSchema(rstring const & mySchema) {
tupleSchema = mySchema;
}
void setSubexpressionsMap(SPL::map<rstring, SPL::list<rstring> > const & myMap) {
subexpressionsMap = myMap;
}
void setSubexpressionsMapKeys(SPL::list<rstring> const & mapKeys) {
subexpressionsMapKeys = mapKeys;
}
void setIntraNestedSubexpressionLogicalOperatorsMap(SPL::map<rstring, rstring> const & myMap) {
intraNestedSubexpressionLogicalOperatorsMap = myMap;
}
void setInterSubexpressionLogicalOperatorsList(SPL::list<rstring> const & opsList) {
interSubexpressionLogicalOperatorsList = opsList;
}
// Senthil added this on Sep/20/2023.
void setMultiLevelNestedSubExpressionIdMap(SPL::map<rstring, int32> const & myMap) {
multiLevelNestedSubExpressionIdMap = myMap;
}
// Senthil added this on Sep/20/2023.
void setIntraMultiLevelNestedSubexpressionLogicalOperatorsMap(SPL::map<rstring, rstring> const & myMap) {
intraMultiLevelNestedSubexpressionLogicalOperatorsMap = myMap;
}
private:
// Private member variables of this class.
// The entire user given expression is stored in this variable.
rstring expression;
// The schema literal for the tuple associated with a fully
// validated expression is stored in this variable.
rstring tupleSchema;
// This map contains the details about the different
// subexpressions present in a fully validated expression.
// It is important to understand the structure of this map which
// is explained in great detail throughout this file.
// Such an explanation can be found by searching for the following phrase:
// "Subexpression id will go something like this"
SPL::map<rstring, SPL::list<rstring> > subexpressionsMap;
// This list provides the subexpression map keys in sorted order.
SPL::list<rstring> subexpressionsMapKeys;
// This map contains the logical operators used within a subexpression.
// Key for this map is the subexpression id and the value is the logical operator.
SPL::map<rstring, rstring> intraNestedSubexpressionLogicalOperatorsMap;
// This list contains the logical operators used in between
// different subexpressions present in a user given expression string.
SPL::list<rstring> interSubexpressionLogicalOperatorsList;
// Senthil added this on Sep/20/2023.
// This map contains the details about the multi-level nested subexpression ids.
// It helps in identifying the related subexpression ids after which a given
// logical operator appears within the multi-level nested subexpression hierarchy.
// Such information is useful to validate the proper use of logical operators in a
// multi-level nested subexpression.
// Key for this map is subexpression id and the value is an
// integer value that indicates the level (1, 2, 3 and so on) of the SE id inside
// the given multi-level nested SE.
SPL::map<rstring, int32> multiLevelNestedSubExpressionIdMap;
// Senthil added this on Sep/20/2023.
// This map contains the details about the multi-level nested subexpression logical operators.
// It helps in identifying the related subexpression ids that form a nested level in a
// multi-level nested subexpression. Such information is useful to perform the
// evaluation at a given nested level.
// Key for this map is subexpression id and the value is the
// intra logical operator within the nested subexpression.
// Any SE id that is at the very beginning and at the very end within a
// nested SE will be assigned a logical operator of an empty string to indicate
// that it is the first or last item for a given multi-level nested SE. All the
// other SE ids from the one after the beginning to one less than the final SE id
// within a given multi-level nested SE group will carry the logical operator that
// appears after that SE. This map will help us a lot later in the evaluation method to
// correctly evaluate and combine the results within a multi-level nested SE.
SPL::map<rstring, rstring> intraMultiLevelNestedSubexpressionLogicalOperatorsMap;
};
// This is the data type for the expression evaluation plan cache.
// We assume that a common use of this function is to evaluate the
// same expression on each tuple that comes to an operator.
// We cache the results returned by the validateExpression function here,
// because the difference in performance is close to 30x for
// what we assume is a common use.
typedef std::tr1::unordered_map<SPL::rstring, ExpressionEvaluationPlan*> ExpEvalCache;
// This will give us a TLS (Thread Local Storage) for this pointer based
// data structure to be available all the time within a PE's thread. A PE can
// contain a single operator or multiple operators in case of operator fusion.
// So, this static (global) variable is only applicable within a given thread that
// is accessible either by one or more operators.
static __thread ExpEvalCache* expEvalCache = NULL;
// ====================================================================
// Prototype for our native functions are declared here.
//
/// Evaluate a given SPL expression.
/// @return the result of the evaluation
template<class T1>
boolean eval_predicate(rstring const & expr,
T1 const & myTuple, int32 & error, boolean trace);
// ====================================================================
// Prototype for other functions used only within this
// C++ header file are declared here.
//
// Trace (display) the inside information for a given tuple.
void traceTupleAtttributeNamesAndValues(Tuple const & myTuple,
SPL::map<rstring, rstring> & tupleAttributesMap, boolean trace);
// Get the SPL literal string for a given tuple.
// @return the SPL type name
rstring getSPLTypeName(ConstValueHandle const & handle, boolean trace);
// Get the parsed attribute names and their types for a given tuple literal string.
boolean parseTupleAttributes(rstring const & myTupleSchema,
SPL::map<rstring, rstring> & tupleAttributesMap,
int32 & error, boolean trace);
// Validate the expression.
// Note: The space below between > > is a must. Otherwise, compiler will give an error.
// Senthil made changes to this method signature on Sep/20/2023.
boolean validateExpression(rstring const & expr,
SPL::map<rstring, rstring> const & tupleAttributesMap,
SPL::map<rstring, SPL::list<rstring> > & subexpressionsMap,
SPL::map<rstring, rstring> & intraNestedSubexpressionLogicalOperatorsMap,
SPL::list<rstring> & interSubexpressionLogicalOperatorsList,
SPL::map<rstring, int32> & multiLevelNestedSubExpressionIdMap,
SPL::map<rstring, rstring> & intraMultiLevelNestedSubexpressionLogicalOperatorsMap,
int32 & error, int32 & validationStartIdx, boolean trace);
// Evaluate the expression according to the predefined plan.
boolean evaluateExpression(ExpressionEvaluationPlan *evalPlanPtr,
Tuple const & myTuple, int32 & error, boolean trace);
// Check if a given quote character marks the end of a map key string.
boolean isQuoteCharacterAtEndOfMapKeyString(blob const & myBlob, int32 const & idx);
// Check if a given quote character marks the end of an RHS string.
boolean isQuoteCharacterAtEndOfRhsString(blob const & myBlob, int32 const & idx);
// Check if a given ] character marks the end of an RHS list string literal.
boolean isCloseBracketAtEndOfRhsString(blob const & myBlob, int32 const & idx);
// Get the constant value handle for a given attribute name in a given tuple.
void getConstValueHandleForTupleAttribute(Tuple const & myTuple,
rstring attributeName, ConstValueHandle & cvh);
// Perform eval operations for an rstring based LHS attribute.
void performRStringEvalOperations(rstring const & lhsValue,
rstring const & rhsValue, rstring const & operationVerb,
boolean & subexpressionEvalResult, int32 & error);
// Check if a given string represents a number (integer or float).
boolean isNumber(rstring const & str);
// Perform existence check eval operations for a collection based LHS attribute.
void performCollectionItemExistenceEvalOperations(boolean const & itemExists,
rstring const & operationVerb,
boolean & subexpressionEvalResult, int32 & error);
// Perform size check eval operations for a collection based LHS attribute.
void performCollectionSizeCheckEvalOperations(int32 const & lhsSize,
int32 const & rhsInt32, rstring const & operationVerb,
boolean & subexpressionEvalResult, int32 & error);
// Perform relational or arithmetic eval operations.
template<class T1>
void performRelationalOrArithmeticEvalOperations(T1 const & lhsValue,
T1 const & rhsValue, rstring const & operationVerb,
T1 const & arithmeticOperandValue,
rstring const & postArithmeticOperationVerb,
boolean & subexpressionEvalResult, int32 & error);
// Perform modulus arithmetic via overloaded functions.
// Just because C++ doesn't support modulus for float values,
// we have to take this approach of using overloaded functions.
void calculateModulus(int32 const & lhsValue,
int32 const & arithmeticOperandValue, int32 & result);
void calculateModulus(uint32 const & lhsValue,
uint32 const & arithmeticOperandValue, uint32 & result);
void calculateModulus(int64 const & lhsValue,
int64 const & arithmeticOperandValue, int64 & result);
void calculateModulus(uint64 const & lhsValue,
uint64 const & arithmeticOperandValue, uint64 & result);
void calculateModulus(float32 const & lhsValue,
float32 const & arithmeticOperandValue, float32 & result);
void calculateModulus(float64 const & lhsValue,
float64 const & arithmeticOperandValue, float64 & result);
void calculateModulus(boolean const & lhsValue,
boolean const & arithmeticOperandValue, boolean & result);
// Perform post arithmetic eval operations.
template<class T1>
void performPostArithmeticEvalOperations(T1 const & arithmeticResult,
T1 const & rhsValue, rstring const & postArithmeticOperationVerb,
boolean & subexpressionEvalResult, int32 & error);
// Create the next subexpression id.
void getNextSubexpressionId(char const & callerId,
int32 const & currentNestedSubexpressionLevel,
rstring & subexpressionId,
int32 const & currentDepthOfNestedSubexpression, boolean trace);
// Check if the next non-space character is an open parenthesis.
boolean isNextNonSpaceCharacterOpenParenthesis(blob const & myBlob,
int32 const & idx, int32 const & stringLength);
// Find if the current single subexpression is enclosed within a parenthesis.
boolean isThisAnEnclosedSingleSubexpression(rstring const & expr,
int32 const & idx);
// Check if the next non-space character is a close parenthesis.
boolean isNextNonSpaceCharacterCloseParenthesis(blob const & myBlob,
int32 const & idx, int32 const & stringLength);
// This method gets the relevant details about the
// nested subexpression group.
void getNestedSubexpressionGroupInfo(rstring const & subexpressionId,
SPL::list<rstring> const & subexpressionIdsList,
SPL::map<rstring, rstring> const & intraNestedSubexpressionLogicalOperatorsMap,
SPL::map<rstring, rstring> const & intraMultiLevelNestedSELogicalOpMap,
int32 & subexpressionCntInCurrentNestedGroup,
rstring & intraNestedSubexpressionLogicalOperator,
SPL::boolean & multiLevelNestedSubexpressionsPresent,
SPL::list<rstring> & multiLevelNestedSubexpressionIdsList);
// This method fetches the value of a user given
// attribute present in a user given tuple.
template<class T1, class T2>
void get_tuple_attribute_value(rstring const & attributeName,
T1 const & myTuple, T2 & value, int32 & error, boolean const & trace);
// This method validates the user given attribute name for
// its syntax correctness.
boolean validateTupleAttributeName(rstring const & attributeName,
SPL::map<rstring, rstring> const & tupleAttributesMap,
SPL::list<rstring> & attributeNameLayoutList,
int32 & error, int32 & validationStartIdx, boolean trace);
// This method fetches the value of a given tuple attribute name.
template<class T1, class T2>
void fetchTupleAttributeValue(rstring const & attributeName,
SPL::map<rstring, rstring> const & tupleAttributesMap,
SPL::list<rstring> const & attributeNameLayoutList,
T1 const & myTuple, T2 & value, int32 & error, boolean trace);
// This method compares the attribute values of two tuples that are
// made of the same schema and returns a list containing the
// attribute names that have differing values.
template<class T1>
void compare_tuple_attributes(T1 const & myTuple1, T1 const & myTuple2,
SPL::list<rstring> & matchingAttributes,
SPL::list<rstring> & differingAttributes,
int32 & error, boolean trace);
// This method fetches the tuple schema literal string and the
// tuple attribute information map with fully qualified tuple
// attribute names and values as key/value pairs.
template<class T1>
void get_tuple_schema_and_attribute_info(T1 const & myTuple,
rstring & schema, SPL::map<rstring, rstring> & attributeInfo,
int32 & error, boolean trace);
// This method inserts the multi-level nested SE id and a logical operator
// for a given SE id into the following two maps.
//
// 1) Multi-level nested SE id map
// 2) Intra multi-level nested SE logical operators map
void insertMultiLevelNestedSeIdAndLogicalOperatorIntoMaps(char const & callerId,
rstring const & seId, rstring const & logicalOpFromCaller,
int32 const & opCnt, int32 const & cpCnt,
SPL::map<rstring, rstring> const & insloMap,
SPL::map<rstring, int32> & mlnsidMap,
SPL::map<rstring, rstring> & imlnsidMap, boolean trace);
// ====================================================================
// Evaluate a given expression.
// Example expressions:
// a == "hi" && b contains "xyz" && g[4] > 6.7 && id % 8 == 3
// (a == "hi") && (b contains "xyz" || g[4] > 6.7 || id % 8 == 3)
// Allowed operators: logical, relational, arithmetic and special operators.
//
// Evaluate an expression.
// Arg1: Expression
// Arg2: Your tuple
// Arg3: A mutable int32 variable to receive non-zero eval error code if any.
// Arg4: A boolean value to enable debug tracing inside this function.
// It returns true if the expression evaluation is successful.
template<class T1>
inline boolean eval_predicate(rstring const & expr,
T1 const & myTuple, int32 & error, boolean trace=false) {
boolean result = false;
error = ALL_CLEAR;
// Check if there is some content in the given expression.
if(Functions::String::length(expr) == 0) {
error = EMPTY_EXPRESSION;
return(false);
}
// Get the schema literal string of a given tuple.
// Example of myTuple's schema:
// myTuple=tuple<rstring name,tuple<tuple<tuple<float32 latitude,float32 longitude> geo,tuple<rstring state,rstring zipCode,map<rstring,rstring> officials,list<rstring> businesses> info> location,tuple<float32 temperature,float32 humidity> weather> details,tuple<int32 population,int32 numberOfSchools,int32 numberOfHospitals> stats,int32 rank,list<int32> roadwayNumbers,map<rstring,int32> housingNumbers>
//
SPLAPPTRC(L_TRACE, "Begin timing measurement 1", "TupleSchemaConstructor");
rstring myTupleSchema = getSPLTypeName(myTuple, trace);
SPLAPPTRC(L_TRACE, "End timing measurement 1", "TupleSchemaConstructor");
if(myTupleSchema == "") {
// This should never occur. If it happens in
// extremely rare cases, we have to investigate the
// tuple literal schema generation function.
error = TUPLE_LITERAL_SCHEMA_GENERATION_ERROR;
return(false);
}
if (expEvalCache == NULL) {
// Create this only once per operator thread.
expEvalCache = new ExpEvalCache;
if(expEvalCache == NULL) {
// If we can't create the cache, then that is troublesome.
error = EXP_EVAL_CACHE_OBJECT_CREATION_ERROR;
return(false);
}
}
// We can now check if the given expression is already in the eval plan cache.
ExpEvalCache::iterator it = expEvalCache->find(expr);
if (it != expEvalCache->end()) {
// We found this expression in the cache.
// Let us see if the tuple schema we originally stored in the
// cache matches with the schema for the tuple that the
// caller passed in this call to this native function.
if(it->second->getTupleSchema() != myTupleSchema) {
if(trace == true) {
cout << "==== BEGIN eval_predicate trace 2b ====" << endl;
cout << "Full expression=" << expr << endl;
cout << "Tuple schema mismatch found inside the expression evaluation plan cache." << endl;
cout << "Tuple schema stored in the cache=" <<
it->second->getTupleSchema() << endl;
cout << "Schema for the tuple passed in this call=" << myTupleSchema << endl;
cout << "Total number of expressions in the cache=" <<
expEvalCache->size() << endl;
cout << "==== END eval_predicate trace 2b ====" << endl;
}
error = TUPLE_SCHEMA_MISMATCH_FOUND_IN_EXP_EVAL_PLAN_CACHE;
return(false);
} else {
if(trace == true) {
cout << "==== BEGIN eval_predicate trace 3b ====" << endl;
cout << "Full expression=" << expr << endl;
cout << "Matching tuple schema is found inside the expression evaluation plan cache." << endl;
cout << "Total number of expressions in the cache=" <<
expEvalCache->size() << endl;
cout << "==== END eval_predicate trace 3b ====" << endl;
}
}
// We will continue evaluating this expression outside of this if block.
} else {
if(trace == true) {
cout << "==== BEGIN eval_predicate trace 2a ====" << endl;
cout << "Full expression=" << expr << endl;
cout << "Expression is not found inside the evaluation plan cache." << endl;
cout << "Starting the preparation for adding it to the eval plan cache." << endl;
cout << "Total number of expressions in the cache=" <<
expEvalCache->size() << endl;
cout << "==== END eval_predicate trace 2a ====" << endl;
}
// This expression is not in the eval plan cache. So, we will do the
// preparation necessary for adding it to the eval plan cache.
// Let us parse the individual attributes of the given tuple and store them in a map.
SPL::map<rstring, rstring> tupleAttributesMap;
SPLAPPTRC(L_TRACE, "Begin timing measurement 2", "TupleAttributeParser");
result = parseTupleAttributes(myTupleSchema,
tupleAttributesMap, error, trace);
SPLAPPTRC(L_TRACE, "End timing measurement 2", "TupleAttributeParser");
if(result == false) {
return(false);
}
// If trace is enabled let us do the introspection of the
// user provided tuple and display its attribute names and values.
traceTupleAtttributeNamesAndValues(myTuple, tupleAttributesMap, trace);
// SE map's key is a subexpression id.
// Subexpression id will go something like this:
// 1.1, 1.2, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 4.1, 4.2, 4.3, 5.1
// Subexpression id is made of level 1 and level2.
// We support either zero parenthesis or a single level or
// multilevel (nested) parenthesis.
// Logical operators used within a subexpression must be of the same kind.
//
// A few examples of zero, single and nested parenthesis.
//
// 1.1 2.1 3.1 4.1
// a == "hi" && b contains "xyz" && g[4] > 6.7 && id % 8 == 3
//
// 1.1 2.1
// (a == "hi") && (b contains "xyz" || g[4] > 6.7 || id % 8 == 3)
//
// 1.1 2.1 3.1 4.1
// (a == "hi") && (b contains "xyz") && (g[4] > 6.7) && (id % 8 == 3)
//
// 1.1 2.1 2.2
// (a == "hi") && ((b contains "xyz" || g[4] > 6.7) && id % 8 == 3)
//
// 1.1 2.1 2.2
// (a == "hi") && (b contains "xyz" && (g[4] > 6.7 || id % 8 == 3))
//
// 1.1 2.1
// (a == "hi") && ((b contains "xyz") || (g[4] > 6.7) || (id % 8 == 3))
//
// 1.1 2.1 2.2
// (a == "hi") && ((b contains "xyz") || (g[4] > 6.7 || id % 8 == 3))
//
// 1.1 1.2 2.1
// ((a == "hi" || c endsWith 'pqr') && (b contains "xyz")) || (g[4] > 6.7 || id % 8 == 3)
//
// 1.1 1.2 1.3
// (((a == 'hi') || (x <= 5) || (t == 3.14) || (p > 7)) && ((j == 3) && (y < 1) && (r == 9)) && s endsWith 'Nation')
//
// In addition to the expressions shown above, there is a whole different category of
// deeply nested ones. To see them, you can search in this file for
// "multi-level nested subexpression examples".
//
// This map's value is a list that describes the composition of a given subexpression.
// Structure of such a list will go something like this:
// This list will have a sequence of rstring items as shown below.
// LHSAttribName
// LHSAttribType
// ListIndexOrMapKeyValue - When N/A, it will have an empty string.
// OperationVerb - For arithmetic verbs, it will have extra stuff. e-g: % 8 ==
// RHSValue
// Intra subexpression logical operator - When N/A, it will have an empty string.
// ... - The sequence above repeats for this subexpression.
//
// Note: The space below between > > is a must. Otherwise, compiler will give an error.
SPL::map<rstring, SPL::list<rstring> > subexpressionsMap;
// Store the logical operators within the nested sub-expressions.
// Key for this map will be id of the nested subexpression that
// is preceding the logical operator. Value will be the logical
// operator.
SPL::map<rstring, rstring> intraNestedSubexpressionLogicalOperatorsMap;
// Store the logical operators between different sub-expressions.
// This list will have N-1 items where N is the total number of
// subexpressions stored in the map above.
SPL::list<rstring> interSubexpressionLogicalOperatorsList;
// Senthil added this on Sep/20/2023.
SPL::map<rstring, int32> multiLevelNestedSubExpressionIdMap;
SPL::map<rstring, rstring> intraMultiLevelNestedSubexpressionLogicalOperatorsMap;
// Let us validate the expression for correctness in its use of
// the correct tuple attributes and correct operation verbs.
// In addition to validating the expression, let us also
// get back a reusable map structure of how the expression is made,
// how it is tied to the tuple attributes and what operations
// need to be performed later while evaluating the expression.
SPLAPPTRC(L_TRACE, "Begin timing measurement 3", "ExpressionValidator");
// Perform the validation from the beginning of
// the expression starting at index 0.
int32 validationStartIdx = 0;
// Senthil added a new method argument on Sep/20/2023.
result = validateExpression(expr, tupleAttributesMap,
subexpressionsMap,
intraNestedSubexpressionLogicalOperatorsMap,
interSubexpressionLogicalOperatorsList,
multiLevelNestedSubExpressionIdMap,
intraMultiLevelNestedSubexpressionLogicalOperatorsMap,
error, validationStartIdx, trace);
SPLAPPTRC(L_TRACE, "End timing measurement 3", "ExpressionValidator");
if(result == false) {
return(false);
}
// We have done a successful expression validation.
// We can prepare to store the results from the
// validation in a cache for reuse later if the
// same expression is sent repeatedly for evaluation.
//
// Let us sort the subexpressions map keys so that
// we can process them in the correct order.
SPL::list<rstring> subexpressionsMapKeys =
Functions::Collections::keys(subexpressionsMap);
Functions::Collections::sortM(subexpressionsMapKeys);
// We can now create a new eval plan cache entry for this expression.
ExpressionEvaluationPlan *evalPlanPtr = NULL;
evalPlanPtr = new ExpressionEvaluationPlan();
if(evalPlanPtr == NULL) {
error = EXP_EVAL_PLAN_OBJECT_CREATION_ERROR;
return(false);
}
// Let us take a copy of various data structures related to this
// fully validated expression in our eval plan cache for
// prolonged use by calling the setter methods of the cache object.
evalPlanPtr->setExpression(expr);
evalPlanPtr->setTupleSchema(myTupleSchema);
evalPlanPtr->setSubexpressionsMap(subexpressionsMap);
evalPlanPtr->setSubexpressionsMapKeys(subexpressionsMapKeys);
evalPlanPtr->setIntraNestedSubexpressionLogicalOperatorsMap(
intraNestedSubexpressionLogicalOperatorsMap);
evalPlanPtr->setInterSubexpressionLogicalOperatorsList(
interSubexpressionLogicalOperatorsList);
// Senthil added this on Sep/20/2023.
evalPlanPtr->setMultiLevelNestedSubExpressionIdMap(
multiLevelNestedSubExpressionIdMap);
evalPlanPtr->setIntraMultiLevelNestedSubexpressionLogicalOperatorsMap(
intraMultiLevelNestedSubexpressionLogicalOperatorsMap);
// Let us store it as a K/V pair in the map now.
std::pair<ExpEvalCache::iterator, bool> cacheInsertResult =
expEvalCache->insert(std::make_pair(expr, evalPlanPtr));
if(cacheInsertResult.second == false) {
error = ERROR_INSERTING_EVAL_PLAN_PTR_IN_CACHE;
return(false);
}
if(trace == true) {
cout << "==== BEGIN eval_predicate trace 11a ====" << endl;
cout << "Full expression=" << expr << endl;
cout << "Inserted the validated expression in the eval plan cache." << endl;
cout << "Total number of expressions in the cache=" <<
expEvalCache->size() << endl;
cout << "==== END eval_predicate trace 11a ====" << endl;
}
it = cacheInsertResult.first;
} // End of the else block.
// We have a valid iterator from the eval plan cache for the given expression.
// We can go ahead and execute the evaluation plan now.
SPLAPPTRC(L_TRACE, "Begin timing measurement 4", "ExpressionEvaluation");
// We are making a non-recursive call.
result = evaluateExpression(it->second, myTuple, error, trace);
SPLAPPTRC(L_TRACE, "End timing measurement 4", "ExpressionEvaluation");
return(result);
} // End of eval_predicate
// ====================================================================
// ====================================================================
// This function receives a Tuple as input and returns a tuple schema literal string.
// We will later parse the tuple literal string to create a map of all the
// attributes in a user given tuple.
// Example of myTuple's schema that will be returned by this function:
// myTuple=tuple<rstring name,tuple<tuple<tuple<float32 latitude,float32 longitude> geo,tuple<rstring state,rstring zipCode,map<rstring,rstring> officials,list<rstring> businesses> info> location,tuple<float32 temperature,float32 humidity> weather> details,tuple<int32 population,int32 numberOfSchools,int32 numberOfHospitals> stats,int32 rank,list<int32> roadwayNumbers,map<rstring,int32> housingNumbers>
//
inline rstring getSPLTypeName(ConstValueHandle const & handle, boolean trace=false) {
SPL::Meta::Type mtype = handle.getMetaType();
// Go through the meta type of every single attribute and
// form a tuple literal. This will have recursive calls into
// this same function when the user given tuple is nested.
switch(mtype) {
case Meta::Type::INVALID:
assert(!"cannot happen");
return "";
case Meta::Type::BOOLEAN:
return "boolean";
case Meta::Type::ENUM: {
Enum const & data = handle;
string res = "enum<";
vector<string> const & enums = data.getValidValues();
for (size_t i=0, iu=enums.size(); i<iu; ++i) {
if (i>0) {
res += ",";
}
res += enums[i];
}
res += ">";
return res;
}
case Meta::Type::INT8:
return "int8";
case Meta::Type::INT16:
return "int16";
case Meta::Type::INT32:
return "int32";
case Meta::Type::INT64:
return "int64";
case Meta::Type::UINT8:
return "uint8";
case Meta::Type::UINT16:
return "uint16";
case Meta::Type::UINT32:
return "uint32";
case Meta::Type::UINT64:
return "uint64";
case Meta::Type::FLOAT32:
return "float32";
case Meta::Type::FLOAT64:
return "float64";
case Meta::Type::DECIMAL32:
return "decimal32";
case Meta::Type::DECIMAL64:
return "decimal64";
case Meta::Type::DECIMAL128:
return "decimal128";
case Meta::Type::COMPLEX32:
return "complex32";