-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtypecheck.jai
More file actions
1284 lines (1092 loc) · 60.6 KB
/
typecheck.jai
File metadata and controls
1284 lines (1092 loc) · 60.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
TODO: Constant evaluation
we could mark certain node types as constants and do constant evaluation where applicable
this would not be something we really want to do on initial pass, since we don't want to modify the AST that much
but it would certainly be a good idea to do before going to bytecode
*/
Typechecking_Flags :: enum_flags {
NONE :: 0;
EXPECT_LVALUE :: 1 << 0;
IS_STATEMENT_ROOT :: 1 << 1;
}
// before using this method, we may want to double check that it does explode the stack too much, as I anticipate it may.
// typecheck and maybe propogate error
typecheck_node_or_return :: (
script: *Script,
node: *Node,
check_flags: Typechecking_Flags = .NONE,
hint_type: *Type_Info = null
) -> (
value_type: *Type_Info
) #expand {
result := typecheck_node(script, node, check_flags, hint_type);
if script.error.type != .NO_ERROR then `return null;
return result;
}
typecheck_node :: (
using script: *Script,
node: *Node,
check_flags: Typechecking_Flags = .NONE,
hint_type: *Type_Info = null
) -> (
value_type: *Type_Info
) {
dprint("typechecking node % % (%)\n", node, node.node_type, get_location(node));
dprint("expecting type: %\n", as_type(hint_type));
// dprint("%\n", sprint_node(script, node));
dprint_push_indent();
// if already typechecked, just return resolved type
// this was not possible before, but will be now that multiple identifier nodes may reference same underlying procedure or variable node
if node.flags & .TYPECHECKED {
assert(node.value_type != null);
return node.value_type;
}
// weird special case for assignment operators. needs to be done up top so we can remove the flag right away.
// TODO: print source code location on error as well. really, we need to support this in many places for better error reporting.
if node.flags & .MUST_BE_STATEMENT_ROOT {
if !(check_flags & .IS_STATEMENT_ROOT) {
set_typecheck_error(script, "This expression '%' can only be used as the root of a statement, not just as an arbitrary expression.", sprint_node(script, node,, temp), node = node);
return null;
}
}
check_flags &= ~.IS_STATEMENT_ROOT;
if script.typecheck_callback {
result := script.typecheck_callback(script, node, check_flags, hint_type);
if has_error(script) return null;
if result return result;
}
if node.node_type == {
case Node_Directive; typecheck_directive (script, xx node, check_flags, hint_type);
case Node_Literal; typecheck_literal (script, xx node, check_flags, hint_type);
case Node_Identifier; typecheck_identifier (script, xx node, check_flags, hint_type);
case Node_Operation; typecheck_operation (script, xx node, check_flags, hint_type);
case Node_Procedure_Call; typecheck_procedure_call (script, xx node, check_flags, hint_type);
case Node_Cast; typecheck_cast (script, xx node, check_flags, hint_type);
case Node_Dot; typecheck_dot (script, xx node, check_flags, hint_type);
case Node_Subscript; typecheck_subscript (script, xx node, check_flags, hint_type);
case Node_Declaration; typecheck_declaration (script, xx node, check_flags, hint_type);
case Node_Block; typecheck_block (script, xx node, check_flags, hint_type);
case Node_If_Statement; typecheck_if_statement (script, xx node, check_flags, hint_type);
case Node_While_Loop; typecheck_while_loop (script, xx node, check_flags, hint_type);
case Node_For_Loop; typecheck_for_loop (script, xx node, check_flags, hint_type);
case;
set_typecheck_error(script, "Unhandled node type % in typecheck_node.", node.node_type, node = node);
}
if has_error(script) return null;
if node.value_type != null {
if check_flags & .EXPECT_LVALUE {
if !set_as_lvalue(script, node) return null;
}
node.flags |= .TYPECHECKED;
}
return node.value_type;
}
typecheck_script :: (script: *Script) -> bool {
if script.ast_root == null return false;
if script.ast_root.node_type != Node_Block return false;
for script.ast_root.statements {
typecheck_node(script, it, .IS_STATEMENT_ROOT);
if has_error(script) return false;
}
script.flags |= .TYPECHECKED;
return true;
}
// TODO: try to remove the need for 'need_lvalue' and make it so that we can just check the returned node with is_valid_lvalue if need be
// it appears the only reason we have this in place atm is to prevent using it_index as an lvalue
// NOTE: the 'namespace' parameter is for resolving virtual member declarations. It's a bit wonky right now...
resolve_declaration :: (script: *Script, identifier: string, scope_block: *Node, namespace: *Node_Identifier = null, need_lvalue := false) -> *Node_Declaration {
scope := scope_block;
while scope {
if scope.node_type == {
case Node_Block;
declaration := find_declaration_in_block(script, xx scope, identifier, namespace);
if declaration return declaration;
case Node_For_Loop;
if !namespace {
declaration := find_iterator_declaration(script, xx scope, identifier, need_lvalue);
if declaration return declaration;
}
}
scope = scope.scope;
}
for script.global_declarations {
if match_declaration(script, xx it, identifier, namespace) {
return it;
}
}
return null;
}
find_declaration_in_block :: (script: *Script, block: *Node_Block, identifier: string, namespace: *Node_Identifier = null) -> *Node_Declaration {
for block.statements {
if it.node_type == Node_Declaration {
if match_declaration(script, xx it, identifier, namespace) {
return xx it;
}
}
}
return null;
}
match_declaration :: (script: *Script, declaration: *Node_Declaration, identifier: string, namespace: *Node_Identifier = null) -> bool {
ok, name, decl_namespace := get_declaration_info(script, declaration);
return ok && name == identifier && identifiers_resolve_to_the_same_thing(script, decl_namespace, namespace);
}
find_iterator_declaration :: (script: *Script, for_loop: *Node_For_Loop, identifier: string, need_lvalue := false) -> *Node_Declaration {
if get_declaration_name(script, *for_loop.it_decl) == identifier {
return *for_loop.it_decl;
}
if get_declaration_name(script, *for_loop.it_index_decl) == identifier {
if need_lvalue {
// TODO: this is a bad error message since it's reported with the for loop's location and not the identifier's location.
log("%: Error: it_index cannot be used as an lvalue.", get_location(for_loop));
return null;
}
return *for_loop.it_index_decl;
}
return null;
}
// If some expression is fundamentally just a literal, this procedure will give you that terminal literal node.
// This procedure primarily exists so that we have a consistent way to check if some expresison can be user as a malleable literal.
get_terminal_literal :: (node: *Node) -> *Node_Literal {
if node.node_type == {
case Node_Literal;
return xx node;
case Node_Dot;
dot := node.(*Node_Dot);
if dot.right.node_type == {
case Node_Literal;
return xx dot.right;
case Node_Identifier;
identifier := dot.right.(*Node_Identifier);
if identifier.identifier_type == {
case .LITERAL;
return identifier.literal;
}
}
}
return null;
}
is_aggr :: (t: Type) -> bool { return is_aggr(t.(*Type_Info)); }
is_aggr :: (ti: *Type_Info) -> bool {
if ti.type == {
case .INTEGER; return false;
case .FLOAT; return false;
case .ENUM; return false;
case .BOOL; return false;
case .POINTER; return false;
case .PROCEDURE; return false;
case .TYPE; return false;
}
return true;
}
/*
Coercions and implicit casts
coercion here refers to altering the value_type of a Node_Literal in-place.
an implicit cast is just an extra cast node that is inserted in to the AST, and the cast still has to be executed at runtime.
these two things are different, but also really similar and we usually want to just do the correct thing for the givne node, whehter it be a literal or not
I woud like to make these procedures nicer to use,
but there's two different uses I need to keep in sync
1. checking if an implicit cast or coercion can be done (and getting the 'distance' of that cast)
this needs to *not* modify the AST
2. actually doing the implict cast or coercion, which will modify the AST
These two functions could be separate, but it's less likely that we'll get bugs if the actual doing of the cast
just calls the pre-checking function instead of duplicating logic
but, I still can't avoid duplicating some of the most high-level logic which is the selection between a cast or coercion
*/
make_implicit_cast :: (script: *Script, node: *Node, type: *Type_Info) -> *Node {
_cast := alloc_node(script, Node_Cast);
_cast.flags |= .IMPLICIT | .TYPECHECKED;
_cast.implicit_type = type;
_cast.value = node;
_cast.value_type = type;
if node.flags & .TYPECHECKED {
_cast.flags |= .TYPECHECKED;
}
dprint("%: Info: created implicit cast from % to %", get_location(node), as_type(_cast.value.value_type), as_type(_cast.implicit_type));
return _cast;
}
get_integer_cast_distance :: (from: *Type_Info_Integer, to: *Type_Info_Integer) -> int {
if (from.signed && !to.signed)
|| (from.runtime_size > to.runtime_size)
|| (!from.signed && to.signed && to.runtime_size == from.runtime_size)
then return -1;
// NOTE: for now, we just add +1 for signed. maybe this should be weighted differently.
from_size := from.runtime_size + from.signed.(int);
to_size := to.runtime_size + to.signed.(int);
return to_size - from_size;
}
get_implicit_cast_distance :: (node: *Node, to: *Type_Info) -> int {
from := node.value_type;
if to == from return 0;
if is_numeric_type(from) && is_numeric_type(to) {
if node.node_type == Node_Literal
&& node.(*Node_Literal).literal_type == .NUMBER {
return 0;
}
if to.type == from.type {
if to.type == {
case .FLOAT;
distance := to.runtime_size - from.runtime_size;
return distance;
case .INTEGER;
distance := get_integer_cast_distance(xx from, xx to);
return distance;
}
}
}
return -1;
}
try_implicit_cast :: (script: *Script, node: *Node, type: *Type_Info) -> (ok: bool, replacement: *Node) {
if node.value_type == type return true, null;
if try_implicit_reference(script, node, type) {
dprint("took implicit reference to node with value_type '%'\n", as_type(node.value_type));
return true, null;
}
if node.node_type == Node_Literal {
return coerce_literal_type(xx node, type), null;
}
if get_implicit_cast_distance(node, type) >= 0 {
return true, make_implicit_cast(script, node, type);
}
set_typecheck_error(script, "Unable to implicitly cast type '%' to '%'", as_type(node.value_type), as_type(type), node = node, type = .FAILED_IMPLICIT_CAST);
return false, null;
}
// we just reach into the literal and re-type it in place
coerce_literal_type :: (literal: *Node_Literal, type: *Type_Info) -> bool {
if literal.value_type == type return true;
if literal.literal_type == {
case .NUMBER;
if !is_numeric_type(type) return false;
new_number := Any_Number.zero_of_type(type);
Convert.any_to_any(to_any(*new_number), to_any(*literal.number));
literal.number = new_number;
return true;
}
log("%: Error: unable to coerce % literal of type '%' to '%'", get_location(literal), literal.literal_type, as_type(literal.value_type), as_type(type));
return false;
}
get_cast_result_type :: (_cast: *Node_Cast) -> *Type_Info {
return ifx _cast.flags & .IMPLICIT then _cast.implicit_type else _cast.type_identifier.type_info_pointer;
}
can_do_implicit_reference :: (node: *Node, type: *Type_Info) -> bool {
ptr_info := type.(*Type_Info_Pointer);
if ptr_info.type != .POINTER return false;
if ptr_info.pointer_to == node.value_type return true;
// NOTE: when implementing the below, need to check that #as only works on the first member
// if is_using_as(node.value_type, ptr_info.pointer_to) return true; // TODO
return false;
}
// just flags the node as an lvalue if successful
try_implicit_reference :: (script: *Script, node: *Node, type: *Type_Info) -> bool {
if !is_valid_lvalue(script, node) return false;
if !can_do_implicit_reference(node, type) return false;
// TODO: we may need to recurse in order to check or fix up child nodes that didn't previously know they are expected to be an lvalue?
node.flags |= .IS_LVALUE;
return true;
}
set_as_lvalue :: (script: *Script, node: *Node) -> bool {
ok, message := is_valid_lvalue(script, node);
if !ok {
// TODO: refactor is_valid_lvalue so that it sets the script error and attaches any dynamic info needed in each case.
// if node.node_type == Node_Identifier {
// log("Identifier was '%'", get_identifier_name(script, xx node));
// }
set_typecheck_error(script, message, node = node);
return false;
}
node.flags |= .IS_LVALUE;
return true;
}
// second return value is an error string. In order to prevent spamming temp storage, I'm not using tprint to actually make the error message here more specific/situational.
// so the caller (probably just typecheck_node) may want to follow up with some additional information such as the identifier name.
// maybe we could come up with some standard system for inserting info from a node so that we can return a format string here...
is_valid_lvalue :: (script: *Script, node: *Node) -> bool, string {
if node.node_type == {
case Node_Literal;
literal := node.(*Node_Literal);
if (literal.flags & .IS_MALLEABLE) || literal.literal_type == .ANY {
return true, "";
}
return false, "A literal cannot be used as an lvalue, unless it is marked as being malleable with `?`.";
case Node_Identifier;
identifier := node.(*Node_Identifier);
if #complete identifier.identifier_type == {
case .UNRESOLVED; return false, "An unresolved identifier cannot be used as an lvalue.";
case .TYPE; return false, "A type identifier cannot be used as an lvalue.";
case .EXTERNAL_PROCEDURE; return false, "An external procedure cannot be used as an lvalue.";
case .STRUCT_MEMBER; return true, "";
case .EXTERNAL_VARIABLE;
variable := *script.variables[identifier.index];
if variable.flags & .CONSTANT {
return false, "An external variable cannot be used as an lvalue when marked as constant.";
}
return true, "";
case .LITERAL;
ok, msg := is_valid_lvalue(script, identifier.literal);
return ok, msg;
case .DECLARATION;
// TODO: update this logic for the new semantics around constant declarations and macros
// if (identifier.declaration.flags & .IS_CONSTANT)
// && !declaration_is_malleable_literal(identifier.declaration) {
// return false, "Error: Attempting to use an identifier as an lvalue which refers to a constant declaration which is not malleable.";
// }
return true, "";
}
case Node_Dot;
dot := node.(*Node_Dot);
if dot.left.value_type.type == .STRUCT {
ok, msg := is_valid_lvalue(script, dot.left);
return ok, msg;
}
ok, msg := is_valid_lvalue(script, dot.right);
return ok, msg;
case Node_Subscript;
subscript := node.(*Node_Subscript);
ok, msg := is_valid_lvalue(script, subscript.base_expression);
return ok, msg;
// NOTE: it is possible these nodes may be able to produce valid lvalues in the future
case Node_Operation; return false, "Using the result of an operation as an lvalue is not currently supported";
case Node_Procedure_Call; return false, "Using the result of a procedure call as an lvalue is not currently supported";
case Node_Cast; return false, "Using the result of a cast as an lvalue is not currently supported";
// TODO: These cases really should not even remotely happen... so maybe we should not even bother with an error message?
case Node_Declaration; return false, "A declaration cannot be used as an lvalue;";
case Node_Block; return false, "A block cannot be used as an lvalue;";
case Node_If_Statement; return false, "An if statement cannot be used as an lvalue;";
case Node_While_Loop; return false, "A while loop cannot be used as an lvalue;";
case Node_For_Loop; return false, "A for loop cannot be used as an lvalue;";
}
assert(false, "Invalid node type '%' in is_valid_lvalue.", node.node_type);
return false, "";
}
// we use this for getting the proper type to hint to procedure arguments
get_base_if_type_is_pointer :: (type: *Type_Info) -> *Type_Info {
if type.type == .POINTER {
// TODO: do something about multiple indirections case?
return type.(*Type_Info_Pointer).pointer_to;
}
return type;
}
/*
Separate sub-routines for typechecking specific node types.
These are broken out so that Jai does not allocate a ridiculous stack frame size for typecheck_node, limiting the maximum recursion depth to like, 10.
Since we are doing this, we may as well refactor the main typecheck_node procedure to be a bit clearer.
One thinkg that's always been a bit weird about it is that we need to not only set the value type on the node, but also return that same type info as the return value
we could just get rid of that return value an return only an error object, which we can just declare at the top of the proceudre much like the result object we were using before
Then if we still want to have the same semantics of getting a return type back on typecheck_node, we can do so only from that proc by explicitly returing the node.value_type
which will prevent there ever being a mismatch betweenwhat is returned and what is set on the node
*/
typecheck_directive :: (using script: *Script, directive: *Node_Directive, check_flags := Typechecking_Flags.NONE, hint_type: *Type_Info = null) {
directive.value_type = type_info(void); // by default
// we always call the directive a second time during typechecking,
// even if the directive already returned some replacement node during parsing
if !evaluate_directive(script, directive, .TYPECHECK) {
builder: String_Builder;
print(*builder, "Failed to evaluate directive % in typechecking phase. Evaluation error:\n", directive.name);
format_error(*builder, script, false, true);
set_typecheck_error(script, builder_to_string(*builder,, temp), node = directive);
return;
}
if directive.runtime_node {
directive.value_type = typecheck_node(script, directive.runtime_node, check_flags, hint_type);
}
}
typecheck_literal :: (using script: *Script, literal: *Node_Literal, check_flags := Typechecking_Flags.NONE, hint_type: *Type_Info = null) {
if #complete literal.literal_type == {
case .ANY;
literal.value_type = literal.any.type;
case .STRING;
literal.value_type = type_info(string);
literal.flags |= .IS_CONSTANT;
case .BOOLEAN;
literal.value_type = literal.number.type;
literal.flags |= .IS_CONSTANT;
case .NUMBER;
if is_numeric_type(hint_type) {
// NOTE: we now remap to value_type just once here, so that we don't have to do it each time in execute
// TODO: think about rules around automatic casting here...
// maybe we don't want to automatically go float -> int,
// since we will now only get float here when the literal had a decimal part
new_number := Any_Number.zero_of_type(hint_type);
Convert.any_to_any(to_any(*new_number), to_any(*literal.number));
literal.number = new_number;
} else {
// NOTE: also doing a little remap here in default case,
// since we actually want the default float type to be float32
if literal.number.type == xx float64 {
new_number := Any_Number.zero_of_type(float32);
Convert.any_to_any(to_any(*new_number), to_any(*literal.number));
literal.number = new_number;
}
}
literal.value_type = literal.number.type;
literal.flags |= .IS_CONSTANT;
case .STRUCT;
ti_struct := hint_type.(*Type_Info_Struct);
if ti_struct == null {
set_typecheck_error(script,
"Unable to typecheck struct literal without a type hint.",
type = .MISSING_TYPE_HINT,
node = literal
);
return;
}
if ti_struct.type != .STRUCT {
set_typecheck_error(script, "Type hint provided to struct literal is not a struct type. (was '%')", as_type(ti_struct), node = literal);
return;
}
all_members_are_constant := true;
for literal.aggr.expressions {
if it_index >= ti_struct.members.count {
set_typecheck_error(script, "Too many values provided in struct literal.", node = literal);
return;
}
member := ti_struct.members[it_index];
expr_ti := typecheck_node(script, it, hint_type = member.type);
if has_error(script) return;
if expr_ti != member.type {
set_typecheck_error(script, "Value type provided in struct initializer for field '%' was incorrect. Expected '%', got '%'.", member.name, as_type(member.type), as_type(expr_ti), node = it);
return;
}
all_members_are_constant &&= (it.flags & .IS_CONSTANT).(bool);
}
if all_members_are_constant {
literal.flags |= .IS_CONSTANT;
}
// TODO: we could reduce space required for storing intermediate values
// if we pass a value_pointer alongside hint_type in typechecking
// that way if hint type is accepted, then it can also use value_pointer
// for its backing value instead of allocating its own space for value
literal.value_type = ti_struct;
literal.aggr.value_pointer = Dynamic_New(literal.value_type,, get_pool_allocator(*script.pool));
dprint("allocated space for literal of type %, node %\n", literal.value_type, literal);
}
assert(literal.value_type != null);
}
typecheck_identifier :: (using script: *Script, identifier: *Node_Identifier, check_flags := Typechecking_Flags.NONE, hint_type: *Type_Info = null) {
// NOTE: identifier may have already been typechecked e.g. in parameters of procedure call
// in this case we obviously don't want to check it again (not to mention accessing name will now be invalid!)
if identifier.identifier_type != .UNRESOLVED return;
declaration := resolve_declaration(script, identifier.name, identifier.scope, need_lvalue = check_flags & .EXPECT_LVALUE != 0);
if declaration {
if declaration.value_type == null {
set_typecheck_error(script, "Identifier '%' used before its declaration.", identifier.name, node = identifier);
return;
}
// TODO: we cannot mark the identifier as constant based on the declaration yet,
// since the declaration may later be referenced by another identifier as an lvalue, making the declaration non-constant
// This is fine since we don't really do any constant evaluation on the AST, but when we lower to bytecode,
// we will need to consider that a DECLARATION identifier may yet need to be checked for constancy.
// TODO: if the declaration is a macro though, and the init_expression of the macro is constant, then we can still go ahead and safely mark the identifier as constant
// this may be a good idea to go ahead and implement since it would allow us to use such an identifier in a type expression.
if declaration.flags & .IS_CONSTANT {
if check_flags & .EXPECT_LVALUE {
dprint("removing IS_CONSTANT flag from declaration '%' because its identifier is being used as an lvalue\n", get_declaration_name(script, declaration));
declaration.flags &= ~.IS_CONSTANT;
}
// if declaration.flags & .MACRO {
// identifier.flags |= .CONSTANT;
// }
}
identifier.identifier_type = .DECLARATION;
identifier.declaration = declaration;
identifier.value_type = declaration.value_type;
return;
}
// TODO: maybe we should add some checks to make sure that all externally inserted identifiers don't conflict across categories
// for example, we should not have a type identifier which conflicts with an external variable or procedure
for type_table {
if it.name == identifier.name {
identifier.flags |= .IS_CONSTANT; // the type table is assumed to be constant during the script's lifetime
identifier.identifier_type = .TYPE;
identifier.index = it_index;
identifier.value_type = xx Type;
return;
}
}
for variables {
if it.name == identifier.name {
identifier.identifier_type = .EXTERNAL_VARIABLE;
identifier.index = it_index;
identifier.value_type = variables[it_index].type;
return;
}
}
// NOTE: we don't attach an error message here
set_typecheck_error(script, "", type = .UNRESOLVED_IDENTIFIER, node = identifier);
return;
}
typecheck_operation :: (using script: *Script, operation: *Node_Operation, check_flags := Typechecking_Flags.NONE, hint_type: *Type_Info = null) {
if operation.directive {
typecheck_node(script, operation.directive, check_flags, hint_type);
if operation.directive.runtime_node return;
}
left_type, right_type: *Type_Info;
_operator := get_operator(script, operation);
if _operator.kind == {
case .PREFIX; #through;
case .POSTFIX;
left_type = typecheck_node(script, operation.left, check_flags, hint_type);
if has_error(script) return;
case .BINARY;
// Binary operation checking is a bit more complicated, because either side may require a type hint from the other.
left_type = typecheck_node(script, operation.left, check_flags, hint_type);
right_type = typecheck_node(script, operation.right, check_flags, hint_type);
// If both come back null on first try, we return the upstream error.
if left_type == null && right_type == null {
assert(has_error(script));
return;
}
clear_error(script);
// Otherwise, try to typecheck whichever one previously failed with the type hint of the other side.
if left_type == null {
dprint("Info: Failed to typecheck left operand on first attempt, trying again with type hint: %", as_type(right_type));
left_type = typecheck_node(script, operation.left, check_flags, right_type);
if has_error(script) return;
}
if right_type == null {
dprint("Info: Failed to typecheck right operand on first attempt, trying again with type hint: %", as_type(left_type));
right_type = typecheck_node(script, operation.right, check_flags, left_type);
if has_error(script) return;
}
case .ASSIGNMENT;
// NOTE: we basically ignore hint_type in this case because assignment expressions must be used at statement-level anyhow.
left_type = typecheck_node(script, operation.left, check_flags | .EXPECT_LVALUE, null);
if has_error(script) return;
right_type = typecheck_node(script, operation.right, check_flags, left_type);
if has_error(script) return;
// special case: operator = is always a simple memcpy
if operation.name == "=" {
if left_type != right_type {
ok, replacement := try_implicit_cast(script, operation.right, left_type);
if !ok return;
if replacement replace_node(*operation.right, replacement);
}
operation.value_type = left_type;
return;
}
}
// TODO: We could still use some smarter type coercion here, probably.
// Will have to see what specific cases come up later and try to fix those.
left\_underlying_type := ifx left\_type && left\_type.type == .ENUM then left\_type.(*Type_Info_Enum).internal_type.(*Type_Info) else left\_type;
right_underlying_type := ifx right_type && right_type.type == .ENUM then right_type.(*Type_Info_Enum).internal_type.(*Type_Info) else right_type;
builtin_operation_index, swap_args := resolve_builtin_operation(operation.name, _operator.kind, left_underlying_type, right_underlying_type);
if builtin_operation_index >= 0 {
operation.builtin_operation_index = builtin_operation_index;
// NOTE: because we are performing this swap on the AST like this, this technically makes the evaluation order of each side of an operation undefined
// TODO: ideally, we should just swizzle the operands on the stack when executing the operation
if swap_args { operation.left, operation.right = operation.right, operation.left; }
// NOTE: only uses the enum type if both left and right enums match,
// otherwise we set return type as the matching underlying type.
operation.value_type = BUILTIN_OPERATIONS[builtin_operation_index].ret.(*Type_Info);
if left_type.type == .ENUM && left_type == right_type {
if operation.value_type != left_underlying_type {
set_typecheck_error(script, "We tried to coerce the enum type '%' to its underlying int type '%' for the purposes of making this binary operation work, but the return type of the resolved binary operator '%' does not match the underlying int type of the enum. So unfortunately we're gonna have to fail typechecking here...", as_type(left_type), as_type(left_underlying_type), as_type(operation.value_type), node = operation);
return;
}
// dprint("Info: returning enum type from binary operation: %", as_type(left_type));
operation.value_type = left_type;
}
if is_aggr(operation.value_type) {
operation.return_ptr = Dynamic_New(operation.value_type,, get_pool_allocator(*script.pool));
// dprint("allocating variable of type % at % for node %\n", as_type(operation.value_type), operation.return_ptr, operation);
}
return;
}
// if operation was not a builtin, then we will try to resolve the operation as an overloaded procedure
procedure_call := alloc_node(script, Node_Procedure_Call);
procedure_call.procedure_expression = make_identifier(script, operation.name);
if is_unary(_operator)
then procedure_call.arguments = array_copy((*Node).[ operation.left ],, get_pool_allocator(*script.pool));
else procedure_call.arguments = array_copy((*Node).[ operation.left, operation.right ],, get_pool_allocator(*script.pool));
operation.overload_procedure = procedure_call;
operation.flags |= .OVERLOAD;
operation.value_type = typecheck_node(script, procedure_call);
}
typecheck_procedure_call :: (using script: *Script, procedure_call: *Node_Procedure_Call, check_flags := Typechecking_Flags.NONE, hint_type: *Type_Info = null) {
// TODO: we should probably be more rigorous about how we filter check flags as we traverse the AST
// here, for instance, we could use some constnat like PASSTHROUGH_FLAGS_FOR_PROCEDURE_EXPRESSION and | with that
// as it stands though, we don't actually have many check flags and maybe even the ones we do have could possibly be removed if we refactored some things...
// So, here is my little note in an obscure place where I will probably just forget about it and move on
proc_info := typecheck_node(script, procedure_call.procedure_expression).(*Type_Info_Procedure);
if script.error.type == .UNRESOLVED_IDENTIFIER
&& procedure_call.procedure_expression.node_type == Node_Identifier {
clear_error(script);
identifier := procedure_call.procedure_expression.(*Node_Identifier);
assert(identifier.identifier_type == .UNRESOLVED); // so we can access name member validly
// Procedure arguments are sort of typechecked twice, once before selecting an overload and once after.
// In the first check, we do not provide any type hints, which means we may fail to typecheck arguments where the type hint is required.
// But we ignore this error on the first pass, since we may be able to provide a type hint later after selecting an overload.
for procedure_call.arguments {
typecheck_node(script, it);
if script.error.type == .MISSING_TYPE_HINT clear_error(script);
if has_error(script) return;
}
procedure_name := identifier.name;
procedure_index := resolve_procedure_index(script, procedure_name, procedure_call);
if procedure_index == -1 return;
identifier.identifier_type = .EXTERNAL_PROCEDURE;
identifier.index = procedure_index;
identifier.flags |= .TYPECHECKED;
proc_info:, identifier.value_type = procedures[procedure_index].proc_info;
// For the second pass over the arguments, we go back and verify the types for all arguments, re-typechecking those that may have failed due to a missing type hint.
// We also iterate more manually here so that we can replace nodes if we need to insert any implicit casts, since we need a **Node for that.
for 0..procedure_call.arguments.count-1 {
node_ptr_ptr := *procedure_call.arguments[it];
node := node_ptr_ptr.*;
expected_type := proc_info.argument_types[it];
if node.value_type == null {
dprint("re-typechecking argument with hint_type %", as_type(expected_type));
type := typecheck_node(script, node, hint_type = expected_type);
if has_error(script) return;
if type != expected_type {
builder: String_Builder;
print(*builder, "We deferred typechecking a procedure argument in the hopes that we could later hint the type and resolve it that way, but that failed.\n");
print(*builder, "\tProcedure identifier: '%'\n", procedure_name);
print(*builder, "\tArgument expression: '%'\n", sprint_node(script, node,, temp));
print(*builder, "\tExpected type '%', got '%'\n", expected_type, type);
set_typecheck_error(script, builder_to_string(*builder,, temp), node = node, type = .MISSING_TYPE_HINT);
return;
}
} else {
ok, replacement := try_implicit_cast(script, node, expected_type);
if !ok {
builder: String_Builder;
print(*builder, "Failed while trying to implicitly cast a procedure argument.\n");
print(*builder, "\tProcedure identifier: '%'\n", procedure_name);
print(*builder, "\tArgument expression: '%'\n", sprint_node(script, node,, temp));
print(*builder, "\tExpected type '%', got '%'\n", expected_type, node.value_type);
script.error.message = builder_to_string(*builder,, temp);
return;
}
if replacement replace_node(node_ptr_ptr, replacement);
}
}
procedure_call.value_type = ifx proc_info.return_types
then proc_info.return_types[0]
else type_info(void);
}
// procedure was the result of some larger expression, so it already has a strict type
else {
if proc_info == null {
set_typecheck_error(script, "Failed to typecheck procedure_expression.", node = procedure_call);
return;
}
if proc_info.type != .PROCEDURE {
set_typecheck_error(script, "Attempt to call a non-procedure type as if it were a procedure.", node = procedure_call);
return;
}
if proc_info.return_types.count > 1 {
set_typecheck_error(script, "Attempt to call a procedure which returns more than one value. This is not currently supported.", node = procedure_call);
return;
}
// typecheck arguments. we know what they must be, so we just straightforwardly match, and can hint type trivially
if proc_info.argument_types.count != procedure_call.arguments.count {
set_typecheck_error(script, "Incorrect number of arguments provided for procedure call. Expected %, got %.\n", proc_info.argument_types.count, procedure_call.arguments.count, node = procedure_call);
return;
}
for procedure_call.arguments {
expected_type := proc_info.argument_types[it_index];
argument_type := typecheck_node(script, it, hint_type = expected_type);
if has_error(script) return;
if argument_type != expected_type {
set_typecheck_error(script, "Mismatched types on argument % for procedure call. Expected %, got %.\n", it_index, as_type(expected_type), as_type(argument_type), node = it);
return;
}
}
procedure_call.value_type = ifx proc_info.return_types
then proc_info.return_types[0]
else type_info(void);
}
if is_aggr(procedure_call.value_type) {
procedure_call.return_ptr = Dynamic_New(procedure_call.value_type,, get_pool_allocator(*script.pool));
// dprint("allocating variable of type % at % for node %\n", as_type(procedure_call.value_type), procedure_call.return_ptr, procedure_call);
}
assert(procedure_call.value_type != null);
}
typecheck_cast :: (using script: *Script, node_cast: *Node_Cast, check_flags := Typechecking_Flags.NONE, hint_type: *Type_Info = null) {
value_type := typecheck_node(script, node_cast.value);
if has_error(script) return;
if node_cast.flags & .IMPLICIT {
node_cast.value_type = node_cast.implicit_type;
} else {
type_expression := node_cast.type_expression;
type_type := typecheck_node(script, type_expression, hint_type = type_info(Type));
if has_error(script) return;
if type_type.type != .TYPE {
set_typecheck_error(script, "Type expression in cast did not resolve to a Type.", node = type_expression);
return;
}
// TODO: make sure that type expression is constant before evaluating it
if !(type_expression.flags & .IS_CONSTANT) {
set_typecheck_error(script, "Type expression in cast must be constant.", node = type_expression);
return;
}
type_any := evaluate_node(script, type_expression);
if has_error(script) return;
node_cast.value_type = type_any.value_pointer.(**Type_Info).*;
}
// TODO: check if we can actually perform the cast here
// for now, we won't precheck this since we will just use Convert.any_to_any
return;
}
typecheck_dot :: (using script: *Script, dot: *Node_Dot, check_flags := Typechecking_Flags.NONE, hint_type: *Type_Info = null) {
if dot.flags & .IS_ARROW {
// bad things will happen if we try to re-typecheck an arrow
if dot.value_type != null return;
assert(dot.left != null);
assert(dot.right != null);
assert(dot.left.node_type == Node_Identifier);
assert(dot.right.node_type == Node_Identifier);
// left ident typechecks in standard way
left_ident := dot.left.(*Node_Identifier);
// TODO: should we be passing check flags here?
typecheck_node(script, left_ident);
if has_error(script) return;
// right ident typechecking is all done here in non-standard way
right_ident := dot.right.(*Node_Identifier);
assert(right_ident.identifier_type == .UNRESOLVED);
right_ident_name := get_identifier_name(script, xx dot.right);
assert(right_ident_name != "");
declaration := resolve_declaration(script, right_ident_name, dot.scope, namespace = left_ident, need_lvalue = check_flags & .EXPECT_LVALUE != 0);
if !declaration {
set_typecheck_error(script, "Unresolved virtual member identifier '%'", right_ident_name, type = .UNRESOLVED_IDENTIFIER, node = dot);
return;
}
if declaration.value_type == null {
set_typecheck_error(script, "Virtual member identifier '%' used before its declaration.", right_ident_name, node = right_ident);
return;
}
// NOTE: it is actually important that we set right_ident as lvalue and flag as typechecked, since we execute right_ident to get the virtual member value
right_ident.identifier_type = .DECLARATION;
right_ident.declaration = declaration;
right_ident.value_type = declaration.value_type;
if (check_flags & .EXPECT_LVALUE) && !set_as_lvalue(script, right_ident) return;
right_ident.flags |= .TYPECHECKED;
dot.value_type = declaration.value_type;
return;
}
// Currently, the only valid case for dot.left being null is for unary dot enums.
// Setting left_type to 'Type' when dot.left is null gets us into the proper case down below.
left_type: *Type_Info;
if dot.left {
// TODO: determing if check_flags should have EXPECT_LVALUE flag passed as well. Looked like we had faulty logic here for that before...
left_type = typecheck_node(script, dot.left, check_flags);
if has_error(script) return;
} else {
left_type = xx Type;
}
if left_type.type == {
case .POINTER;
// if left type is a pointer to a struct, override left_type with the type info for struct being pointed at
// this is to allow implicit dereference on pointers to structs for member access
pointer_info := left_type.(*Type_Info_Pointer);
if pointer_info.pointer_to.type != .STRUCT {
set_typecheck_error(script, "If left side of struct member dot dereference expression is a pointer, then it must point to a struct type.", node = dot.left);
return;
}
struct_info := pointer_info.pointer_to.(*Type_Info_Struct);
left_type = struct_info; // override for below struct case
// NOTE: we also need to remove the EXPECT_LVALUE flag if it was set for the pointer iself.
// this is currently the only spot we ever do this, so its a bit weird!
dot.left.flags &= ~.IS_LVALUE;
#through;
case .STRUCT;
if dot.right.node_type != Node_Identifier {
set_typecheck_error(script, "Right side of struct member dot dereference expression must be an identifier.", node = dot.right);
return;
}
member_identifier := dot.right.(*Node_Identifier);
assert(member_identifier.node_type == Node_Identifier && member_identifier.identifier_type == .UNRESOLVED);
member_name := member_identifier.name;
dprint("typechecking struct member identifier at % with name '%'\n", member_identifier, member_name);
member_ti, member_offset := get_field(xx left_type, member_name);
if member_ti == null {
set_typecheck_error(script, "Attempt to access struct member which does not exist.", node = dot.right);
return;
}
// tricky case. left may be some complex expression where the result is an instance of a struct type.
// so we can't just set the left type to be a namespace.
// but we do still want to do this if left type is an identifier? or no, since it could be a type identifier? which maybe we want to leave as is?
if member_ti.flags & .CONSTANT {
set_typecheck_error(script, "Access of constant struct members is not yet allowed.", node = dot.right);
return;
// member_identifier.identifier_type = .LITERAL;
// member_identifier.literal = alloc_node(script, Node_Literal);
// TODO: need to be able to call some simple make_literal() proc here and actually init the literal value
} else {
member_identifier.identifier_type = .STRUCT_MEMBER;
member_identifier.member = member_ti;
}
member_identifier.value_type = member_ti.type;
member_identifier.flags |= .TYPECHECKED;
dot.value_type = member_ti.type;
return; // no error
// case .ENUM;
// TODO: access enum namespace through instance of enum type
case .TYPE;
namespace: *Type_Info;
if dot.left {
// TODO: make sure that type expression is constant before evaluating it
if !(dot.left.flags & .IS_CONSTANT) {
set_typecheck_error(script, "Type expression on left-hand side of dot operator must be constant.", node = dot);
return;
}
left_any := evaluate_node(script, dot.left);
if has_error(script) return;
namespace = any_isa(left_any, *Type_Info).*;
} else {
if hint_type == null || (hint_type.type != .ENUM && hint_type.type != .STRUCT) {
set_typecheck_error(script, "Unary dot can only be used for enums, but hint_type was %", as_type(hint_type), type = .MISSING_TYPE_HINT, node = dot);
return;
}
namespace = hint_type;
}
if namespace.type == {
case .STRUCT;
if dot.right.node_type == {
case Node_Literal;
right_type := typecheck_node(script, dot.right, hint_type = namespace);
if has_error(script) return;
if namespace != right_type {
set_typecheck_error(script, "Type returned by right side of struct literal expression did not match type specified on left side of dot.", node = dot.right);
return;
}
dot.value_type = namespace;
return; // no error
// case .Node_Identifier;
}
/*