aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/embryo/src/bin/embryo_cc_sc2.c
diff options
context:
space:
mode:
authorDavid Walter Seikel2012-01-04 18:41:13 +1000
committerDavid Walter Seikel2012-01-04 18:41:13 +1000
commitdd7595a3475407a7fa96a97393bae8c5220e8762 (patch)
treee341e911d7eb911a51684a7412ef7f7c7605d28e /libraries/embryo/src/bin/embryo_cc_sc2.c
parentAdd the skeleton. (diff)
downloadSledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.zip
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.gz
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.bz2
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.xz
Add the base Enlightenment Foundation Libraries - eina, eet, evas, ecore, embryo, and edje.
Note that embryo wont be used, but I'm not sure yet if you can build edje without it.
Diffstat (limited to '')
-rw-r--r--libraries/embryo/src/bin/embryo_cc_sc2.c2779
1 files changed, 2779 insertions, 0 deletions
diff --git a/libraries/embryo/src/bin/embryo_cc_sc2.c b/libraries/embryo/src/bin/embryo_cc_sc2.c
new file mode 100644
index 0000000..b3f4fae
--- /dev/null
+++ b/libraries/embryo/src/bin/embryo_cc_sc2.c
@@ -0,0 +1,2779 @@
1/* Small compiler - File input, preprocessing and lexical analysis functions
2 *
3 * Copyright (c) ITB CompuPhase, 1997-2003
4 *
5 * This software is provided "as-is", without any express or implied warranty.
6 * In no event will the authors be held liable for any damages arising from
7 * the use of this software.
8 *
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
12 *
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software in
15 * a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
17 * 2. Altered source versions must be plainly marked as such, and must not be
18 * misrepresented as being the original software.
19 * 3. This notice may not be removed or altered from any source distribution.
20 *
21 * Version: $Id: embryo_cc_sc2.c 62382 2011-08-12 12:39:29Z billiob $
22 */
23
24
25#ifdef HAVE_CONFIG_H
26# include <config.h>
27#endif
28
29#include <assert.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <ctype.h>
34#include <math.h>
35#include "embryo_cc_sc.h"
36#include "Embryo.h"
37
38static int match(char *st, int end);
39static cell litchar(char **lptr, int rawmode);
40static int alpha(char c);
41
42static int icomment; /* currently in multiline comment? */
43static int iflevel; /* nesting level if #if/#else/#endif */
44static int skiplevel; /* level at which we started skipping */
45static int elsedone; /* level at which we have seen an #else */
46static char term_expr[] = "";
47static int listline = -1; /* "current line" for the list file */
48
49/* pushstk & popstk
50 *
51 * Uses a LIFO stack to store information. The stack is used by doinclude(),
52 * doswitch() (to hold the state of "swactive") and some other routines.
53 *
54 * Porting note: I made the bold assumption that an integer will not be
55 * larger than a pointer (it may be smaller). That is, the stack element
56 * is typedef'ed as a pointer type, but I also store integers on it. See
57 * SC.H for "stkitem"
58 *
59 * Global references: stack,stkidx (private to pushstk() and popstk())
60 */
61static stkitem stack[sSTKMAX];
62static int stkidx;
63void
64pushstk(stkitem val)
65{
66 if (stkidx >= sSTKMAX)
67 error(102, "parser stack"); /* stack overflow (recursive include?) */
68 stack[stkidx] = val;
69 stkidx += 1;
70}
71
72stkitem
73popstk(void)
74{
75 if (stkidx == 0)
76 return (stkitem) - 1; /* stack is empty */
77 stkidx -= 1;
78 return stack[stkidx];
79}
80
81int
82plungequalifiedfile(char *name)
83{
84 static char *extensions[] = { ".inc", ".sma", ".small" };
85 FILE *fp;
86 char *ext;
87 int ext_idx;
88
89 ext_idx = 0;
90 do
91 {
92 fp = (FILE *) sc_opensrc(name);
93 ext = strchr(name, '\0'); /* save position */
94 if (!fp)
95 {
96 /* try to append an extension */
97 strcpy(ext, extensions[ext_idx]);
98 fp = (FILE *) sc_opensrc(name);
99 if (!fp)
100 *ext = '\0'; /* on failure, restore filename */
101 } /* if */
102 ext_idx++;
103 }
104 while ((!fp) &&
105 (ext_idx < (int)(sizeof extensions / sizeof extensions[0])));
106 if (!fp)
107 {
108 *ext = '\0'; /* restore filename */
109 return FALSE;
110 } /* if */
111 pushstk((stkitem) inpf);
112 pushstk((stkitem) inpfname); /* pointer to current file name */
113 pushstk((stkitem) curlibrary);
114 pushstk((stkitem) iflevel);
115 assert(skiplevel == 0);
116 pushstk((stkitem) icomment);
117 pushstk((stkitem) fcurrent);
118 pushstk((stkitem) fline);
119 inpfname = strdup(name); /* set name of include file */
120 if (!inpfname)
121 error(103); /* insufficient memory */
122 inpf = fp; /* set input file pointer to include file */
123 fnumber++;
124 fline = 0; /* set current line number to 0 */
125 fcurrent = fnumber;
126 icomment = FALSE;
127 setfile(inpfname, fcurrent);
128 listline = -1; /* force a #line directive when changing the file */
129 setactivefile(fcurrent);
130 return TRUE;
131}
132
133int
134plungefile(char *name, int try_currentpath, int try_includepaths)
135{
136 int result = FALSE;
137 int i;
138 char *ptr;
139
140 if (try_currentpath)
141 result = plungequalifiedfile(name);
142
143 if (try_includepaths && name[0] != DIRSEP_CHAR)
144 {
145 for (i = 0; !result && (ptr = get_path(i)); i++)
146 {
147 char path[PATH_MAX];
148
149 strncpy(path, ptr, sizeof path);
150 path[sizeof path - 1] = '\0'; /* force '\0' termination */
151 strncat(path, name, sizeof(path) - strlen(path));
152 path[sizeof path - 1] = '\0';
153 result = plungequalifiedfile(path);
154 } /* while */
155 } /* if */
156 return result;
157}
158
159static void
160check_empty(char *lptr)
161{
162 /* verifies that the string contains only whitespace */
163 while (*lptr <= ' ' && *lptr != '\0')
164 lptr++;
165 if (*lptr != '\0')
166 error(38); /* extra characters on line */
167}
168
169/* doinclude
170 *
171 * Gets the name of an include file, pushes the old file on the stack and
172 * sets some options. This routine doesn't use lex(), since lex() doesn't
173 * recognize file names (and directories).
174 *
175 * Global references: inpf (altered)
176 * inpfname (altered)
177 * fline (altered)
178 * lptr (altered)
179 */
180static void
181doinclude(void)
182{
183 char name[PATH_MAX], c;
184 int i, result;
185
186 while (*lptr <= ' ' && *lptr != 0) /* skip leading whitespace */
187 lptr++;
188 if (*lptr == '<' || *lptr == '\"')
189 {
190 c = (char)((*lptr == '\"') ? '\"' : '>'); /* termination character */
191 lptr++;
192 while (*lptr <= ' ' && *lptr != 0) /* skip whitespace after quote */
193 lptr++;
194 }
195 else
196 {
197 c = '\0';
198 } /* if */
199
200 i = 0;
201 while ((*lptr != c) && (*lptr != '\0') && (i < (int)(sizeof(name) - 1))) /* find the end of the string */
202 name[i++] = *lptr++;
203 while (i > 0 && name[i - 1] <= ' ')
204 i--; /* strip trailing whitespace */
205 assert((i >= 0) && (i < (int)(sizeof(name))));
206 name[i] = '\0'; /* zero-terminate the string */
207
208 if (*lptr != c)
209 { /* verify correct string termination */
210 error(37); /* invalid string */
211 return;
212 } /* if */
213 if (c != '\0')
214 check_empty(lptr + 1); /* verify that the rest of the line is whitespace */
215
216 /* Include files between "..." or without quotes are read from the current
217 * directory, or from a list of "include directories". Include files
218 * between <...> are only read from the list of include directories.
219 */
220 result = plungefile(name, (c != '>'), TRUE);
221 if (!result)
222 error(100, name); /* cannot read from ... (fatal error) */
223}
224
225/* readline
226 *
227 * Reads in a new line from the input file pointed to by "inpf". readline()
228 * concatenates lines that end with a \ with the next line. If no more data
229 * can be read from the file, readline() attempts to pop off the previous file
230 * from the stack. If that fails too, it sets "freading" to 0.
231 *
232 * Global references: inpf,fline,inpfname,freading,icomment (altered)
233 */
234static void
235readline(char *line)
236{
237 int i, num, cont;
238 char *ptr;
239
240 if (lptr == term_expr)
241 return;
242 num = sLINEMAX;
243 cont = FALSE;
244 do
245 {
246 if (!inpf || sc_eofsrc(inpf))
247 {
248 if (cont)
249 error(49); /* invalid line continuation */
250 if (inpf && inpf != inpf_org)
251 sc_closesrc(inpf);
252 i = (int)(long)popstk();
253 if (i == -1)
254 { /* All's done; popstk() returns "stack is empty" */
255 freading = FALSE;
256 *line = '\0';
257 /* when there is nothing more to read, the #if/#else stack should
258 * be empty and we should not be in a comment
259 */
260 assert(iflevel >= 0);
261 if (iflevel > 0)
262 error(1, "#endif", "-end of file-");
263 else if (icomment)
264 error(1, "*/", "-end of file-");
265 return;
266 } /* if */
267 fline = i;
268 fcurrent = (int)(long)popstk();
269 icomment = (int)(long)popstk();
270 assert(skiplevel == 0); /* skiplevel was not stored on stack, because it should always be zero at this point */
271 iflevel = (int)(long)popstk();
272 curlibrary = (constvalue *) popstk();
273 free(inpfname); /* return memory allocated for the include file name */
274 inpfname = (char *)popstk();
275 inpf = (FILE *) popstk();
276 setactivefile(fcurrent);
277 listline = -1; /* force a #line directive when changing the file */
278 elsedone = 0;
279 } /* if */
280
281 if (!sc_readsrc(inpf, line, num))
282 {
283 *line = '\0'; /* delete line */
284 cont = FALSE;
285 }
286 else
287 {
288 /* check whether to erase leading spaces */
289 if (cont)
290 {
291 char *ptr = line;
292
293 while (*ptr == ' ' || *ptr == '\t')
294 ptr++;
295 if (ptr != line)
296 memmove(line, ptr, strlen(ptr) + 1);
297 } /* if */
298 cont = FALSE;
299 /* check whether a full line was read */
300 if (!strchr(line, '\n') && !sc_eofsrc(inpf))
301 error(75); /* line too long */
302 /* check if the next line must be concatenated to this line */
303 if ((ptr = strchr(line, '\n')) && ptr > line)
304 {
305 assert(*(ptr + 1) == '\0'); /* '\n' should be last in the string */
306 while (ptr > line
307 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
308 ptr--; /* skip trailing whitespace */
309 if (*ptr == '\\')
310 {
311 cont = TRUE;
312 /* set '\a' at the position of '\\' to make it possible to check
313 * for a line continuation in a single line comment (error 49)
314 */
315 *ptr++ = '\a';
316 *ptr = '\0'; /* erase '\n' (and any trailing whitespace) */
317 } /* if */
318 } /* if */
319 num -= strlen(line);
320 line += strlen(line);
321 } /* if */
322 fline += 1;
323 }
324 while (num >= 0 && cont);
325}
326
327/* stripcom
328 *
329 * Replaces all comments from the line by space characters. It updates
330 * a global variable ("icomment") for multiline comments.
331 *
332 * This routine also supports the C++ extension for single line comments.
333 * These comments are started with "//" and end at the end of the line.
334 *
335 * Global references: icomment (private to "stripcom")
336 */
337static void
338stripcom(char *line)
339{
340 char c;
341
342 while (*line)
343 {
344 if (icomment)
345 {
346 if (*line == '*' && *(line + 1) == '/')
347 {
348 icomment = FALSE; /* comment has ended */
349 *line = ' '; /* replace '*' and '/' characters by spaces */
350 *(line + 1) = ' ';
351 line += 2;
352 }
353 else
354 {
355 if (*line == '/' && *(line + 1) == '*')
356 error(216); /* nested comment */
357 *line = ' '; /* replace comments by spaces */
358 line += 1;
359 } /* if */
360 }
361 else
362 {
363 if (*line == '/' && *(line + 1) == '*')
364 {
365 icomment = TRUE; /* start comment */
366 *line = ' '; /* replace '/' and '*' characters by spaces */
367 *(line + 1) = ' ';
368 line += 2;
369 }
370 else if (*line == '/' && *(line + 1) == '/')
371 { /* comment to end of line */
372 if (strchr(line, '\a'))
373 error(49); /* invalid line continuation */
374 *line++ = '\n'; /* put "newline" at first slash */
375 *line = '\0'; /* put "zero-terminator" at second slash */
376 }
377 else
378 {
379 if (*line == '\"' || *line == '\'')
380 { /* leave literals unaltered */
381 c = *line; /* ending quote, single or double */
382 line += 1;
383 while ((*line != c || *(line - 1) == '\\')
384 && *line != '\0')
385 line += 1;
386 line += 1; /* skip final quote */
387 }
388 else
389 {
390 line += 1;
391 } /* if */
392 } /* if */
393 } /* if */
394 } /* while */
395}
396
397/* btoi
398 *
399 * Attempts to interpret a numeric symbol as a boolean value. On success
400 * it returns the number of characters processed (so the line pointer can be
401 * adjusted) and the value is stored in "val". Otherwise it returns 0 and
402 * "val" is garbage.
403 *
404 * A boolean value must start with "0b"
405 */
406static int
407btoi(cell * val, char *curptr)
408{
409 char *ptr;
410
411 *val = 0;
412 ptr = curptr;
413 if (*ptr == '0' && *(ptr + 1) == 'b')
414 {
415 ptr += 2;
416 while (*ptr == '0' || *ptr == '1' || *ptr == '_')
417 {
418 if (*ptr != '_')
419 *val = (*val << 1) | (*ptr - '0');
420 ptr++;
421 } /* while */
422 }
423 else
424 {
425 return 0;
426 } /* if */
427 if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */
428 return 0;
429 else
430 return (int)(ptr - curptr);
431}
432
433/* dtoi
434 *
435 * Attempts to interpret a numeric symbol as a decimal value. On success
436 * it returns the number of characters processed and the value is stored in
437 * "val". Otherwise it returns 0 and "val" is garbage.
438 */
439static int
440dtoi(cell * val, char *curptr)
441{
442 char *ptr;
443
444 *val = 0;
445 ptr = curptr;
446 if (!isdigit(*ptr)) /* should start with digit */
447 return 0;
448 while (isdigit(*ptr) || *ptr == '_')
449 {
450 if (*ptr != '_')
451 *val = (*val * 10) + (*ptr - '0');
452 ptr++;
453 } /* while */
454 if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */
455 return 0;
456 if (*ptr == '.' && isdigit(*(ptr + 1)))
457 return 0; /* but a fractional part must not be present */
458 return (int)(ptr - curptr);
459}
460
461/* htoi
462 *
463 * Attempts to interpret a numeric symbol as a hexadecimal value. On
464 * success it returns the number of characters processed and the value is
465 * stored in "val". Otherwise it return 0 and "val" is garbage.
466 */
467static int
468htoi(cell * val, char *curptr)
469{
470 char *ptr;
471
472 *val = 0;
473 ptr = curptr;
474 if (!isdigit(*ptr)) /* should start with digit */
475 return 0;
476 if (*ptr == '0' && *(ptr + 1) == 'x')
477 { /* C style hexadecimal notation */
478 ptr += 2;
479 while (isxdigit(*ptr) || *ptr == '_')
480 {
481 if (*ptr != '_')
482 {
483 assert(isxdigit(*ptr));
484 *val = *val << 4;
485 if (isdigit(*ptr))
486 *val += (*ptr - '0');
487 else
488 *val += (tolower(*ptr) - 'a' + 10);
489 } /* if */
490 ptr++;
491 } /* while */
492 }
493 else
494 {
495 return 0;
496 } /* if */
497 if (alphanum(*ptr))
498 return 0;
499 else
500 return (int)(ptr - curptr);
501}
502
503#if defined LINUX
504static double
505pow10(int value)
506{
507 double res = 1.0;
508
509 while (value >= 4)
510 {
511 res *= 10000.0;
512 value -= 5;
513 } /* while */
514 while (value >= 2)
515 {
516 res *= 100.0;
517 value -= 2;
518 } /* while */
519 while (value >= 1)
520 {
521 res *= 10.0;
522 value -= 1;
523 } /* while */
524 return res;
525}
526#endif
527
528/* ftoi
529 *
530 * Attempts to interpret a numeric symbol as a rational number, either as
531 * IEEE 754 single precision floating point or as a fixed point integer.
532 * On success it returns the number of characters processed and the value is
533 * stored in "val". Otherwise it returns 0 and "val" is unchanged.
534 *
535 * Small has stricter definition for floating point numbers than most:
536 * o the value must start with a digit; ".5" is not a valid number, you
537 * should write "0.5"
538 * o a period must appear in the value, even if an exponent is given; "2e3"
539 * is not a valid number, you should write "2.0e3"
540 * o at least one digit must follow the period; "6." is not a valid number,
541 * you should write "6.0"
542 */
543static int
544ftoi(cell * val, char *curptr)
545{
546 char *ptr;
547 double fnum, ffrac, fmult;
548 unsigned long dnum, dbase;
549 int i, ignore;
550
551 assert(rational_digits >= 0 && rational_digits < 9);
552 for (i = 0, dbase = 1; i < rational_digits; i++)
553 dbase *= 10;
554 fnum = 0.0;
555 dnum = 0L;
556 ptr = curptr;
557 if (!isdigit(*ptr)) /* should start with digit */
558 return 0;
559 while (isdigit(*ptr) || *ptr == '_')
560 {
561 if (*ptr != '_')
562 {
563 fnum = (fnum * 10.0) + (*ptr - '0');
564 dnum = (dnum * 10L) + (*ptr - '0') * dbase;
565 } /* if */
566 ptr++;
567 } /* while */
568 if (*ptr != '.')
569 return 0; /* there must be a period */
570 ptr++;
571 if (!isdigit(*ptr)) /* there must be at least one digit after the dot */
572 return 0;
573 ffrac = 0.0;
574 fmult = 1.0;
575 ignore = FALSE;
576 while (isdigit(*ptr) || *ptr == '_')
577 {
578 if (*ptr != '_')
579 {
580 ffrac = (ffrac * 10.0) + (*ptr - '0');
581 fmult = fmult / 10.0;
582 dbase /= 10L;
583 dnum += (*ptr - '0') * dbase;
584 if (dbase == 0L && sc_rationaltag && rational_digits > 0
585 && !ignore)
586 {
587 error(222); /* number of digits exceeds rational number precision */
588 ignore = TRUE;
589 } /* if */
590 } /* if */
591 ptr++;
592 } /* while */
593 fnum += ffrac * fmult; /* form the number so far */
594 if (*ptr == 'e')
595 { /* optional fractional part */
596 int exp, sign;
597
598 ptr++;
599 if (*ptr == '-')
600 {
601 sign = -1;
602 ptr++;
603 }
604 else
605 {
606 sign = 1;
607 } /* if */
608 if (!isdigit(*ptr)) /* 'e' should be followed by a digit */
609 return 0;
610 exp = 0;
611 while (isdigit(*ptr))
612 {
613 exp = (exp * 10) + (*ptr - '0');
614 ptr++;
615 } /* while */
616#if defined LINUX
617 fmult = pow10(exp * sign);
618#else
619 fmult = pow(10, exp * sign);
620#endif
621 fnum *= fmult;
622 dnum *= (unsigned long)(fmult + 0.5);
623 } /* if */
624
625 /* decide how to store the number */
626 if (sc_rationaltag == 0)
627 {
628 error(70); /* rational number support was not enabled */
629 *val = 0;
630 }
631 else if (rational_digits == 0)
632 {
633 float f = (float) fnum;
634 /* floating point */
635 *val = EMBRYO_FLOAT_TO_CELL(f);
636#if !defined NDEBUG
637 /* I assume that the C/C++ compiler stores "float" values in IEEE 754
638 * format (as mandated in the ANSI standard). Test this assumption anyway.
639 */
640 {
641 float test1 = 0.0, test2 = 50.0;
642 Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
643 Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
644
645 if (c1 != 0x00000000L)
646 {
647 fprintf(stderr,
648 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
649 "point math as embryo expects. this could be bad.\n"
650 "\n"
651 "(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
652 "\n"
653 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
654 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
655 , c1);
656 }
657 else if (c2 != 0x42480000L)
658 {
659 fprintf(stderr,
660 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
661 "point math as embryo expects. This could be bad.\n"
662 "\n"
663 "(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
664 "\n"
665 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
666 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
667 , c2);
668 }
669 }
670#endif
671 }
672 else
673 {
674 /* fixed point */
675 *val = (cell) dnum;
676 } /* if */
677
678 return (int)(ptr - curptr);
679}
680
681/* number
682 *
683 * Reads in a number (binary, decimal or hexadecimal). It returns the number
684 * of characters processed or 0 if the symbol couldn't be interpreted as a
685 * number (in this case the argument "val" remains unchanged). This routine
686 * relies on the 'early dropout' implementation of the logical or (||)
687 * operator.
688 *
689 * Note: the routine doesn't check for a sign (+ or -). The - is checked
690 * for at "hier2()" (in fact, it is viewed as an operator, not as a
691 * sign) and the + is invalid (as in K&R C, and unlike ANSI C).
692 */
693static int
694number(cell * val, char *curptr)
695{
696 int i;
697 cell value;
698
699 if ((i = btoi(&value, curptr)) != 0 /* binary? */
700 || (i = htoi(&value, curptr)) != 0 /* hexadecimal? */
701 || (i = dtoi(&value, curptr)) != 0) /* decimal? */
702 {
703 *val = value;
704 return i;
705 }
706 else
707 {
708 return 0; /* else not a number */
709 } /* if */
710}
711
712static void
713chrcat(char *str, char chr)
714{
715 str = strchr(str, '\0');
716 *str++ = chr;
717 *str = '\0';
718}
719
720static int
721preproc_expr(cell * val, int *tag)
722{
723 int result;
724 int index;
725 cell code_index;
726 char *term;
727
728 /* Disable staging; it should be disabled already because
729 * expressions may not be cut off half-way between conditional
730 * compilations. Reset the staging index, but keep the code
731 * index.
732 */
733 if (stgget(&index, &code_index))
734 {
735 error(57); /* unfinished expression */
736 stgdel(0, code_index);
737 stgset(FALSE);
738 } /* if */
739 /* append a special symbol to the string, so the expression
740 * analyzer won't try to read a next line when it encounters
741 * an end-of-line
742 */
743 assert(strlen(pline) < sLINEMAX);
744 term = strchr(pline, '\0');
745 assert(term != NULL);
746 chrcat(pline, PREPROC_TERM); /* the "DEL" code (see SC.H) */
747 result = constexpr(val, tag); /* get value (or 0 on error) */
748 *term = '\0'; /* erase the token (if still present) */
749 lexclr(FALSE); /* clear any "pushed" tokens */
750 return result;
751}
752
753/* getstring
754 * Returns returns a pointer behind the closing quote or to the other
755 * character that caused the input to be ended.
756 */
757static char *
758getstring(char *dest, int max)
759{
760 assert(dest != NULL);
761 *dest = '\0';
762 while (*lptr <= ' ' && *lptr != '\0')
763 lptr++; /* skip whitespace */
764 if (*lptr != '"')
765 {
766 error(37); /* invalid string */
767 }
768 else
769 {
770 int len = 0;
771
772 lptr++; /* skip " */
773 while (*lptr != '"' && *lptr != '\0')
774 {
775 if (len < max - 1)
776 dest[len++] = *lptr;
777 lptr++;
778 } /* if */
779 dest[len] = '\0';
780 if (*lptr == '"')
781 lptr++; /* skip closing " */
782 else
783 error(37); /* invalid string */
784 } /* if */
785 return lptr;
786}
787
788enum
789{
790 CMD_NONE,
791 CMD_TERM,
792 CMD_EMPTYLINE,
793 CMD_CONDFALSE,
794 CMD_INCLUDE,
795 CMD_DEFINE,
796 CMD_IF,
797 CMD_DIRECTIVE,
798};
799
800/* command
801 *
802 * Recognizes the compiler directives. The function returns:
803 * CMD_NONE the line must be processed
804 * CMD_TERM a pending expression must be completed before processing further lines
805 * Other value: the line must be skipped, because:
806 * CMD_CONDFALSE false "#if.." code
807 * CMD_EMPTYLINE line is empty
808 * CMD_INCLUDE the line contains a #include directive
809 * CMD_DEFINE the line contains a #subst directive
810 * CMD_IF the line contains a #if/#else/#endif directive
811 * CMD_DIRECTIVE the line contains some other compiler directive
812 *
813 * Global variables: iflevel, skiplevel, elsedone (altered)
814 * lptr (altered)
815 */
816static int
817command(void)
818{
819 int tok, ret;
820 cell val;
821 char *str;
822 int index;
823 cell code_index;
824
825 while (*lptr <= ' ' && *lptr != '\0')
826 lptr += 1;
827 if (*lptr == '\0')
828 return CMD_EMPTYLINE; /* empty line */
829 if (*lptr != '#')
830 return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE; /* it is not a compiler directive */
831 /* compiler directive found */
832 indent_nowarn = TRUE; /* allow loose indentation" */
833 lexclr(FALSE); /* clear any "pushed" tokens */
834 /* on a pending expression, force to return a silent ';' token and force to
835 * re-read the line
836 */
837 if (!sc_needsemicolon && stgget(&index, &code_index))
838 {
839 lptr = term_expr;
840 return CMD_TERM;
841 } /* if */
842 tok = lex(&val, &str);
843 ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE; /* preset 'ret' to CMD_DIRECTIVE (most common case) */
844 switch (tok)
845 {
846 case tpIF: /* conditional compilation */
847 ret = CMD_IF;
848 iflevel += 1;
849 if (skiplevel)
850 break; /* break out of switch */
851 preproc_expr(&val, NULL); /* get value (or 0 on error) */
852 if (!val)
853 skiplevel = iflevel;
854 check_empty(lptr);
855 break;
856 case tpELSE:
857 ret = CMD_IF;
858 if (iflevel == 0 && skiplevel == 0)
859 {
860 error(26); /* no matching #if */
861 errorset(sRESET);
862 }
863 else
864 {
865 if (elsedone == iflevel)
866 error(60); /* multiple #else directives between #if ... #endif */
867 elsedone = iflevel;
868 if (skiplevel == iflevel)
869 skiplevel = 0;
870 else if (skiplevel == 0)
871 skiplevel = iflevel;
872 } /* if */
873 check_empty(lptr);
874 break;
875#if 0 /* ??? *really* need to use a stack here */
876 case tpELSEIF:
877 ret = CMD_IF;
878 if (iflevel == 0 && skiplevel == 0)
879 {
880 error(26); /* no matching #if */
881 errorset(sRESET);
882 }
883 else if (elsedone == iflevel)
884 {
885 error(61); /* #elseif directive may not follow an #else */
886 errorset(sRESET);
887 }
888 else
889 {
890 preproc_expr(&val, NULL); /* get value (or 0 on error) */
891 if (skiplevel == 0)
892 skiplevel = iflevel; /* we weren't skipping, start skipping now */
893 else if (val)
894 skiplevel = 0; /* we were skipping, condition is valid -> stop skipping */
895 /* else: we were skipping and condition is invalid -> keep skipping */
896 check_empty(lptr);
897 } /* if */
898 break;
899#endif
900 case tpENDIF:
901 ret = CMD_IF;
902 if (iflevel == 0 && skiplevel == 0)
903 {
904 error(26);
905 errorset(sRESET);
906 }
907 else
908 {
909 if (skiplevel == iflevel)
910 skiplevel = 0;
911 if (elsedone == iflevel)
912 elsedone = 0; /* ??? actually, should build a stack of #if/#endif and keep
913 * the state whether an #else was seen per nesting level */
914 iflevel -= 1;
915 } /* if */
916 check_empty(lptr);
917 break;
918 case tINCLUDE: /* #include directive */
919 ret = CMD_INCLUDE;
920 if (skiplevel == 0)
921 doinclude();
922 break;
923 case tpFILE:
924 if (skiplevel == 0)
925 {
926 char pathname[PATH_MAX];
927
928 lptr = getstring(pathname, sizeof pathname);
929 if (pathname[0] != '\0')
930 {
931 free(inpfname);
932 inpfname = strdup(pathname);
933 if (!inpfname)
934 error(103); /* insufficient memory */
935 } /* if */
936 } /* if */
937 check_empty(lptr);
938 break;
939 case tpLINE:
940 if (skiplevel == 0)
941 {
942 if (lex(&val, &str) != tNUMBER)
943 error(8); /* invalid/non-constant expression */
944 fline = (int)val;
945
946 while (*lptr == ' ' && *lptr != '\0')
947 lptr++; /* skip whitespace */
948 if (*lptr == '"')
949 {
950 char pathname[PATH_MAX];
951
952 lptr = getstring(pathname, sizeof pathname);
953 if (pathname[0] != '\0')
954 {
955 free(inpfname);
956 inpfname = strdup(pathname);
957 if (!inpfname)
958 error(103); /* insufficient memory */
959 } /* if */
960 }
961 } /* if */
962 check_empty(lptr);
963 break;
964 case tpASSERT:
965 if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
966 {
967 preproc_expr(&val, NULL); /* get constant expression (or 0 on error) */
968 if (!val)
969 error(7); /* assertion failed */
970 check_empty(lptr);
971 } /* if */
972 break;
973 case tpPRAGMA:
974 if (skiplevel == 0)
975 {
976 if (lex(&val, &str) == tSYMBOL)
977 {
978 if (strcmp(str, "ctrlchar") == 0)
979 {
980 if (lex(&val, &str) != tNUMBER)
981 error(27); /* invalid character constant */
982 sc_ctrlchar = (char)val;
983 }
984 else if (strcmp(str, "compress") == 0)
985 {
986 cell val;
987
988 preproc_expr(&val, NULL);
989 sc_compress = (int)val; /* switch code packing on/off */
990 }
991 else if (strcmp(str, "dynamic") == 0)
992 {
993 preproc_expr(&sc_stksize, NULL);
994 }
995 else if (strcmp(str, "library") == 0)
996 {
997 char name[sNAMEMAX + 1];
998
999 while (*lptr <= ' ' && *lptr != '\0')
1000 lptr++;
1001 if (*lptr == '"')
1002 {
1003 lptr = getstring(name, sizeof name);
1004 }
1005 else
1006 {
1007 int i;
1008
1009 for (i = 0;
1010 (i < (int)(sizeof(name))) &&
1011 (alphanum(*lptr));
1012 i++, lptr++)
1013 name[i] = *lptr;
1014 name[i] = '\0';
1015 } /* if */
1016 if (name[0] == '\0')
1017 {
1018 curlibrary = NULL;
1019 }
1020 else
1021 {
1022 if (strlen(name) > sEXPMAX)
1023 error(220, name, sEXPMAX); /* exported symbol is truncated */
1024 /* add the name if it does not yet exist in the table */
1025 if (!find_constval(&libname_tab, name, 0))
1026 curlibrary =
1027 append_constval(&libname_tab, name, 0, 0);
1028 } /* if */
1029 }
1030 else if (strcmp(str, "pack") == 0)
1031 {
1032 cell val;
1033
1034 preproc_expr(&val, NULL); /* default = packed/unpacked */
1035 sc_packstr = (int)val;
1036 }
1037 else if (strcmp(str, "rational") == 0)
1038 {
1039 char name[sNAMEMAX + 1];
1040 cell digits = 0;
1041 int i;
1042
1043 /* first gather all information, start with the tag name */
1044 while ((*lptr <= ' ') && (*lptr != '\0'))
1045 lptr++;
1046 for (i = 0;
1047 (i < (int)(sizeof(name))) &&
1048 (alphanum(*lptr));
1049 i++, lptr++)
1050 name[i] = *lptr;
1051 name[i] = '\0';
1052 /* then the precision (for fixed point arithmetic) */
1053 while (*lptr <= ' ' && *lptr != '\0')
1054 lptr++;
1055 if (*lptr == '(')
1056 {
1057 preproc_expr(&digits, NULL);
1058 if (digits <= 0 || digits > 9)
1059 {
1060 error(68); /* invalid rational number precision */
1061 digits = 0;
1062 } /* if */
1063 if (*lptr == ')')
1064 lptr++;
1065 } /* if */
1066 /* add the tag (make it public) and check the values */
1067 i = sc_addtag(name);
1068 exporttag(i);
1069 if (sc_rationaltag == 0
1070 || (sc_rationaltag == i
1071 && rational_digits == (int)digits))
1072 {
1073 sc_rationaltag = i;
1074 rational_digits = (int)digits;
1075 }
1076 else
1077 {
1078 error(69); /* rational number format already set, can only be set once */
1079 } /* if */
1080 }
1081 else if (strcmp(str, "semicolon") == 0)
1082 {
1083 cell val;
1084
1085 preproc_expr(&val, NULL);
1086 sc_needsemicolon = (int)val;
1087 }
1088 else if (strcmp(str, "tabsize") == 0)
1089 {
1090 cell val;
1091
1092 preproc_expr(&val, NULL);
1093 sc_tabsize = (int)val;
1094 }
1095 else if (strcmp(str, "align") == 0)
1096 {
1097 sc_alignnext = TRUE;
1098 }
1099 else if (strcmp(str, "unused") == 0)
1100 {
1101 char name[sNAMEMAX + 1];
1102 int i, comma;
1103 symbol *sym;
1104
1105 do
1106 {
1107 /* get the name */
1108 while ((*lptr <= ' ') && (*lptr != '\0'))
1109 lptr++;
1110 for (i = 0;
1111 (i < (int)(sizeof(name))) &&
1112 (isalpha(*lptr));
1113 i++, lptr++)
1114 name[i] = *lptr;
1115 name[i] = '\0';
1116 /* get the symbol */
1117 sym = findloc(name);
1118 if (!sym)
1119 sym = findglb(name);
1120 if (sym)
1121 {
1122 sym->usage |= uREAD;
1123 if (sym->ident == iVARIABLE
1124 || sym->ident == iREFERENCE
1125 || sym->ident == iARRAY
1126 || sym->ident == iREFARRAY)
1127 sym->usage |= uWRITTEN;
1128 }
1129 else
1130 {
1131 error(17, name); /* undefined symbol */
1132 } /* if */
1133 /* see if a comma follows the name */
1134 while (*lptr <= ' ' && *lptr != '\0')
1135 lptr++;
1136 comma = (*lptr == ',');
1137 if (comma)
1138 lptr++;
1139 }
1140 while (comma);
1141 }
1142 else
1143 {
1144 error(207); /* unknown #pragma */
1145 } /* if */
1146 }
1147 else
1148 {
1149 error(207); /* unknown #pragma */
1150 } /* if */
1151 check_empty(lptr);
1152 } /* if */
1153 break;
1154 case tpENDINPUT:
1155 case tpENDSCRPT:
1156 if (skiplevel == 0)
1157 {
1158 check_empty(lptr);
1159 assert(inpf != NULL);
1160 if (inpf != inpf_org)
1161 sc_closesrc(inpf);
1162 inpf = NULL;
1163 } /* if */
1164 break;
1165#if !defined NOEMIT
1166 case tpEMIT:
1167 {
1168 /* write opcode to output file */
1169 char name[40];
1170 int i;
1171
1172 while (*lptr <= ' ' && *lptr != '\0')
1173 lptr++;
1174 for (i = 0; i < 40 && (isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1175 name[i] = (char)tolower(*lptr);
1176 name[i] = '\0';
1177 stgwrite("\t");
1178 stgwrite(name);
1179 stgwrite(" ");
1180 code_idx += opcodes(1);
1181 /* write parameter (if any) */
1182 while (*lptr <= ' ' && *lptr != '\0')
1183 lptr++;
1184 if (*lptr != '\0')
1185 {
1186 symbol *sym;
1187
1188 tok = lex(&val, &str);
1189 switch (tok)
1190 {
1191 case tNUMBER:
1192 case tRATIONAL:
1193 outval(val, FALSE);
1194 code_idx += opargs(1);
1195 break;
1196 case tSYMBOL:
1197 sym = findloc(str);
1198 if (!sym)
1199 sym = findglb(str);
1200 if (!sym || (sym->ident != iFUNCTN
1201 && sym->ident != iREFFUNC
1202 && (sym->usage & uDEFINE) == 0))
1203 {
1204 error(17, str); /* undefined symbol */
1205 }
1206 else
1207 {
1208 outval(sym->addr, FALSE);
1209 /* mark symbol as "used", unknown whether for read or write */
1210 markusage(sym, uREAD | uWRITTEN);
1211 code_idx += opargs(1);
1212 } /* if */
1213 break;
1214 default:
1215 {
1216 char s2[20];
1217 extern char *sc_tokens[]; /* forward declaration */
1218
1219 if (tok < 256)
1220 sprintf(s2, "%c", (char)tok);
1221 else
1222 strcpy(s2, sc_tokens[tok - tFIRST]);
1223 error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1224 break;
1225 } /* case */
1226 } /* switch */
1227 } /* if */
1228 stgwrite("\n");
1229 check_empty(lptr);
1230 break;
1231 } /* case */
1232#endif
1233#if !defined NO_DEFINE
1234 case tpDEFINE:
1235 {
1236 ret = CMD_DEFINE;
1237 if (skiplevel == 0)
1238 {
1239 char *pattern, *substitution;
1240 char *start, *end;
1241 int count, prefixlen;
1242 stringpair *def;
1243
1244 /* find the pattern to match */
1245 while (*lptr <= ' ' && *lptr != '\0')
1246 lptr++;
1247 start = lptr; /* save starting point of the match pattern */
1248 count = 0;
1249 while (*lptr > ' ' && *lptr != '\0')
1250 {
1251 litchar(&lptr, FALSE); /* litchar() advances "lptr" and handles escape characters */
1252 count++;
1253 } /* while */
1254 end = lptr;
1255 /* check pattern to match */
1256 if (!isalpha(*start) && *start != '_')
1257 {
1258 error(74); /* pattern must start with an alphabetic character */
1259 break;
1260 } /* if */
1261 /* store matched pattern */
1262 pattern = malloc(count + 1);
1263 if (!pattern)
1264 error(103); /* insufficient memory */
1265 lptr = start;
1266 count = 0;
1267 while (lptr != end)
1268 {
1269 assert(lptr < end);
1270 assert(*lptr != '\0');
1271 pattern[count++] = (char)litchar(&lptr, FALSE);
1272 } /* while */
1273 pattern[count] = '\0';
1274 /* special case, erase trailing variable, because it could match anything */
1275 if (count >= 2 && isdigit(pattern[count - 1])
1276 && pattern[count - 2] == '%')
1277 pattern[count - 2] = '\0';
1278 /* find substitution string */
1279 while (*lptr <= ' ' && *lptr != '\0')
1280 lptr++;
1281 start = lptr; /* save starting point of the match pattern */
1282 count = 0;
1283 end = NULL;
1284 while (*lptr != '\0')
1285 {
1286 /* keep position of the start of trailing whitespace */
1287 if (*lptr <= ' ')
1288 {
1289 if (!end)
1290 end = lptr;
1291 }
1292 else
1293 {
1294 end = NULL;
1295 } /* if */
1296 count++;
1297 lptr++;
1298 } /* while */
1299 if (!end)
1300 end = lptr;
1301 /* store matched substitution */
1302 substitution = malloc(count + 1); /* +1 for '\0' */
1303 if (!substitution)
1304 error(103); /* insufficient memory */
1305 lptr = start;
1306 count = 0;
1307 while (lptr != end)
1308 {
1309 assert(lptr < end);
1310 assert(*lptr != '\0');
1311 substitution[count++] = *lptr++;
1312 } /* while */
1313 substitution[count] = '\0';
1314 /* check whether the definition already exists */
1315 for (prefixlen = 0, start = pattern;
1316 isalpha(*start) || isdigit(*start) || *start == '_';
1317 prefixlen++, start++)
1318 /* nothing */ ;
1319 assert(prefixlen > 0);
1320 if ((def = find_subst(pattern, prefixlen)))
1321 {
1322 if (strcmp(def->first, pattern) != 0
1323 || strcmp(def->second, substitution) != 0)
1324 error(201, pattern); /* redefinition of macro (non-identical) */
1325 delete_subst(pattern, prefixlen);
1326 } /* if */
1327 /* add the pattern/substitution pair to the list */
1328 assert(pattern[0] != '\0');
1329 insert_subst(pattern, substitution, prefixlen);
1330 free(pattern);
1331 free(substitution);
1332 } /* if */
1333 break;
1334 } /* case */
1335 case tpUNDEF:
1336 if (skiplevel == 0)
1337 {
1338 if (lex(&val, &str) == tSYMBOL)
1339 {
1340 if (!delete_subst(str, strlen(str)))
1341 error(17, str); /* undefined symbol */
1342 }
1343 else
1344 {
1345 error(20, str); /* invalid symbol name */
1346 } /* if */
1347 check_empty(lptr);
1348 } /* if */
1349 break;
1350#endif
1351 default:
1352 error(31); /* unknown compiler directive */
1353 ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE; /* line must be processed (if skiplevel==0) */
1354 } /* switch */
1355 return ret;
1356}
1357
1358#if !defined NO_DEFINE
1359static int
1360is_startstring(char *string)
1361{
1362 if (*string == '\"' || *string == '\'')
1363 return TRUE; /* "..." */
1364
1365 if (*string == '!')
1366 {
1367 string++;
1368 if (*string == '\"' || *string == '\'')
1369 return TRUE; /* !"..." */
1370 if (*string == sc_ctrlchar)
1371 {
1372 string++;
1373 if (*string == '\"' || *string == '\'')
1374 return TRUE; /* !\"..." */
1375 } /* if */
1376 }
1377 else if (*string == sc_ctrlchar)
1378 {
1379 string++;
1380 if (*string == '\"' || *string == '\'')
1381 return TRUE; /* \"..." */
1382 if (*string == '!')
1383 {
1384 string++;
1385 if (*string == '\"' || *string == '\'')
1386 return TRUE; /* \!"..." */
1387 } /* if */
1388 } /* if */
1389
1390 return FALSE;
1391}
1392
1393static char *
1394skipstring(char *string)
1395{
1396 char endquote;
1397 int rawstring = FALSE;
1398
1399 while (*string == '!' || *string == sc_ctrlchar)
1400 {
1401 rawstring = (*string == sc_ctrlchar);
1402 string++;
1403 } /* while */
1404
1405 endquote = *string;
1406 assert(endquote == '\"' || endquote == '\'');
1407 string++; /* skip open quote */
1408 while (*string != endquote && *string != '\0')
1409 litchar(&string, rawstring);
1410 return string;
1411}
1412
1413static char *
1414skippgroup(char *string)
1415{
1416 int nest = 0;
1417 char open = *string;
1418 char close;
1419
1420 switch (open)
1421 {
1422 case '(':
1423 close = ')';
1424 break;
1425 case '{':
1426 close = '}';
1427 break;
1428 case '[':
1429 close = ']';
1430 break;
1431 case '<':
1432 close = '>';
1433 break;
1434 default:
1435 assert(0);
1436 close = '\0'; /* only to avoid a compiler warning */
1437 } /* switch */
1438
1439 string++;
1440 while (*string != close || nest > 0)
1441 {
1442 if (*string == open)
1443 nest++;
1444 else if (*string == close)
1445 nest--;
1446 else if (is_startstring(string))
1447 string = skipstring(string);
1448 if (*string == '\0')
1449 break;
1450 string++;
1451 } /* while */
1452 return string;
1453}
1454
1455static char *
1456strdel(char *str, size_t len)
1457{
1458 size_t length = strlen(str);
1459
1460 if (len > length)
1461 len = length;
1462 memmove(str, str + len, length - len + 1); /* include EOS byte */
1463 return str;
1464}
1465
1466static char *
1467strins(char *dest, char *src, size_t srclen)
1468{
1469 size_t destlen = strlen(dest);
1470
1471 assert(srclen <= strlen(src));
1472 memmove(dest + srclen, dest, destlen + 1); /* include EOS byte */
1473 memcpy(dest, src, srclen);
1474 return dest;
1475}
1476
1477static int
1478substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1479{
1480 int prefixlen;
1481 char *p, *s, *e, *args[10];
1482 int match, arg, len;
1483
1484 memset(args, 0, sizeof args);
1485
1486 /* check the length of the prefix */
1487 for (prefixlen = 0, s = pattern; isalpha(*s) || isdigit(*s) || *s == '_';
1488 prefixlen++, s++)
1489 /* nothing */ ;
1490 assert(prefixlen > 0);
1491 assert(strncmp(line, pattern, prefixlen) == 0);
1492
1493 /* pattern prefix matches; match the rest of the pattern, gather
1494 * the parameters
1495 */
1496 s = line + prefixlen;
1497 p = pattern + prefixlen;
1498 match = TRUE; /* so far, pattern matches */
1499 while (match && *s != '\0' && *p != '\0')
1500 {
1501 if (*p == '%')
1502 {
1503 p++; /* skip '%' */
1504 if (isdigit(*p))
1505 {
1506 arg = *p - '0';
1507 assert(arg >= 0 && arg <= 9);
1508 p++; /* skip parameter id */
1509 assert(*p != '\0');
1510 /* match the source string up to the character after the digit
1511 * (skipping strings in the process
1512 */
1513 e = s;
1514 while (*e != *p && *e != '\0' && *e != '\n')
1515 {
1516 if (is_startstring(e)) /* skip strings */
1517 e = skipstring(e);
1518 else if (strchr("({[", *e)) /* skip parenthized groups */
1519 e = skippgroup(e);
1520 if (*e != '\0')
1521 e++; /* skip non-alphapetic character (or closing quote of
1522 * a string, or the closing paranthese of a group) */
1523 } /* while */
1524 /* store the parameter (overrule any earlier) */
1525 if (args[arg])
1526 free(args[arg]);
1527 len = (int)(e - s);
1528 args[arg] = malloc(len + 1);
1529 if (!args[arg])
1530 error(103); /* insufficient memory */
1531 strncpy(args[arg], s, len);
1532 args[arg][len] = '\0';
1533 /* character behind the pattern was matched too */
1534 if (*e == *p)
1535 {
1536 s = e + 1;
1537 }
1538 else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1539 && !sc_needsemicolon)
1540 {
1541 s = e; /* allow a trailing ; in the pattern match to end of line */
1542 }
1543 else
1544 {
1545 assert(*e == '\0' || *e == '\n');
1546 match = FALSE;
1547 s = e;
1548 } /* if */
1549 p++;
1550 }
1551 else
1552 {
1553 match = FALSE;
1554 } /* if */
1555 }
1556 else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1557 {
1558 /* source may be ';' or end of the line */
1559 while (*s <= ' ' && *s != '\0')
1560 s++; /* skip white space */
1561 if (*s != ';' && *s != '\0')
1562 match = FALSE;
1563 p++; /* skip the semicolon in the pattern */
1564 }
1565 else
1566 {
1567 cell ch;
1568
1569 /* skip whitespace between two non-alphanumeric characters, except
1570 * for two identical symbols
1571 */
1572 assert(p > pattern);
1573 if (!alphanum(*p) && *(p - 1) != *p)
1574 while (*s <= ' ' && *s != '\0')
1575 s++; /* skip white space */
1576 ch = litchar(&p, FALSE); /* this increments "p" */
1577 if (*s != ch)
1578 match = FALSE;
1579 else
1580 s++; /* this character matches */
1581 } /* if */
1582 } /* while */
1583
1584 if (match && *p == '\0')
1585 {
1586 /* if the last character to match is an alphanumeric character, the
1587 * current character in the source may not be alphanumeric
1588 */
1589 assert(p > pattern);
1590 if (alphanum(*(p - 1)) && alphanum(*s))
1591 match = FALSE;
1592 } /* if */
1593
1594 if (match)
1595 {
1596 /* calculate the length of the substituted string */
1597 for (e = substitution, len = 0; *e != '\0'; e++)
1598 {
1599 if (*e == '%' && isdigit(*(e + 1)))
1600 {
1601 arg = *(e + 1) - '0';
1602 assert(arg >= 0 && arg <= 9);
1603 if (args[arg])
1604 len += strlen(args[arg]);
1605 e++; /* skip %, digit is skipped later */
1606 }
1607 else
1608 {
1609 len++;
1610 } /* if */
1611 } /* for */
1612 /* check length of the string after substitution */
1613 if (strlen(line) + len - (int)(s - line) > buffersize)
1614 {
1615 error(75); /* line too long */
1616 }
1617 else
1618 {
1619 /* substitute pattern */
1620 strdel(line, (int)(s - line));
1621 for (e = substitution, s = line; *e != '\0'; e++)
1622 {
1623 if (*e == '%' && isdigit(*(e + 1)))
1624 {
1625 arg = *(e + 1) - '0';
1626 assert(arg >= 0 && arg <= 9);
1627 if (args[arg])
1628 {
1629 strins(s, args[arg], strlen(args[arg]));
1630 s += strlen(args[arg]);
1631 } /* if */
1632 e++; /* skip %, digit is skipped later */
1633 }
1634 else
1635 {
1636 strins(s, e, 1);
1637 s++;
1638 } /* if */
1639 } /* for */
1640 } /* if */
1641 } /* if */
1642
1643 for (arg = 0; arg < 10; arg++)
1644 if (args[arg])
1645 free(args[arg]);
1646
1647 return match;
1648}
1649
1650static void
1651substallpatterns(char *line, int buffersize)
1652{
1653 char *start, *end;
1654 int prefixlen;
1655 stringpair *subst;
1656
1657 start = line;
1658 while (*start != '\0')
1659 {
1660 /* find the start of a prefix (skip all non-alphabetic characters),
1661 * also skip strings
1662 */
1663 while (!isalpha(*start) && *start != '_' && *start != '\0')
1664 {
1665 /* skip strings */
1666 if (is_startstring(start))
1667 {
1668 start = skipstring(start);
1669 if (*start == '\0')
1670 break; /* abort loop on error */
1671 } /* if */
1672 start++; /* skip non-alphapetic character (or closing quote of a string) */
1673 } /* while */
1674 if (*start == '\0')
1675 break; /* abort loop on error */
1676 /* get the prefix (length), look for a matching definition */
1677 prefixlen = 0;
1678 end = start;
1679 while (isalpha(*end) || isdigit(*end) || *end == '_')
1680 {
1681 prefixlen++;
1682 end++;
1683 } /* while */
1684 assert(prefixlen > 0);
1685 subst = find_subst(start, prefixlen);
1686 if (subst)
1687 {
1688 /* properly match the pattern and substitute */
1689 if (!substpattern
1690 (start, buffersize - (start - line), subst->first,
1691 subst->second))
1692 start = end; /* match failed, skip this prefix */
1693 /* match succeeded: do not update "start", because the substitution text
1694 * may be matched by other macros
1695 */
1696 }
1697 else
1698 {
1699 start = end; /* no macro with this prefix, skip this prefix */
1700 } /* if */
1701 } /* while */
1702}
1703#endif
1704
1705/* preprocess
1706 *
1707 * Reads a line by readline() into "pline" and performs basic preprocessing:
1708 * deleting comments, skipping lines with false "#if.." code and recognizing
1709 * other compiler directives. There is an indirect recursion: lex() calls
1710 * preprocess() if a new line must be read, preprocess() calls command(),
1711 * which at his turn calls lex() to identify the token.
1712 *
1713 * Global references: lptr (altered)
1714 * pline (altered)
1715 * freading (referred to only)
1716 */
1717void
1718preprocess(void)
1719{
1720 int iscommand;
1721
1722 if (!freading)
1723 return;
1724 do
1725 {
1726 readline(pline);
1727 stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */
1728 lptr = pline; /* set "line pointer" to start of the parsing buffer */
1729 iscommand = command();
1730 if (iscommand != CMD_NONE)
1731 errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */
1732#if !defined NO_DEFINE
1733 if (iscommand == CMD_NONE)
1734 {
1735 assert(lptr != term_expr);
1736 substallpatterns(pline, sLINEMAX);
1737 lptr = pline; /* reset "line pointer" to start of the parsing buffer */
1738 } /* if */
1739#endif
1740 }
1741 while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading); /* enddo */
1742}
1743
1744static char *
1745unpackedstring(char *lptr, int rawstring)
1746{
1747 while (*lptr != '\0')
1748 {
1749 /* check for doublequotes indicating the end of the string */
1750 if (*lptr == '\"')
1751 {
1752 /* check whether there's another pair of quotes following.
1753 * If so, paste the two strings together, thus
1754 * "pants""off" becomes "pantsoff"
1755 */
1756 if (*(lptr + 1) == '\"')
1757 lptr += 2;
1758 else
1759 break;
1760 }
1761
1762 if (*lptr == '\a')
1763 { /* ignore '\a' (which was inserted at a line concatenation) */
1764 lptr++;
1765 continue;
1766 } /* if */
1767 stowlit(litchar(&lptr, rawstring)); /* litchar() alters "lptr" */
1768 } /* while */
1769 stowlit(0); /* terminate string */
1770 return lptr;
1771}
1772
1773static char *
1774packedstring(char *lptr, int rawstring)
1775{
1776 int i;
1777 ucell val, c;
1778
1779 i = sizeof(ucell) - (charbits / 8); /* start at most significant byte */
1780 val = 0;
1781 while (*lptr != '\0')
1782 {
1783 /* check for doublequotes indicating the end of the string */
1784 if (*lptr == '\"')
1785 {
1786 /* check whether there's another pair of quotes following.
1787 * If so, paste the two strings together, thus
1788 * "pants""off" becomes "pantsoff"
1789 */
1790 if (*(lptr + 1) == '\"')
1791 lptr += 2;
1792 else
1793 break;
1794 }
1795
1796 if (*lptr == '\a')
1797 { /* ignore '\a' (which was inserted at a line concatenation) */
1798 lptr++;
1799 continue;
1800 } /* if */
1801 c = litchar(&lptr, rawstring); /* litchar() alters "lptr" */
1802 if (c >= (ucell) (1 << charbits))
1803 error(43); /* character constant exceeds range */
1804 val |= (c << 8 * i);
1805 if (i == 0)
1806 {
1807 stowlit(val);
1808 val = 0;
1809 } /* if */
1810 i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1811 } /* if */
1812 /* save last code; make sure there is at least one terminating zero character */
1813 if (i != (int)(sizeof(ucell) - (charbits / 8)))
1814 stowlit(val); /* at least one zero character in "val" */
1815 else
1816 stowlit(0); /* add full cell of zeros */
1817 return lptr;
1818}
1819
1820/* lex(lexvalue,lexsym) Lexical Analysis
1821 *
1822 * lex() first deletes leading white space, then checks for multi-character
1823 * operators, keywords (including most compiler directives), numbers,
1824 * labels, symbols and literals (literal characters are converted to a number
1825 * and are returned as such). If every check fails, the line must contain
1826 * a single-character operator. So, lex() returns this character. In the other
1827 * case (something did match), lex() returns the number of the token. All
1828 * these tokens have been assigned numbers above 255.
1829 *
1830 * Some tokens have "attributes":
1831 * tNUMBER the value of the number is return in "lexvalue".
1832 * tRATIONAL the value is in IEEE 754 encoding or in fixed point
1833 * encoding in "lexvalue".
1834 * tSYMBOL the first sNAMEMAX characters of the symbol are
1835 * stored in a buffer, a pointer to this buffer is
1836 * returned in "lexsym".
1837 * tLABEL the first sNAMEMAX characters of the label are
1838 * stored in a buffer, a pointer to this buffer is
1839 * returned in "lexsym".
1840 * tSTRING the string is stored in the literal pool, the index
1841 * in the literal pool to this string is stored in
1842 * "lexvalue".
1843 *
1844 * lex() stores all information (the token found and possibly its attribute)
1845 * in global variables. This allows a token to be examined twice. If "_pushed"
1846 * is true, this information is returned.
1847 *
1848 * Global references: lptr (altered)
1849 * fline (referred to only)
1850 * litidx (referred to only)
1851 * _lextok, _lexval, _lexstr
1852 * _pushed
1853 */
1854
1855static int _pushed;
1856static int _lextok;
1857static cell _lexval;
1858static char _lexstr[sLINEMAX + 1];
1859static int _lexnewline;
1860
1861void
1862lexinit(void)
1863{
1864 stkidx = 0; /* index for pushstk() and popstk() */
1865 iflevel = 0; /* preprocessor: nesting of "#if" */
1866 skiplevel = 0; /* preprocessor: skipping lines or compiling lines */
1867 icomment = FALSE; /* currently not in a multiline comment */
1868 _pushed = FALSE; /* no token pushed back into lex */
1869 _lexnewline = FALSE;
1870}
1871
1872char *sc_tokens[] = {
1873 "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1874 "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1875 "...", "..",
1876 "assert", "break", "case", "char", "const", "continue", "default",
1877 "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1878 "if", "native", "new", "operator", "public", "return", "sizeof",
1879 "sleep", "static", "stock", "switch", "tagof", "while",
1880 "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1881 "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1882 ";", ";", "-integer value-", "-rational value-", "-identifier-",
1883 "-label-", "-string-"
1884};
1885
1886int
1887lex(cell * lexvalue, char **lexsym)
1888{
1889 int i, toolong, newline, rawstring;
1890 char **tokptr;
1891
1892 if (_pushed)
1893 {
1894 _pushed = FALSE; /* reset "_pushed" flag */
1895 *lexvalue = _lexval;
1896 *lexsym = _lexstr;
1897 return _lextok;
1898 } /* if */
1899
1900 _lextok = 0; /* preset all values */
1901 _lexval = 0;
1902 _lexstr[0] = '\0';
1903 *lexvalue = _lexval;
1904 *lexsym = _lexstr;
1905 _lexnewline = FALSE;
1906 if (!freading)
1907 return 0;
1908
1909 newline = (lptr == pline); /* does lptr point to start of line buffer */
1910 while (*lptr <= ' ')
1911 { /* delete leading white space */
1912 if (*lptr == '\0')
1913 {
1914 preprocess(); /* preprocess resets "lptr" */
1915 if (!freading)
1916 return 0;
1917 if (lptr == term_expr) /* special sequence to terminate a pending expression */
1918 return (_lextok = tENDEXPR);
1919 _lexnewline = TRUE; /* set this after preprocess(), because
1920 * preprocess() calls lex() recursively */
1921 newline = TRUE;
1922 }
1923 else
1924 {
1925 lptr += 1;
1926 } /* if */
1927 } /* while */
1928 if (newline)
1929 {
1930 stmtindent = 0;
1931 for (i = 0; i < (int)(lptr - pline); i++)
1932 if (pline[i] == '\t' && sc_tabsize > 0)
1933 stmtindent +=
1934 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1935 else
1936 stmtindent++;
1937 } /* if */
1938
1939 i = tFIRST;
1940 tokptr = sc_tokens;
1941 while (i <= tMIDDLE)
1942 { /* match multi-character operators */
1943 if (match(*tokptr, FALSE))
1944 {
1945 _lextok = i;
1946 return _lextok;
1947 } /* if */
1948 i += 1;
1949 tokptr += 1;
1950 } /* while */
1951 while (i <= tLAST)
1952 { /* match reserved words and compiler directives */
1953 if (match(*tokptr, TRUE))
1954 {
1955 _lextok = i;
1956 errorset(sRESET); /* reset error flag (clear the "panic mode") */
1957 return _lextok;
1958 } /* if */
1959 i += 1;
1960 tokptr += 1;
1961 } /* while */
1962
1963 if ((i = number(&_lexval, lptr)) != 0)
1964 { /* number */
1965 _lextok = tNUMBER;
1966 *lexvalue = _lexval;
1967 lptr += i;
1968 }
1969 else if ((i = ftoi(&_lexval, lptr)) != 0)
1970 {
1971 _lextok = tRATIONAL;
1972 *lexvalue = _lexval;
1973 lptr += i;
1974 }
1975 else if (alpha(*lptr))
1976 { /* symbol or label */
1977 /* Note: only sNAMEMAX characters are significant. The compiler
1978 * generates a warning if a symbol exceeds this length.
1979 */
1980 _lextok = tSYMBOL;
1981 i = 0;
1982 toolong = 0;
1983 while (alphanum(*lptr))
1984 {
1985 _lexstr[i] = *lptr;
1986 lptr += 1;
1987 if (i < sNAMEMAX)
1988 i += 1;
1989 else
1990 toolong = 1;
1991 } /* while */
1992 _lexstr[i] = '\0';
1993 if (toolong)
1994 error(200, _lexstr, sNAMEMAX); /* symbol too long, truncated to sNAMEMAX chars */
1995 if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
1996 {
1997 _lextok = PUBLIC_CHAR; /* '@' all alone is not a symbol, it is an operator */
1998 }
1999 else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
2000 {
2001 _lextok = '_'; /* '_' by itself is not a symbol, it is a placeholder */
2002 } /* if */
2003 if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
2004 {
2005 _lextok = tLABEL; /* it wasn't a normal symbol, it was a label/tagname */
2006 lptr += 1; /* skip colon */
2007 } /* if */
2008 }
2009 else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
2010 { /* unpacked string literal */
2011 _lextok = tSTRING;
2012 rawstring = (*lptr == sc_ctrlchar);
2013 *lexvalue = _lexval = litidx;
2014 lptr += 1; /* skip double quote */
2015 if (rawstring)
2016 lptr += 1; /* skip "escape" character too */
2017 lptr =
2018 sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
2019 rawstring);
2020 if (*lptr == '\"')
2021 lptr += 1; /* skip final quote */
2022 else
2023 error(37); /* invalid (non-terminated) string */
2024 }
2025 else if ((*lptr == '!' && *(lptr + 1) == '\"')
2026 || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2027 || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2028 && *(lptr + 2) == '\"'))
2029 { /* packed string literal */
2030 _lextok = tSTRING;
2031 rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2032 *lexvalue = _lexval = litidx;
2033 lptr += 2; /* skip exclamation point and double quote */
2034 if (rawstring)
2035 lptr += 1; /* skip "escape" character too */
2036 lptr =
2037 sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2038 rawstring);
2039 if (*lptr == '\"')
2040 lptr += 1; /* skip final quote */
2041 else
2042 error(37); /* invalid (non-terminated) string */
2043 }
2044 else if (*lptr == '\'')
2045 { /* character literal */
2046 lptr += 1; /* skip quote */
2047 _lextok = tNUMBER;
2048 *lexvalue = _lexval = litchar(&lptr, FALSE);
2049 if (*lptr == '\'')
2050 lptr += 1; /* skip final quote */
2051 else
2052 error(27); /* invalid character constant (must be one character) */
2053 }
2054 else if (*lptr == ';')
2055 { /* semicolumn resets "error" flag */
2056 _lextok = ';';
2057 lptr += 1;
2058 errorset(sRESET); /* reset error flag (clear the "panic mode") */
2059 }
2060 else
2061 {
2062 _lextok = *lptr; /* if every match fails, return the character */
2063 lptr += 1; /* increase the "lptr" pointer */
2064 } /* if */
2065 return _lextok;
2066}
2067
2068/* lexpush
2069 *
2070 * Pushes a token back, so the next call to lex() will return the token
2071 * last examined, instead of a new token.
2072 *
2073 * Only one token can be pushed back.
2074 *
2075 * In fact, lex() already stores the information it finds into global
2076 * variables, so all that is to be done is set a flag that informs lex()
2077 * to read and return the information from these variables, rather than
2078 * to read in a new token from the input file.
2079 */
2080void
2081lexpush(void)
2082{
2083 assert(_pushed == FALSE);
2084 _pushed = TRUE;
2085}
2086
2087/* lexclr
2088 *
2089 * Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2090 * symbol (a not continue with some old one). This is required upon return
2091 * from Assembler mode.
2092 */
2093void
2094lexclr(int clreol)
2095{
2096 _pushed = FALSE;
2097 if (clreol)
2098 {
2099 lptr = strchr(pline, '\0');
2100 assert(lptr != NULL);
2101 } /* if */
2102}
2103
2104/* matchtoken
2105 *
2106 * This routine is useful if only a simple check is needed. If the token
2107 * differs from the one expected, it is pushed back.
2108 */
2109int
2110matchtoken(int token)
2111{
2112 cell val;
2113 char *str;
2114 int tok;
2115
2116 tok = lex(&val, &str);
2117 if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2118 {
2119 return 1;
2120 }
2121 else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2122 {
2123 lexpush(); /* push "tok" back, we use the "hidden" newline token */
2124 return 1;
2125 }
2126 else
2127 {
2128 lexpush();
2129 return 0;
2130 } /* if */
2131}
2132
2133/* tokeninfo
2134 *
2135 * Returns additional information of a token after using "matchtoken()"
2136 * or needtoken(). It does no harm using this routine after a call to
2137 * "lex()", but lex() already returns the same information.
2138 *
2139 * The token itself is the return value. Normally, this one is already known.
2140 */
2141int
2142tokeninfo(cell * val, char **str)
2143{
2144 /* if the token was pushed back, tokeninfo() returns the token and
2145 * parameters of the *next* token, not of the *current* token.
2146 */
2147 assert(!_pushed);
2148 *val = _lexval;
2149 *str = _lexstr;
2150 return _lextok;
2151}
2152
2153/* needtoken
2154 *
2155 * This routine checks for a required token and gives an error message if
2156 * it isn't there (and returns FALSE in that case).
2157 *
2158 * Global references: _lextok;
2159 */
2160int
2161needtoken(int token)
2162{
2163 char s1[20], s2[20];
2164
2165 if (matchtoken(token))
2166 {
2167 return TRUE;
2168 }
2169 else
2170 {
2171 /* token already pushed back */
2172 assert(_pushed);
2173 if (token < 256)
2174 sprintf(s1, "%c", (char)token); /* single character token */
2175 else
2176 strcpy(s1, sc_tokens[token - tFIRST]); /* multi-character symbol */
2177 if (!freading)
2178 strcpy(s2, "-end of file-");
2179 else if (_lextok < 256)
2180 sprintf(s2, "%c", (char)_lextok);
2181 else
2182 strcpy(s2, sc_tokens[_lextok - tFIRST]);
2183 error(1, s1, s2); /* expected ..., but found ... */
2184 return FALSE;
2185 } /* if */
2186}
2187
2188/* match
2189 *
2190 * Compares a series of characters from the input file with the characters
2191 * in "st" (that contains a token). If the token on the input file matches
2192 * "st", the input file pointer "lptr" is adjusted to point to the next
2193 * token, otherwise "lptr" remains unaltered.
2194 *
2195 * If the parameter "end: is true, match() requires that the first character
2196 * behind the recognized token is non-alphanumeric.
2197 *
2198 * Global references: lptr (altered)
2199 */
2200static int
2201match(char *st, int end)
2202{
2203 int k;
2204 char *ptr;
2205
2206 k = 0;
2207 ptr = lptr;
2208 while (st[k])
2209 {
2210 if (st[k] != *ptr)
2211 return 0;
2212 k += 1;
2213 ptr += 1;
2214 } /* while */
2215 if (end)
2216 { /* symbol must terminate with non-alphanumeric char */
2217 if (alphanum(*ptr))
2218 return 0;
2219 } /* if */
2220 lptr = ptr; /* match found, skip symbol */
2221 return 1;
2222}
2223
2224/* stowlit
2225 *
2226 * Stores a value into the literal queue. The literal queue is used for
2227 * literal strings used in functions and for initializing array variables.
2228 *
2229 * Global references: litidx (altered)
2230 * litq (altered)
2231 */
2232void
2233stowlit(cell value)
2234{
2235 if (litidx >= litmax)
2236 {
2237 cell *p;
2238
2239 litmax += sDEF_LITMAX;
2240 p = (cell *) realloc(litq, litmax * sizeof(cell));
2241 if (!p)
2242 error(102, "literal table"); /* literal table overflow (fatal error) */
2243 litq = p;
2244 } /* if */
2245 assert(litidx < litmax);
2246 litq[litidx++] = value;
2247}
2248
2249/* litchar
2250 *
2251 * Return current literal character and increase the pointer to point
2252 * just behind this literal character.
2253 *
2254 * Note: standard "escape sequences" are suported, but the backslash may be
2255 * replaced by another character; the syntax '\ddd' is supported,
2256 * but ddd must be decimal!
2257 */
2258static cell
2259litchar(char **lptr, int rawmode)
2260{
2261 cell c = 0;
2262 unsigned char *cptr;
2263
2264 cptr = (unsigned char *)*lptr;
2265 if (rawmode || *cptr != sc_ctrlchar)
2266 { /* no escape character */
2267 c = *cptr;
2268 cptr += 1;
2269 }
2270 else
2271 {
2272 cptr += 1;
2273 if (*cptr == sc_ctrlchar)
2274 {
2275 c = *cptr; /* \\ == \ (the escape character itself) */
2276 cptr += 1;
2277 }
2278 else
2279 {
2280 switch (*cptr)
2281 {
2282 case 'a': /* \a == audible alarm */
2283 c = 7;
2284 cptr += 1;
2285 break;
2286 case 'b': /* \b == backspace */
2287 c = 8;
2288 cptr += 1;
2289 break;
2290 case 'e': /* \e == escape */
2291 c = 27;
2292 cptr += 1;
2293 break;
2294 case 'f': /* \f == form feed */
2295 c = 12;
2296 cptr += 1;
2297 break;
2298 case 'n': /* \n == NewLine character */
2299 c = 10;
2300 cptr += 1;
2301 break;
2302 case 'r': /* \r == carriage return */
2303 c = 13;
2304 cptr += 1;
2305 break;
2306 case 't': /* \t == horizontal TAB */
2307 c = 9;
2308 cptr += 1;
2309 break;
2310 case 'v': /* \v == vertical TAB */
2311 c = 11;
2312 cptr += 1;
2313 break;
2314 case '\'': /* \' == ' (single quote) */
2315 case '"': /* \" == " (single quote) */
2316 case '%': /* \% == % (percent) */
2317 c = *cptr;
2318 cptr += 1;
2319 break;
2320 default:
2321 if (isdigit(*cptr))
2322 { /* \ddd */
2323 c = 0;
2324 while (*cptr >= '0' && *cptr <= '9') /* decimal! */
2325 c = c * 10 + *cptr++ - '0';
2326 if (*cptr == ';')
2327 cptr++; /* swallow a trailing ';' */
2328 }
2329 else
2330 {
2331 error(27); /* invalid character constant */
2332 } /* if */
2333 } /* switch */
2334 } /* if */
2335 } /* if */
2336 *lptr = (char *)cptr;
2337 assert(c >= 0 && c < 256);
2338 return c;
2339}
2340
2341/* alpha
2342 *
2343 * Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2344 * or an "at" sign ("@"). The "@" is an extension to standard C.
2345 */
2346static int
2347alpha(char c)
2348{
2349 return (isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2350}
2351
2352/* alphanum
2353 *
2354 * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2355 */
2356int
2357alphanum(char c)
2358{
2359 return (alpha(c) || isdigit(c));
2360}
2361
2362/* The local variable table must be searched backwards, so that the deepest
2363 * nesting of local variables is searched first. The simplest way to do
2364 * this is to insert all new items at the head of the list.
2365 * In the global list, the symbols are kept in sorted order, so that the
2366 * public functions are written in sorted order.
2367 */
2368static symbol *
2369add_symbol(symbol * root, symbol * entry, int sort)
2370{
2371 symbol *newsym;
2372
2373 if (sort)
2374 while (root->next && strcmp(entry->name, root->next->name) > 0)
2375 root = root->next;
2376
2377 if (!(newsym = (symbol *)malloc(sizeof(symbol))))
2378 {
2379 error(103);
2380 return NULL;
2381 } /* if */
2382 memcpy(newsym, entry, sizeof(symbol));
2383 newsym->next = root->next;
2384 root->next = newsym;
2385 return newsym;
2386}
2387
2388static void
2389free_symbol(symbol * sym)
2390{
2391 arginfo *arg;
2392
2393 /* free all sub-symbol allocated memory blocks, depending on the
2394 * kind of the symbol
2395 */
2396 assert(sym != NULL);
2397 if (sym->ident == iFUNCTN)
2398 {
2399 /* run through the argument list; "default array" arguments
2400 * must be freed explicitly; the tag list must also be freed */
2401 assert(sym->dim.arglist != NULL);
2402 for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2403 {
2404 if (arg->ident == iREFARRAY && arg->hasdefault)
2405 free(arg->defvalue.array.data);
2406 else if (arg->ident == iVARIABLE
2407 && ((arg->hasdefault & uSIZEOF) != 0
2408 || (arg->hasdefault & uTAGOF) != 0))
2409 free(arg->defvalue.size.symname);
2410 assert(arg->tags != NULL);
2411 free(arg->tags);
2412 } /* for */
2413 free(sym->dim.arglist);
2414 } /* if */
2415 assert(sym->refer != NULL);
2416 free(sym->refer);
2417 free(sym);
2418}
2419
2420void
2421delete_symbol(symbol * root, symbol * sym)
2422{
2423 /* find the symbol and its predecessor
2424 * (this function assumes that you will never delete a symbol that is not
2425 * in the table pointed at by "root")
2426 */
2427 assert(root != sym);
2428 while (root->next != sym)
2429 {
2430 root = root->next;
2431 assert(root != NULL);
2432 } /* while */
2433
2434 /* unlink it, then free it */
2435 root->next = sym->next;
2436 free_symbol(sym);
2437}
2438
2439void
2440delete_symbols(symbol * root, int level, int delete_labels,
2441 int delete_functions)
2442{
2443 symbol *sym;
2444
2445 /* erase only the symbols with a deeper nesting level than the
2446 * specified nesting level */
2447 while (root->next)
2448 {
2449 sym = root->next;
2450 if (sym->compound < level)
2451 break;
2452 if ((delete_labels || sym->ident != iLABEL)
2453 && (delete_functions || sym->ident != iFUNCTN
2454 || (sym->usage & uNATIVE) != 0) && (delete_functions
2455 || sym->ident != iCONSTEXPR
2456 || (sym->usage & uPREDEF) ==
2457 0) && (delete_functions
2458 || (sym->ident !=
2459 iVARIABLE
2460 && sym->ident !=
2461 iARRAY)))
2462 {
2463 root->next = sym->next;
2464 free_symbol(sym);
2465 }
2466 else
2467 {
2468 /* if the function was prototyped, but not implemented in this source,
2469 * mark it as such, so that its use can be flagged
2470 */
2471 if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2472 sym->usage |= uMISSING;
2473 if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2474 || sym->ident == iARRAY)
2475 sym->usage &= ~uDEFINE; /* clear "defined" flag */
2476 /* for user defined operators, also remove the "prototyped" flag, as
2477 * user-defined operators *must* be declared before use
2478 */
2479 if (sym->ident == iFUNCTN && !isalpha(*sym->name)
2480 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2481 sym->usage &= ~uPROTOTYPED;
2482 root = sym; /* skip the symbol */
2483 } /* if */
2484 } /* if */
2485}
2486
2487/* The purpose of the hash is to reduce the frequency of a "name"
2488 * comparison (which is costly). There is little interest in avoiding
2489 * clusters in similar names, which is why this function is plain simple.
2490 */
2491unsigned int
2492namehash(char *name)
2493{
2494 unsigned char *ptr = (unsigned char *)name;
2495 int len = strlen(name);
2496
2497 if (len == 0)
2498 return 0L;
2499 assert(len < 256);
2500 return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2501 (ptr[len >> 1Lu]);
2502}
2503
2504static symbol *
2505find_symbol(symbol * root, char *name, int fnumber)
2506{
2507 symbol *ptr = root->next;
2508 unsigned long hash = namehash(name);
2509
2510 while (ptr)
2511 {
2512 if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2513 && !ptr->parent && (ptr->fnumber < 0
2514 || ptr->fnumber == fnumber))
2515 return ptr;
2516 ptr = ptr->next;
2517 } /* while */
2518 return NULL;
2519}
2520
2521static symbol *
2522find_symbol_child(symbol * root, symbol * sym)
2523{
2524 symbol *ptr = root->next;
2525
2526 while (ptr)
2527 {
2528 if (ptr->parent == sym)
2529 return ptr;
2530 ptr = ptr->next;
2531 } /* while */
2532 return NULL;
2533}
2534
2535/* Adds "bywhom" to the list of referrers of "entry". Typically,
2536 * bywhom will be the function that uses a variable or that calls
2537 * the function.
2538 */
2539int
2540refer_symbol(symbol * entry, symbol * bywhom)
2541{
2542 int count;
2543
2544 assert(bywhom != NULL); /* it makes no sense to add a "void" referrer */
2545 assert(entry != NULL);
2546 assert(entry->refer != NULL);
2547
2548 /* see if it is already there */
2549 for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2550 count++)
2551 /* nothing */ ;
2552 if (count < entry->numrefers)
2553 {
2554 assert(entry->refer[count] == bywhom);
2555 return TRUE;
2556 } /* if */
2557
2558 /* see if there is an empty spot in the referrer list */
2559 for (count = 0; count < entry->numrefers && entry->refer[count];
2560 count++)
2561 /* nothing */ ;
2562 assert(count <= entry->numrefers);
2563 if (count == entry->numrefers)
2564 {
2565 symbol **refer;
2566 int newsize = 2 * entry->numrefers;
2567
2568 assert(newsize > 0);
2569 /* grow the referrer list */
2570 refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2571 if (!refer)
2572 return FALSE; /* insufficient memory */
2573 /* initialize the new entries */
2574 entry->refer = refer;
2575 for (count = entry->numrefers; count < newsize; count++)
2576 entry->refer[count] = NULL;
2577 count = entry->numrefers; /* first empty spot */
2578 entry->numrefers = newsize;
2579 } /* if */
2580
2581 /* add the referrer */
2582 assert(entry->refer[count] == NULL);
2583 entry->refer[count] = bywhom;
2584 return TRUE;
2585}
2586
2587void
2588markusage(symbol * sym, int usage)
2589{
2590 sym->usage |= (char)usage;
2591 /* check if (global) reference must be added to the symbol */
2592 if ((usage & (uREAD | uWRITTEN)) != 0)
2593 {
2594 /* only do this for global symbols */
2595 if (sym->vclass == sGLOBAL)
2596 {
2597 /* "curfunc" should always be valid, since statements may not occurs
2598 * outside functions; in the case of syntax errors, however, the
2599 * compiler may arrive through this function
2600 */
2601 if (curfunc)
2602 refer_symbol(sym, curfunc);
2603 } /* if */
2604 } /* if */
2605}
2606
2607/* findglb
2608 *
2609 * Returns a pointer to the global symbol (if found) or NULL (if not found)
2610 */
2611symbol *
2612findglb(char *name)
2613{
2614 return find_symbol(&glbtab, name, fcurrent);
2615}
2616
2617/* findloc
2618 *
2619 * Returns a pointer to the local symbol (if found) or NULL (if not found).
2620 * See add_symbol() how the deepest nesting level is searched first.
2621 */
2622symbol *
2623findloc(char *name)
2624{
2625 return find_symbol(&loctab, name, -1);
2626}
2627
2628symbol *
2629findconst(char *name)
2630{
2631 symbol *sym;
2632
2633 sym = find_symbol(&loctab, name, -1); /* try local symbols first */
2634 if (!sym || sym->ident != iCONSTEXPR) /* not found, or not a constant */
2635 sym = find_symbol(&glbtab, name, fcurrent);
2636 if (!sym || sym->ident != iCONSTEXPR)
2637 return NULL;
2638 assert(sym->parent == NULL); /* constants have no hierarchy */
2639 return sym;
2640}
2641
2642symbol *
2643finddepend(symbol * parent)
2644{
2645 symbol *sym;
2646
2647 sym = find_symbol_child(&loctab, parent); /* try local symbols first */
2648 if (!sym) /* not found */
2649 sym = find_symbol_child(&glbtab, parent);
2650 return sym;
2651}
2652
2653/* addsym
2654 *
2655 * Adds a symbol to the symbol table (either global or local variables,
2656 * or global and local constants).
2657 */
2658symbol *
2659addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2660{
2661 symbol entry, **refer;
2662
2663 /* global variables/constants/functions may only be defined once */
2664 assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2665 || findglb(name) == NULL);
2666 /* labels may only be defined once */
2667 assert(ident != iLABEL || findloc(name) == NULL);
2668
2669 /* create an empty referrer list */
2670 if (!(refer = (symbol **)malloc(sizeof(symbol *))))
2671 {
2672 error(103); /* insufficient memory */
2673 return NULL;
2674 } /* if */
2675 *refer = NULL;
2676
2677 /* first fill in the entry */
2678 strcpy(entry.name, name);
2679 entry.hash = namehash(name);
2680 entry.addr = addr;
2681 entry.vclass = (char)vclass;
2682 entry.ident = (char)ident;
2683 entry.tag = tag;
2684 entry.usage = (char)usage;
2685 entry.compound = 0; /* may be overridden later */
2686 entry.fnumber = -1; /* assume global visibility (ignored for local symbols) */
2687 entry.numrefers = 1;
2688 entry.refer = refer;
2689 entry.parent = NULL;
2690
2691 /* then insert it in the list */
2692 if (vclass == sGLOBAL)
2693 return add_symbol(&glbtab, &entry, TRUE);
2694 else
2695 return add_symbol(&loctab, &entry, FALSE);
2696}
2697
2698symbol *
2699addvariable(char *name, cell addr, int ident, int vclass, int tag,
2700 int dim[], int numdim, int idxtag[])
2701{
2702 symbol *sym, *parent, *top;
2703 int level;
2704
2705 /* global variables may only be defined once */
2706 assert(vclass != sGLOBAL || (sym = findglb(name)) == NULL
2707 || (sym->usage & uDEFINE) == 0);
2708
2709 if (ident == iARRAY || ident == iREFARRAY)
2710 {
2711 parent = NULL;
2712 sym = NULL; /* to avoid a compiler warning */
2713 for (level = 0; level < numdim; level++)
2714 {
2715 top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2716 top->dim.array.length = dim[level];
2717 top->dim.array.level = (short)(numdim - level - 1);
2718 top->x.idxtag = idxtag[level];
2719 top->parent = parent;
2720 parent = top;
2721 if (level == 0)
2722 sym = top;
2723 } /* for */
2724 }
2725 else
2726 {
2727 sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2728 } /* if */
2729 return sym;
2730}
2731
2732/* getlabel
2733 *
2734 * Return next available internal label number.
2735 */
2736int
2737getlabel(void)
2738{
2739 return labnum++;
2740}
2741
2742/* itoh
2743 *
2744 * Converts a number to a hexadecimal string and returns a pointer to that
2745 * string.
2746 */
2747char *
2748itoh(ucell val)
2749{
2750 static char itohstr[15]; /* hex number is 10 characters long at most */
2751 char *ptr;
2752 int i, nibble[8]; /* a 32-bit hexadecimal cell has 8 nibbles */
2753 int max;
2754
2755#if defined(BIT16)
2756 max = 4;
2757#else
2758 max = 8;
2759#endif
2760 ptr = itohstr;
2761 for (i = 0; i < max; i += 1)
2762 {
2763 nibble[i] = (int)(val & 0x0f); /* nibble 0 is lowest nibble */
2764 val >>= 4;
2765 } /* endfor */
2766 i = max - 1;
2767 while (nibble[i] == 0 && i > 0) /* search for highest non-zero nibble */
2768 i -= 1;
2769 while (i >= 0)
2770 {
2771 if (nibble[i] >= 10)
2772 *ptr++ = (char)('a' + (nibble[i] - 10));
2773 else
2774 *ptr++ = (char)('0' + nibble[i]);
2775 i -= 1;
2776 } /* while */
2777 *ptr = '\0'; /* and a zero-terminator */
2778 return itohstr;
2779}