aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/embryo/src/bin/embryo_cc_sc7.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/embryo/src/bin/embryo_cc_sc7.c')
-rw-r--r--libraries/embryo/src/bin/embryo_cc_sc7.c688
1 files changed, 688 insertions, 0 deletions
diff --git a/libraries/embryo/src/bin/embryo_cc_sc7.c b/libraries/embryo/src/bin/embryo_cc_sc7.c
new file mode 100644
index 0000000..a2f9f4c
--- /dev/null
+++ b/libraries/embryo/src/bin/embryo_cc_sc7.c
@@ -0,0 +1,688 @@
1/* Small compiler - Staging buffer and optimizer
2 *
3 * The staging buffer
4 * ------------------
5 * The staging buffer allows buffered output of generated code, deletion
6 * of redundant code, optimization by a tinkering process and reversing
7 * the ouput of evaluated expressions (which is used for the reversed
8 * evaluation of arguments in functions).
9 * Initially, stgwrite() writes to the file directly, but after a call to
10 * stgset(TRUE), output is redirected to the buffer. After a call to
11 * stgset(FALSE), stgwrite()'s output is directed to the file again. Thus
12 * only one routine is used for writing to the output, which can be
13 * buffered output or direct output.
14 *
15 * staging buffer variables: stgbuf - the buffer
16 * stgidx - current index in the staging buffer
17 * staging - if true, write to the staging buffer;
18 * if false, write to file directly.
19 *
20 * Copyright (c) ITB CompuPhase, 1997-2003
21 *
22 * This software is provided "as-is", without any express or implied warranty.
23 * In no event will the authors be held liable for any damages arising from
24 * the use of this software.
25 *
26 * Permission is granted to anyone to use this software for any purpose,
27 * including commercial applications, and to alter it and redistribute it
28 * freely, subject to the following restrictions:
29 *
30 * 1. The origin of this software must not be misrepresented; you must not
31 * claim that you wrote the original software. If you use this software in
32 * a product, an acknowledgment in the product documentation would be
33 * appreciated but is not required.
34 * 2. Altered source versions must be plainly marked as such, and must not be
35 * misrepresented as being the original software.
36 * 3. This notice may not be removed or altered from any source distribution.
37 *
38 * Version: $Id: embryo_cc_sc7.c 52451 2010-09-19 03:00:12Z raster $
39 */
40
41
42#ifdef HAVE_CONFIG_H
43# include <config.h>
44#endif
45
46#include <assert.h>
47#include <stdio.h>
48#include <stdlib.h> /* for atoi() */
49#include <string.h>
50#include <ctype.h>
51
52#include "embryo_cc_sc.h"
53
54#if defined _MSC_VER
55#pragma warning(push)
56#pragma warning(disable:4125) /* decimal digit terminates octal escape sequence */
57#endif
58
59#include "embryo_cc_sc7.scp"
60
61#if defined _MSC_VER
62#pragma warning(pop)
63#endif
64
65static void stgstring(char *start, char *end);
66static void stgopt(char *start, char *end);
67
68#define sSTG_GROW 512
69#define sSTG_MAX 20480
70
71static char *stgbuf = NULL;
72static int stgmax = 0; /* current size of the staging buffer */
73
74#define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1)
75
76static void
77grow_stgbuffer(int requiredsize)
78{
79 char *p;
80 int clear = !stgbuf; /* if previously none, empty buffer explicitly */
81
82 assert(stgmax < requiredsize);
83 /* if the staging buffer (holding intermediate code for one line) grows
84 * over a few kBytes, there is probably a run-away expression
85 */
86 if (requiredsize > sSTG_MAX)
87 error(102, "staging buffer"); /* staging buffer overflow (fatal error) */
88 stgmax = requiredsize + sSTG_GROW;
89 if (stgbuf)
90 p = (char *)realloc(stgbuf, stgmax * sizeof(char));
91 else
92 p = (char *)malloc(stgmax * sizeof(char));
93 if (!p)
94 error(102, "staging buffer"); /* staging buffer overflow (fatal error) */
95 stgbuf = p;
96 if (clear)
97 *stgbuf = '\0';
98}
99
100void
101stgbuffer_cleanup(void)
102{
103 if (stgbuf)
104 {
105 free(stgbuf);
106 stgbuf = NULL;
107 stgmax = 0;
108 } /* if */
109}
110
111/* the variables "stgidx" and "staging" are declared in "scvars.c" */
112
113/* stgmark
114 *
115 * Copies a mark into the staging buffer. At this moment there are three
116 * possible marks:
117 * sSTARTREORDER identifies the beginning of a series of expression
118 * strings that must be written to the output file in
119 * reordered order
120 * sENDREORDER identifies the end of 'reverse evaluation'
121 * sEXPRSTART + idx only valid within a block that is evaluated in
122 * reordered order, it identifies the start of an
123 * expression; the "idx" value is the argument position
124 *
125 * Global references: stgidx (altered)
126 * stgbuf (altered)
127 * staging (referred to only)
128 */
129void
130stgmark(char mark)
131{
132 if (staging)
133 {
134 CHECK_STGBUFFER(stgidx);
135 stgbuf[stgidx++] = mark;
136 } /* if */
137}
138
139static int
140filewrite(char *str)
141{
142 if (sc_status == statWRITE)
143 return sc_writeasm(outf, str);
144 return TRUE;
145}
146
147/* stgwrite
148 *
149 * Writes the string "st" to the staging buffer or to the output file. In the
150 * case of writing to the staging buffer, the terminating byte of zero is
151 * copied too, but... the optimizer can only work on complete lines (not on
152 * fractions of it. Therefore if the string is staged, if the last character
153 * written to the buffer is a '\0' and the previous-to-last is not a '\n',
154 * the string is concatenated to the last string in the buffer (the '\0' is
155 * overwritten). This also means an '\n' used in the middle of a string isn't
156 * recognized and could give wrong results with the optimizer.
157 * Even when writing to the output file directly, all strings are buffered
158 * until a whole line is complete.
159 *
160 * Global references: stgidx (altered)
161 * stgbuf (altered)
162 * staging (referred to only)
163 */
164void
165stgwrite(char *st)
166{
167 int len;
168
169 CHECK_STGBUFFER(0);
170 if (staging)
171 {
172 if (stgidx >= 2 && stgbuf[stgidx - 1] == '\0'
173 && stgbuf[stgidx - 2] != '\n')
174 stgidx -= 1; /* overwrite last '\0' */
175 while (*st != '\0')
176 { /* copy to staging buffer */
177 CHECK_STGBUFFER(stgidx);
178 stgbuf[stgidx++] = *st++;
179 } /* while */
180 CHECK_STGBUFFER(stgidx);
181 stgbuf[stgidx++] = '\0';
182 }
183 else
184 {
185 CHECK_STGBUFFER(strlen(stgbuf) + strlen(st) + 1);
186 strcat(stgbuf, st);
187 len = strlen(stgbuf);
188 if (len > 0 && stgbuf[len - 1] == '\n')
189 {
190 filewrite(stgbuf);
191 stgbuf[0] = '\0';
192 } /* if */
193 } /* if */
194}
195
196/* stgout
197 *
198 * Writes the staging buffer to the output file via stgstring() (for
199 * reversing expressions in the buffer) and stgopt() (for optimizing). It
200 * resets "stgidx".
201 *
202 * Global references: stgidx (altered)
203 * stgbuf (referred to only)
204 * staging (referred to only)
205 */
206void
207stgout(int index)
208{
209 if (!staging)
210 return;
211 stgstring(&stgbuf[index], &stgbuf[stgidx]);
212 stgidx = index;
213}
214
215typedef struct
216{
217 char *start, *end;
218} argstack;
219
220/* stgstring
221 *
222 * Analyses whether code strings should be output to the file as they appear
223 * in the staging buffer or whether portions of it should be re-ordered.
224 * Re-ordering takes place in function argument lists; Small passes arguments
225 * to functions from right to left. When arguments are "named" rather than
226 * positional, the order in the source stream is indeterminate.
227 * This function calls itself recursively in case it needs to re-order code
228 * strings, and it uses a private stack (or list) to mark the start and the
229 * end of expressions in their correct (reversed) order.
230 * In any case, stgstring() sends a block as large as possible to the
231 * optimizer stgopt().
232 *
233 * In "reorder" mode, each set of code strings must start with the token
234 * sEXPRSTART, even the first. If the token sSTARTREORDER is represented
235 * by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies:
236 * '[]...' valid, but useless; no output
237 * '[|...] valid, but useless; only one string
238 * '[|...|...] valid and useful
239 * '[...|...] invalid, first string doesn't start with '|'
240 * '[|...|] invalid
241 */
242static void
243stgstring(char *start, char *end)
244{
245 char *ptr;
246 int nest, argc, arg;
247 argstack *stack;
248
249 while (start < end)
250 {
251 if (*start == sSTARTREORDER)
252 {
253 start += 1; /* skip token */
254 /* allocate a argstack with sMAXARGS items */
255 stack = (argstack *) malloc(sMAXARGS * sizeof(argstack));
256 if (!stack)
257 error(103); /* insufficient memory */
258 nest = 1; /* nesting counter */
259 argc = 0; /* argument counter */
260 arg = -1; /* argument index; no valid argument yet */
261 do
262 {
263 switch (*start)
264 {
265 case sSTARTREORDER:
266 nest++;
267 start++;
268 break;
269 case sENDREORDER:
270 nest--;
271 start++;
272 break;
273 default:
274 if ((*start & sEXPRSTART) == sEXPRSTART)
275 {
276 if (nest == 1)
277 {
278 if (arg >= 0)
279 stack[arg].end = start - 1; /* finish previous argument */
280 arg = (unsigned char)*start - sEXPRSTART;
281 stack[arg].start = start + 1;
282 if (arg >= argc)
283 argc = arg + 1;
284 } /* if */
285 start++;
286 }
287 else
288 {
289 start += strlen(start) + 1;
290 } /* if */
291 } /* switch */
292 }
293 while (nest); /* enddo */
294 if (arg >= 0)
295 stack[arg].end = start - 1; /* finish previous argument */
296 while (argc > 0)
297 {
298 argc--;
299 stgstring(stack[argc].start, stack[argc].end);
300 } /* while */
301 free(stack);
302 }
303 else
304 {
305 ptr = start;
306 while (ptr < end && *ptr != sSTARTREORDER)
307 ptr += strlen(ptr) + 1;
308 stgopt(start, ptr);
309 start = ptr;
310 } /* if */
311 } /* while */
312}
313
314/* stgdel
315 *
316 * Scraps code from the staging buffer by resetting "stgidx" to "index".
317 *
318 * Global references: stgidx (altered)
319 * staging (referred to only)
320 */
321void
322stgdel(int index, cell code_index)
323{
324 if (staging)
325 {
326 stgidx = index;
327 code_idx = code_index;
328 } /* if */
329}
330
331int
332stgget(int *index, cell * code_index)
333{
334 if (staging)
335 {
336 *index = stgidx;
337 *code_index = code_idx;
338 } /* if */
339 return staging;
340}
341
342/* stgset
343 *
344 * Sets staging on or off. If it's turned off, the staging buffer must be
345 * initialized to an empty string. If it's turned on, the routine makes sure
346 * the index ("stgidx") is set to 0 (it should already be 0).
347 *
348 * Global references: staging (altered)
349 * stgidx (altered)
350 * stgbuf (contents altered)
351 */
352void
353stgset(int onoff)
354{
355 staging = onoff;
356 if (staging)
357 {
358 assert(stgidx == 0);
359 stgidx = 0;
360 CHECK_STGBUFFER(stgidx);
361 /* write any contents that may be put in the buffer by stgwrite()
362 * when "staging" was 0
363 */
364 if (stgbuf[0] != '\0')
365 filewrite(stgbuf);
366 } /* if */
367 stgbuf[0] = '\0';
368}
369
370/* phopt_init
371 * Initialize all sequence strings of the peehole optimizer. The strings
372 * are embedded in the .EXE file in compressed format, here we expand
373 * them (and allocate memory for the sequences).
374 */
375static SEQUENCE *sequences;
376
377int
378phopt_init(void)
379{
380 int number, i, len;
381 char str[160];
382
383 /* count number of sequences */
384 for (number = 0; sequences_cmp[number].find; number++)
385 /* nothing */ ;
386 number++; /* include an item for the NULL terminator */
387
388 if (!(sequences = (SEQUENCE *)malloc(number * sizeof(SEQUENCE))))
389 return FALSE;
390
391 /* pre-initialize all to NULL (in case of failure) */
392 for (i = 0; i < number; i++)
393 {
394 sequences[i].find = NULL;
395 sequences[i].replace = NULL;
396 sequences[i].savesize = 0;
397 } /* for */
398
399 /* expand all strings */
400 for (i = 0; i < number - 1; i++)
401 {
402 len =
403 strexpand(str, (unsigned char *)sequences_cmp[i].find, sizeof str,
404 SCPACK_TABLE);
405 assert(len <= (int)(sizeof(str)));
406 assert(len == (int)(strlen(str) + 1));
407 sequences[i].find = (char *)malloc(len);
408 if (sequences[i].find)
409 strcpy(sequences[i].find, str);
410 len =
411 strexpand(str, (unsigned char *)sequences_cmp[i].replace, sizeof str,
412 SCPACK_TABLE);
413 assert(len <= (int)(sizeof(str)));
414 assert(len == (int)(strlen(str) + 1));
415 sequences[i].replace = (char *)malloc(len);
416 if (sequences[i].replace)
417 strcpy(sequences[i].replace, str);
418 sequences[i].savesize = sequences_cmp[i].savesize;
419 if (!sequences[i].find || !sequences[i].replace)
420 return phopt_cleanup();
421 } /* for */
422
423 return TRUE;
424}
425
426int
427phopt_cleanup(void)
428{
429 int i;
430
431 if (sequences)
432 {
433 i = 0;
434 while (sequences[i].find || sequences[i].replace)
435 {
436 if (sequences[i].find)
437 free(sequences[i].find);
438 if (sequences[i].replace)
439 free(sequences[i].replace);
440 i++;
441 } /* while */
442 free(sequences);
443 sequences = NULL;
444 } /* if */
445 return FALSE;
446}
447
448#define _maxoptvars 4
449#define _aliasmax 10 /* a 32-bit number can be represented in
450 * 9 decimal digits */
451
452static int
453matchsequence(char *start, char *end, char *pattern,
454 char symbols[_maxoptvars][_aliasmax + 1], int *match_length)
455{
456 int var, i;
457 char str[_aliasmax + 1];
458 char *start_org = start;
459
460 *match_length = 0;
461 for (var = 0; var < _maxoptvars; var++)
462 symbols[var][0] = '\0';
463
464 while (*start == '\t' || *start == ' ')
465 start++;
466 while (*pattern)
467 {
468 if (start >= end)
469 return FALSE;
470 switch (*pattern)
471 {
472 case '%': /* new "symbol" */
473 pattern++;
474 assert(isdigit(*pattern));
475 var = atoi(pattern) - 1;
476 assert(var >= 0 && var < _maxoptvars);
477 assert(alphanum(*start));
478 for (i = 0; start < end && alphanum(*start); i++, start++)
479 {
480 assert(i <= _aliasmax);
481 str[i] = *start;
482 } /* for */
483 str[i] = '\0';
484 if (symbols[var][0] != '\0')
485 {
486 if (strcmp(symbols[var], str) != 0)
487 return FALSE; /* symbols should be identical */
488 }
489 else
490 {
491 strcpy(symbols[var], str);
492 } /* if */
493 break;
494 case ' ':
495 if (*start != '\t' && *start != ' ')
496 return FALSE;
497 while ((start < end && *start == '\t') || *start == ' ')
498 start++;
499 break;
500 case '!':
501 while ((start < end && *start == '\t') || *start == ' ')
502 start++; /* skip trailing white space */
503 if (*start != '\n')
504 return FALSE;
505 assert(*(start + 1) == '\0');
506 start += 2; /* skip '\n' and '\0' */
507 if (*(pattern + 1) != '\0')
508 while ((start < end && *start == '\t') || *start == ' ')
509 start++; /* skip leading white space of next instruction */
510 break;
511 default:
512 if (tolower(*start) != tolower(*pattern))
513 return FALSE;
514 start++;
515 } /* switch */
516 pattern++;
517 } /* while */
518
519 *match_length = (int)(start - start_org);
520 return TRUE;
521}
522
523static char *
524replacesequence(char *pattern, char symbols[_maxoptvars][_aliasmax + 1],
525 int *repl_length)
526{
527 char *lptr;
528 int var;
529 char *buffer;
530
531 /* calculate the length of the new buffer
532 * this is the length of the pattern plus the length of all symbols (note
533 * that the same symbol may occur multiple times in the pattern) plus
534 * line endings and startings ('\t' to start a line and '\n\0' to end one)
535 */
536 assert(repl_length != NULL);
537 *repl_length = 0;
538 lptr = pattern;
539 while (*lptr)
540 {
541 switch (*lptr)
542 {
543 case '%':
544 lptr++; /* skip '%' */
545 assert(isdigit(*lptr));
546 var = atoi(lptr) - 1;
547 assert(var >= 0 && var < _maxoptvars);
548 assert(symbols[var][0] != '\0'); /* variable should be defined */
549 *repl_length += strlen(symbols[var]);
550 break;
551 case '!':
552 *repl_length += 3; /* '\t', '\n' & '\0' */
553 break;
554 default:
555 *repl_length += 1;
556 } /* switch */
557 lptr++;
558 } /* while */
559
560 /* allocate a buffer to replace the sequence in */
561 if (!(buffer = malloc(*repl_length)))
562 {
563 error(103);
564 return NULL;
565 }
566
567 /* replace the pattern into this temporary buffer */
568 lptr = buffer;
569 *lptr++ = '\t'; /* the "replace" patterns do not have tabs */
570 while (*pattern)
571 {
572 assert((int)(lptr - buffer) < *repl_length);
573 switch (*pattern)
574 {
575 case '%':
576 /* write out the symbol */
577 pattern++;
578 assert(isdigit(*pattern));
579 var = atoi(pattern) - 1;
580 assert(var >= 0 && var < _maxoptvars);
581 assert(symbols[var][0] != '\0'); /* variable should be defined */
582 strcpy(lptr, symbols[var]);
583 lptr += strlen(symbols[var]);
584 break;
585 case '!':
586 /* finish the line, optionally start the next line with an indent */
587 *lptr++ = '\n';
588 *lptr++ = '\0';
589 if (*(pattern + 1) != '\0')
590 *lptr++ = '\t';
591 break;
592 default:
593 *lptr++ = *pattern;
594 } /* switch */
595 pattern++;
596 } /* while */
597
598 assert((int)(lptr - buffer) == *repl_length);
599 return buffer;
600}
601
602static void
603strreplace(char *dest, char *replace, int sub_length, int repl_length,
604 int dest_length)
605{
606 int offset = sub_length - repl_length;
607
608 if (offset > 0) /* delete a section */
609 memmove(dest, dest + offset, dest_length - offset);
610 else if (offset < 0) /* insert a section */
611 memmove(dest - offset, dest, dest_length);
612 memcpy(dest, replace, repl_length);
613}
614
615/* stgopt
616 *
617 * Optimizes the staging buffer by checking for series of instructions that
618 * can be coded more compact. The routine expects the lines in the staging
619 * buffer to be separated with '\n' and '\0' characters.
620 *
621 * The longest sequences must be checked first.
622 */
623
624static void
625stgopt(char *start, char *end)
626{
627 char symbols[_maxoptvars][_aliasmax + 1];
628 int seq, match_length, repl_length;
629
630 assert(sequences != NULL);
631 while (start < end)
632 {
633 if ((sc_debug & sNOOPTIMIZE) != 0 || sc_status != statWRITE)
634 {
635 /* do not match anything if debug-level is maximum */
636 filewrite(start);
637 }
638 else
639 {
640 seq = 0;
641 while (sequences[seq].find)
642 {
643 assert(seq >= 0);
644 if (matchsequence
645 (start, end, sequences[seq].find, symbols, &match_length))
646 {
647 char *replace =
648 replacesequence(sequences[seq].replace, symbols,
649 &repl_length);
650 /* If the replacement is bigger than the original section, we may need
651 * to "grow" the staging buffer. This is quite complex, due to the
652 * re-ordering of expressions that can also happen in the staging
653 * buffer. In addition, it should not happen: the peephole optimizer
654 * must replace sequences with *shorter* sequences, not longer ones.
655 * So, I simply forbid sequences that are longer than the ones they
656 * are meant to replace.
657 */
658 assert(match_length >= repl_length);
659 if (match_length >= repl_length)
660 {
661 strreplace(start, replace, match_length,
662 repl_length, (int)(end - start));
663 end -= match_length - repl_length;
664 free(replace);
665 code_idx -= sequences[seq].savesize;
666 seq = 0; /* restart search for matches */
667 }
668 else
669 {
670 /* actually, we should never get here (match_length<repl_length) */
671 assert(0);
672 seq++;
673 } /* if */
674 }
675 else
676 {
677 seq++;
678 } /* if */
679 } /* while */
680 assert(sequences[seq].find == NULL);
681 filewrite(start);
682 } /* if */
683 assert(start < end);
684 start += strlen(start) + 1; /* to next string */
685 } /* while (start<end) */
686}
687
688#undef SCPACK_TABLE