Препроцессор языка СИ
‹ ¡®à â®à ï à ¡®â 1
‡ ¤ ¨¥:
®áâநâì «¥ªá¨ç¥áª¨© «¨§ â®à (᪠¥à), ª®â®àë© ¡ã¤¥â ¢
¤ «ì¥©è¥¬ ¨á¯®«ì§®¢ âìáï ¯à¨ à §¡®à¥ HTML-¤®ªã¬¥â®¢.
’¥ªáâ ¯à®£à ¬¬ë:
#include <io.h>
#include <ctype.h>
#include <string.h>
#include <alloc.h>
#include "parser\htm_cnst.h"
#include "parser\htm_glob.h"
#include "parser\htm_tokn.h"
#include "parser\htm_err.h"
#include "parse.h"
#pragma hdrstop
extern YYSTYPE yylval;
enum {TEXT=0, PRE, KEYWORD, ATTR, AVALUE, IN_QUOTE};
char *states [ 6] = {"TEXT", "PRE", "KEYWORD", "ATTR", "AVALUE", "IN_QUOTE"};
char *tktypes[10] = {"_OpenTag", "_CloseTag", "_EndTag", "_C_KEYWORD",
"_S_KEYWORD", "_A_KEYWORD", "_V_KEYWORD", "_NUM",
"_IDENTIFIER", "_QUOTED_ATTR"};
/* text, preformatted text, HTML KEYWORD, attribute KEYWORD, */
/* attribute value KEYWORD */
int in_close = FALSE;
int in_open = FALSE;
int opened_par = FALSE;
int state = TEXT;
int old_state = TEXT;
int cUKSZ = 0;
int USE_BUFFER = FALSE;
int lex_buff_size = 0;
typedef struct {
int tktyp;
char tkval[NMSZ];
} tbuff;
tbuff lex_buff[5];
int c;
int lineno;
long charno;
long f_size;
int icm;
/* !!!!!! */
char comment[CMSZ];
/* !!!!!! */
typedef struct {
char name[TKSZ];
int kw_token;
int in_paragraph;
} kw_table;
/********** functions declarations ************/
void fixfile(FILE *, char*);
int nextchar(FILE *, FILE *);
void nlproc (FILE *);
int bsearch (char *, kw_table *, int);
int nexttok (char *);
void lexinit();
int yylex();
/**********************************************/
/* Possible KEYWORDS - directives */
kw_table keyword_table[KWSZ] = {
- 2 -
{"A", _C_KEYWORD, TRUE },
// ...
{"WBR", _S_KEYWORD, TRUE }
};
/* Possible KEYWORDS - attributes */
kw_table attr_table[ATSZ] = {
{"ALIGN", _A_KEYWORD },
// ...
{"WRAP", _A_KEYWORD }
};
/* Possible KEYWORDS - attribute's values */
kw_table aval_table[AVSZ] = {
{"ABSBOTTOM", _V_KEYWORD },
// ...
{"_top", _V_KEYWORD }
};
/*
* äãªæ¨¨
*/
/*
* ”ãªæ¨ï ¡¨ ண® ¯®¨áª ¨¬¥¨ ¢ â ¡«¨æ¥ ¨¬¥. ‚®§¢à é ¥â ¨¤¥ªá
* ©¤¥®£® í«¥¬¥â ¢ ¬ áᨢ¥ ¨«¨ -1, ¥á«¨ ¨ç¥£® ¥ è« .
* Œ áᨢ á®á⮨⠨§ § ¯¨á¥© á ¯®«ï¬¨ : áâப®¢®© ASCII/Z-ª®áâ âë -
* ª«î祢®£® á«®¢ HTML ¨ int'®¢áª®£® ⨯ í⮣® KEYWORD'
*/
int bsearch( char word[], kw_table word_table[], int tbsize)
{
int low, high, middle;
int i;
low = 0;
high = tbsize - 1;
while (low <= high) {
middle = (low + high) / 2;
i = strcmp(word, word_table[middle].name);
if (i < 0) {
high = middle - 1;
} else if (i > 0) {
low = middle + 1;
} else {
return (middle);
}
}
return (-1);
}
int nexttok(char *val)
{
register int i, i2;
char tokenvalue[NMSZ];
int c1, tokentype, tokenend;
i = 0;
while (isspace(c)) /* c is always one char ahead */ {
if (c == '\n') nlproc(listfp);
if (state == PRE) {
tokenvalue[0] = c; tokenvalue[1] = '\0';
tokentype = _IDENTIFIER;
c = nextchar(infp, listfp); charno++;
strcpy(val, tokenvalue); return(tokentype);
}
c = nextchar(infp, listfp); charno++;
- 3 -
}
if ( ( (state == TEXT) || (state == PRE) ) && (c == '<') ) {
ungetc(c1 = getc(infp), infp);
old_state = state; state = KEYWORD;
if (c1 == '\/') {
c = nextchar(infp, listfp); charno++;
c = nextchar(infp, listfp); charno++;
tokenvalue[0] = '<'; tokenvalue[1] = '\/'; tokenvalue[2] = '\0';
tokentype = _CloseTag; in_close = TRUE; in_open = FALSE;
strcpy(val, tokenvalue); return(tokentype);
} else {
c = nextchar(infp, listfp); charno++;
tokenvalue[0] = '<'; tokenvalue[1] = '\0';
tokentype = _OpenTag; in_close = FALSE; in_open = TRUE;
strcpy(val, tokenvalue); return(tokentype);
}
}
if (state == KEYWORD) {
if (c == '\!') { /* Comments! */
while ((c != '>') && ( c != EOF )) {
c = nextchar(infp, listfp); charno++;
}
state = ATTR;
tokenvalue[0] = '\0'; tokentype = _IDENTIFIER;
strcpy(val, tokenvalue); return(tokentype);
}
while (isalnum(c)) {
tokenvalue[i++] = toupper(c);
c = nextchar(infp, listfp); charno++;
}
tokenvalue[i++] = '\0';
if ((i = bsearch(tokenvalue, keyword_table, KWSZ)) >= 0) {
tokentype = keyword_table[i].kw_token;
state = ATTR;
if ( strcmp("PRE", tokenvalue) == 0 ) {
if (in_close) old_state = TEXT;
else old_state = PRE;
}
strcpy(val, tokenvalue); return(tokentype);
} else {
if ((i = bsearch(tokenvalue, attr_table, ATSZ)) >= 0) {
tokentype = attr_table[i].kw_token;
state = AVALUE;
strcpy(val, tokenvalue); return(tokentype);
} else { /* Unknown attribute. Actually, it's much more easier */
/* just ignoring it in YACC then trying to skip it here*/
tokentype = _IDENTIFIER;
state = AVALUE;
strcpy(val, tokenvalue); return(tokentype);
}
}
}
if (state == IN_QUOTE) {
if ( c == '\"' ) {
c = nextchar(infp, listfp); charno++;
state = ATTR;
tokenvalue[0] = '\"'; tokenvalue[1] = '\0';
tokentype = '\"';
strcpy(val, tokenvalue); return(tokentype);
}
tokentype = _QUOTED_ATTR; /* maybe URL, maybe rain, maybe snow... */
/* state = IN_QUOTE; */
while ( c != '\"' ) {
tokenvalue[i++] = c;
c = nextchar(infp, listfp); charno++;
}
- 4 -
tokenvalue[i++] = '\0';
/* c = nextchar(infp, listfp); Skip the closing quotation mark */
strcpy(val, tokenvalue); return(tokentype);
} /* end if for (state == IN_QUOTE) */
return 0; /* ’ ª®£® ¥ ¬®¦¥â ¡ëâì... */
}
int yylex()
{
int tktyp;
char tkval[NMSZ];
if (!USE_BUFFER) {
tktyp = nexttok(tkval);
switch (tktyp) {
case _OpenTag: {
tktyp = nexttok(tkval);
if (opened_par) {
if (tktyp == _C_KEYWORD || tktyp == _S_KEYWORD) {
if (!keyword_table[bsearch(tkval, keyword_table, KW...
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...
lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[...
lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...
lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...
lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff_size = 5; USE_BUFFER = TRUE;
opened_par = (strcmp(tkval, "P")==0);
} else {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...
lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[...
lex_buff_size = 2; USE_BUFFER = TRUE;
}
}
} else { // i.e. opened_par == FALSE
opened_par = (strcmp(tkval, "P")==0);
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[1].tkv...
lex_buff_size = 2; USE_BUFFER = TRUE;
}
break;
}
case _CloseTag: {
tktyp = nexttok(tkval);
if (opened_par) {
if (keyword_table[bsearch(tkval, keyword_table, KWSZ)]...
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0]....
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1]....
lex_buff_size = 2; USE_BUFFER = TRUE;
} else {
if (strcmp(tkval, "P")==0) {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff_size = 2; USE_BUFFER = TRUE;
opened_par = FALSE;
} else {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...
lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...
lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff_size = 5; USE_BUFFER = TRUE;
opened_par = FALSE;
}
}
} else {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkv...
- 5 -
lex_buff_size = 2; USE_BUFFER = TRUE;
}
break;
}
case _IDENTIFIER: {
if ( !(in_open || in_close) && (!opened_par)) {
lex_buff[0].tktyp = _IDENTIFIER; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...
lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...
lex_buff[3].tktyp = _OpenTag ; strcpy(lex_buff[3].tkv...
lex_buff_size = 4; USE_BUFFER = TRUE;
opened_par = TRUE;
}
break;
}
case 0: { // EOF
if (opened_par) {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...
lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...
lex_buff[3].tktyp = _CloseTag ; strcpy(lex_buff[3].tkv...
lex_buff_size = 4; USE_BUFFER = TRUE;
opened_par = TRUE;
}
}
}
}
if (USE_BUFFER) {
tktyp = lex_buff[--lex_buff_size].tktyp;
strcpy(tkval, lex_buff[ lex_buff_size].tkval);
if (lex_buff_size == 0) USE_BUFFER = FALSE;
}
yylval.pchar = (char *) malloc(1 + strlen(tkval));
strcpy(yylval.pchar, tkval);
return(tktyp);
}
ਬ¥ç ¨¥: à ᯥç ⪨ ä ©«®¢ htm_cnst.h, htm_glob.h, htm_tokn.h ¨
htm_err.h á¬. ¢ ¯à¨«®¦¥¨¨ € apendix' 4.4.
- 6 -
‹ ¡®à â®à ï à ¡®â 2
‡ ¤ ¨¥:
®áâநâì ä®à¬ ⨧ â®à ‘-ä ©«®¢. ¥®¡å®¤¨¬® ॠ«¨§®¢ âì
á«¥¤ãî騥 äãªæ¨¨:
- `{` - ¢á¥£¤ c ®¢®© áâப¨ ¡¥§ ®âáâã¯
- `{` - ¨ª®£¤ ¥ ¯¥à¥®á¨âáï
- `{` - ¢á¥£¤ c ®¢®© áâப¨ á ®âáâ㯮¬
- 0 - ¢áâ ¢«ïâì ᨬ¢®« TAB ¯à¨ ®âáâ㯥
- 1..8 ¢áâ ¢«ïâì x ¯à®¡¥«®¢
- ä®à¬ â¨à®¢ âì ª®¬¬¥â ਨ c xx ¯® yy ¯®§¨æ¨¨
- ¥áª®«ìª® ª®¬ ¤ ®¤®© áâப¥
- `=` ¢ë¤¥«ïâì ¯à®¡¥« ¬¨
’¥ªáâ ¯à®£à ¬¬ë:
#include <stdio.h>
#include <dos.h>
#include <ctype.h>
#include <string.h>
#define ERROR 0
#define IDENT 1
#define KEYWORD 2
#define BRACKETS 3
#define OTHER 4
#define BEGIN '{'
#define END '}'
#define COMMA ','
#define SEMI ';'
#define LB '\n'
char val[100];
char comment[200];
int pos=0;
int undo;
int backspace;
#define N_KW 8
char keywords[N_KW][20]={
"for",
"while",
"do",
"if",
"switch",
"else",
"case",
"default"};
// -----------------------------------------------------------------
void blockQuote (char *(&s),FILE *f)
{
char c=fgetc(f);
*(s++)=c, c=0;
while (c!='\'' && c!='\"') {
c=*(s++)=fgetc(f);
if (c=='\\')
c=*(s++)=fgetc(f);
}
}
// -----------------------------------------------------------------
int getNext (FILE *f)
{
char *s=val,c;
int buf=undo;
static int symbol=0;
static int startPos=1;
c=fgetc(f);
- 7 -
startPos=0;
if (isalnum(c) || c=='_') {
symbol=0;
*(s++)=c;
while ((isalnum(c) || c=='_' || c=='.') && !feof(f) && !isspace(c))
c=*(s++)=fgetc(f);
ungetc (c,f);
*(--s)=0;
for (int i=0;i<N_KW && strcmp(keywords[i],val);i++);
if (i!=N_KW) *(s++)=' ';
*s=0;
if (i!=N_KW) return KEYWORD;
return IDENT;
}
if (c=='\'' || c=='\"') {
symbol=0;
ungetc(c,f);
blockQuote (s,f);
*s=0;
return IDENT;
}
if (c=='=' && E && !symbol)
{
*(s++)=' ';
*(s++)=c;
*(s++)=' ';
*s=0;
return OTHER;
}
*s=c, *(s+1)=0;
symbol=1;
return OTHER;
}
void outVal (FILE *f,int back=0)
{
char *s=val;
if (backspace && !back)
if (!T) {
fputc (9,f);
pos+=F3;
}
else {
for (int j=0;j<T;j++)
{
fputc (' ',f);
pos++;
}
}
backspace=0;
while (*s) {
fputc (*(s++),f);
pos++;
}
}
void outCR (FILE *f,int tab,int newLine=0)
{
int limit=tab-1+newLine;
// Out Comments
if (*comment)
outComment (f);
if (!newLine) {
fputc ('\n',f);
pos=0;
- 8 -
}
for (int i=0;i<limit;i++)
{
if (!T) {
fputc (9,f);
pos+=F3;
}
else {
for (int j=0;j<T;j++) {
fputc (' ',f);
pos++;
}
}
}
if (tab && !newLine)
backspace=1;
}
// -----------------------------------------------------------------
int format (FILE *f_in,FILE *f_out)
{
int tab=0,tab1=0,tab2=0,lb=0;
int lex;
while (!feof(f_in))
{
lex=getNext(f_in);
switch (lex) {
case KEYWORD: {
lb=0;
outVal (f_out);
lex=getNext(f_in);
break;
}
case BEGIN: {
tab1=0;
if (!lb && N!=2) outCR (f_out,tab);
if (N==3) outCR (f_out,1,1);
outVal(f_out);
outCR (f_out,++tab);
lb=1;
break;
}
case END: {
if (!lb) outCR (f_out,tab);
lb=1;
tab1=0;
lex=getNext (f_in);
if (lex==LB) lex=getNext(f_in);
undo=lex;
outCR (f_out,tab);
lb=1;
break;
}
case SEMI: {
lb=0;
tab1=0;
outVal (f_out);
lex=getNext(f_in);
undo=lex;
break;
}
case IDENT: {
lb=0;
outVal (f_out);
lex=getNext (f_in);
undo=lex;
break;
}
case LB: {
- 9 -
int used;
tab1=0;
if (*comment) {
outCR (f_out,tab);
used=1;
}
lex=getNext (f_in);
if (N!=2 || lex!=BEGIN) {
if (!used) outCR (f_out,tab);
lb=1;
}
undo=lex;
break;
}
case EOF:
return 1;
default: {
lb=0;
outVal(f_out);
}
}
}
return 1;
}
ਬ¥ç ¨¥: ¥®¡å®¤¨¬ë¥ äãªæ¨¨ ¡ë«¨ ॠ«¨§®¢ ë ¢ ¯®«®¬ ®¡ê¥¬¥
¨ ¯à¨ á¤ ç¥ à¥ª ¨© ¥ ¢ë§¢ «¨. (’¥áâ®¢ë© ¯à¨¬¥à
¯à¨¢¥¤¥ ¢ ¯à¨«®¦¥¨¨ apendix' 4.5)
- 10 -
‹ ¡®à â®à ï à ¡®â 3
‡ ¤ ¨¥:
à® «¨§¨®¢ âì åä ¨ ¢ë¡ âì «ãçèãî ¤«ï á«ãç ©®£® à ᯥ¤¥«¥¨ï ¨¤-®¢
¡®¡é¥ ï 䮬㫠¢ëç¨á«¥¨ï åä:
h0 = 0;
h(i) = Alf* h(i-1) [+] C(i), i=1..k
k - \¤«¨ áâப¨. [+] - ¥ª®â® ï ¯à®¨§¢®«ì ï ®¯¥à æ¨ï
(+, -, <<, _rotl, ^ |, &)
à® «¨§¨®¢ âì í⨠¨ «î¡ë¥ ¤à㣨¥ åä ª®«¨ç¥á⢮ ª®««¨§¨© (ª®ä«¨ªâ®¢)
¤«ï ¥ª®â®àëå á«ãç ©ëå ¯®á«¥¤®¢ ⥫ì®áâì. ¯à¨¬¥à:
1) id'ë ï§ëª C (¤® 50).
2) ---- "" ---- (¤® 100).
3) ---- "" ---- (¤® 1000).
4) ‚¥è¨¥ ¨¬¥ áâ ¤ ⮩ ¡¨¡«®¨®â⥪¨ BC++.
5) ‚¥è¨¥ ¨¬¥ £ ä¨ç¥áª®© ¡¨¡«¨®â¥ª¨ BC++.
6) ‘«ãç ©® £¥¥¨ã¥¬ë¥ ¨¬¥ (~600)
7) €£«¨©áª¨¥ áá«®¢ á ¯¥ä¨ªá ¬¨ ¨/¨«¨ áãää¨ªá ¬¨ (xxx) - ®ª®«® 200
8) 300 ¨¬¥ ¢¨¤ : w000, w001, w002, etc
€ «¨§ áâ â¨á⨪¨ ᢥá⨠¢ â ¡«¨æã/£à 䨪
’¥ªá ¯à®£à ¬¬ë «¨§ â®à :
#include <stdio.h>
#include <stdlib.h>
unsigned hash_shift(char *s)
{
unsigned hash = 0;
while (*s)
hash = (hash << 1) + *s++;
return hash;
}
unsigned hash_rotl(char *s)
{
unsigned hash = 0;
while (*s)
hash = _rotl(hash,1) ^ *s++;
return hash;
}
long hash_pgw(char *s)
{
char *p;
unsigned long h = 0, g;
for(p=s;*p;p++)
{
h = (h << 4) + *p;
if (g = h & 0xF0000000)
{
h = h^(g >>24);
h = h^g;
}
}
return h;
}
unsigned hash_4 (char *s)
{
unsigned h = 0;
const D=5;
- 11 -
while (*s)
h = D*h + *s++;
return h;
}
main (int argc,char **argv)
{
int n=0,total=0;
int placed1=0,placed2=0,placed3=0,placed4=0;
int max1=0,max2=0,max3=0,max4=0;
int *table1,*table2,*table3,*table4;
FILE *f;
char buf[100];
puts ("‹ ¡®à â®à ï à ¡®â 3. ˆáá«¥¤®¢ ¨¥ åíè-äãªæ¨¨.");
puts ("‹ï¯ã®¢ ˆ.‚. €-61");
if (argc<3)
{
puts ("\n HASH.COM <n> <file_name>");
puts (" n - ç¨á«® í«¥¬¥â®¢ ¢ â ¡«¨æ¥");
puts (" file_name - ¨¬ï ä ©« á ¨¤¥â¨ä¨ª â®à ¬¨ \n");
return 0;
}
n=atoi(argv[1]);
if (n<10)
{
puts ("\n ‘«¨èª®¬ ¬ «¥ìª ï â ¡«¨æ \n");
return 0;
}
table1=(int*)calloc(n,sizeof(int));
table2=(int*)calloc(n,sizeof(int));
table3=(int*)calloc(n,sizeof(int));
table4=(int*)calloc(n,sizeof(int));
if (!table1 || !table2 || !table3 || !table4)
{
puts ("\n¥ å¢ â ¥â ¯ ¬ï⨠¤«ï â ¡«¨æ\n");
return 0;
}
if ((f=fopen(argv[2],"r"))==NULL)
{
puts ("\n¥ ¬®£ã ®âªàëâì 㪠§ ë© ä ©«\n");
return 0;
}
while (!feof(f))
if (fscanf (f,"%s",buf)==1)
{
table1[hash_shift(buf)%n]++;
table2[hash_rotl(buf)%n]++;
table3[hash_pgw(buf)%n]++;
table4[hash_4(buf)%n]++;
puts (buf);
total++;
}
puts ("‘â â¨á⨪ :");
for (int i=0;i<n;i++)
{
if (table1[i]) {
if (max1<table1[i]) max1=table1[i];
placed1++;
}
if (table2[i]) {
if (max2<table2[i]) max2=table2[i];
placed2++;
}
if (table3[i]) {
if (max3<table3[i]) max3=table3[i];
placed3++;
}
- 12 -
if (table4[i]) {
if (max4<table4[i]) max4=table4[i];
placed4++;
}
}
printf (" HASH_SHIFT : %f max=%d\n",(float)total/placed1,max1);
printf (" HASH_ROTL : %f max=%d\n",(float)total/placed2,max2);
printf (" HASH_PGW : %f max=%d\n",(float)total/placed3,max3);
printf (" HASH_4 : %f max=%d\n",(float)total/placed4,max4);
fclose (f);
return 0;
}