turner.c
/* Notice of Copyright, License and Warranty
**
** This software is Copyright 1998, 1999, 2000 Jeffrey S. Dutky
** This software is licensed for use under the terms of the GNU General
** Public License (also called the GPL), a copy of which must be included
** with any distribution of this software. You may also find a copy of the
** GPL at the Free Software Foundation's web site at http://www.fsf.org/
** or http://www.gnu.org.
**
** This software is provided "as is" and without any express or implied
** warranties, including, without limitation, the implied waranties of
** merchantability and fitness for a particular purpose.
*/
#include <stdio.h>
#include <string.h>
#include <time.h>
#include "gettoken.h"
#include "parseargs.h"
/*
** this is a small program that will take C source code as input and
** output an syntax hilighted HTML version. The name (turner) is a
** reference to Ted Turner's colorization of black and white films
** back in the early ninties.
**
** The hilighting colors can specified on the command line for the
** following syntactic elements:
**
** -b <background color>
** -c <char literal color>
** -e <output file extension>
** -f <float literal color>
** -h <print out a help screen>
** -i <int literal color>
** -j force java mode
** -k <keyword color>
** -o <operator color>
** -p <preprocessor color>
** -r <remark color>
** -s <string literal color>
** -t <text color>
** -w <tab width>
** -x force c++ mode
** -A <author's name>
** -B don't bold keywords
** -C create default config files, if any
** -F don't specify alternate fonts for remarks
** -H <home directory URL>
** -I don't italicize remarks
** -K <alternate keyword file name>
** -M <contact e-mail address>
** -O <alternate operator file name>
** -R <comment font names>
** -T <document title>
**
** colors can be specified either as hexidecimal rrggbb numbers or
** with standard HTML color names.
**
** all other values can be specified in the .turner file, in exactly
** the same way you would specify them on the command line.
**
** The keywords are listed in a file called .keywords, each keyword
** on a line of it's own, blank lines are ignored.
**
** similarly, operators are defined in a file called .operators, each
** operator on a line of it's own, blank lines are ignored.
**
** color, keyword, and operator default values are also hard coded
** into the program so it will happily work on C code even if you
** don't have the configuration files.
**
** also by default, keywords are bolded and remarks are italicized, the
** entire block of source code is enclosed in <PRE> ... </PRE> tags, and
** tabs are replaced with spaces
**
** Overall, the program will happily process completely bogus C code. It
** is not meant as a syntax checker of any kind. Its only purpose is to
** generate syntax hilighted HTML documents from C code: I assume that
** you have access to real syntax checker if that's what you need.
*/
static char *cppkeys[]={"asm","auto","bool","break","case","catch","char",
"class","const","continue","default","delete","do","double","else","enum",
"explicit","extern","false","far","float","for","fortran","friend","goto",
"if","inline","int","long","mutable","near","new","operator","pascal",
"private","protected","public","register","return","short","signed","sizeof",
"static","struct","switch","template","this","throw","true","try","typedef",
"union","unsigned","virtual","void","volatile","while","{","}",NULL};
static char *ckeys[]={"asm","auto","break","case","char","const","continue",
"default","do","double","else","entry","enum","extern","far","float","for",
"fortran","goto","if","int","long","near","pascal","register","return",
"short","signed","sizeof","static","struct","switch","typedef","union",
"unsigned","void","volatile","while","{","}",NULL};
static char *jkeys[]={"abstract","boolean","break","byte","byvalue","case",
"catch","char","class","const","continue","default","do","double","else",
"extends","false","final","finally","float","for","goto","if","implements",
"import","int","instanceof","long","native","new","null","package","private",
"public","return","short","static","super","switch","synchronized","this",
"threadsafe","throw","transient","true","try","void","while",NULL};
static char *operatordef[]={"+","-","*","/","%","&","|","^","[","]","&&",
"||","=","==","+=","-=","<=",">=","!=","/=","%=","&=","|=","^=","~","!",
"?",":","<",">","<<",">>","(",")","::",NULL};
static char *javaext[]={".java",".Java",".JAVA",".jav",".JAV",".Jav",
".j",".J",".j++",".J++",".jpp",".JPP",".Jpp",NULL};
static char *cext[]={".c",".h",NULL};
static char *cppext[]={".C",".H",".cc",".CC",".hh",".HH",".cpp",".CPP",
".Cpp",".hpp",".HPP",".Hpp","c++",".C++",".h++",".H++",".cxx",".CXX",
".Cxx",".hxx",".HXX",".Hxx",NULL};
static char *cssheader="<STYLE TYPE=\"text/css\">\n"
"\t.rmk {\n%s\t\tcolor: %s\n\t}\n"
"\t.chr{\n\t\tcolor: %s\n\t}\n"
"\t.int{\n\t\tcolor: %s\n\t}\n"
"\t.flt{\n\t\tcolor: %s\n\t}\n"
"\t.str{\n\t\tcolor: %s\n\t}\n"
"\t.key{\n%s\t\tcolor: %s\n\t}\n"
"\t.pre{\n\t\tcolor: %s\n\t}\n"
"\t.ops{\n\t\tcolor: %s\n\t}\n"
"</STYLE>\n";
static int match(char *s, char **m){
int i;
for(i=0;m[i];i++)
if(strcmp(m[i],s)==0) return 1;
return 0;
}
static void getext(char *s, char *e, int len){
int i,n;
char t;
/* copy extension from end of s into e */
for(i=strlen(s),n=0;i>=0 && n<len-1;i--,n++){
e[n]=s[i];
e[n+1]='\0';
if(s[i]=='.') break;
}
/* reverse the copied extension */
for(i=strlen(e),n=0;i>n;i--,n++){
t=e[n];
e[n]=e[i];
e[i]=t;
}
return;
}
static char
tcolor[100], /* default text color is black */
rcolor[100], /* default remark color is gray */
ccolor[100], /* default char color is red */
icolor[100], /* default int color is red */
fcolor[100], /* default float color is maroon */
scolor[100], /* default string color is green */
kcolor[100], /* default keyword color is blue */
pcolor[100], /* default preprocessor color is purple */
ocolor[100], /* default operator color is navy blue */
bcolor[100], /* default background color is white */
doctitle[256],author[256],keyfile[256],operatorfile[256],
ext[256],iext[256],tabw[256],opts[256],homeurl[256],today[256],
token[1000],infname[256],outfname[256],mailto[256],
remarkf[100], v4remarkf[100], v4keystyle[100],
*fontct="<FONT COLOR=%s>",*fontcft="<FONT COLOR=%s FACE=\"%s\">",
*_fontt="</FONT>",
*boldt="<B>",*_boldt="</B>",
*italict="<I>",*_italict="</I>",
*v4fonttag="<SPAN CLASS=\"%s\">", *v4_fonttag="</SPAN>";
int main(int args, char *arg[]){
int n,t,t0,rv,nfiles;
FILE *in, *out;
char **keywords, **operators;
char *argfmt="34A:Bb:Cc:e:Ff:H:Ii:jK:k:M:mO:o:p:R:r:s:T:t:w:x?";
int moddate=0, html3=1, html4=0, bold=1, italic=1, fonts=1,tabwidth=4;
int start=1;
time_t now;
time(&now);
strftime(today,256,"%X %x",localtime(&now));
keywords=ckeys;
operators=operatordef;
in=stdin;
out=stdout;
strcpy(infname,""); /* set up a bunch of default values */
strcpy(outfname,"");
strcpy(doctitle,"");
strcpy(author,"");
strcpy(homeurl,"");
strcpy(mailto,"");
strcpy(tabw,"");
strcpy(ext,".html");
strcpy(keyfile,".keywords");
strcpy(operatorfile,".operators");
strcpy(tcolor,"Black");
strcpy(rcolor,"Gray");
strcpy(ccolor,"Red");
strcpy(icolor,"Red");
strcpy(fcolor,"Maroon");
strcpy(scolor,"Green");
strcpy(kcolor,"Blue");
strcpy(pcolor,"Purple");
strcpy(ocolor,"Navy");
strcpy(bcolor,"White");
remarkf[0]='\0';
v4remarkf[0]=v4keystyle[0]='\0';
/* parse the command line arguments and adjust assorted variables accordingly */
rv=parseargs(args,arg,argfmt,opts,author,bcolor,ccolor,ext,fcolor,
homeurl,icolor,keyfile,kcolor,mailto,operatorfile,ocolor,pcolor,
remarkf,rcolor,scolor,doctitle,tcolor,tabw);
if(rv<0 || strchr(opts,'h')){
if(badusage(rv) || strchr(opts,'h')){
fprintf(stderr,"\n\tturner, version 0.9.3, (C) 1998, 1998, 2000, "
"2004 Jeffrey S. Dutky\n");
fprintf(stderr,"\n\tGenerate syntax hilighted source code in "
"HTML format from C, C++ \n\tor Java source code.\n");
fprintf(stderr,"\n\tUsage:\n");
fprintf(stderr,"\n\t%s\n",parseusage(arg[0],75,argfmt,"author",
"back-color","char-color","extension","float-color",
"home-URL","int-color","keyword-file","keyword-color",
"e-mail","operator-file","operator-color","preprocessor-color",
"remark-font","remark-color","string-color","title",
"text-color","tab-width"));
fprintf(stderr,"\n\tfurther arguments will be taken as file names ");
fprintf(stderr,"to process.\n");
fprintf(stderr,"\n\t\t-3 generate version 3.0 HTML\n");
fprintf(stderr,"\n\t\t-4 generate version 4.0 HTML\n");
fprintf(stderr,"\n\t\t-B don't bold keywords\n");
fprintf(stderr,"\n\t\t-C create default configuration files\n");
fprintf(stderr,"\n\t\t-F don't specifiy alternate fonts for "
"remarks\n");
fprintf(stderr,"\n\t\t-I don't italicize remarks\n");
fprintf(stderr,"\n\t\t-j use Java keywords\n");
fprintf(stderr,"\n\t\t-m add a 'Last Modified' line\n");
fprintf(stderr,"\n\t\t-x use C++ keywords\n");
fprintf(stderr,"\n\tThis software is licensed under the terms of "
"the GNU General \n\tPublic License (also called the GPL), "
"a copy of which must be \n\tincluded with any distribution "
"of this software. You may also \n\tfind a copy of the GPL "
"at the Free Software Foundation's web \n\tsite at "
"http://www.fsf.org or http://www.gnu.org\n");
fprintf(stderr,"\n\tThis software is provided \"AS IS\" and "
"without any express or \n\timplied warranties, include, "
"without limitation, the implied \n\twarranties of "
"merchantability and fitness for a paricular "
"\n\tpurpose.\n");
return 0;
}else fprintf(stderr,"Argument Parsing Error #%d: %s\n",rv,parseerror(rv));
return -1;
}
if(strchr(opts,'m')) /* check for various command line options */
moddate=1;
if(strchr(opts,'B'))
bold=0;
if(strchr(opts,'I'))
italic=0;
if(strchr(opts,'F'))
fonts=0;
if(strchr(opts,'4'))
html3=0, html4=1;
if(strchr(opts,'j'))
keywords=jkeys;
if(strchr(opts,'x'))
keywords=cppkeys;
if(strlen(tabw))
tabwidth=atoi(tabw);
if(remarkf[0]=='\0')
strcpy(remarkf, "'New Century Schoolbook',Bookman,Palatino,Times");
else fonts=1;
if(rv<args-1){
nfiles=args-rv-1;
strcpy(infname,arg[rv+1]); /* copy the input filename */
strcpy(doctitle,infname);
strcpy(outfname,infname); /* create the output file name */
strcat(outfname,ext);
getext(infname,iext,256); /* figure out what kind of source code this is */
if(match(iext,cext)) /* select the correct set of keywords for this source */
keywords=ckeys;
if(match(iext,javaext))
keywords=jkeys;
if(match(iext,cppext))
keywords=cppkeys;
in=fopen(infname,"r"); /* open the input file */
if(in==NULL)
return -2;
out=fopen(outfname,"w"); /* open the output file */
if(out==NULL){
fclose(in);
return -3;
}
}else nfiles=1;
for(n=0;n<nfiles;n++){
if(nfiles>1 && n>0){
strcpy(infname,arg[rv+1+n]); /* copy the input filename */
strcpy(doctitle,infname);
strcpy(outfname,infname); /* create the output file name */
strcat(outfname,ext);
getext(infname,iext,256); /* figure out what kind of source code this is */
if(match(iext,cext)) /* select the correct set of keywords for this source */
keywords=ckeys;
if(match(iext,javaext))
keywords=jkeys;
if(match(iext,cppext))
keywords=cppkeys;
in=fopen(infname,"r"); /* open the input file */
if(in==NULL)
return -2;
out=fopen(outfname,"w"); /* open the output file */
if(out==NULL){
fclose(in);
return -3;
}
}
if(html4){ /* NOTICE!!! First HTML 4.0 compatability part! NOTICE!!! */
fprintf(out,"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n");
fontct=v4fonttag; /* setup CSS format tags */
fontcft=v4fonttag;
_fontt=v4_fonttag;
if(bold)
strcpy(v4keystyle, "\t\tfont-weight: bold;\n");
bold=0; /* all type styles are taken care of with CSS */
boldt=""; /* so I'm turning off bolding and italicizing */
_boldt=""; /* as well as nulling out the bold and italic */
if(fonts){
strcat(v4remarkf, "\t\tfont-family: ");
strcat(v4remarkf, remarkf);
strcat(v4remarkf, ";\n");
}
fonts=0; /* I'm turning off font faces: that's done with CSS also */
if(italic)
strcat(v4remarkf, "\t\tfont-style: italic;\n");
italic=0; /* tags. */
italict="";
_italict="";
}
fprintf(out,"<HTML>\n<HEAD>\n"); /* dump a header to the output */
if(strlen(doctitle)>0)
fprintf(out,"<TITLE>%s</TITLE>\n",doctitle);
if(strlen(author)>0)
fprintf(out,"<META NAME=\"Author\" CONTENT=\"%s\">\n",author);
fprintf(out,"<META NAME=\"Generator\" CONTENT=\"turner ver.0.9.3\">\n");
if(html4){ /* NOTICE!!! Second HTML 4.0 compatability part! NOTICE!!! */
/* write the CSS header block */
fprintf(out, cssheader, v4remarkf, rcolor, ccolor, icolor,
fcolor, scolor, v4keystyle, kcolor, pcolor, ocolor);
strcpy(rcolor,"rmk"); /* setup CSS names */
strcpy(ccolor,"chr");
strcpy(icolor,"int");
strcpy(fcolor,"flt");
strcpy(scolor,"str");
strcpy(kcolor,"key");
strcpy(pcolor,"pre");
strcpy(ocolor,"ops");
}
fprintf(out,"</HEAD>\n<BODY BGCOLOR=%s>\n",bcolor);
if(strlen(homeurl))
fprintf(out,"<P ALIGN=Center><A HREF=\"%s\">HOME</A></P>\n",homeurl);
if(strlen(doctitle)>0)
fprintf(out,"<H1>%s</H1>\n",doctitle);
fprintf(out,"<PRE>\n"); /* start writing the actual content */
t0=GENERIC;
while((t=gettoken(in,token,1000,tabwidth,start))!=ENDOFFILE){ /* process the file */
start=0;
if(t==KEYWORD && !match(token, keywords)) /* is it a real keyword */
t=GENERIC; /* or just a normal identifier? */
if(t0!=t){ /* if the token type has changed */
switch(t0){ /* complete the last token */
case KEYWORD:
if(bold)
fprintf(out,_boldt);
fprintf(out,_fontt);
break;
case STRING:
case INTEGER:
case CHARACTER:
case PREPROCESSOR:
case FLOAT:
fprintf(out,_fontt);
break;
case COMMENT:
if(italic)
fprintf(out,_italict);
fprintf(out,_fontt);
break;
default:
break;
}
switch(t){ /* and write a new start tag for this token */
case KEYWORD:
fprintf(out,fontct,kcolor);
if(bold)
fprintf(out,boldt);
break;
case STRING:
fprintf(out,fontct,scolor);
break;
case INTEGER:
fprintf(out,fontct,icolor);
break;
case CHARACTER:
fprintf(out,fontct,ccolor);
break;
case PREPROCESSOR:
fprintf(out,fontct,pcolor);
break;
case COMMENT:
if(fonts)
fprintf(out,fontcft,rcolor,remarkf);
else
fprintf(out,fontct,rcolor);
if(italic)
fprintf(out,italict);
break;
case FLOAT:
fprintf(out,fontct,fcolor);
break;
default:
break;
}
}
fprintf(out,"%s",token); /* write the token text itself */
fflush(out);
t0=t;
}
fprintf(out,"</PRE>\n"); /* dump a tailer to the output */
if(strlen(homeurl))
fprintf(out,"<P ALIGN=Center><A HREF=\"%s\">HOME</A></P>\n",homeurl);
if(moddate)
fprintf(out,"<P><I>Last Modified: %s<I></P>\n",today);
if(strlen(mailto)){
if(strlen(author))
fprintf(out,"<ADDRESS>Author: <A HREF=\"mailto:%s\">%s</A></ADDRESS>\n",mailto,author);
else
fprintf(out,"<ADDRESS><A HREF=\"mailto:%s\">Contact the Author.</A></ADDRESS>\n",mailto);
}
fprintf(out,"</BODY>\n</HTML>\n");
if(in!=stdin)
fclose(in);
if(out!=stdout)
fclose(out);
}
return 0;
}