turner.c

/* Notice of Copyright, License and Warranty
**
** This software is Copyright 1998, 1999, 2000 Jeffrey S. Dutky
** This software is licensed for use under the terms of the GNU General
** Public License (also called the GPL), a copy of which must be included
** with any distribution of this software. You may also find a copy of the
** GPL at the Free Software Foundation's web site at http://www.fsf.org/
** or http://www.gnu.org.
**
** This software is provided "as is" and without any express or implied
** warranties, including, without limitation, the implied waranties of
** merchantability and fitness for a particular purpose.
*/

#include <stdio.h>
#include <string.h>
#include <time.h>

#include "gettoken.h"
#include "parseargs.h"

/*
** this is a small program that will take C source code as input and
** output an syntax hilighted HTML version. The name (turner) is a
** reference to Ted Turner's colorization of black and white films
** back in the early ninties.
**
** The hilighting colors can specified on the command line for the
** following syntactic elements:
**
** -b <background color>
** -c <char literal color>
** -e <output file extension>
** -f <float literal color>
** -h <print out a help screen>
** -i <int literal color>
** -j    force java mode
** -k <keyword color>
** -o <operator color>
** -p <preprocessor color>
** -r <remark color>
** -s <string literal color>
** -t <text color>
** -w <tab width>
** -x    force c++ mode
** -A <author's name>
** -B    don't bold keywords
** -C    create default config files, if any
** -F    don't specify alternate fonts for remarks
** -H <home directory URL>
** -I    don't italicize remarks
** -K <alternate keyword file name>
** -M <contact e-mail address>
** -O <alternate operator file name>
** -R <comment font names>
** -T <document title>
**
** colors can be specified either as hexidecimal rrggbb numbers or
** with standard HTML color names.
**
** all other values can be specified in the .turner file, in exactly
** the same way you would specify them on the command line.
**
** The keywords are listed in a file called .keywords, each keyword
** on a line of it's own, blank lines are ignored.
**
** similarly, operators are defined in a file called .operators, each
** operator on a line of it's own, blank lines are ignored.
**
** color, keyword, and operator default values are also hard coded
** into the program so it will happily work on C code even if you
** don't have the configuration files.
**
** also by default, keywords are bolded and remarks are italicized, the
** entire block of source code is enclosed in <PRE> ... </PRE> tags, and
** tabs are replaced with spaces
**
** Overall, the program will happily process completely bogus C code. It
** is not meant as a syntax checker of any kind. Its only purpose is to
** generate syntax hilighted HTML documents from C code: I assume that
** you have access to real syntax checker if that's what you need.
*/

static char *cppkeys[]={"asm","auto","bool","break","case","catch","char",
"class","const","continue","default","delete","do","double","else","enum",
"explicit","extern","false","far","float","for","fortran","friend","goto",
"if","inline","int","long","mutable","near","new","operator","pascal",
"private","protected","public","register","return","short","signed","sizeof",
"static","struct","switch","template","this","throw","true","try","typedef",
"union","unsigned","virtual","void","volatile","while","{","}",NULL};

static char *ckeys[]={"asm","auto","break","case","char","const","continue",
"default","do","double","else","entry","enum","extern","far","float","for",
"fortran","goto","if","int","long","near","pascal","register","return",
"short","signed","sizeof","static","struct","switch","typedef","union",
"unsigned","void","volatile","while","{","}",NULL};

static char *jkeys[]={"abstract","boolean","break","byte","byvalue","case",
"catch","char","class","const","continue","default","do","double","else",
"extends","false","final","finally","float","for","goto","if","implements",
"import","int","instanceof","long","native","new","null","package","private",
"public","return","short","static","super","switch","synchronized","this",
"threadsafe","throw","transient","true","try","void","while",NULL};

static char *operatordef[]={"+","-","*","/","%","&","|","^","[","]","&&",
"||","=","==","+=","-=","<=",">=","!=","/=","%=","&=","|=","^=","~","!",
"?",":","<",">","<<",">>","(",")","::",NULL};

static char *javaext[]={".java",".Java",".JAVA",".jav",".JAV",".Jav",
".j",".J",".j++",".J++",".jpp",".JPP",".Jpp",NULL};

static char *cext[]={".c",".h",NULL};

static char *cppext[]={".C",".H",".cc",".CC",".hh",".HH",".cpp",".CPP",
".Cpp",".hpp",".HPP",".Hpp","c++",".C++",".h++",".H++",".cxx",".CXX",
".Cxx",".hxx",".HXX",".Hxx",NULL};

static char *cssheader="<STYLE TYPE=\"text/css\">\n"
"\t.rmk {\n%s\t\tcolor: %s\n\t}\n"
"\t.chr{\n\t\tcolor: %s\n\t}\n"
"\t.int{\n\t\tcolor: %s\n\t}\n"
"\t.flt{\n\t\tcolor: %s\n\t}\n"
"\t.str{\n\t\tcolor: %s\n\t}\n"
"\t.key{\n%s\t\tcolor: %s\n\t}\n"
"\t.pre{\n\t\tcolor: %s\n\t}\n"
"\t.ops{\n\t\tcolor: %s\n\t}\n"
"</STYLE>\n";

static int match(char *s, char **m){
   int i;
   
   for(i=0;m[i];i++)
       if(strcmp(m[i],s)==0) return 1;
   return 0;
}

static void getext(char *s, char *e, int len){
   int i,n;
   char t;
   
   /* copy extension from end of s into e */
   for(i=strlen(s),n=0;i>=0 && n<len-1;i--,n++){
       e[n]=s[i];
       e[n+1]='\0';
       if(s[i]=='.') break;
   }
   /* reverse the copied extension */
   for(i=strlen(e),n=0;i>n;i--,n++){
       t=e[n];
       e[n]=e[i];
       e[i]=t;
   }
   return;
}

static char 
tcolor[100], /* default text color is black */
rcolor[100], /* default remark color is gray */
ccolor[100], /* default char color is red */
icolor[100], /* default int color is red */
fcolor[100], /* default float color is maroon */
scolor[100], /* default string color is green */
kcolor[100], /* default keyword color is blue */
pcolor[100], /* default preprocessor color is purple */
ocolor[100], /* default operator color is navy blue */
bcolor[100], /* default background color is white */
doctitle[256],author[256],keyfile[256],operatorfile[256],
ext[256],iext[256],tabw[256],opts[256],homeurl[256],today[256],
token[1000],infname[256],outfname[256],mailto[256],
remarkf[100], v4remarkf[100], v4keystyle[100],
*fontct="<FONT COLOR=%s>",*fontcft="<FONT COLOR=%s FACE=\"%s\">",
*_fontt="</FONT>",
*boldt="<B>",*_boldt="</B>",
*italict="<I>",*_italict="</I>",
*v4fonttag="<SPAN CLASS=\"%s\">", *v4_fonttag="</SPAN>";

int main(int args, char *arg[]){
   int n,t,t0,rv,nfiles;
   FILE *in, *out;
   char **keywords, **operators;
   char *argfmt="34A:Bb:Cc:e:Ff:H:Ii:jK:k:M:mO:o:p:R:r:s:T:t:w:x?";
   int moddate=0, html3=1, html4=0, bold=1, italic=1, fonts=1,tabwidth=4;
   int start=1;
   time_t now;
       
   time(&now);
   strftime(today,256,"%X %x",localtime(&now));
   
   keywords=ckeys;
   operators=operatordef;
   in=stdin;
   out=stdout;
   
   strcpy(infname,""); /* set up a bunch of default values */
   strcpy(outfname,"");
   strcpy(doctitle,"");
   strcpy(author,"");
   strcpy(homeurl,"");
   strcpy(mailto,"");
   strcpy(tabw,"");
   strcpy(ext,".html");
   strcpy(keyfile,".keywords");
   strcpy(operatorfile,".operators");
   
   strcpy(tcolor,"Black");
   strcpy(rcolor,"Gray");
   strcpy(ccolor,"Red");
   strcpy(icolor,"Red");
   strcpy(fcolor,"Maroon");
   strcpy(scolor,"Green");
   strcpy(kcolor,"Blue");
   strcpy(pcolor,"Purple");
   strcpy(ocolor,"Navy");
   strcpy(bcolor,"White");
   
   remarkf[0]='\0';
   
   v4remarkf[0]=v4keystyle[0]='\0';
   
   /* parse the command line arguments and adjust assorted variables accordingly */
       rv=parseargs(args,arg,argfmt,opts,author,bcolor,ccolor,ext,fcolor,
           homeurl,icolor,keyfile,kcolor,mailto,operatorfile,ocolor,pcolor,
           remarkf,rcolor,scolor,doctitle,tcolor,tabw);
   
   if(rv<0 || strchr(opts,'h')){
       if(badusage(rv) || strchr(opts,'h')){
           fprintf(stderr,"\n\tturner, version 0.9.3, (C) 1998, 1998, 2000, "
               "2004 Jeffrey S. Dutky\n");
           fprintf(stderr,"\n\tGenerate syntax hilighted source code in "
               "HTML format from C, C++ \n\tor Java source code.\n");
           fprintf(stderr,"\n\tUsage:\n");
           fprintf(stderr,"\n\t%s\n",parseusage(arg[0],75,argfmt,"author",
               "back-color","char-color","extension","float-color",
               "home-URL","int-color","keyword-file","keyword-color",
               "e-mail","operator-file","operator-color","preprocessor-color",
               "remark-font","remark-color","string-color","title",
               "text-color","tab-width"));
           fprintf(stderr,"\n\tfurther arguments will be taken as file names ");
           fprintf(stderr,"to process.\n");
           fprintf(stderr,"\n\t\t-3  generate version 3.0 HTML\n");
           fprintf(stderr,"\n\t\t-4  generate version 4.0 HTML\n");
           fprintf(stderr,"\n\t\t-B  don't bold keywords\n");
           fprintf(stderr,"\n\t\t-C  create default configuration files\n");
           fprintf(stderr,"\n\t\t-F  don't specifiy alternate fonts for "
               "remarks\n");
           fprintf(stderr,"\n\t\t-I  don't italicize remarks\n");
           fprintf(stderr,"\n\t\t-j  use Java keywords\n");
           fprintf(stderr,"\n\t\t-m  add a 'Last Modified' line\n");
           fprintf(stderr,"\n\t\t-x  use C++ keywords\n");
           fprintf(stderr,"\n\tThis software is licensed under the terms of "
               "the GNU General \n\tPublic License (also called the GPL), "
               "a copy of which must be \n\tincluded with any distribution "
               "of this software. You may also \n\tfind a copy of the GPL "
               "at the Free Software Foundation's web \n\tsite at "
               "http://www.fsf.org or http://www.gnu.org\n");
           fprintf(stderr,"\n\tThis software is provided \"AS IS\" and "
               "without any express or \n\timplied warranties, include, "
               "without limitation, the implied \n\twarranties of "
               "merchantability and fitness for a paricular "
               "\n\tpurpose.\n");
           return 0;
       }else fprintf(stderr,"Argument Parsing Error #%d: %s\n",rv,parseerror(rv));
       return -1;
   }
   
   if(strchr(opts,'m'))  /* check for various command line options */
       moddate=1;
   if(strchr(opts,'B'))
       bold=0;
   if(strchr(opts,'I'))
       italic=0;
   if(strchr(opts,'F'))
       fonts=0;
   if(strchr(opts,'4'))
       html3=0, html4=1;
   if(strchr(opts,'j'))
       keywords=jkeys;
   if(strchr(opts,'x'))
       keywords=cppkeys;
   if(strlen(tabw))
       tabwidth=atoi(tabw);
   
   if(remarkf[0]=='\0')
       strcpy(remarkf, "'New Century Schoolbook',Bookman,Palatino,Times");
   else fonts=1;
   
   if(rv<args-1){
       nfiles=args-rv-1;
       strcpy(infname,arg[rv+1]); /* copy the input filename */
       strcpy(doctitle,infname);
       strcpy(outfname,infname); /* create the output file name */
       strcat(outfname,ext);
       getext(infname,iext,256); /* figure out what kind of source code this is */
       if(match(iext,cext)) /* select the correct set of keywords for this source */
           keywords=ckeys;
       if(match(iext,javaext))
           keywords=jkeys;
       if(match(iext,cppext))
           keywords=cppkeys;
       in=fopen(infname,"r"); /* open the input file */
       if(in==NULL)
           return -2;
       out=fopen(outfname,"w"); /* open the output file */
       if(out==NULL){
           fclose(in);
           return -3;
       }
   }else nfiles=1;
   
   for(n=0;n<nfiles;n++){
       if(nfiles>1 && n>0){
           strcpy(infname,arg[rv+1+n]); /* copy the input filename */
           strcpy(doctitle,infname);
           strcpy(outfname,infname); /* create the output file name */
           strcat(outfname,ext);
           getext(infname,iext,256); /* figure out what kind of source code this is */
           if(match(iext,cext)) /* select the correct set of keywords for this source */
               keywords=ckeys;
           if(match(iext,javaext))
               keywords=jkeys;
           if(match(iext,cppext))
               keywords=cppkeys;
           in=fopen(infname,"r"); /* open the input file */
           if(in==NULL)
               return -2;
           out=fopen(outfname,"w"); /* open the output file */
           if(out==NULL){
               fclose(in);
               return -3;
           }
       }
       if(html4){ /* NOTICE!!! First HTML 4.0 compatability part! NOTICE!!! */
           fprintf(out,"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n");
           fontct=v4fonttag; /* setup CSS format tags */
           fontcft=v4fonttag;
           _fontt=v4_fonttag;
           if(bold)
               strcpy(v4keystyle, "\t\tfont-weight: bold;\n");
           bold=0;        /* all type styles are taken care of with CSS */
           boldt="";      /* so I'm turning off bolding and italicizing */
           _boldt="";     /* as well as nulling out the bold and italic */
           if(fonts){
               strcat(v4remarkf, "\t\tfont-family: ");
               strcat(v4remarkf, remarkf);
               strcat(v4remarkf, ";\n");
           }
           fonts=0;            /* I'm turning off font faces: that's done with CSS also */
           if(italic)
               strcat(v4remarkf, "\t\tfont-style: italic;\n");
           italic=0;           /* tags. */
           italict="";
           _italict="";
       }
       fprintf(out,"<HTML>\n<HEAD>\n"); /* dump a header to the output */
       if(strlen(doctitle)>0)
           fprintf(out,"<TITLE>%s</TITLE>\n",doctitle);
       if(strlen(author)>0)
           fprintf(out,"<META NAME=\"Author\" CONTENT=\"%s\">\n",author);
       fprintf(out,"<META NAME=\"Generator\" CONTENT=\"turner ver.0.9.3\">\n");
       if(html4){ /* NOTICE!!! Second HTML 4.0 compatability part! NOTICE!!! */
           /* write the CSS header block */
           fprintf(out, cssheader, v4remarkf, rcolor, ccolor, icolor,
               fcolor, scolor, v4keystyle, kcolor, pcolor, ocolor);
           strcpy(rcolor,"rmk"); /* setup CSS names */
           strcpy(ccolor,"chr");
           strcpy(icolor,"int");
           strcpy(fcolor,"flt");
           strcpy(scolor,"str");
           strcpy(kcolor,"key");
           strcpy(pcolor,"pre");
           strcpy(ocolor,"ops");
       }
       fprintf(out,"</HEAD>\n<BODY BGCOLOR=%s>\n",bcolor);
       if(strlen(homeurl))
           fprintf(out,"<P ALIGN=Center><A HREF=\"%s\">HOME</A></P>\n",homeurl);
       if(strlen(doctitle)>0)
           fprintf(out,"<H1>%s</H1>\n",doctitle);
           
       fprintf(out,"<PRE>\n"); /* start writing the actual content */
       t0=GENERIC;
       while((t=gettoken(in,token,1000,tabwidth,start))!=ENDOFFILE){ /* process the file */
           start=0;
           if(t==KEYWORD && !match(token, keywords)) /* is it a real keyword */
               t=GENERIC; /* or just a normal identifier? */
           if(t0!=t){ /* if the token type has changed */
           
               switch(t0){ /* complete the last token */
               case KEYWORD:
                   if(bold)
                       fprintf(out,_boldt);
                   fprintf(out,_fontt);
                   break;
               case STRING:
               case INTEGER:
               case CHARACTER:
               case PREPROCESSOR:
               case FLOAT:
                   fprintf(out,_fontt);
                   break;
               case COMMENT:
                   if(italic)
                       fprintf(out,_italict);
                   fprintf(out,_fontt);
                   break;
               default:
                   break;
               }
               
               switch(t){ /* and write a new start tag for this token */
               case KEYWORD:
                   fprintf(out,fontct,kcolor);
                   if(bold)
                       fprintf(out,boldt);
                   break;
               case STRING:
                   fprintf(out,fontct,scolor);
                   break;
               case INTEGER:
                   fprintf(out,fontct,icolor);
                   break;
               case CHARACTER:
                   fprintf(out,fontct,ccolor);
                   break;
               case PREPROCESSOR:
                   fprintf(out,fontct,pcolor);
                   break;
               case COMMENT:
                   if(fonts)
                       fprintf(out,fontcft,rcolor,remarkf);
                   else
                       fprintf(out,fontct,rcolor);
                   if(italic)
                       fprintf(out,italict);
                   break;
               case FLOAT:
                   fprintf(out,fontct,fcolor);
                   break;
               default:
                   break;
               }
           }
           fprintf(out,"%s",token); /* write the token text itself */
           fflush(out);
           t0=t;
       }
       fprintf(out,"</PRE>\n"); /* dump a tailer to the output */
       if(strlen(homeurl))
           fprintf(out,"<P ALIGN=Center><A HREF=\"%s\">HOME</A></P>\n",homeurl);
       if(moddate)
           fprintf(out,"<P><I>Last Modified: %s<I></P>\n",today);
       if(strlen(mailto)){
           if(strlen(author))
               fprintf(out,"<ADDRESS>Author: <A HREF=\"mailto:%s\">%s</A></ADDRESS>\n",mailto,author);
           else
               fprintf(out,"<ADDRESS><A HREF=\"mailto:%s\">Contact the Author.</A></ADDRESS>\n",mailto);
       }
       fprintf(out,"</BODY>\n</HTML>\n");
       
       if(in!=stdin)
           fclose(in);
       if(out!=stdout)
           fclose(out);
   }
   return 0;
}