aboutsummaryrefslogtreecommitdiff
path: root/tests/poppler/utils/HtmlOutputDev.cc
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2011-03-16 17:19:57 -0700
committerAlon Zakai <alonzakai@gmail.com>2011-03-16 17:19:57 -0700
commitcad92b918bed03be4b822c7579b0f5d1affb9401 (patch)
tree73117aee864581675bcb120caff847dc8f89fa0d /tests/poppler/utils/HtmlOutputDev.cc
parentae5aa844848c071d3ee98130b7f658b423db054c (diff)
poppler test
Diffstat (limited to 'tests/poppler/utils/HtmlOutputDev.cc')
-rw-r--r--tests/poppler/utils/HtmlOutputDev.cc1683
1 files changed, 1683 insertions, 0 deletions
diff --git a/tests/poppler/utils/HtmlOutputDev.cc b/tests/poppler/utils/HtmlOutputDev.cc
new file mode 100644
index 00000000..ede5700d
--- /dev/null
+++ b/tests/poppler/utils/HtmlOutputDev.cc
@@ -0,0 +1,1683 @@
+//========================================================================
+//
+// HtmlOutputDev.cc
+//
+// Copyright 1997-2002 Glyph & Cog, LLC
+//
+// Changed 1999-2000 by G.Ovtcharov
+//
+// Changed 2002 by Mikhail Kruk
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2005-2010 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org>
+// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
+// Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp>
+// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
+// Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
+// Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org>
+// Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
+// Copyright (C) 2010 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifdef __GNUC__
+#pragma implementation
+#endif
+
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <ctype.h>
+#include <math.h>
+#include "goo/GooString.h"
+#include "goo/GooList.h"
+#include "UnicodeMap.h"
+#include "goo/gmem.h"
+#include "Error.h"
+#include "GfxState.h"
+#include "Page.h"
+#include "PNGWriter.h"
+#ifdef ENABLE_LIBJPEG
+#include "DCTStream.h"
+#endif
+#include "GlobalParams.h"
+#include "HtmlOutputDev.h"
+#include "HtmlFonts.h"
+
+int HtmlPage::pgNum=0;
+int HtmlOutputDev::imgNum=1;
+GooList *HtmlOutputDev::imgList=new GooList();
+
+extern GBool complexMode;
+extern GBool singleHtml;
+extern GBool ignore;
+extern GBool printCommands;
+extern GBool printHtml;
+extern GBool noframes;
+extern GBool stout;
+extern GBool xml;
+extern GBool showHidden;
+extern GBool noMerge;
+
+static GooString* basename(GooString* str){
+
+ char *p=str->getCString();
+ int len=str->getLength();
+ for (int i=len-1;i>=0;i--)
+ if (*(p+i)==SLASH)
+ return new GooString((p+i+1),len-i-1);
+ return new GooString(str);
+}
+
+#if 0
+static GooString* Dirname(GooString* str){
+
+ char *p=str->getCString();
+ int len=str->getLength();
+ for (int i=len-1;i>=0;i--)
+ if (*(p+i)==SLASH)
+ return new GooString(p,i+1);
+ return new GooString();
+}
+#endif
+
+//------------------------------------------------------------------------
+// HtmlString
+//------------------------------------------------------------------------
+
+HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts) {
+ GfxFont *font;
+ double x, y;
+
+ state->transform(state->getCurX(), state->getCurY(), &x, &y);
+ if ((font = state->getFont())) {
+ double ascent = font->getAscent();
+ double descent = font->getDescent();
+ if( ascent > 1.05 ){
+ //printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent );
+ ascent = 1.05;
+ }
+ if( descent < -0.4 ){
+ //printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent );
+ descent = -0.4;
+ }
+ yMin = y - ascent * fontSize;
+ yMax = y - descent * fontSize;
+ GfxRGB rgb;
+ state->getFillRGB(&rgb);
+ GooString *name = state->getFont()->getName();
+ if (!name) name = HtmlFont::getDefaultFont(); //new GooString("default");
+ HtmlFont hfont=HtmlFont(name, static_cast<int>(fontSize-1), rgb);
+ fontpos = fonts->AddFont(hfont);
+ } else {
+ // this means that the PDF file draws text without a current font,
+ // which should never happen
+ yMin = y - 0.95 * fontSize;
+ yMax = y + 0.35 * fontSize;
+ fontpos=0;
+ }
+ if (yMin == yMax) {
+ // this is a sanity check for a case that shouldn't happen -- but
+ // if it does happen, we want to avoid dividing by zero later
+ yMin = y;
+ yMax = y + 1;
+ }
+ col = 0;
+ text = NULL;
+ xRight = NULL;
+ link = NULL;
+ len = size = 0;
+ yxNext = NULL;
+ xyNext = NULL;
+ htext=new GooString();
+ dir = textDirUnknown;
+}
+
+
+HtmlString::~HtmlString() {
+ gfree(text);
+ delete htext;
+ gfree(xRight);
+}
+
+void HtmlString::addChar(GfxState *state, double x, double y,
+ double dx, double dy, Unicode u) {
+ if (dir == textDirUnknown) {
+ //dir = UnicodeMap::getDirection(u);
+ dir = textDirLeftRight;
+ }
+
+ if (len == size) {
+ size += 16;
+ text = (Unicode *)grealloc(text, size * sizeof(Unicode));
+ xRight = (double *)grealloc(xRight, size * sizeof(double));
+ }
+ text[len] = u;
+ if (len == 0) {
+ xMin = x;
+ }
+ xMax = xRight[len] = x + dx;
+//printf("added char: %f %f xright = %f\n", x, dx, x+dx);
+ ++len;
+}
+
+void HtmlString::endString()
+{
+ if( dir == textDirRightLeft && len > 1 )
+ {
+ //printf("will reverse!\n");
+ for (int i = 0; i < len / 2; i++)
+ {
+ Unicode ch = text[i];
+ text[i] = text[len - i - 1];
+ text[len - i - 1] = ch;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// HtmlPage
+//------------------------------------------------------------------------
+
+HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) {
+ this->rawOrder = rawOrder;
+ curStr = NULL;
+ yxStrings = NULL;
+ xyStrings = NULL;
+ yxCur1 = yxCur2 = NULL;
+ fonts=new HtmlFontAccu();
+ links=new HtmlLinks();
+ pageWidth=0;
+ pageHeight=0;
+ fontsPageMarker = 0;
+ DocName=NULL;
+ firstPage = -1;
+ imgExt = new GooString(imgExtVal);
+}
+
+HtmlPage::~HtmlPage() {
+ clear();
+ if (DocName) delete DocName;
+ if (fonts) delete fonts;
+ if (links) delete links;
+ if (imgExt) delete imgExt;
+}
+
+void HtmlPage::updateFont(GfxState *state) {
+ GfxFont *font;
+ double *fm;
+ char *name;
+ int code;
+ double w;
+
+ // adjust the font size
+ fontSize = state->getTransformedFontSize();
+ if ((font = state->getFont()) && font->getType() == fontType3) {
+ // This is a hack which makes it possible to deal with some Type 3
+ // fonts. The problem is that it's impossible to know what the
+ // base coordinate system used in the font is without actually
+ // rendering the font. This code tries to guess by looking at the
+ // width of the character 'm' (which breaks if the font is a
+ // subset that doesn't contain 'm').
+ for (code = 0; code < 256; ++code) {
+ if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
+ name[0] == 'm' && name[1] == '\0') {
+ break;
+ }
+ }
+ if (code < 256) {
+ w = ((Gfx8BitFont *)font)->getWidth(code);
+ if (w != 0) {
+ // 600 is a generic average 'm' width -- yes, this is a hack
+ fontSize *= w / 0.6;
+ }
+ }
+ fm = font->getFontMatrix();
+ if (fm[0] != 0) {
+ fontSize *= fabs(fm[3] / fm[0]);
+ }
+ }
+}
+
+void HtmlPage::beginString(GfxState *state, GooString *s) {
+ curStr = new HtmlString(state, fontSize, fonts);
+}
+
+
+void HtmlPage::conv(){
+ HtmlString *tmp;
+
+ int linkIndex = 0;
+ HtmlFont* h;
+ for(tmp=yxStrings;tmp;tmp=tmp->yxNext){
+ int pos=tmp->fontpos;
+ // printf("%d\n",pos);
+ h=fonts->Get(pos);
+
+ if (tmp->htext) delete tmp->htext;
+ tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len);
+
+ if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){
+ tmp->link = links->getLink(linkIndex);
+ /*GooString *t=tmp->htext;
+ tmp->htext=links->getLink(k)->Link(tmp->htext);
+ delete t;*/
+ }
+ }
+
+}
+
+
+void HtmlPage::addChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ double ox, double oy, Unicode *u, int uLen) {
+ double x1, y1, w1, h1, dx2, dy2;
+ int n, i;
+ state->transform(x, y, &x1, &y1);
+ n = curStr->len;
+
+ // check that new character is in the same direction as current string
+ // and is not too far away from it before adding
+ //if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||
+ // XXX
+ if (
+ (n > 0 &&
+ fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin))) {
+ endString();
+ beginString(state, NULL);
+ }
+ state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
+ 0, &dx2, &dy2);
+ dx -= dx2;
+ dy -= dy2;
+ state->transformDelta(dx, dy, &w1, &h1);
+ if (uLen != 0) {
+ w1 /= uLen;
+ h1 /= uLen;
+ }
+ for (i = 0; i < uLen; ++i) {
+ curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
+ }
+}
+
+void HtmlPage::endString() {
+ HtmlString *p1, *p2;
+ double h, y1, y2;
+
+ // throw away zero-length strings -- they don't have valid xMin/xMax
+ // values, and they're useless anyway
+ if (curStr->len == 0) {
+ delete curStr;
+ curStr = NULL;
+ return;
+ }
+
+ curStr->endString();
+
+#if 0 //~tmp
+ if (curStr->yMax - curStr->yMin > 20) {
+ delete curStr;
+ curStr = NULL;
+ return;
+ }
+#endif
+
+ // insert string in y-major list
+ h = curStr->yMax - curStr->yMin;
+ y1 = curStr->yMin + 0.5 * h;
+ y2 = curStr->yMin + 0.8 * h;
+ if (rawOrder) {
+ p1 = yxCur1;
+ p2 = NULL;
+ } else if ((!yxCur1 ||
+ (y1 >= yxCur1->yMin &&
+ (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
+ (!yxCur2 ||
+ (y1 < yxCur2->yMin ||
+ (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
+ p1 = yxCur1;
+ p2 = yxCur2;
+ } else {
+ for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
+ if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
+ break;
+ }
+ yxCur2 = p2;
+ }
+ yxCur1 = curStr;
+ if (p1)
+ p1->yxNext = curStr;
+ else
+ yxStrings = curStr;
+ curStr->yxNext = p2;
+ curStr = NULL;
+}
+
+static const char *strrstr( const char *s, const char *ss )
+{
+ const char *p = strstr( s, ss );
+ for( const char *pp = p; pp != NULL; pp = strstr( p+1, ss ) ){
+ p = pp;
+ }
+ return p;
+}
+
+static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic, GBool &finish_bold )
+{
+ const char *last_italic = finish_italic && ( finish_bold || finish_a ) ? strrstr( htext->getCString(), "<i>" ) : NULL;
+ const char *last_bold = finish_bold && ( finish_italic || finish_a ) ? strrstr( htext->getCString(), "<b>" ) : NULL;
+ const char *last_a = finish_a && ( finish_italic || finish_bold ) ? strrstr( htext->getCString(), "<a " ) : NULL;
+ if( finish_a && ( finish_italic || finish_bold ) && last_a > ( last_italic > last_bold ? last_italic : last_bold ) ){
+ htext->append("</a>", 4);
+ finish_a = false;
+ }
+ if( finish_italic && finish_bold && last_italic > last_bold ){
+ htext->append("</i>", 4);
+ finish_italic = false;
+ }
+ if( finish_bold )
+ htext->append("</b>", 4);
+ if( finish_italic )
+ htext->append("</i>", 4);
+ if( finish_a )
+ htext->append("</a>");
+}
+
+void HtmlPage::coalesce() {
+ HtmlString *str1, *str2;
+ HtmlFont *hfont1, *hfont2;
+ double space, horSpace, vertSpace, vertOverlap;
+ GBool addSpace, addLineBreak;
+ int n, i;
+ double curX, curY;
+
+#if 0 //~ for debugging
+ for (str1 = yxStrings; str1; str1 = str1->yxNext) {
+ printf("x=%f..%f y=%f..%f size=%2d '",
+ str1->xMin, str1->xMax, str1->yMin, str1->yMax,
+ (int)(str1->yMax - str1->yMin));
+ for (i = 0; i < str1->len; ++i) {
+ fputc(str1->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ printf("\n------------------------------------------------------------\n\n");
+#endif
+ str1 = yxStrings;
+
+ if( !str1 ) return;
+
+ //----- discard duplicated text (fake boldface, drop shadows)
+ if( !complexMode )
+ { /* if not in complex mode get rid of duplicate strings */
+ HtmlString *str3;
+ GBool found;
+ while (str1)
+ {
+ double size = str1->yMax - str1->yMin;
+ double xLimit = str1->xMin + size * 0.2;
+ found = gFalse;
+ for (str2 = str1, str3 = str1->yxNext;
+ str3 && str3->xMin < xLimit;
+ str2 = str3, str3 = str2->yxNext)
+ {
+ if (str3->len == str1->len &&
+ !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
+ fabs(str3->yMin - str1->yMin) < size * 0.2 &&
+ fabs(str3->yMax - str1->yMax) < size * 0.2 &&
+ fabs(str3->xMax - str1->xMax) < size * 0.2)
+ {
+ found = gTrue;
+ //printf("found duplicate!\n");
+ break;
+ }
+ }
+ if (found)
+ {
+ str2->xyNext = str3->xyNext;
+ str2->yxNext = str3->yxNext;
+ delete str3;
+ }
+ else
+ {
+ str1 = str1->yxNext;
+ }
+ }
+ } /*- !complexMode */
+
+ str1 = yxStrings;
+
+ hfont1 = getFont(str1);
+ if( hfont1->isBold() )
+ str1->htext->insert(0,"<b>",3);
+ if( hfont1->isItalic() )
+ str1->htext->insert(0,"<i>",3);
+ if( str1->getLink() != NULL ) {
+ GooString *ls = str1->getLink()->getLinkStart();
+ str1->htext->insert(0, ls);
+ delete ls;
+ }
+ curX = str1->xMin; curY = str1->yMin;
+
+ while (str1 && (str2 = str1->yxNext)) {
+ hfont2 = getFont(str2);
+ space = str1->yMax - str1->yMin;
+ horSpace = str2->xMin - str1->xMax;
+ addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4);
+ vertSpace = str2->yMin - str1->yMax;
+
+//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
+
+ if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax)
+ {
+ vertOverlap = str1->yMax - str2->yMin;
+ } else
+ if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax)
+ {
+ vertOverlap = str2->yMax - str1->yMin;
+ } else
+ {
+ vertOverlap = 0;
+ }
+
+ if (
+ (
+ (
+ (
+ (rawOrder && vertOverlap > 0.5 * space)
+ ||
+ (!rawOrder && str2->yMin < str1->yMax)
+ ) &&
+ (horSpace > -0.5 * space && horSpace < space)
+ ) ||
+ (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)
+ ) &&
+ (!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
+ str1->dir == str2->dir // text direction the same
+ )
+ {
+// printf("yes\n");
+ n = str1->len + str2->len;
+ if ((addSpace = horSpace > 0.1 * space)) {
+ ++n;
+ }
+ if (addLineBreak) {
+ ++n;
+ }
+
+ str1->size = (n + 15) & ~15;
+ str1->text = (Unicode *)grealloc(str1->text,
+ str1->size * sizeof(Unicode));
+ str1->xRight = (double *)grealloc(str1->xRight,
+ str1->size * sizeof(double));
+ if (addSpace) {
+ str1->text[str1->len] = 0x20;
+ str1->htext->append(xml?" ":"&nbsp;");
+ str1->xRight[str1->len] = str2->xMin;
+ ++str1->len;
+ }
+ if (addLineBreak) {
+ str1->text[str1->len] = '\n';
+ str1->htext->append("<br>");
+ str1->xRight[str1->len] = str2->xMin;
+ ++str1->len;
+ str1->yMin = str2->yMin;
+ str1->yMax = str2->yMax;
+ str1->xMax = str2->xMax;
+ int fontLineSize = hfont1->getLineSize();
+ int curLineSize = (int)(vertSpace + space);
+ if( curLineSize != fontLineSize )
+ {
+ HtmlFont *newfnt = new HtmlFont(*hfont1);
+ newfnt->setLineSize(curLineSize);
+ str1->fontpos = fonts->AddFont(*newfnt);
+ delete newfnt;
+ hfont1 = getFont(str1);
+ // we have to reget hfont2 because it's location could have
+ // changed on resize
+ hfont2 = getFont(str2);
+ }
+ }
+ for (i = 0; i < str2->len; ++i) {
+ str1->text[str1->len] = str2->text[i];
+ str1->xRight[str1->len] = str2->xRight[i];
+ ++str1->len;
+ }
+
+ /* fix <i>, <b> if str1 and str2 differ and handle switch of links */
+ HtmlLink *hlink1 = str1->getLink();
+ HtmlLink *hlink2 = str2->getLink();
+ bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2);
+ GBool finish_a = switch_links && hlink1 != NULL;
+ GBool finish_italic = hfont1->isItalic() && ( !hfont2->isItalic() || finish_a );
+ GBool finish_bold = hfont1->isBold() && ( !hfont2->isBold() || finish_a || finish_italic );
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
+ if( switch_links && hlink2 != NULL ) {
+ GooString *ls = hlink2->getLinkStart();
+ str1->htext->append(ls);
+ delete ls;
+ }
+ if( ( !hfont1->isItalic() || finish_italic ) && hfont2->isItalic() )
+ str1->htext->append("<i>", 3);
+ if( ( !hfont1->isBold() || finish_bold ) && hfont2->isBold() )
+ str1->htext->append("<b>", 3);
+
+
+ str1->htext->append(str2->htext);
+ // str1 now contains href for link of str2 (if it is defined)
+ str1->link = str2->link;
+ hfont1 = hfont2;
+ if (str2->xMax > str1->xMax) {
+ str1->xMax = str2->xMax;
+ }
+ if (str2->yMax > str1->yMax) {
+ str1->yMax = str2->yMax;
+ }
+ str1->yxNext = str2->yxNext;
+ delete str2;
+ } else { // keep strings separate
+// printf("no\n");
+ GBool finish_a = str1->getLink() != NULL;
+ GBool finish_bold = hfont1->isBold();
+ GBool finish_italic = hfont1->isItalic();
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
+
+ str1->xMin = curX; str1->yMin = curY;
+ str1 = str2;
+ curX = str1->xMin; curY = str1->yMin;
+ hfont1 = hfont2;
+ if( hfont1->isBold() )
+ str1->htext->insert(0,"<b>",3);
+ if( hfont1->isItalic() )
+ str1->htext->insert(0,"<i>",3);
+ if( str1->getLink() != NULL ) {
+ GooString *ls = str1->getLink()->getLinkStart();
+ str1->htext->insert(0, ls);
+ delete ls;
+ }
+ }
+ }
+ str1->xMin = curX; str1->yMin = curY;
+
+ GBool finish_bold = hfont1->isBold();
+ GBool finish_italic = hfont1->isItalic();
+ GBool finish_a = str1->getLink() != NULL;
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
+
+#if 0 //~ for debugging
+ for (str1 = yxStrings; str1; str1 = str1->yxNext) {
+ printf("x=%3d..%3d y=%3d..%3d size=%2d ",
+ (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
+ (int)(str1->yMax - str1->yMin));
+ printf("'%s'\n", str1->htext->getCString());
+ }
+ printf("\n------------------------------------------------------------\n\n");
+#endif
+
+}
+
+void HtmlPage::dumpAsXML(FILE* f,int page){
+ fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
+ fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth);
+
+ for(int i=fontsPageMarker;i < fonts->size();i++) {
+ GooString *fontCSStyle = fonts->CSStyle(i);
+ fprintf(f,"\t%s\n",fontCSStyle->getCString());
+ delete fontCSStyle;
+ }
+
+ GooString *str, *str1 = NULL;
+ for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
+ if (tmp->htext){
+ str=new GooString(tmp->htext);
+ fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
+ fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
+ fprintf(f,"font=\"%d\">", tmp->fontpos);
+ str1=fonts->getCSStyle(tmp->fontpos, str);
+ fputs(str1->getCString(),f);
+ delete str;
+ delete str1;
+ fputs("</text>\n",f);
+ }
+ }
+ fputs("</page>\n",f);
+}
+
+
+void HtmlPage::dumpComplex(FILE *file, int page){
+ FILE* pageFile;
+ GooString* tmp;
+ char* htmlEncoding;
+
+ if( firstPage == -1 ) firstPage = page;
+
+ if( !noframes )
+ {
+ GooString* pgNum=GooString::fromInt(page);
+ tmp = new GooString(DocName);
+ if (!singleHtml){
+ tmp->append('-')->append(pgNum)->append(".html");
+ pageFile = fopen(tmp->getCString(), "w");
+ } else {
+ tmp->append("-html")->append(".html");
+ pageFile = fopen(tmp->getCString(), "a");
+ }
+ delete pgNum;
+ if (!pageFile) {
+ error(-1, "Couldn't open html file '%s'", tmp->getCString());
+ delete tmp;
+ return;
+ }
+
+ if (!singleHtml)
+ fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page);
+ else
+ fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString());
+
+ delete tmp;
+
+ htmlEncoding = HtmlOutputDev::mapEncodingToHtml
+ (globalParams->getTextEncodingName());
+ if (!singleHtml)
+ fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+ else
+ fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n <br>\n", htmlEncoding);
+ }
+ else
+ {
+ pageFile = file;
+ fprintf(pageFile,"<!-- Page %d -->\n", page);
+ fprintf(pageFile,"<a name=\"%d\"></a>\n", page);
+ }
+
+ fprintf(pageFile,"<DIV style=\"position:relative;width:%d;height:%d;\">\n",
+ pageWidth, pageHeight);
+
+ tmp=basename(DocName);
+
+ fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
+ for(int i=fontsPageMarker;i!=fonts->size();i++) {
+ GooString *fontCSStyle;
+ if (!singleHtml)
+ fontCSStyle = fonts->CSStyle(i);
+ else
+ fontCSStyle = fonts->CSStyle(i,page);
+ fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
+ delete fontCSStyle;
+ }
+
+ fputs("-->\n</STYLE>\n",pageFile);
+
+ if( !noframes )
+ {
+ fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile);
+ }
+
+ if( !ignore )
+ {
+ fprintf(pageFile,
+ "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\">\n",
+ pageWidth, pageHeight, tmp->getCString(),
+ (page-firstPage+1), imgExt->getCString());
+ }
+
+ delete tmp;
+
+ GooString *str, *str1 = NULL;
+ for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
+ if (tmp1->htext){
+ str=new GooString(tmp1->htext);
+ fprintf(pageFile,
+ "<DIV style=\"position:absolute;top:%d;left:%d\">",
+ xoutRound(tmp1->yMin),
+ xoutRound(tmp1->xMin));
+ fputs("<nobr>",pageFile);
+ if (!singleHtml)
+ str1=fonts->getCSStyle(tmp1->fontpos, str);
+ else
+ str1=fonts->getCSStyle(tmp1->fontpos, str, page);
+ fputs(str1->getCString(),pageFile);
+ delete str;
+ delete str1;
+ fputs("</nobr></DIV>\n",pageFile);
+ }
+ }
+
+ fputs("</DIV>\n", pageFile);
+
+ if( !noframes )
+ {
+ fputs("</BODY>\n</HTML>\n",pageFile);
+ fclose(pageFile);
+ }
+}
+
+
+void HtmlPage::dump(FILE *f, int pageNum)
+{
+ if (complexMode || singleHtml)
+ {
+ if (xml) dumpAsXML(f, pageNum);
+ if (!xml) dumpComplex(f, pageNum);
+ }
+ else
+ {
+ fprintf(f,"<A name=%d></a>",pageNum);
+ // Loop over the list of image names on this page
+ int listlen=HtmlOutputDev::imgList->getLength();
+ for (int i = 0; i < listlen; i++) {
+ GooString *fName= (GooString *)HtmlOutputDev::imgList->del(0);
+ fprintf(f,"<IMG src=\"%s\"><br>\n",fName->getCString());
+ delete fName;
+ }
+ HtmlOutputDev::imgNum=1;
+
+ GooString* str;
+ for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
+ if (tmp->htext){
+ str=new GooString(tmp->htext);
+ fputs(str->getCString(),f);
+ delete str;
+ fputs("<br>\n",f);
+ }
+ }
+ fputs("<hr>\n",f);
+ }
+}
+
+
+
+void HtmlPage::clear() {
+ HtmlString *p1, *p2;
+
+ if (curStr) {
+ delete curStr;
+ curStr = NULL;
+ }
+ for (p1 = yxStrings; p1; p1 = p2) {
+ p2 = p1->yxNext;
+ delete p1;
+ }
+ yxStrings = NULL;
+ xyStrings = NULL;
+ yxCur1 = yxCur2 = NULL;
+
+ if( !noframes )
+ {
+ delete fonts;
+ fonts=new HtmlFontAccu();
+ fontsPageMarker = 0;
+ }
+ else
+ {
+ fontsPageMarker = fonts->size();
+ }
+
+ delete links;
+ links=new HtmlLinks();
+
+
+}
+
+void HtmlPage::setDocName(char *fname){
+ DocName=new GooString(fname);
+}
+
+//------------------------------------------------------------------------
+// HtmlMetaVar
+//------------------------------------------------------------------------
+
+HtmlMetaVar::HtmlMetaVar(char *_name, char *_content)
+{
+ name = new GooString(_name);
+ content = new GooString(_content);
+}
+
+HtmlMetaVar::~HtmlMetaVar()
+{
+ delete name;
+ delete content;
+}
+
+GooString* HtmlMetaVar::toString()
+{
+ GooString *result = new GooString("<META name=\"");
+ result->append(name);
+ result->append("\" content=\"");
+ result->append(content);
+ result->append("\">");
+ return result;
+}
+
+//------------------------------------------------------------------------
+// HtmlOutputDev
+//------------------------------------------------------------------------
+
+static char* HtmlEncodings[][2] = {
+ {"Latin1", "ISO-8859-1"},
+ {NULL, NULL}
+};
+
+
+char* HtmlOutputDev::mapEncodingToHtml(GooString* encoding)
+{
+ char* enc = encoding->getCString();
+ for(int i = 0; HtmlEncodings[i][0] != NULL; i++)
+ {
+ if( strcmp(enc, HtmlEncodings[i][0]) == 0 )
+ {
+ return HtmlEncodings[i][1];
+ }
+ }
+ return enc;
+}
+
+void HtmlOutputDev::doFrame(int firstPage){
+ GooString* fName=new GooString(Docname);
+ char* htmlEncoding;
+ fName->append(".html");
+
+ if (!(fContentsFrame = fopen(fName->getCString(), "w"))){
+ error(-1, "Couldn't open html file '%s'", fName->getCString());
+ delete fName;
+ return;
+ }
+
+ delete fName;
+
+ fName=basename(Docname);
+ fputs(DOCTYPE_FRAMES, fContentsFrame);
+ fputs("\n<HTML>",fContentsFrame);
+ fputs("\n<HEAD>",fContentsFrame);
+ fprintf(fContentsFrame,"\n<TITLE>%s</TITLE>",docTitle->getCString());
+ htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
+ fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+ dumpMetaVars(fContentsFrame);
+ fprintf(fContentsFrame, "</HEAD>\n");
+ fputs("<FRAMESET cols=\"100,*\">\n",fContentsFrame);
+ fprintf(fContentsFrame,"<FRAME name=\"links\" src=\"%s_ind.html\">\n",fName->getCString());
+ fputs("<FRAME name=\"contents\" src=",fContentsFrame);
+ if (complexMode)
+ fprintf(fContentsFrame,"\"%s-%d.html\"",fName->getCString(), firstPage);
+ else
+ fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString());
+
+ fputs(">\n</FRAMESET>\n</HTML>\n",fContentsFrame);
+
+ delete fName;
+ fclose(fContentsFrame);
+}
+
+HtmlOutputDev::HtmlOutputDev(char *fileName, char *title,
+ char *author, char *keywords, char *subject, char *date,
+ char *extension,
+ GBool rawOrder, int firstPage, GBool outline)
+{
+ char *htmlEncoding;
+
+ fContentsFrame = NULL;
+ docTitle = new GooString(title);
+ pages = NULL;
+ dumpJPEG=gTrue;
+ //write = gTrue;
+ this->rawOrder = rawOrder;
+ this->doOutline = outline;
+ ok = gFalse;
+ imgNum=1;
+ //this->firstPage = firstPage;
+ //pageNum=firstPage;
+ // open file
+ needClose = gFalse;
+ pages = new HtmlPage(rawOrder, extension);
+
+ glMetaVars = new GooList();
+ glMetaVars->append(new HtmlMetaVar("generator", "pdftohtml 0.36"));
+ if( author ) glMetaVars->append(new HtmlMetaVar("author", author));
+ if( keywords ) glMetaVars->append(new HtmlMetaVar("keywords", keywords));
+ if( date ) glMetaVars->append(new HtmlMetaVar("date", date));
+ if( subject ) glMetaVars->append(new HtmlMetaVar("subject", subject));
+
+ maxPageWidth = 0;
+ maxPageHeight = 0;
+
+ pages->setDocName(fileName);
+ Docname=new GooString (fileName);
+
+ // for non-xml output (complex or simple) with frames generate the left frame
+ if(!xml && !noframes)
+ {
+ if (!singleHtml)
+ {
+ GooString* left=new GooString(fileName);
+ left->append("_ind.html");
+
+ doFrame(firstPage);
+
+ if (!(fContentsFrame = fopen(left->getCString(), "w")))
+ {
+ error(-1, "Couldn't open html file '%s'", left->getCString());
+ delete left;
+ return;
+ }
+ delete left;
+ fputs(DOCTYPE, fContentsFrame);
+ fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame);
+
+ if (doOutline)
+ {
+ GooString *str = basename(Docname);
+ fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
+ delete str;
+ }
+ }
+ if (!complexMode)
+ { /* not in complex mode */
+
+ GooString* right=new GooString(fileName);
+ right->append("s.html");
+
+ if (!(page=fopen(right->getCString(),"w"))){
+ error(-1, "Couldn't open html file '%s'", right->getCString());
+ delete right;
+ return;
+ }
+ delete right;
+ fputs(DOCTYPE, page);
+ fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",page);
+ }
+ }
+
+ if (noframes) {
+ if (stout) page=stdout;
+ else {
+ GooString* right=new GooString(fileName);
+ if (!xml) right->append(".html");
+ if (xml) right->append(".xml");
+ if (!(page=fopen(right->getCString(),"w"))){
+ error(-1, "Couldn't open html file '%s'", right->getCString());
+ delete right;
+ return;
+ }
+ delete right;
+ }
+
+ htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
+ if (xml)
+ {
+ fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding);
+ fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page);
+ fputs("<pdf2xml>\n",page);
+ }
+ else
+ {
+ fprintf(page,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n",
+ DOCTYPE, docTitle->getCString());
+
+ fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+
+ dumpMetaVars(page);
+ fprintf(page,"</HEAD>\n");
+ fprintf(page,"<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
+ }
+ }
+ ok = gTrue;
+}
+
+HtmlOutputDev::~HtmlOutputDev() {
+ HtmlFont::clear();
+
+ delete Docname;
+ delete docTitle;
+
+ deleteGooList(glMetaVars, HtmlMetaVar);
+
+ if (fContentsFrame){
+ fputs("</BODY>\n</HTML>\n",fContentsFrame);
+ fclose(fContentsFrame);
+ }
+ if (xml) {
+ fputs("</pdf2xml>\n",page);
+ fclose(page);
+ } else
+ if ( !complexMode || xml || noframes )
+ {
+ fputs("</BODY>\n</HTML>\n",page);
+ fclose(page);
+ }
+ if (pages)
+ delete pages;
+}
+
+void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
+#if 0
+ if (mode&&!xml){
+ if (write){
+ write=gFalse;
+ GooString* fname=Dirname(Docname);
+ fname->append("image.log");
+ if((tin=fopen(getFileNameFromPath(fname->getCString(),fname->getLength()),"w"))==NULL){
+ printf("Error : can not open %s",fname);
+ exit(1);
+ }
+ delete fname;
+ // if(state->getRotation()!=0)
+ // fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
+ // else
+ fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());
+ }
+ }
+#endif
+
+ this->pageNum = pageNum;
+ GooString *str=basename(Docname);
+ pages->clear();
+ if(!noframes)
+ {
+ if (fContentsFrame)
+ {
+ if (complexMode)
+ fprintf(fContentsFrame,"<A href=\"%s-%d.html\"",str->getCString(),pageNum);
+ else
+ fp