/*
 * WebSPHINX web crawling toolkit
 * Copyright (C) 1998,1999 Carnegie Mellon University 
 * 
 * This library is free software; you can redistribute it
 * and/or modify it under the terms of the GNU Library
 * General Public License as published by the Free Software 
 * Foundation, version 2.
 *
 * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/
 */
package websphinx;

import java.io.*;
import java.net.URL;
import websphinx.util.Str;

public class RecordTransformer extends RewritableLinkTransformer {

    String prolog = "<HTML><HEAD><TITLE>Extracted Records</TITLE></HEAD><BODY><TABLE>\n";
    String epilog = "</TABLE></BODY></HTML>\n";

    String recordStart = "<TR>\n<TD><A HREF=\"%u\">%n.</A>\n";
    String recordEnd =   "\n";
    String recordDivider = "";
    
    String fieldStart = "  <TD>";
    String fieldEnd   = "\n";
    String fieldDivider = "";

    int nRecords = 0;

    public RecordTransformer (String filename) throws IOException {
        super (filename);
    }

    public synchronized void setProlog (String prolog) {
        this.prolog = prolog;
    }
    public synchronized String getProlog () {
        return prolog;
    }

    public synchronized void setEpilog (String epilog) {
        this.epilog = epilog;
    }
    public synchronized String getEpilog () {
        return epilog;
    }

    public synchronized void setRecordStart (String recordStart) {
        this.recordStart = recordStart;
    }
    public synchronized String getRecordStart () {
        return recordStart;
    }

    public synchronized void setRecordEnd (String recordEnd) {
        this.recordEnd = recordEnd;
    }
    public synchronized String getRecordEnd () {
        return recordEnd;
    }

    public synchronized void setRecordDivider (String recordDivider) {
        this.recordDivider = recordDivider;
    }
    public synchronized String getRecordDivider () {
        return recordDivider;
    }

    public synchronized void setFieldStart (String fieldStart) {
        this.fieldStart = fieldStart;
    }
    public synchronized String getFieldStart () {
        return fieldStart;
    }

    public synchronized void setFieldEnd (String fieldEnd) {
        this.fieldEnd = fieldEnd;
    }
    public synchronized String getFieldEnd () {
        return fieldEnd;
    }

    public synchronized void setFieldDivider (String fieldDivider) {
        this.fieldDivider = fieldDivider;
    }
    public synchronized String getFieldDivider () {
        return fieldDivider;
    }

    /**
     * Flush the record page to disk.  Temporarily writes the epilog.
     */
    public synchronized void flush () throws IOException {
        long p = getFilePointer ();
        if (nRecords == 0)
            emit (prolog);
        emit (epilog);
        seek (p);
        super.flush ();
    }
        

    public synchronized int getRecordCount () {
        return nRecords;
    }

    public synchronized void writeRecord (Object[] fields, boolean asText) throws IOException {
        ++nRecords;

        emit ((nRecords == 1) ? prolog : recordDivider);
        
        URL url = urlOfFirstRegion (fields);
        
        emitTemplate (recordStart, url, nRecords);
        for (int i=0; i<fields.length; ++i) {
            if (i > 0)
                emit (fieldDivider);
            emit (fieldStart);
            
            Object f = fields[i];
            if (f instanceof Region) {
                Region r = (Region)fields[i];
                if (asText)
                    write (r.toText());
                else
                    write (r);
            }
            else
                write (f.toString ());
                
            emit (fieldEnd);
        }
        emitTemplate (recordEnd, url, nRecords);
    }
    
    private URL urlOfFirstRegion (Object[] fields) {
        for (int i=0; i<fields.length; ++i)
            if (fields[i] instanceof Region) {
                Region r = (Region)fields[i];
                return r.getSource().getURL();
            }
        return null;
    }

    private void emitTemplate (String template, URL url, int record) throws IOException {
        if (template == null || template.length() == 0)
            return;
            
        template = Str.replace (template, "%n", String.valueOf (record));
        template = Str.replace (template, "%u", url != null ? url.toString () : "");
        emit (template);
    }

    /*
     * Testing
     *
    public static void main (String[] args) throws Exception {
        Pattern p = new Tagexp (args[0].replace ('_', ' ') );
        RecordTransformer records = new RecordTransformer (args[1]);
        for (int i=2; i<args.length; ++i) {
            Page page = new Page (new Link (args[i]));
            PatternMatcher m = p.match (page);
            for (Region r = m.nextMatch(); r != null; r = m.nextMatch())
                records.writeRecord (r.getFields (Pattern.groups), false);
        }
        records.close ();
    }
     */

}
