org.apache.nutch.indexer
Class Indexer

java.lang.Object
  extended by org.apache.hadoop.conf.Configured
      extended by org.apache.nutch.indexer.Indexer
All Implemented Interfaces:
Closeable, org.apache.hadoop.conf.Configurable, org.apache.hadoop.mapred.JobConfigurable, org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,NutchWritable>, org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,NutchWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable>, org.apache.hadoop.util.Tool

public class Indexer
extends org.apache.hadoop.conf.Configured
implements org.apache.hadoop.util.Tool, org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,NutchWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable>, org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,NutchWritable>

Create indexes for segments.


Nested Class Summary
static class Indexer.LuceneDocumentWrapper
          A utility class used to pass a lucene document from Indexer.reduce to Indexer.OutputFormat.
static class Indexer.OutputFormat
          Unwrap Lucene Documents created by reduce and add them to an index.
 
Field Summary
static String DONE_NAME
           
static org.apache.commons.logging.Log LOG
           
 
Constructor Summary
Indexer()
           
Indexer(org.apache.hadoop.conf.Configuration conf)
           
 
Method Summary
 void close()
           
 void configure(org.apache.hadoop.mapred.JobConf job)
           
 void index(org.apache.hadoop.fs.Path indexDir, org.apache.hadoop.fs.Path crawlDb, org.apache.hadoop.fs.Path linkDb, org.apache.hadoop.fs.Path[] segments)
           
static void main(String[] args)
           
 void map(org.apache.hadoop.io.Text key, org.apache.hadoop.io.Writable value, org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,NutchWritable> output, org.apache.hadoop.mapred.Reporter reporter)
           
 void reduce(org.apache.hadoop.io.Text key, Iterator<NutchWritable> values, org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable> output, org.apache.hadoop.mapred.Reporter reporter)
           
 int run(String[] args)
           
 
Methods inherited from class org.apache.hadoop.conf.Configured
getConf, setConf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface org.apache.hadoop.conf.Configurable
getConf, setConf
 

Field Detail

DONE_NAME

public static final String DONE_NAME
See Also:
Constant Field Values

LOG

public static final org.apache.commons.logging.Log LOG
Constructor Detail

Indexer

public Indexer()

Indexer

public Indexer(org.apache.hadoop.conf.Configuration conf)
Method Detail

configure

public void configure(org.apache.hadoop.mapred.JobConf job)
Specified by:
configure in interface org.apache.hadoop.mapred.JobConfigurable

close

public void close()
Specified by:
close in interface Closeable

reduce

public void reduce(org.apache.hadoop.io.Text key,
                   Iterator<NutchWritable> values,
                   org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable> output,
                   org.apache.hadoop.mapred.Reporter reporter)
            throws IOException
Specified by:
reduce in interface org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,NutchWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable>
Throws:
IOException

index

public void index(org.apache.hadoop.fs.Path indexDir,
                  org.apache.hadoop.fs.Path crawlDb,
                  org.apache.hadoop.fs.Path linkDb,
                  org.apache.hadoop.fs.Path[] segments)
           throws IOException
Throws:
IOException

main

public static void main(String[] args)
                 throws Exception
Throws:
Exception

run

public int run(String[] args)
        throws Exception
Specified by:
run in interface org.apache.hadoop.util.Tool
Throws:
Exception

map

public void map(org.apache.hadoop.io.Text key,
                org.apache.hadoop.io.Writable value,
                org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,NutchWritable> output,
                org.apache.hadoop.mapred.Reporter reporter)
         throws IOException
Specified by:
map in interface org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,NutchWritable>
Throws:
IOException


Copyright © 2006 The Apache Software Foundation