org.apache.nutch.crawl
Class CrawlDb
java.lang.Object
org.apache.hadoop.conf.Configured
org.apache.nutch.crawl.CrawlDb
- All Implemented Interfaces:
- org.apache.hadoop.conf.Configurable, org.apache.hadoop.util.Tool
public class CrawlDb
- extends org.apache.hadoop.conf.Configured
- implements org.apache.hadoop.util.Tool
This class takes the output of the fetcher and updates the
crawldb accordingly.
Constructor Summary |
CrawlDb()
|
CrawlDb(org.apache.hadoop.conf.Configuration conf)
|
Method Summary |
static org.apache.hadoop.mapred.JobConf |
createJob(org.apache.hadoop.conf.Configuration config,
org.apache.hadoop.fs.Path crawlDb)
|
static void |
install(org.apache.hadoop.mapred.JobConf job,
org.apache.hadoop.fs.Path crawlDb)
|
static void |
main(String[] args)
|
int |
run(String[] args)
|
void |
update(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path[] segments,
boolean normalize,
boolean filter)
|
void |
update(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path[] segments,
boolean normalize,
boolean filter,
boolean additionsAllowed,
boolean force)
|
Methods inherited from class org.apache.hadoop.conf.Configured |
getConf, setConf |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Methods inherited from interface org.apache.hadoop.conf.Configurable |
getConf, setConf |
LOG
public static final org.apache.commons.logging.Log LOG
CRAWLDB_ADDITIONS_ALLOWED
public static final String CRAWLDB_ADDITIONS_ALLOWED
- See Also:
- Constant Field Values
CURRENT_NAME
public static final String CURRENT_NAME
- See Also:
- Constant Field Values
LOCK_NAME
public static final String LOCK_NAME
- See Also:
- Constant Field Values
CrawlDb
public CrawlDb()
CrawlDb
public CrawlDb(org.apache.hadoop.conf.Configuration conf)
update
public void update(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path[] segments,
boolean normalize,
boolean filter)
throws IOException
- Throws:
IOException
update
public void update(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path[] segments,
boolean normalize,
boolean filter,
boolean additionsAllowed,
boolean force)
throws IOException
- Throws:
IOException
createJob
public static org.apache.hadoop.mapred.JobConf createJob(org.apache.hadoop.conf.Configuration config,
org.apache.hadoop.fs.Path crawlDb)
throws IOException
- Throws:
IOException
install
public static void install(org.apache.hadoop.mapred.JobConf job,
org.apache.hadoop.fs.Path crawlDb)
throws IOException
- Throws:
IOException
main
public static void main(String[] args)
throws Exception
- Throws:
Exception
run
public int run(String[] args)
throws Exception
- Specified by:
run
in interface org.apache.hadoop.util.Tool
- Throws:
Exception
Copyright © 2006 The Apache Software Foundation