/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.indexer; import java.net.URL; import java.util.ArrayList; import java.util.Enumeration; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.common.SolrInputDocument; /** Adapter class that hides the API of Solr java client from Nutch. */ public class SolrClientAdapter extends Configured { private static Log LOG = LogFactory.getLog(SolrClientAdapter.class); private static final String INDEXER_SOLR_URL = "indexer.solr.url"; private static ArrayList ignoreFields = new ArrayList(); static { ignoreFields.add("boost"); } SolrServer solr; public SolrClientAdapter(Configuration conf) { super(conf); String url = conf.get(INDEXER_SOLR_URL); if (url == null) { throw new RuntimeException("Solr server url (" + INDEXER_SOLR_URL + ") is not defined"); } try { solr = new CommonsHttpSolrServer(new URL(url)); } catch (Exception e) { throw new RuntimeException("Cannot instantiate Solr client with url: '" + url + "'. Reason: " + e.getMessage()); } } /** Adds single Lucene document to index. */ public void index(Document doc) { SolrInputDocument solrDoc = new SolrInputDocument(); for (Enumeration e = doc.fields(); e.hasMoreElements();) { Field field = e.nextElement(); if (!ignoreFields.contains((field.name()))) { solrDoc.addField( field.name(), field.stringValue(), field.getBoost() ); } } try { solr.add(solrDoc); } catch (Exception e) { LOG.warn("Could not index document, reason:" + e.getMessage(), e); } } /** Commits changes */ public void commit(){ try { solr.commit(true, false); } catch (Exception e) { LOG.warn("Could not commit, reason:" + e.getMessage(), e); } } }