1. Java pojo:
Add the Java POJO with the required fields-
import org.apache.solr.client.solrj.beans.Field; /** * Created by yash on 18/11/14. */ public class ProductBean { @Field private int id; @Field("rank") private int rank; @Field("prodid") private long prodid; @Field("cat") private int cat; @Field("subcat") private int subcat; public ProductBean(){} // Required by Solr to initialize bean. public ProductBean(int id, int rank, long prodid, int cat, int subcat) { this.id = id; this.rank = rank; this.prodid = prodid; this.cat = cat; this.subcat = subcat; } public int getRank() { return rank; } public void setRank(int rank) { this.rank = rank; } public long getprodid() { return prodid; } public void setprodid(long prodid) { this.prodid = prodid; } public int getCat() { return cat; } public void setCat(int cat) { this.cat = cat; } public int getSubcat() { return subcat; } public void setSubcat(int subcat) { this.subcat = subcat; } }
2. Index the Data file in Solr
Using POJO for Indexing into Solr –
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.common.SolrInputDocument; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.util.Scanner; /** * Created by yash on 6/11/14. */ @Service public class SolrIndexerService { private static final Logger log = LoggerFactory.getLogger(SolrIndexerService.class); private static final String SOLR_URL = "http://54.254.192.149:8983/solr/feeddata/"; private static final String FILE_PATH = "/home/yash/Desktop/solr-data/testdata.txt"; public void indexFile() throws IOException, SolrServerException { SolrServer server = new CommonsHttpSolrServer(SOLR_URL); Scanner sc = new Scanner(new File(FILE_PATH)); ProductBean bean; String record; String[] columns; int recordCount = 0; long currentTstmp = System.currentTimeMillis(); while(sc.hasNextLine()) { record = sc.nextLine(); if(record==null || record.length()<1){ continue; } System.out.println(record); columns = record.split(","); bean = new ProductBean(recordCount, Integer.parseInt(columns[0]), Long.parseLong(columns[1]), Integer.parseInt(columns[2]), Integer.parseInt(columns[3]), Integer.parseInt(columns[4])); server.addBean(bean); recordCount++; if(recordCount%1000==0) server.commit(); // periodically flush } server.commit(); /* Remove all records with updated time less than current updated timestamp */ server.deleteByQuery("-last_updated:"+ String.valueOf(currentTstmp)); server.commit(); server.optimize(); System.out.println("Done !!"); } public static void main(String[] args) throws IOException, SolrServerException { new SolrIndexerService().indexFile(); } }