Contoh ngitung skor sebuah kalimat dgn query tertentu

Berikut hasil coba2 yg gw lakuin buat ngeluarin skor suatu query trhadap file(bukan kalimat, yg ni ntar gw cobain lagi, tp satu satu dulu…)
Jd kita critanya punya sekumpulan file, yg dr file2 tsb mw d cari brdasar kata tertentu. Ini lah sintaxnya : jgn lp d save ke SkorFileText.java atau klik di sini kalo mw donlot… makasih
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.File;
import java.io.BufferedReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.util.ArrayList;

public class SkorFileText {

private IndexWriter writer;
private static int jmldokakhir;
public static void main (String[] args) throws IOException {
System.out.println(“Masukin alamat foldernya mo d taruh d mana indexnya: “);
//nerima inputan, udah gitu aja
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
String inputan = br.readLine();
String DirIndex = inputan;
// instansiasi analyzer
StandardAnalyzer analyzer = new StandardAnalyzer();
//nulis indexnya
try {
IndexWriter writer = new IndexWriter(DirIndex, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
while (!inputan.equalsIgnoreCase(“s”)) {
try {

System.out.println(“sekarang, masukin nama file atau folder yg mau d index: (kalo udah ketik s, s=setop)”);
System.out.println(“[format file : .xml, .html, .html, .txt]”);
inputan = br.readLine();
if (inputan.equalsIgnoreCase(“s”)) {
break;
}

//try to add file into the index
//artinya, nyobain nambah file ke index tsb…
// indexer.indexFileOrDirectory(inputan);
File file = new File(inputan);
//masukin file2 ke antrian
ArrayList antrian = new ArrayList();
//cek file or foldernya ada kagak
if (!file.exists()) {
System.out.println(file + ” kagak ada.”);
}
if (file.isDirectory()) {// file nya direktori atau folder
for (File f : file.listFiles()) {
String filename = f.getName().toLowerCase();
// cuma ngindex file format text
if (filename.endsWith(“.htm”) || filename.endsWith(“.html”) ||
filename.endsWith(“.xml”) || filename.endsWith(“.txt”)) {
antrian.add(f);
} else {
System.out.println(filename + ” di sekip”);
}
}
} else {// file nya langsung file ajah
String filename = file.getName().toLowerCase();
if (filename.endsWith(“.htm”) || filename.endsWith(“.html”) ||
filename.endsWith(“.xml”) || filename.endsWith(“.txt”)) {
antrian.add(file);
} else {
System.out.println(filename + ” di sekip”);
}
}

//skarang baca isi file terus, masukin dah ke index… nah loh
int jmldokawal = writer.numDocs();
for (File f : antrian) {
FileReader fr = null;

try {
Document dok = new Document();
// nambah konten file
fr = new FileReader(f);
dok.add(new Field(“namafile”,f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
dok.add(new Field(“konten”, fr));
//field yg kedua, isinya path file, alamak tempat file tsb
dok.add(new Field(“path”, inputan,Field.Store.YES,Field.Index.NOT_ANALYZED));

writer.addDocument(dok);
//System.out.println(“ketambah: ” + f);
} catch (Exception e) {
System.out.println(“gak bs d tambah: ” + f + ” karena eh because ” + e.getMessage());
} finally {
fr.close();
}
}
jmldokakhir = writer.numDocs();

System.out.println(“”);
System.out.println(“************************”);
System.out.println((jmldokakhir – jmldokawal) + ” dokumen d tambahin “);
System.out.println(“************************”);

antrian.clear();

} catch (Exception e) {
System.out.println(“Error indexing ” + inputan + ” : ” + e.getMessage());
}
}
writer.optimize();
writer.close();

//masukin querynya….
System.out.println(“nah kalo sekarang masukin query nya : “);
inputan = br.readLine();
String querystr = args.length > 0 ? args [0] : inputan;
Query queri = new QueryParser(“konten”,analyzer).parse(querystr);

IndexSearcher pencari = new IndexSearcher(DirIndex);

// nah mulai ngitung skornya… buset pnjang kali ya, udah baris 114 aja
TopDocCollector kolektor = new TopDocCollector(jmldokakhir);
pencari.search(queri,kolektor);// mencari sebanyak jmldokakhir di pencari… ya gitu lah pokonya
System.out.println(“the file = ” + pencari.doc(11).get(“path”) +”\\”+pencari.doc(11).get(“namafile”));
ScoreDoc[] hit = kolektor.topDocs().scoreDocs;
System.out.println(“ketemu sejumlah ” + hit.length + ” hit.”); //nampilin jumlahnya

//sekarang nampilin yg mana aja yg kena hit…
for(int i=0;i<hits.length;i++) {
int IdDok = hit[i].doc;
Document d = pencari.doc(IdDok);
System.out.println((i+1) + ". "+ d.get("path") +"\\"+d.get("namafile") + " " + hit[i].score);
//System.out.println(pencari.explain(queri,hit[i].doc));
}

pencari.close();
} catch (Exception ex) {
System.out.println("sori index g bs d bikin, coba cek lagi… makasih");
System.exit(-1);
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s