Named Entity Extraction Example in openNLP
Named Entity Extraction Example in openNLP – In this openNLP tutorial, we shall try entity extraction from a sentence using openNLP pre-built models, that were already trained to find the named entity.
What is Named Entity Recognition/Extraction (NER)?
Named Entity Recognition is a task of finding the named entities that could possibly belong to categories like persons, organizations, dates, percentages, etc., and categorize the identified entity to one of these categories.
How Named Entity Extraction is done in openNLP ?
In openNLP, Named Entity Extraction is done using statistical models, i.e., machine learning techniques. Coming to specifics, Maxent modeling is used.
Example: Named Entity Extraction Example in openNLP
The following example, NameFinderExample.java shows how to use NameFinderME class to extract named entities, person and place.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.Span;
/**
* This class demonstrates how to use NameFinderME class to do Named Entity Recognition/Extraction tasks.
* @author tutorialkart.com
*/
public class NameFinderExample {
public static void main(String[] args) {
// find person name
try {
System.out.println("-------Finding entities belonging to category : person name------");
new NameFinderExample().findName();
System.out.println();
} catch (IOException e) {
e.printStackTrace();
}
// find place
try {
System.out.println("-------Finding entities belonging to category : place name------");
new NameFinderExample().findLocation();
System.out.println();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* method to find locations in the sentence
* @throws IOException
*/
public void findName() throws IOException {
InputStream is = new FileInputStream("en-ner-person.bin");
// load the model from file
TokenNameFinderModel model = new TokenNameFinderModel(is);
is.close();
// feed the model to name finder class
NameFinderME nameFinder = new NameFinderME(model);
// input string array
String[] sentence = new String[]{
"John",
"Smith",
"is",
"standing",
"next",
"to",
"bus",
"stop",
"and",
"waiting",
"for",
"Mike",
"."
};
Span nameSpans[] = nameFinder.find(sentence);
// nameSpans contain all the possible entities detected
for(Span s: nameSpans){
System.out.print(s.toString());
System.out.print(" : ");
// s.getStart() : contains the start index of possible name in the input string array
// s.getEnd() : contains the end index of the possible name in the input string array
for(int index=s.getStart();index<s.getEnd();index++){
System.out.print(sentence[index]+" ");
}
System.out.println();
}
}
/**
* method to find locations in the sentence
* @throws IOException
*/
public void findLocation() throws IOException {
InputStream is = new FileInputStream("en-ner-location.bin");
// load the model from file
TokenNameFinderModel model = new TokenNameFinderModel(is);
is.close();
// feed the model to name finder class
NameFinderME nameFinder = new NameFinderME(model);
// input string array
String[] sentence = new String[]{
"John",
"Smith",
"is",
"from",
"Atlanta",
"."
};
Span nameSpans[] = nameFinder.find(sentence);
// nameSpans contain all the possible entities detected
for(Span s: nameSpans){
System.out.print(s.toString());
System.out.print(" : ");
// s.getStart() : contains the start index of possible name in the input string array
// s.getEnd() : contains the end index of the possible name in the input string array
for(int index=s.getStart();index<s.getEnd();index++){
System.out.print(sentence[index]+" ");
}
System.out.println();
}
}
}
When the example program, NameFinderExample.java is run, the output to console is:
The project structure and the model file location, etc., is shown below:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.Span;
/**
* This class demonstrates how to use NameFinderME class to do Named Entity Recognition/Extraction tasks.
* @author tutorialkart.com
*/
public class NameFinderExample {
public static void main(String[] args) {
// find person name
try {
System.out.println("-------Finding entities belonging to category : person name------");
new NameFinderExample().findName();
System.out.println();
} catch (IOException e) {
e.printStackTrace();
}
// find place
try {
System.out.println("-------Finding entities belonging to category : place name------");
new NameFinderExample().findLocation();
System.out.println();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* method to find locations in the sentence
* @throws IOException
*/
public void findName() throws IOException {
InputStream is = new FileInputStream("en-ner-person.bin");
// load the model from file
TokenNameFinderModel model = new TokenNameFinderModel(is);
is.close();
// feed the model to name finder class
NameFinderME nameFinder = new NameFinderME(model);
// input string array
String[] sentence = new String[]{
"John",
"Smith",
"is",
"standing",
"next",
"to",
"bus",
"stop",
"and",
"waiting",
"for",
"Mike",
"."
};
Span nameSpans[] = nameFinder.find(sentence);
// nameSpans contain all the possible entities detected
for(Span s: nameSpans){
System.out.print(s.toString());
System.out.print(" : ");
// s.getStart() : contains the start index of possible name in the input string array
// s.getEnd() : contains the end index of the possible name in the input string array
for(int index=s.getStart();index<s.getEnd();index++){
System.out.print(sentence[index]+" ");
}
System.out.println();
}
}
/**
* method to find locations in the sentence
* @throws IOException
*/
public void findLocation() throws IOException {
InputStream is = new FileInputStream("en-ner-location.bin");
// load the model from file
TokenNameFinderModel model = new TokenNameFinderModel(is);
is.close();
// feed the model to name finder class
NameFinderME nameFinder = new NameFinderME(model);
// input string array
String[] sentence = new String[]{
"John",
"Smith",
"is",
"from",
"Atlanta",
"."
};
Span nameSpans[] = nameFinder.find(sentence);
// nameSpans contain all the possible entities detected
for(Span s: nameSpans){
System.out.print(s.toString());
System.out.print(" : ");
// s.getStart() : contains the start index of possible name in the input string array
// s.getEnd() : contains the end index of the possible name in the input string array
for(int index=s.getStart();index<s.getEnd();index++){
System.out.print(sentence[index]+" ");
}
System.out.println();
}
}
}
|
No comments:
Post a Comment