23
23
import ai .philterd .phileas .model .objects .Span ;
24
24
import ai .philterd .phileas .model .policy .Policy ;
25
25
import ai .philterd .phileas .model .services .MetricsService ;
26
-
27
26
import com .google .gson .Gson ;
28
27
import com .google .gson .reflect .TypeToken ;
29
- import okhttp3 .Authenticator ;
30
- import okhttp3 .ConnectionPool ;
31
- import okhttp3 .Credentials ;
32
- import okhttp3 .OkHttpClient ;
33
- import okhttp3 .Request ;
34
- import okhttp3 .Route ;
35
28
import org .apache .commons .collections4 .CollectionUtils ;
36
29
import org .apache .commons .lang3 .StringUtils ;
37
30
import org .apache .commons .math3 .stat .descriptive .DescriptiveStatistics ;
31
+ import org .apache .hc .client5 .http .classic .methods .HttpPost ;
32
+ import org .apache .hc .client5 .http .config .RequestConfig ;
33
+ import org .apache .hc .client5 .http .impl .classic .CloseableHttpClient ;
34
+ import org .apache .hc .client5 .http .impl .classic .HttpClientBuilder ;
35
+ import org .apache .hc .client5 .http .impl .classic .HttpClients ;
36
+ import org .apache .hc .client5 .http .impl .io .PoolingHttpClientConnectionManager ;
37
+ import org .apache .hc .core5 .http .HttpEntity ;
38
+ import org .apache .hc .core5 .http .io .HttpClientResponseHandler ;
39
+ import org .apache .hc .core5 .http .io .entity .EntityUtils ;
40
+ import org .apache .hc .core5 .http .io .entity .StringEntity ;
41
+ import org .apache .hc .core5 .net .URIBuilder ;
38
42
import org .apache .logging .log4j .LogManager ;
39
43
import org .apache .logging .log4j .Logger ;
40
- import retrofit2 .Response ;
41
- import retrofit2 .Retrofit ;
42
- import retrofit2 .converter .gson .GsonConverterFactory ;
43
- import retrofit2 .converter .scalars .ScalarsConverterFactory ;
44
44
45
45
import java .io .IOException ;
46
46
import java .lang .reflect .Type ;
47
+ import java .net .URI ;
47
48
import java .util .ArrayList ;
48
49
import java .util .Collection ;
49
50
import java .util .LinkedList ;
@@ -57,9 +58,11 @@ public class PhEyeFilter extends NerFilter {
57
58
58
59
private final boolean removePunctuation ;
59
60
60
- private final transient PhEyeService service ;
61
+ private final PhEyeConfiguration phEyeConfiguration ;
61
62
private final Collection <String > labels ;
62
-
63
+ private final Gson gson ;
64
+ final PoolingHttpClientConnectionManager connectionManager ;
65
+
63
66
public PhEyeFilter (final FilterConfiguration filterConfiguration ,
64
67
final PhEyeConfiguration phEyeConfiguration ,
65
68
final Map <String , DescriptiveStatistics > stats ,
@@ -69,53 +72,35 @@ public PhEyeFilter(final FilterConfiguration filterConfiguration,
69
72
70
73
super (filterConfiguration , stats , metricsService , thresholds , FilterType .AGE );
71
74
75
+ this .phEyeConfiguration = phEyeConfiguration ;
72
76
this .removePunctuation = removePunctuation ;
73
77
this .labels = phEyeConfiguration .getLabels ();
78
+ this .gson = new Gson ();
74
79
75
- final OkHttpClient . Builder builder = new OkHttpClient . Builder ();
80
+ this . connectionManager = new PoolingHttpClientConnectionManager ();
76
81
77
- if (StringUtils .isNotEmpty (phEyeConfiguration .getUsername ()) && StringUtils .isNotEmpty (phEyeConfiguration .getPassword ())) {
78
- builder .authenticator (new Authenticator () {
79
- @ Override
80
- public Request authenticate (final Route route , final okhttp3 .Response response ) {
81
- final String credential = Credentials .basic (phEyeConfiguration .getUsername (), phEyeConfiguration .getPassword ());
82
- return response .request ().newBuilder ().header ("Authorization" , credential ).build ();
83
- }
84
- });
82
+ if (phEyeConfiguration .getMaxIdleConnections () > 0 ) {
83
+ connectionManager .setMaxTotal (phEyeConfiguration .getMaxIdleConnections ());
84
+ connectionManager .setDefaultMaxPerRoute (phEyeConfiguration .getMaxIdleConnections ());
85
85
}
86
86
87
- builder .retryOnConnectionFailure (true );
88
- builder .connectTimeout (phEyeConfiguration .getTimeout (), TimeUnit .SECONDS );
89
- builder .writeTimeout (phEyeConfiguration .getTimeout (), TimeUnit .SECONDS );
90
- builder .readTimeout (phEyeConfiguration .getTimeout (), TimeUnit .SECONDS );
91
- builder .connectionPool (new ConnectionPool (phEyeConfiguration .getMaxIdleConnections (), phEyeConfiguration .getKeepAliveDurationMs (), TimeUnit .MILLISECONDS ));
92
-
93
- final OkHttpClient okHttpClient = builder .build ();
94
-
95
- final Retrofit retrofit = new Retrofit .Builder ()
96
- .baseUrl (phEyeConfiguration .getEndpoint ())
97
- .client (okHttpClient )
98
- .callFactory (okHttpClient )
99
- .addConverterFactory (ScalarsConverterFactory .create ())
100
- .addConverterFactory (GsonConverterFactory .create ())
101
- .build ();
102
-
103
- service = retrofit .create (PhEyeService .class );
104
-
105
87
}
106
88
107
89
@ Override
108
90
public FilterResult filter (final Policy policy , final String context , final String documentId , final int piece ,
109
- String input , final Map <String , String > attributes ) throws Exception {
91
+ final String input , final Map <String , String > attributes ) throws Exception {
110
92
111
93
final List <Span > spans = new LinkedList <>();
112
94
113
95
// Remove punctuation if instructed to do so.
114
96
// It is replacing each punctuation mark with an empty space. This will allow span indexes
115
97
// to remain constant as opposed to removing the punctuation and causing the string to then
116
98
// have a shorter length.
99
+ final String formattedInput ;
117
100
if (removePunctuation ) {
118
- input = input .replaceAll ("\\ p{Punct}" , " " );
101
+ formattedInput = input .replaceAll ("\\ p{Punct}" , " " );
102
+ } else {
103
+ formattedInput = input ;
119
104
}
120
105
121
106
final PhEyeRequest phEyeRequest = new PhEyeRequest ();
@@ -124,34 +109,77 @@ public FilterResult filter(final Policy policy, final String context, final Stri
124
109
phEyeRequest .setDocumentId (documentId );
125
110
phEyeRequest .setPiece (piece );
126
111
phEyeRequest .setLabels (labels );
127
-
128
- final Response <String > response = service .find (phEyeRequest ).execute ();
129
-
130
- if (response .isSuccessful ()) {
131
112
132
- final Type listType = new TypeToken <ArrayList <PhEyeSpan >>(){}.getType ();
133
- final List <PhEyeSpan > phEyeSpans = new Gson ().fromJson (response .body (), listType );
134
-
135
- if (CollectionUtils .isNotEmpty (phEyeSpans )) {
113
+ final String json = gson .toJson (phEyeRequest );
114
+
115
+ final URI uri = new URIBuilder (phEyeConfiguration .getEndpoint () + "/find" )
116
+ .build ();
117
+
118
+ final RequestConfig requestConfig = RequestConfig .custom ()
119
+ .setConnectionRequestTimeout (phEyeConfiguration .getTimeout (), TimeUnit .SECONDS )
120
+ .setResponseTimeout (phEyeConfiguration .getTimeout (), TimeUnit .SECONDS )
121
+ .build ();
122
+
123
+ final HttpPost httpPost = new HttpPost (uri );
124
+ httpPost .setConfig (requestConfig );
125
+ httpPost .setEntity (new StringEntity (json ));
126
+ httpPost .setHeader ("Content-Type" , "application/json" );
127
+ httpPost .setHeader ("Accept" , "application/json" );
128
+
129
+ if (StringUtils .isNotEmpty (phEyeConfiguration .getBearerToken ())) {
130
+ httpPost .setHeader ("Authorization" , "Bearer " + phEyeConfiguration .getBearerToken ());
131
+ }
132
+
133
+ final HttpClientBuilder httpClientBuilder = HttpClients .custom ().setConnectionManager (connectionManager );
136
134
137
- for ( final PhEyeSpan phEyeSpan : phEyeSpans ) {
135
+ try ( CloseableHttpClient httpClient = httpClientBuilder . build () ) {
138
136
139
- // Only interested in spans matching the tag we are looking for, e.g. PER, LOC.
140
- if (labels .contains (phEyeSpan .getLabel ())) {
137
+ final HttpClientResponseHandler <String > responseHandler = response -> {
141
138
142
- // Check the confidence threshold.
143
- if (!thresholds .containsKey (phEyeSpan .getLabel ().toUpperCase ()) || phEyeSpan .getScore () >= thresholds .get (phEyeSpan .getLabel ().toUpperCase ())) {
139
+ if (response .getCode () == 200 ) {
144
140
145
- // Get the window of text surrounding the token.
146
- final String [] window = getWindow ( input , phEyeSpan . getStart (), phEyeSpan . getEnd ()) ;
141
+ final HttpEntity responseEntity = response . getEntity ();
142
+ return responseEntity != null ? EntityUtils . toString ( responseEntity ) : null ;
147
143
148
- final Span span = createSpan (policy , context , documentId , phEyeSpan .getText (),
149
- window , phEyeSpan .getLabel (), phEyeSpan .getStart (), phEyeSpan .getEnd (),
150
- phEyeSpan .getScore (), attributes );
144
+ } else {
145
+
146
+ // The request to philter-ner was not successful.
147
+ LOGGER .error ("PhEyeFilter failed with code {}" , response .getCode ());
148
+ throw new IOException ("Unable to process document. Received error response from philter-ner." );
149
+
150
+ }
151
+
152
+ };
153
+
154
+ final String responseBody = httpClient .execute (httpPost , responseHandler );
155
+
156
+ if (responseBody != null ) {
157
+
158
+ final Type listType = new TypeToken <ArrayList <PhEyeSpan >>() {}.getType ();
159
+ final List <PhEyeSpan > phEyeSpans = new Gson ().fromJson (responseBody , listType );
160
+
161
+ if (CollectionUtils .isNotEmpty (phEyeSpans )) {
162
+
163
+ for (final PhEyeSpan phEyeSpan : phEyeSpans ) {
164
+
165
+ // Only interested in spans matching the tag we are looking for, e.g. PER, LOC.
166
+ if (labels .contains (phEyeSpan .getLabel ())) {
167
+
168
+ // Check the confidence threshold.
169
+ if (!thresholds .containsKey (phEyeSpan .getLabel ().toUpperCase ()) || phEyeSpan .getScore () >= thresholds .get (phEyeSpan .getLabel ().toUpperCase ())) {
170
+
171
+ // Get the window of text surrounding the token.
172
+ final String [] window = getWindow (formattedInput , phEyeSpan .getStart (), phEyeSpan .getEnd ());
173
+
174
+ final Span span = createSpan (policy , context , documentId , phEyeSpan .getText (),
175
+ window , phEyeSpan .getLabel (), phEyeSpan .getStart (), phEyeSpan .getEnd (),
176
+ phEyeSpan .getScore (), attributes );
177
+
178
+ // Span will be null if no span was created due to it being excluded.
179
+ if (span != null ) {
180
+ spans .add (span );
181
+ }
151
182
152
- // Span will be null if no span was created due to it being excluded.
153
- if (span != null ) {
154
- spans .add (span );
155
183
}
156
184
157
185
}
@@ -160,16 +188,15 @@ public FilterResult filter(final Policy policy, final String context, final Stri
160
188
161
189
}
162
190
163
- LOGGER .debug ("Returning {} NER spans." , spans .size ());
191
+ LOGGER .debug ("Returning {} NER spans from ph-eye." , spans .size ());
192
+ return new FilterResult (context , documentId , piece , spans );
164
193
165
- }
166
-
167
- return new FilterResult (context , documentId , piece , spans );
194
+ } else {
168
195
169
- } else {
196
+ // We received null back which is not expected.
197
+ throw new IOException ("Unable to process document. Received error response from philter-ner." );
170
198
171
- // The request to philter-ner was not successful.
172
- throw new IOException ("Unable to process document. Received error response from philter-ner." );
199
+ }
173
200
174
201
}
175
202
0 commit comments