File tree Expand file tree Collapse file tree 2 files changed +9
-1
lines changed Expand file tree Collapse file tree 2 files changed +9
-1
lines changed Original file line number Diff line number Diff line change 11import argparse
2+ import re
23
34from math import ceil
45from urllib .parse import urlencode
@@ -199,9 +200,14 @@ def sort_by_arg_checker(arg_value):
199200 # Also add each issue's body (description/first comment) to the
200201 # comment corpus array to be processed as well.
201202 issue_body = r ['body' ] or ""
203+ # Also check for issue's description for any CODE blocks to be tokenized.
204+ issue_body = re .sub ('```([^`]*)```|`([^`]*)`' , 'CODE' , issue_body )
202205 issue_body_lines = issue_body .splitlines ()
203206 for line in issue_body_lines :
204- if line != "" :
207+ # Strip away any new lines
208+ line = line .strip ('\n ' )
209+ line = line .strip ('\t ' )
210+ if line :
205211 CORPUS .append ({
206212 "issueID" : r ['id' ],
207213 "issueURL_API" : r ['url' ],
Original file line number Diff line number Diff line change @@ -78,6 +78,8 @@ def gitHubCommentAPI(issues):
7878
7979 comment_lines = code_tokenized_comment .splitlines ()
8080 for line in comment_lines :
81+ line = line .strip ('\n ' )
82+ line = line .strip ('\t ' )
8183 if line :
8284 results .append ({
8385 "issueID" : i ['issueID' ],
You can’t perform that action at this time.
0 commit comments