Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve report functionality #357

Merged
merged 3 commits into from
Jul 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions orangecontrib/text/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,17 @@ def __str__(self):

def report(self):
return (
('Transformers', ', '.join(str(tr) for tr in self.transformers)),
('Tokenizer', str(self.tokenizer)),
('Normalizer', str(self.normalizer)),
('Filters', ', '.join(str(f) for f in self.filters)),
('Ngrams range', str(self.ngrams_range)),
('Frequency filter', str(self.freq_filter)),
('Pos tagger', str(self.pos_tagger)),
('Transformers', ', '.join(str(tr) for tr in self.transformers)
if self.transformers else None),
('Tokenizer', str(self.tokenizer) if self.tokenizer else None),
('Normalizer', str(self.normalizer) if self.normalizer else None),
('Filters', ', '.join(str(f) for f in self.filters) if
self.filters else None),
('Ngrams range', str(self.ngrams_range) if self.ngrams_range else
None),
('Frequency filter', str(self.freq_filter) if self.freq_filter
else None),
('Pos tagger', str(self.pos_tagger) if self.pos_tagger else None),
)


Expand Down
6 changes: 4 additions & 2 deletions orangecontrib/text/widgets/owcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from Orange.widgets.settings import Setting, ContextSetting, PerfectDomainContextHandler
from Orange.widgets.widget import OWWidget, Msg, Input, Output
from orangecontrib.text.corpus import Corpus, get_sample_corpora_dir
from orangecontrib.text.widgets.utils import widgets
from orangecontrib.text.widgets.utils import widgets, QSize


class OWCorpus(OWWidget):
Expand Down Expand Up @@ -101,11 +101,13 @@ def __init__(self):
get_sample_corpora_dir()),
autoDefault=False,
)
box.layout().addWidget(self.report_button)

# load first file
self.file_widget.select(0)

def sizeHint(self):
return QSize(400, 300)

@Inputs.data
def set_data(self, data):
have_data = data is not None
Expand Down
15 changes: 11 additions & 4 deletions orangecontrib/text/widgets/owcorpusviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,15 +406,22 @@ def commit(self):
self.Outputs.matching_docs.send(None)
self.Outputs.other_docs.send(None)

def send_report(self):
self.report_items((
("Query", self.regexp_filter),
("Matching documents", self.n_matching),
))


if __name__ == '__main__':
from orangecontrib.text.tag import pos_tagger
from orangecontrib.text.tag.pos import AveragedPerceptronTagger
app = QApplication([])
widget = OWCorpusViewer()
widget.show()
corpus = Corpus.from_file('book-excerpts')
corpus = corpus[:3]
corpus = pos_tagger.tag_corpus(corpus)
corpus.ngram_range = (1, 2)
widget.set_data(corpus)
tagger = AveragedPerceptronTagger()
tagged_corpus = tagger.tag_corpus(corpus)
tagged_corpus.ngram_range = (1, 2)
widget.set_data(tagged_corpus)
app.exec()
1 change: 0 additions & 1 deletion orangecontrib/text/widgets/owguardian.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ def __init__(self):

# Buttons
self.button_box = gui.hBox(self.controlArea)
self.button_box.layout().addWidget(self.report_button)

self.search_button = gui.button(self.button_box, self, 'Search',
self.start_stop,
Expand Down
11 changes: 11 additions & 0 deletions orangecontrib/text/widgets/owimportdocuments.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,17 @@ def dirpath(event):

return super().eventFilter(receiver, event)

def send_report(self):
if not self.currentPath:
return
items = [('Path', self.currentPath),
('Number of documents', self.n_text_data)]
if self.n_text_categories:
items += [('Categories', self.n_text_categories)]
if self.n_skipped:
items += [('Number of skipped', self.n_skipped)]
self.report_items(items, )


class UserInterruptError(BaseException):
"""
Expand Down
1 change: 0 additions & 1 deletion orangecontrib/text/widgets/ownyt.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ def __init__(self):

# Buttons
self.button_box = gui.hBox(self.controlArea)
self.button_box.layout().addWidget(self.report_button)

self.search_button = gui.button(self.button_box, self, 'Search', self.start_stop,
focusPolicy=Qt.NoFocus)
Expand Down
28 changes: 28 additions & 0 deletions orangecontrib/text/widgets/owpubmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,34 @@ def open_calendar(self, widget):
if cal_dlg.exec_():
widget.setText(cal_dlg.picked_date)

def send_report(self):
if not self.pubmed_api:
return
max_records_count = min(
self.pubmed_api.MAX_RECORDS,
self.pubmed_api.search_record_count
)
if self.search_tabs.currentIndex() == 0:
terms = self.keyword_combo.currentText()
authors = self.author_input.text()
self.report_items((
('Query', terms if terms else None),
('Authors', authors if authors else None),
('Date', 'from {} to {}'.format(self.pub_date_from,
self.pub_date_to)),
('Number of records retrieved', '{}/{}'.format(len(
self.output_corpus) if self.output_corpus else 0,
max_records_count))
))
else:
query = self.advanced_query_input.toPlainText()
self.report_items((
('Query', query if query else None),
('Number of records retrieved', '{}/{}'.format(len(
self.output_corpus) if self.output_corpus else 0,
max_records_count))
))


class CalendarDialog(QDialog):

Expand Down
3 changes: 1 addition & 2 deletions orangecontrib/text/widgets/owsentimentanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def __init__(self):

ac = gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit',
'Autocommit is on')
ac.layout().insertWidget(0, self.report_button)
ac.layout().insertSpacing(1, 8)

@Inputs.corpus
Expand Down Expand Up @@ -66,7 +65,7 @@ def send_report(self):
def main():
app = QApplication([])
widget = OWSentimentAnalysis()
corpus = Corpus.from_file('bookexcerpts')
corpus = Corpus.from_file('book-excerpts')
corpus = corpus[:3]
widget.set_corpus(corpus)
widget.show()
Expand Down
1 change: 0 additions & 1 deletion orangecontrib/text/widgets/owtweetprofiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def __init__(self):

# Auto commit
buttons_layout = QHBoxLayout()
buttons_layout.addWidget(self.report_button)
buttons_layout.addSpacing(15)
buttons_layout.addWidget(
gui.auto_commit(None, self, 'auto_commit', 'Commit', box=False)
Expand Down
1 change: 0 additions & 1 deletion orangecontrib/text/widgets/owtwitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ def add_row(label, items):

# Buttons
self.button_box = gui.hBox(self.controlArea)
self.button_box.layout().addWidget(self.report_button)

self.search_button = gui.button(self.button_box, self, 'Search',
self.start_stop,
Expand Down
10 changes: 6 additions & 4 deletions orangecontrib/text/widgets/owwikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from orangecontrib.text.corpus import Corpus
from orangecontrib.text.language_codes import lang2code, code2lang
from orangecontrib.text.widgets.utils import ComboBox, ListEdit, CheckListLayout, asynchronous

from orangecontrib.text.wikipedia import WikipediaAPI


Expand Down Expand Up @@ -53,9 +54,11 @@ def __init__(self, *args, **kwargs):
layout.setSpacing(7)

row = 0
query_edit = ListEdit(self, 'query_list', "Each line represents a separate query.", None, self)
self.query_edit = ListEdit(self, 'query_list', "Each line represents a "
"separate query.", 100, self)
layout.addWidget(QLabel('Query word list:'), row, 0, 1, self.label_width)
layout.addWidget(query_edit, row, self.label_width, 1, self.widgets_width)
layout.addWidget(self.query_edit, row, self.label_width, 1,
self.widgets_width)

# Language
row += 1
Expand All @@ -81,7 +84,6 @@ def __init__(self, *args, **kwargs):
self.result_label = gui.label(self.info_box, self, self.info_label.format(0))

self.button_box = gui.hBox(self.controlArea)
self.button_box.layout().addWidget(self.report_button)

self.search_button = gui.button(self.button_box, self, 'Search', self.start_stop)
self.search_button.setFocusPolicy(Qt.NoFocus)
Expand Down Expand Up @@ -133,7 +135,7 @@ def set_text_features(self):
def send_report(self):
if self.result:
items = (('Language', code2lang[self.language]),
('Query', self.query_list),
('Query', self.query_edit.toPlainText()),
('Articles count', len(self.result)))
self.report_items('Query', items)

Expand Down
3 changes: 3 additions & 0 deletions orangecontrib/text/widgets/owwordcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,9 @@ def commit(self):
topic.name = 'Selected Words'
self.Outputs.selected_words.send(topic)

def send_report(self):
self.report_table(self.tableview)


def main():
from Orange.data import Table, Domain, ContinuousVariable, StringVariable
Expand Down
38 changes: 37 additions & 1 deletion orangecontrib/text/widgets/owwordenrichment.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import numpy as np
from AnyQt.QtWidgets import QTreeWidget, QTreeView, QTreeWidgetItem
from AnyQt.QtWidgets import QTreeWidget, QTreeView, QTreeWidgetItem, \
QApplication

from Orange.data import Table, Domain
from Orange.widgets import gui
from Orange.widgets.settings import Setting
from Orange.widgets.widget import OWWidget, Msg, Input
from PyQt5.QtCore import QSize
from orangecontrib.text import Corpus
from orangecontrib.text.util import np_sp_sum
from orangecontrib.text.stats import false_discovery_rate, hypergeom_p_values
from orangecontrib.text.vectorization import BowVectorizer


class OWWordEnrichment(OWWidget):
Expand Down Expand Up @@ -92,6 +95,9 @@ def __init__(self):
self.sig_words.resizeColumnToContents(i)
self.mainArea.layout().addWidget(self.sig_words)

def sizeHint(self):
return QSize(450, 240)

@Inputs.data
def set_data(self, data=None):
self.data = data
Expand Down Expand Up @@ -199,6 +205,19 @@ def apply(self):
self.filter_enabled(True)
self.progressBarFinished()

def tree_to_table(self):
view = [self.cols]
items = self.sig_words.topLevelItemCount()
for i in range(items):
line = []
for j in range(3):
line.append(self.sig_words.topLevelItem(i).text(j))
view.append(line)
return(view)

def send_report(self):
if self.words:
self.report_table("Enriched words", self.tree_to_table())

fp = lambda score: "%0.5f" % score if score > 10e-3 else "%0.1e" % score
fpt = lambda score: "%0.9f" % score if score > 10e-3 else "%0.5e" % score
Expand All @@ -217,3 +236,20 @@ def __init__(self, word, p_value, f_value, parent):
def __lt__(self, other):
col = self.treeWidget().sortColumn()
return self.data[col] < other.data[col]

def main():

corpus = Corpus.from_file('book-excerpts')
vect = BowVectorizer()
corpus_vect = vect.transform(corpus)
app = QApplication([])
widget = OWWordEnrichment()
widget.set_data(corpus_vect)
subset_corpus = corpus_vect[:10]
widget.set_data_selected(subset_corpus)
widget.handleNewSignals()
widget.show()
app.exec()

if __name__ == '__main__':
main()
1 change: 0 additions & 1 deletion orangecontrib/text/widgets/utils/owbasevectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def __init__(self):
self.controlArea.layout().addWidget(box)

buttons_layout = QHBoxLayout()
buttons_layout.addWidget(self.report_button)
buttons_layout.addSpacing(15)
buttons_layout.addWidget(
gui.auto_commit(None, self, 'autocommit', 'Commit', box=False)
Expand Down