-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.RData | ||
.Ruserdata |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
library(tidyverse) | ||
library(jsonlite) | ||
|
||
# ================================ | ||
# Load data | ||
# ================================ | ||
|
||
annotator_1 <- | ||
stream_in(file("annotated/annotator_1.jsonl"), | ||
flatten = TRUE) %>% | ||
as_tibble() | ||
|
||
annotator_3 <- | ||
stream_in(file("annotated/annotator_3.jsonl"), | ||
flatten = TRUE) %>% | ||
as_tibble() | ||
|
||
annotator_4 <- | ||
stream_in(file("annotated/annotator_4.jsonl"), | ||
flatten = TRUE) %>% | ||
as_tibble() | ||
|
||
unannotated_1 <- | ||
read_csv("datasets/annotator_1.csv") | ||
|
||
unannotated_3 <- | ||
read_csv("datasets/annotator_3.csv") | ||
|
||
unannotated_4 <- | ||
read_csv("datasets/annotator_4.csv") | ||
|
||
unannotated_data <- bind_rows(unannotated_1, | ||
unannotated_3, | ||
unannotated_4) | ||
|
||
annotated_data <- bind_rows(annotator_1, | ||
annotator_3, | ||
annotator_4) | ||
|
||
zeerack_data <- | ||
read_csv("find-out/data/external/zeerack/zeerack_data.csv") | ||
|
||
|
||
# ================================ | ||
# Wrangle Data | ||
# ================================ | ||
|
||
# unannotated_data | ||
unannotated_data <- unannotated_data %>% | ||
distinct(text, .keep_all = TRUE) | ||
|
||
# annotated_data | ||
# Drop unwated cols | ||
# Drop ignored Tweets | ||
# Remove duplicates | ||
annotated_data <- annotated_data %>% | ||
select(text, answer) %>% | ||
filter(answer == "accept" | answer == "reject") %>% | ||
distinct(text, .keep_all = TRUE) | ||
|
||
# Add annotation labels to original data (with meta data) | ||
# Rename answer col | ||
# Rename annotation labels | ||
# Change data column to char to make bindable to zeerack's data | ||
opt_out_data <- left_join(annotated_data, unannotated_data) %>% | ||
rename("annotation" = answer) %>% | ||
mutate(annotation = replace(annotation, | ||
annotation == "reject", | ||
"not_misogynistic")) %>% | ||
mutate(annotation = replace(annotation, | ||
annotation == "accept", | ||
"misogynistic")) %>% | ||
mutate(created_at = as.character(created_at)) | ||
|
||
# Find and keep indices of colnames in opt_out_data that appear in zeerack data | ||
opt_out_data <- opt_out_data %>% | ||
select(which(colnames(opt_out_data) %in% colnames(zeerack_data))) | ||
|
||
# Combine opt_out_data and zeerack_data | ||
nlp_test_data <- bind_rows(opt_out_data, | ||
zeerack_data) | ||
|
||
# Write to csv | ||
write_csv(nlp_test_data, "nlp_test_data.csv") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{"text":"@DavidDuguidMP @theSNP @NicolaSturgeon @theresa_may And your party's answer is to drown out the smaller boats in fav of 5 familys who fav the Union not the EU. https://t.co/Jp8nsoX7Ux","_input_hash":-637431715,"_task_hash":-1810602418,"label":"MISOGYNY","score":0.3518103957,"priority":0.3518103957,"spans":[],"meta":{"score":0.3518103957},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@UnburntWitch https://t.co/orBdjutGeQ","_input_hash":447219898,"_task_hash":-676576730,"label":"MISOGYNY","score":0.350058645,"priority":0.350058645,"spans":[],"meta":{"score":0.350058645},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@VersaceViv https://t.co/BSZpkIC39q","_input_hash":1962611394,"_task_hash":39237949,"label":"MISOGYNY","score":0.3496937454,"priority":0.3496937454,"spans":[],"meta":{"score":0.3496937454},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} | ||
{"text":"@simzsimple @DanielleDASH https://t.co/k4jCgWKkHI","_input_hash":371622904,"_task_hash":-1406415518,"label":"MISOGYNY","score":0.349434942,"priority":0.349434942,"spans":[],"meta":{"score":0.349434942},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} | ||
{"text":"@DanielleDASH Can you keep your voice down plz","_input_hash":-2001394314,"_task_hash":-1065562606,"label":"MISOGYNY","score":0.349352479,"priority":0.349352479,"spans":[],"meta":{"score":0.349352479},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"Stupid ass bitch unfollowed me LMAO","_input_hash":1499868745,"_task_hash":139421075,"label":"MISOGYNY","score":0.3492530584,"priority":0.3492530584,"spans":[],"meta":{"score":0.3492530584},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@JAngello85 @AOC @TeamPelosi Let me see if I can explain this simply to you. Law = Congress, Police = Trump. Tell her to fix the laws that she is responsible for as a member of congress and stop blaming the President for doing his job enforcing them.","_input_hash":-387413993,"_task_hash":-1311366814,"label":"MISOGYNY","score":0.3487482071,"priority":0.3487482071,"spans":[],"meta":{"score":0.3487482071},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@JennyGilruth @tavishscott @kezdugdale Looking remarkably alert, all things considered! Congrats, both.","_input_hash":161560655,"_task_hash":842121279,"label":"MISOGYNY","score":0.3479855657,"priority":0.3479855657,"spans":[],"meta":{"score":0.3479855657},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@amandawinnlee @UnburntWitch Yeah. I can sort of understand the ongoing war over whether subs or dubs are better, but the miniscule details of a script aren't the end of the world. Mind you, your dub was amazing.","_input_hash":-1538833768,"_task_hash":-241476288,"label":"MISOGYNY","score":0.3479712307,"priority":0.3479712307,"spans":[],"meta":{"score":0.3479712307},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@VirginActiveUK @EverydaySexism Lol. Can\u2019t even DM you on it... you need to follow me...","_input_hash":-1545960329,"_task_hash":1321691901,"label":"MISOGYNY","score":0.3476383388,"priority":0.3476383388,"spans":[],"meta":{"score":0.3476383388},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@billyt501 @kezdugdale @patrickharvie @THTScotland Billy are you shorter than Patrick? \ud83d\udc40","_input_hash":1226539107,"_task_hash":-909900886,"label":"MISOGYNY","score":0.3467655778,"priority":0.3467655778,"spans":[],"meta":{"score":0.3467655778},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@CarlosD22428844 @Maureen6Johnson @NicolaSturgeon Scumbag !!!! \ud83e\udd2c\ud83e\udd2c\ud83e\udd2c","_input_hash":-1903014502,"_task_hash":453979558,"label":"MISOGYNY","score":0.346683085,"priority":0.346683085,"spans":[],"meta":{"score":0.346683085},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@TeaByAli There was many more I didn\u2019t get screenshots of because she deleted them. I went on and seen she had 595 comments then an hour later it was down to 480. She\u2019s a dirty deleter and only accepts ass kissing and coddling her daughter.","_input_hash":-111907008,"_task_hash":-1936296971,"label":"MISOGYNY","score":0.3410734236,"priority":0.3410734236,"spans":[],"meta":{"score":0.3410734236},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{"text":"@DavidDuguidMP @theSNP @NicolaSturgeon @theresa_may And your party's answer is to drown out the smaller boats in fav of 5 familys who fav the Union not the EU. https://t.co/Jp8nsoX7Ux","_input_hash":-637431715,"_task_hash":-1810602418,"label":"MISOGYNY","score":0.3518103957,"priority":0.3518103957,"spans":[],"meta":{"score":0.3518103957},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@UnburntWitch https://t.co/orBdjutGeQ","_input_hash":447219898,"_task_hash":-676576730,"label":"MISOGYNY","score":0.350058645,"priority":0.350058645,"spans":[],"meta":{"score":0.350058645},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@VersaceViv https://t.co/BSZpkIC39q","_input_hash":1962611394,"_task_hash":39237949,"label":"MISOGYNY","score":0.3496937454,"priority":0.3496937454,"spans":[],"meta":{"score":0.3496937454},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} | ||
{"text":"@simzsimple @DanielleDASH https://t.co/k4jCgWKkHI","_input_hash":371622904,"_task_hash":-1406415518,"label":"MISOGYNY","score":0.349434942,"priority":0.349434942,"spans":[],"meta":{"score":0.349434942},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} | ||
{"text":"@DanielleDASH Can you keep your voice down plz","_input_hash":-2001394314,"_task_hash":-1065562606,"label":"MISOGYNY","score":0.349352479,"priority":0.349352479,"spans":[],"meta":{"score":0.349352479},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"Stupid ass bitch unfollowed me LMAO","_input_hash":1499868745,"_task_hash":139421075,"label":"MISOGYNY","score":0.3492530584,"priority":0.3492530584,"spans":[],"meta":{"score":0.3492530584},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@JAngello85 @AOC @TeamPelosi Let me see if I can explain this simply to you. Law = Congress, Police = Trump. Tell her to fix the laws that she is responsible for as a member of congress and stop blaming the President for doing his job enforcing them.","_input_hash":-387413993,"_task_hash":-1311366814,"label":"MISOGYNY","score":0.3487482071,"priority":0.3487482071,"spans":[],"meta":{"score":0.3487482071},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@JennyGilruth @tavishscott @kezdugdale Looking remarkably alert, all things considered! Congrats, both.","_input_hash":161560655,"_task_hash":842121279,"label":"MISOGYNY","score":0.3479855657,"priority":0.3479855657,"spans":[],"meta":{"score":0.3479855657},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@amandawinnlee @UnburntWitch Yeah. I can sort of understand the ongoing war over whether subs or dubs are better, but the miniscule details of a script aren't the end of the world. Mind you, your dub was amazing.","_input_hash":-1538833768,"_task_hash":-241476288,"label":"MISOGYNY","score":0.3479712307,"priority":0.3479712307,"spans":[],"meta":{"score":0.3479712307},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@VirginActiveUK @EverydaySexism Lol. Can\u2019t even DM you on it... you need to follow me...","_input_hash":-1545960329,"_task_hash":1321691901,"label":"MISOGYNY","score":0.3476383388,"priority":0.3476383388,"spans":[],"meta":{"score":0.3476383388},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@billyt501 @kezdugdale @patrickharvie @THTScotland Billy are you shorter than Patrick? \ud83d\udc40","_input_hash":1226539107,"_task_hash":-909900886,"label":"MISOGYNY","score":0.3467655778,"priority":0.3467655778,"spans":[],"meta":{"score":0.3467655778},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@CarlosD22428844 @Maureen6Johnson @NicolaSturgeon Scumbag !!!! \ud83e\udd2c\ud83e\udd2c\ud83e\udd2c","_input_hash":-1903014502,"_task_hash":453979558,"label":"MISOGYNY","score":0.346683085,"priority":0.346683085,"spans":[],"meta":{"score":0.346683085},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@TeaByAli There was many more I didn\u2019t get screenshots of because she deleted them. I went on and seen she had 595 comments then an hour later it was down to 480. She\u2019s a dirty deleter and only accepts ass kissing and coddling her daughter.","_input_hash":-111907008,"_task_hash":-1936296971,"label":"MISOGYNY","score":0.3410734236,"priority":0.3410734236,"spans":[],"meta":{"score":0.3410734236},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} | ||
{"text":"@rosscolquhoun @NicolaSturgeon You might want to revise the figure on your front page #LiarsForIndy https://t.co/Z1pHRHdn8q","_input_hash":-145004525,"_task_hash":-1231632553,"label":"MISOGYNY","score":0.5339503288,"priority":0.5339503288,"spans":[],"meta":{"score":0.5339503288},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@Sadiq_Filastin @NicolaSturgeon No, you specifically brought up trans women \"often\" perpetrating violence against women, shifting the focus from trans women (and this person in particular) as victims to instead portray them as offenders, minimizing this event and perpetuating harmful views against them.","_input_hash":-1125152035,"_task_hash":-151393820,"label":"MISOGYNY","score":0.5471567512,"priority":0.5471567512,"spans":[],"meta":{"score":0.5471567512},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@Snewsma @AgentP22 @NicolaSturgeon A crowd of racists. Open to all? What a joke. So anti English. You should be ashamed","_input_hash":-1283001516,"_task_hash":-1224910754,"label":"MISOGYNY","score":0.5934074521,"priority":0.5934074521,"spans":[],"meta":{"score":0.5934074521},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@erika_rushton @Alison_McGovern @WirralCouncil @ThisGirlCanUK @EverydaySexism Yep agreed!!!! This isn\u2019t on..Thanks for this Ali... We need to get this taken down ASAP and sort this #sexist advertising out pronto!! #womensport","_input_hash":-101544549,"_task_hash":35899175,"label":"MISOGYNY","score":0.6088513732,"priority":0.6088513732,"spans":[],"meta":{"score":0.6088513732},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} | ||
{"text":"@Janela_X @TheEuroGirl @theSNP @NicolaSturgeon @AngusRobertson They won't. They support it from the top down!\n\nNasty nationalists","_input_hash":-1557038507,"_task_hash":1263137448,"label":"MISOGYNY","score":0.6146041751,"priority":0.6146041751,"spans":[],"meta":{"score":0.6146041751},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} | ||
{"text":"@AndreaLaybourn @EverydaySexism And by charms of life, I now imagine he means the ladies! \ud83d\udc83\ud83d\udc6f\u200d\u2640\ufe0f\ud83d\udc83\ud83d\udc6f\u200d\u2640\ufe0f","_input_hash":-202060094,"_task_hash":-981577199,"label":"MISOGYNY","score":0.3513704836,"priority":0.3513704836,"spans":[],"meta":{"score":0.3513704836},"_session_id":"optout_proto-default","_view_id":"classification","answer":"ignore"} | ||
{"text":"@UnburntWitch \u2728Dramatic contrasting beans\u2728","_input_hash":-1845483070,"_task_hash":-1209400327,"label":"MISOGYNY","score":0.3388901353,"priority":0.3388901353,"spans":[],"meta":{"score":0.3388901353},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@sportsjunkie781 @aimeeallison @JoeBiden @AOC Exactly, that's why I like it.. Four more years guaranteed for Trump, thanks God!\n@POTUS","_input_hash":-2008380657,"_task_hash":-280816422,"label":"MISOGYNY","score":0.6728252172,"priority":0.6728252172,"spans":[],"meta":{"score":0.6728252172},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} | ||
{"text":"@venus_strikes @man0niem @JessicaValenti https://t.co/wnYW2ylqZx","_input_hash":1810660776,"_task_hash":-973474938,"label":"MISOGYNY","score":0.2863336802,"priority":0.2863336802,"spans":[],"meta":{"score":0.2863336802},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"Wow I\u2019ve been spiraling on this bitch for a week hahah","_input_hash":997872362,"_task_hash":-1991387533,"label":"MISOGYNY","score":0.1755436212,"priority":0.1755436212,"spans":[],"meta":{"score":0.1755436212},"_session_id":"optout_proto-default","_view_id":"classification","answer":"accept"} | ||
{"text":"@AndrewYang I disagree... anytime a leader starves his people, kills them for having opinions, and teaches others should be killed IN GRADE SCHOOL isn't looking for peace! Period!","_input_hash":1816046522,"_task_hash":240659263,"label":"MISOGYNY","score":0.2001698613,"priority":0.2001698613,"spans":[],"meta":{"score":0.2001698613},"_session_id":"optout_proto-default","_view_id":"classification","answer":"reject"} |