Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0459bbf
author urls in style name/id
Jan 3, 2021
d94ef20
autofix
Jan 3, 2021
5468f1a
Merge branch 'master' into author-url
akoehn Apr 6, 2021
bc0e86e
Merge branch 'master' into author-url
mjpost Dec 21, 2021
2e9bf73
Merge branch 'master' into author-url
mjpost Jan 11, 2022
3b5758e
Merge remote-tracking branch 'origin/master' into author-url
mjpost Feb 23, 2022
395b279
Merge branch 'master' into author-url
mbollmann Apr 29, 2023
045a619
Merge branch 'master' into author-url
mjpost May 26, 2025
e541c45
Add ORCID script and apply to 2025.naacl main (#623)
mjpost May 30, 2025
9a2ea36
Add orcid to schema, fix pattern
mjpost May 30, 2025
2808774
Add orcids for NAACL 2025 short
mjpost May 30, 2025
8b14f28
Industry
mjpost May 30, 2025
c0c305c
Orcids for Findings:NAACL 2025
mjpost May 30, 2025
7f2c9c7
Add ORCIDs for 2024.conll-babylm
mjpost Jun 6, 2025
fe4ba0e
Ingest ORCIDS for 2025.trustnlp-main
mjpost Jun 6, 2025
9f8668a
Ingest ORCIDS for 2025.c3nlp-1
mjpost Jun 6, 2025
acbf9ab
Ingest ORCIDS for 2025.privatenlp-main
mjpost Jun 6, 2025
471994d
Ingest ORCIDS for 2025.knowledgenlp-1
mjpost Jun 6, 2025
b9c0581
Ingest ORCIDS for 2025.queerinai-main
mjpost Jun 6, 2025
efe1d3b
Adjust outputs on script
mjpost Jun 6, 2025
1af01e9
Add ORCIDS for 2025.cmcl-1
mjpost Jun 6, 2025
701ae50
Ingest ORCIDS for a number of 2025 workshops
mjpost Jun 6, 2025
d75a9dd
Merge remote-tracking branch 'origin/master' into author-url
mjpost Jun 6, 2025
0d40aca
black
mjpost Jun 6, 2025
962eec2
Add ORCIDS for 2024 venues
mjpost Jun 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
258 changes: 258 additions & 0 deletions bin/create_hugo_pages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright 2019 Marcel Bollmann <[email protected]>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Usage: create_hugo_pages.py [--dir=DIR] [-c] [--debug]

Creates page stubs for the full anthology based on the YAML data files.

This script can only be run after create_hugo_yaml.py!

Options:
--dir=DIR Hugo project directory. [default: {scriptdir}/../build/]
--debug Output debug-level log messages.
-c, --clean Delete existing files in target directory before generation.
-h, --help Display this helpful text.
"""

from docopt import docopt
from glob import glob
from tqdm import tqdm
import logging as log
import os
import shutil
import yaml

try:
from yaml import CLoader as Loader
except ImportError:
log.info("Can't load yaml C bindings, reverting to slow pure Python version")
from yaml import Loader

from anthology.utils import SeverityTracker


def check_directory(cdir, clean=False):
if not os.path.isdir(cdir) and not os.path.exists(cdir):
os.makedirs(cdir)
return True
entries = os.listdir(cdir)
if "_index.md" in entries:
entries.remove("_index.md")
if entries and not clean:
log.critical("Directory already exists and has content files: {}".format(cdir))
log.info(
"Call this script with the -c/--clean flag to automatically DELETE existing files"
)
return False
for entry in entries:
entry = "{}/{}".format(cdir, entry)
if os.path.isdir(entry):
shutil.rmtree(entry)
else:
os.remove(entry)
return True


def create_papers(srcdir, clean=False):
"""Creates page stubs for all papers in the Anthology."""
log.info("Creating stubs for papers...")
if not check_directory("{}/content/papers".format(srcdir), clean=clean):
return

# Go through all paper volumes
for yamlfile in tqdm(glob("{}/data/papers/*.yaml".format(srcdir))):
log.debug("Processing {}".format(yamlfile))
with open(yamlfile, "r") as f:
data = yaml.load(f, Loader=Loader)
# Create a paper stub for each entry in the volume
for anthology_id, entry in data.items():
paper_dir = "{}/content/papers/{}".format(srcdir, anthology_id.split("-")[0])
if not os.path.exists(paper_dir):
os.makedirs(paper_dir)
with open("{}/{}.md".format(paper_dir, anthology_id), "w") as f:
print("---", file=f)
yaml.dump(
{"anthology_id": anthology_id, "title": entry["title"]},
default_flow_style=False,
stream=f,
)
print("---", file=f)


def create_volumes(srcdir, clean=False):
"""Creates page stubs for all proceedings volumes in the Anthology."""
log.info("Creating stubs for volumes...")
if not check_directory("{}/content/volumes".format(srcdir), clean=clean):
return

yamlfile = "{}/data/volumes.yaml".format(srcdir)
log.debug("Processing {}".format(yamlfile))
with open(yamlfile, "r") as f:
data = yaml.load(f, Loader=Loader)
# Create a paper stub for each proceedings volume
for anthology_id, entry in data.items():
with open("{}/content/volumes/{}.md".format(srcdir, anthology_id), "w") as f:
print("---", file=f)
yaml.dump(
{
"anthology_id": anthology_id,
"title": entry["title"],
},
default_flow_style=False,
stream=f,
)
print("---", file=f)

return data


def create_people(srcdir, clean=False):
"""Creates page stubs for all authors/editors in the Anthology."""
log.info("Creating stubs for people...")
if not check_directory("{}/content/people".format(srcdir), clean=clean):
return

for yamlfile in tqdm(glob("{}/data/people/*.yaml".format(srcdir))):
log.debug("Processing {}".format(yamlfile))
with open(yamlfile, "r") as f:
data = yaml.load(f, Loader=Loader)
# Create a page stub for each person
for name, entry in data.items():
person_dir = "{}/content/people/{}/{}".format(
srcdir, entry["slug"][0], entry["slug"]
)
if not os.path.exists(person_dir):
os.makedirs(person_dir)
yaml_data = {"name": name, "title": entry["full"], "lastname": entry["last"]}
with open("{}/{}.md".format(person_dir, name), "w") as f:
print("---", file=f)
# "lastname" is dumped to allow sorting by it in Hugo
yaml.dump(yaml_data, default_flow_style=False, stream=f)
print("---", file=f)

return data


def create_venues(srcdir, clean=False):
"""Creates page stubs for all venues in the Anthology."""
yamlfile = "{}/data/venues.yaml".format(srcdir)
log.debug("Processing {}".format(yamlfile))
with open(yamlfile, "r") as f:
data = yaml.load(f, Loader=Loader)

log.info("Creating stubs for venues...")
if not check_directory("{}/content/venues".format(srcdir), clean=clean):
return
# Create a paper stub for each venue (e.g. ACL)
for venue, venue_data in data.items():
venue_str = venue_data["slug"]
with open("{}/content/venues/{}.md".format(srcdir, venue_str), "w") as f:
print("---", file=f)
yaml_data = {
"venue": venue_data["slug"],
"acronym": venue_data["acronym"],
"title": venue_data["name"],
}
yaml.dump(yaml_data, default_flow_style=False, stream=f)
print("---", file=f)


def create_events(srcdir, clean=False):
"""
Creates page stubs for all events in the Anthology.

Expects that the EventIndex has as sequence of dictionaries,
keyed by the event name, with the following fields:

[
"acl-2022": {
"title": "Annual Meeting of the Association for Computational Linguistics (2022)",
"volumes": ["2022.acl-main", "2022.acl-srw", ...]
},
...
]

Here, a "{event_slug}.md" stub is written for each paper. This is used with the Hugo template
file hugo/layout/events/single.html to lookup data written in build/data/events.yaml
(created by create_hugo_yaml.py, the previous step), which knows about the volumes to list.
The stub lists only the event slug and the event title
"""
yamlfile = f"{srcdir}/data/events.yaml"
log.debug(f"Processing {yamlfile}")
with open(yamlfile, "r") as f:
yaml_data = yaml.load(f, Loader=Loader)

log.info("Creating stubs for events...")
if not check_directory(f"{srcdir}/content/events", clean=clean):
return
# Create a paper stub for each event
for event, event_data in yaml_data.items():
with open(f"{srcdir}/content/events/{event}.md", "w") as f:
print("---", file=f)
yaml_data = {"event_slug": event, "title": event_data["title"]}
yaml.dump(yaml_data, default_flow_style=False, stream=f)
print("---", file=f)


def create_sigs(srcdir, clean=False):
"""Creates page stubs for all SIGs in the Anthology."""
yamlfile = "{}/data/sigs.yaml".format(srcdir)
log.debug("Processing {}".format(yamlfile))
with open(yamlfile, "r") as f:
data = yaml.load(f, Loader=Loader)

log.info("Creating stubs for SIGs...")
if not check_directory("{}/content/sigs".format(srcdir), clean=clean):
return
# Create a paper stub for each SIGS (e.g. SIGMORPHON)
for sig, sig_data in data.items():
sig_str = sig_data["slug"]
with open("{}/content/sigs/{}.md".format(srcdir, sig_str), "w") as f:
print("---", file=f)
yaml.dump(
{
"acronym": sig,
"short_acronym": sig[3:] if sig.startswith("SIG") else sig,
"title": sig_data["name"],
},
default_flow_style=False,
stream=f,
)
print("---", file=f)


if __name__ == "__main__":
args = docopt(__doc__)
scriptdir = os.path.dirname(os.path.abspath(__file__))
if "{scriptdir}" in args["--dir"]:
args["--dir"] = args["--dir"].format(scriptdir=scriptdir)
dir_ = os.path.abspath(args["--dir"])

log_level = log.DEBUG if args["--debug"] else log.INFO
log.basicConfig(format="%(levelname)-8s %(message)s", level=log_level)
tracker = SeverityTracker()
log.getLogger().addHandler(tracker)

create_papers(dir_, clean=args["--clean"])
create_volumes(dir_, clean=args["--clean"])
create_people(dir_, clean=args["--clean"])
create_venues(dir_, clean=args["--clean"])
create_events(dir_, clean=args["--clean"])
create_sigs(dir_, clean=args["--clean"])

if tracker.highest >= log.ERROR:
exit(1)
Loading
Loading