diff --git a/README.md b/README.md
index bb0765b5..462d49c1 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ qlever setup-config olympics   # Get Qleverfile (config file) for this dataset
 qlever get-data                # Download the dataset
 qlever index                   # Build index data structures for this dataset
 qlever start                   # Start a QLever server using that index
-qlever example-queries         # Launch some example queries
+qlever query                   # Launch an example query
 qlever ui                      # Launch the QLever UI
 ```
 
diff --git a/pyproject.toml b/pyproject.toml
index b053fe62..8c5a0aed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ classifiers = [
  "Topic :: Database :: Front-Ends"
 ]
 
-dependencies = [ "psutil", "termcolor", "argcomplete", "pyyaml" ]
+dependencies = [ "psutil", "termcolor", "argcomplete", "pyyaml", "rdflib" ]
 
 [project.urls]
 Github = "https://github.com/ad-freiburg/qlever"
diff --git a/src/qlever/commands/benchmark_queries.py b/src/qlever/commands/benchmark_queries.py
new file mode 100644
index 00000000..18eb1321
--- /dev/null
+++ b/src/qlever/commands/benchmark_queries.py
@@ -0,0 +1,1022 @@
+from __future__ import annotations
+
+import csv
+import json
+import re
+import shlex
+import subprocess
+import time
+import traceback
+from io import StringIO
+from pathlib import Path
+from typing import Any
+
+import rdflib
+import yaml
+from termcolor import colored
+
+from qlever.command import QleverCommand
+from qlever.commands.clear_cache import ClearCacheCommand
+from qlever.commands.ui import dict_to_yaml
+from qlever.log import log, mute_log
+from qlever.util import run_command, run_curl_command
+
+
+class BenchmarkQueriesCommand(QleverCommand):
+    """
+    Class for running a given sequence of benchmark or example queries and
+    showing their processing times and result sizes.
+    """
+
+    def __init__(self):
+        pass
+
+    def description(self) -> str:
+        return (
+            "Run the given benchmark or example queries and show their "
+            "processing times and result sizes"
+        )
+
+    def should_have_qleverfile(self) -> bool:
+        return False
+
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {"server": ["host_name", "port"], "ui": ["ui_config"]}
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
+        )
+        subparser.add_argument(
+            "--sparql-endpoint-preset",
+            choices=[
+                "https://qlever.dev/api/wikidata",
+                "https://qlever.dev/api/uniprot",
+                "https://qlever.dev/api/pubchem",
+                "https://qlever.dev/api/osm-planet",
+                "https://wikidata.demo.openlinksw.com/sparql",
+                "https://sparql.uniprot.org/sparql",
+            ],
+            help="SPARQL endpoint from fixed list (to save typing)",
+        )
+        subparser.add_argument(
+            "--queries-tsv",
+            type=str,
+            default=None,
+            help=(
+                "Path to a TSV file containing benchmark queries "
+                "(query_description, full_sparql_query)"
+            ),
+        )
+        subparser.add_argument(
+            "--queries-yml",
+            type=str,
+            default=None,
+            help=(
+                "Path to a YAML file containing benchmark queries.  "
+                "The YAML file should have a top-level "
+                "key called 'queries', which is a list of dictionaries. "
+                "Each dictionary should contain 'query' for the query "
+                "description and 'sparql' for the full SPARQL query."
+            ),
+        )
+        subparser.add_argument(
+            "--query-ids",
+            type=str,
+            default="1-$",
+            help="Query IDs as comma-separated list of "
+            "ranges (e.g., 1-5,7,12-$)",
+        )
+        subparser.add_argument(
+            "--query-regex",
+            type=str,
+            help="Only consider example queries matching "
+            "this regex (using grep -Pi)",
+        )
+        subparser.add_argument(
+            "--example-queries",
+            action="store_true",
+            default=False,
+            help=(
+                "Run the example queries for the given --ui-config "
+                "instead of the benchmark queries from a TSV or YML file"
+            ),
+        )
+        subparser.add_argument(
+            "--download-or-count",
+            choices=["download", "count"],
+            default="download",
+            help="Whether to download the full result "
+            "or just compute the size of the result",
+        )
+        subparser.add_argument(
+            "--limit", type=int, help="Limit on the number of results"
+        )
+        subparser.add_argument(
+            "--remove-offset-and-limit",
+            action="store_true",
+            default=False,
+            help="Remove OFFSET and LIMIT from the query",
+        )
+        subparser.add_argument(
+            "--accept",
+            type=str,
+            choices=[
+                "text/tab-separated-values",
+                "text/csv",
+                "application/sparql-results+json",
+                "application/qlever-results+json",
+                "application/octet-stream",
+                "text/turtle",
+                "AUTO",
+            ],
+            default="application/sparql-results+json",
+            help="Accept header for the SPARQL query; AUTO means "
+            "`text/turtle` for CONSTRUCT AND DESCRIBE queries, "
+            "`application/sparql-results+json` for all others",
+        )
+        subparser.add_argument(
+            "--clear-cache",
+            choices=["yes", "no"],
+            default="no",
+            help="Clear the cache before each query (only works for QLever)",
+        )
+        subparser.add_argument(
+            "--width-query-description",
+            type=int,
+            default=70,
+            help="Width for printing the query description",
+        )
+        subparser.add_argument(
+            "--width-error-message",
+            type=int,
+            default=50,
+            help="Width for printing the error message (0 = no limit)",
+        )
+        subparser.add_argument(
+            "--width-result-size",
+            type=int,
+            default=14,
+            help="Width for printing the result size",
+        )
+        subparser.add_argument(
+            "--add-query-type-to-description",
+            action="store_true",
+            default=False,
+            help="Add the query type (SELECT, ASK, CONSTRUCT, DESCRIBE, "
+            "UNKNOWN) to the description",
+        )
+        subparser.add_argument(
+            "--show-query",
+            choices=["always", "never", "on-error"],
+            default="never",
+            help="Show the queries that will be executed (always, never, on error)",
+        )
+        subparser.add_argument(
+            "--show-prefixes",
+            action="store_true",
+            default=False,
+            help="When showing the query, also show the prefixes",
+        )
+        subparser.add_argument(
+            "--results-dir",
+            type=str,
+            default=".",
+            help=(
+                "The directory where the YML result file would be saved "
+                "for the evaluation web app (Default = current working directory)"
+            ),
+        )
+        subparser.add_argument(
+            "--result-file",
+            type=str,
+            default=None,
+            help=(
+                "Base name used for the result YML file, should be of the "
+                "form `<dataset>.<engine>`, e.g., `wikidata.qlever`"
+            ),
+        )
+        subparser.add_argument(
+            "--max-results-output-file",
+            type=int,
+            default=5,
+            help=(
+                "Maximum number of results per query in the output result "
+                "YML file (Default = 5)"
+            ),
+        )
+
+    def pretty_printed_query(self, query: str, show_prefixes: bool) -> str:
+        remove_prefixes_cmd = (
+            " | sed '/^PREFIX /Id'" if not show_prefixes else ""
+        )
+        pretty_print_query_cmd = (
+            f"echo {shlex.quote(query)}"
+            f" | docker run -i --rm sparqling/sparql-formatter"
+            f"{remove_prefixes_cmd} | grep -v '^$'"
+        )
+        try:
+            query_pretty_printed = run_command(
+                pretty_print_query_cmd, return_output=True
+            )
+            return query_pretty_printed.rstrip()
+        except Exception as e:
+            log.error(
+                f"Failed to pretty-print query, returning original query: {e}"
+            )
+            return query.rstrip()
+
+    def sparql_query_type(self, query: str) -> str:
+        match = re.search(
+            r"(SELECT|ASK|CONSTRUCT|DESCRIBE)\s", query, re.IGNORECASE
+        )
+        if match:
+            return match.group(1).upper()
+        else:
+            return "UNKNOWN"
+
+    @staticmethod
+    def filter_queries(
+        queries: list[tuple[str, str]], query_ids: str, query_regex: str
+    ) -> list[tuple[str, str]]:
+        """
+        Given a list of queries (tuple of query desc and full sparql query),
+        filter them and keep the ones which are a part of query_ids
+        or match with query_regex
+        """
+        # Get the list of query indices to keep
+        total_queries = len(queries)
+        query_indices = []
+        for part in query_ids.split(","):
+            if "-" in part:
+                start, end = part.split("-")
+                if end == "$":
+                    end = total_queries
+                query_indices.extend(range(int(start) - 1, int(end)))
+            else:
+                idx = int(part) if part != "$" else total_queries
+                query_indices.append(idx - 1)
+
+        try:
+            filtered_queries = []
+            pattern = (
+                re.compile(query_regex, re.IGNORECASE) if query_regex else None
+            )
+            for query_idx in query_indices:
+                if query_idx >= total_queries:
+                    continue
+
+                query_desc, sparql = queries[query_idx]
+
+                # Only include queries that match the query_regex if present
+                if pattern and not (
+                    pattern.search(query_desc) or pattern.search(sparql)
+                ):
+                    continue
+
+                filtered_queries.append((query_desc, sparql))
+            return filtered_queries
+        except Exception as exc:
+            log.error(f"Error filtering queries: {exc}")
+            return []
+
+    @staticmethod
+    def parse_queries_tsv(queries_cmd: str) -> list[tuple[str, str]]:
+        """
+        Execute the given bash command to fetch tsv queries and return a
+        list of queries i.e. tuple(query_description, full_sparql_query)
+        """
+        try:
+            tsv_queries_str = run_command(queries_cmd, return_output=True)
+            if len(tsv_queries_str) == 0:
+                log.error("No queries found in the TSV queries file")
+                return []
+            return [
+                tuple(line.split("\t"))
+                for line in tsv_queries_str.strip().splitlines()
+            ]
+        except Exception as exc:
+            log.error(f"Failed to read the TSV queries file: {exc}")
+            return []
+
+    @staticmethod
+    def parse_queries_yml(queries_file: str) -> list[tuple[str, str]]:
+        """
+        Parse a YML file, validate its structure and return a list of
+        queries i.e. tuple(query_description, full_sparql_query)
+        """
+        with open(queries_file, "r", encoding="utf-8") as q_file:
+            try:
+                data = yaml.safe_load(q_file)  # Load YAML safely
+            except yaml.YAMLError as exc:
+                log.error(f"Error parsing {queries_file} file: {exc}")
+                return []
+
+        # Validate the structure
+        if not isinstance(data, dict) or "queries" not in data:
+            log.error(
+                "Error: YAML file must contain a top-level 'queries' key"
+            )
+            return []
+
+        if not isinstance(data["queries"], list):
+            log.error("Error: 'queries' key in YML file must hold a list.")
+            return []
+
+        for item in data["queries"]:
+            if (
+                not isinstance(item, dict)
+                or "query" not in item
+                or "sparql" not in item
+            ):
+                log.error(
+                    "Error: Each item in 'queries' must contain "
+                    "'query' and 'sparql' keys."
+                )
+                return []
+
+        return [
+            (query['query'], query['sparql']) for query in data["queries"]
+        ]
+
+    def get_result_size(
+        self,
+        count_only: bool,
+        query_type: str,
+        accept_header: str,
+        result_file: str,
+    ) -> tuple[int, dict[str, str] | None]:
+        """
+        Get the result size and error_msg dict (if query failed) for
+        different accept headers
+        """
+
+        def get_json_error_msg(e: Exception) -> dict[str, str]:
+            error_msg = {
+                "short": "Malformed JSON",
+                "long": "curl returned with code 200, "
+                "but the JSON is malformed: " + re.sub(r"\s+", " ", str(e)),
+            }
+            return error_msg
+
+        result_size = 0
+        error_msg = None
+        # CASE 0: The result is empty despite a 200 HTTP code (not a
+        # problem for CONSTRUCT and DESCRIBE queries).
+        if Path(result_file).stat().st_size == 0 and (
+            not query_type == "CONSTRUCT" and not query_type == "DESCRIBE"
+        ):
+            result_size = 0
+            error_msg = {
+                "short": "Empty result",
+                "long": "curl returned with code 200, but the result is empty",
+            }
+
+        # CASE 1: Just counting the size of the result (TSV or JSON).
+        elif count_only:
+            if accept_header in ("text/tab-separated-values", "text/csv"):
+                result_size = run_command(
+                    f"sed 1d {result_file}", return_output=True
+                )
+            elif accept_header == "application/qlever-results+json":
+                try:
+                    # sed cmd to get the number between 2nd and 3rd double_quotes
+                    result_size = run_command(
+                        f"jq '.res[0]' {result_file}"
+                        " | sed 's/[^0-9]*\\([0-9]*\\).*/\\1/'",
+                        return_output=True,
+                    )
+                except Exception as e:
+                    error_msg = get_json_error_msg(e)
+            else:
+                try:
+                    result_size = run_command(
+                        f'jq -r ".results.bindings[0]'
+                        f" | to_entries[0].value.value"
+                        f' | tonumber" {result_file}',
+                        return_output=True,
+                    )
+                except Exception as e:
+                    error_msg = get_json_error_msg(e)
+
+        # CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
+        else:
+            if accept_header in ("text/tab-separated-values", "text/csv"):
+                result_size = run_command(
+                    f"sed 1d {result_file} | wc -l", return_output=True
+                )
+            elif accept_header == "text/turtle":
+                result_size = run_command(
+                    f"sed '1d;/^@prefix/d;/^\\s*$/d' {result_file} | wc -l",
+                    return_output=True,
+                )
+            elif accept_header == "application/qlever-results+json":
+                try:
+                    result_size = run_command(
+                        f'jq -r ".resultsize" {result_file}',
+                        return_output=True,
+                    )
+                except Exception as e:
+                    error_msg = get_json_error_msg(e)
+            else:
+                try:
+                    result_size = int(
+                        run_command(
+                            f'jq -r ".results.bindings | length"'
+                            f" {result_file}",
+                            return_output=True,
+                        ).rstrip()
+                    )
+                except Exception as e:
+                    error_msg = get_json_error_msg(e)
+        return int(result_size), error_msg
+
+    @staticmethod
+    def get_single_int_result(result_file: str) -> int | None:
+        """
+        When downloading the full result of a query with accept header as
+        application/sparql-results+json and result_size == 1, get the single
+        integer result value (if any).
+        """
+        single_int_result = None
+        try:
+            single_int_result = int(
+                run_command(
+                    f'jq -e -r ".results.bindings[0][] | .value"'
+                    f" {result_file}",
+                    return_output=True,
+                ).rstrip()
+            )
+        except Exception:
+            pass
+        return single_int_result
+
+    def execute(self, args) -> bool:
+        # We can't have both `--remove-offset-and-limit` and `--limit`.
+        if args.remove_offset_and_limit and args.limit:
+            log.error("Cannot have both --remove-offset-and-limit and --limit")
+            return False
+
+        # Extract dataset and sparql_engine name from result file
+        dataset, engine = None, None
+        if args.result_file is not None:
+            result_file_parts = args.result_file.split(".")
+            if len(result_file_parts) != 2:
+                log.error(
+                    "The argument of --result-file should be of the form "
+                    "`<dataset>.<engine>`, e.g., `wikidata.qlever`"
+                )
+                return False
+            results_dir_path = Path(args.results_dir)
+            if results_dir_path.exists():
+                if not results_dir_path.is_dir():
+                    log.error(
+                        f"{results_dir_path} exists but is not a directory"
+                    )
+                    return False
+            else:
+                log.info(
+                    f"Creating results directory: {results_dir_path.absolute()}"
+                )
+                results_dir_path.mkdir(parents=True, exist_ok=True)
+            dataset, engine = result_file_parts
+
+        # If `args.accept` is `application/sparql-results+json` or
+        # `application/qlever-results+json` or `AUTO`, we need `jq`.
+        if args.accept in (
+            "application/sparql-results+json",
+            "application/qlever-results+json",
+            "AUTO",
+        ):
+            try:
+                subprocess.run(
+                    "jq --version",
+                    shell=True,
+                    check=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                )
+            except Exception as e:
+                log.error(f"Please install `jq` for {args.accept} ({e})")
+                return False
+
+        if not any((args.queries_tsv, args.queries_yml, args.example_queries)):
+            log.error(
+                "No benchmark or example queries to read! Either pass benchmark "
+                "queries using --queries-tsv or --queries-yml, or pass the "
+                "argument --example-queries to run example queries for the "
+                f"given ui_config {args.ui_config}"
+            )
+            return False
+
+        if all((args.queries_tsv, args.queries_yml)):
+            log.error("Cannot have both --queries-tsv and --queries-yml")
+            return False
+
+        if any((args.queries_tsv, args.queries_yml)) and args.example_queries:
+            queries_file_arg = "tsv" if args.queries_tsv else "yml"
+            log.error(
+                f"Cannot have both --queries-{queries_file_arg} and "
+                "--example-queries"
+            )
+            return False
+
+        # Handle shortcuts for SPARQL endpoint.
+        if args.sparql_endpoint_preset:
+            args.sparql_endpoint = args.sparql_endpoint_preset
+
+        # Limit only works with full result.
+        if args.limit and args.download_or_count == "count":
+            log.error("Limit only works with full result")
+            return False
+
+        # Clear cache only works for QLever.
+        is_qlever = (
+            not args.sparql_endpoint
+            or args.sparql_endpoint.startswith("https://qlever")
+        )
+        if engine is not None:
+            is_qlever = is_qlever or "qlever" in engine.lower()
+        if args.clear_cache == "yes":
+            if is_qlever:
+                log.warning(
+                    "Clearing the cache before each query"
+                    " (only works for QLever)"
+                )
+            else:
+                log.warning(
+                    "Clearing the cache only works for QLever"
+                    ", option `--clear-cache` is ignored"
+                )
+                args.clear_cache = "no"
+
+        # Show what the command will do.
+        example_queries_cmd = (
+            "curl -sv https://qlever.cs.uni-freiburg.de/"
+            f"api/examples/{args.ui_config}"
+        )
+        sparql_endpoint = (
+            args.sparql_endpoint
+            if args.sparql_endpoint
+            else f"{args.host_name}:{args.port}"
+        )
+
+        self.show(
+            f"Obtain queries via: {args.queries_yml or args.queries_tsv or example_queries_cmd}\n"
+            f"SPARQL endpoint: {sparql_endpoint}\n"
+            f"Accept header: {args.accept}\n"
+            f"Download result for each query or just count:"
+            f" {args.download_or_count.upper()}"
+            + (f" with LIMIT {args.limit}" if args.limit else ""),
+            only_show=args.show,
+        )
+        if args.show:
+            return True
+
+        if args.queries_yml:
+            queries = self.parse_queries_yml(args.queries_yml)
+        elif args.queries_tsv:
+            queries = self.parse_queries_tsv(f"cat {args.queries_tsv}")
+        else:
+            queries = self.parse_queries_tsv(example_queries_cmd)
+
+        filtered_queries = self.filter_queries(
+            queries, args.query_ids, args.query_regex
+        )
+
+        if len(filtered_queries) == 0 or not filtered_queries[0]:
+            log.error("No queries to process!")
+            return False
+
+        # We want the width of the query description to be an uneven number (in
+        # case we have to truncated it, in which case we want to have a " ... "
+        # in the middle).
+        width_query_description_half = args.width_query_description // 2
+        width_query_description = 2 * width_query_description_half + 1
+
+        # Launch the queries one after the other and for each print: the
+        # description, the result size (number of rows), and the query
+        # processing time (seconds).
+        query_times = []
+        result_sizes = []
+        result_yml_query_records = {"queries": []}
+        num_failed = 0
+        for description, query in filtered_queries:
+            if len(query) == 0:
+                log.error("Could not parse description and query, line is:")
+                log.info("")
+                log.info(f"{description}\t{query}")
+                return False
+            query_type = self.sparql_query_type(query)
+            if args.add_query_type_to_description or args.accept == "AUTO":
+                description = f"{description} [{query_type}]"
+
+            # Clear the cache.
+            if args.clear_cache == "yes":
+                args.server_url = sparql_endpoint
+                args.complete = False
+                clear_cache_successful = False
+                with mute_log():
+                    clear_cache_successful = ClearCacheCommand().execute(args)
+                if not clear_cache_successful:
+                    log.warn("Failed to clear the cache")
+
+            # Remove OFFSET and LIMIT (after the last closing bracket).
+            if args.remove_offset_and_limit or args.limit:
+                closing_bracket_idx = query.rfind("}")
+                regexes = [
+                    re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
+                    re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE),
+                ]
+                for regex in regexes:
+                    match = re.search(regex, query[closing_bracket_idx:])
+                    if match:
+                        query = (
+                            query[: closing_bracket_idx + match.start()]
+                            + query[closing_bracket_idx + match.end() :]
+                        )
+
+            # Limit query.
+            if args.limit:
+                query += f" LIMIT {args.limit}"
+
+            # Count query.
+            if args.download_or_count == "count":
+                # First find out if there is a FROM clause.
+                regex_from_clause = re.compile(
+                    r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE
+                )
+                match_from_clause = re.search(regex_from_clause, query)
+                from_clause = " "
+                if match_from_clause:
+                    from_clause = match_from_clause.group(0)
+                    query = (
+                        query[: match_from_clause.start()]
+                        + " "
+                        + query[match_from_clause.end() :]
+                    )
+                # Now we can add the outer SELECT COUNT(*).
+                query = (
+                    re.sub(
+                        r"SELECT ",
+                        "SELECT (COUNT(*) AS ?qlever_count_)"
+                        + from_clause
+                        + "WHERE { SELECT ",
+                        query,
+                        count=1,
+                        flags=re.IGNORECASE,
+                    )
+                    + " }"
+                )
+
+            # A bit of pretty-printing.
+            query = re.sub(r"\s+", " ", query)
+            query = re.sub(r"\s*\.\s*\}", " }", query)
+            if args.show_query == "always":
+                log.info("")
+                log.info(
+                    colored(
+                        self.pretty_printed_query(query, args.show_prefixes),
+                        "cyan",
+                    )
+                )
+
+            # Accept header. For "AUTO", use `text/turtle` for CONSTRUCT
+            # queries and `application/sparql-results+json` for all others.
+            accept_header = args.accept
+            if accept_header == "AUTO":
+                if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
+                    accept_header = "text/turtle"
+                else:
+                    accept_header = "application/sparql-results+json"
+
+            # Launch query.
+            curl_cmd = (
+                f"curl -Ls {sparql_endpoint}"
+                f' -w "HTTP code: %{{http_code}}\\n"'
+                f' -H "Accept: {accept_header}"'
+                f" --data-urlencode query={shlex.quote(query)}"
+            )
+            log.debug(curl_cmd)
+            result_file = (
+                f"qlever.example_queries.result.{abs(hash(curl_cmd))}.tmp"
+            )
+            start_time = time.time()
+            try:
+                http_code = run_curl_command(
+                    sparql_endpoint,
+                    headers={"Accept": accept_header},
+                    params={"query": query},
+                    result_file=result_file,
+                ).strip()
+                if http_code == "200":
+                    time_seconds = time.time() - start_time
+                    error_msg = None
+                else:
+                    time_seconds = time.time() - start_time
+                    error_msg = {
+                        "short": f"HTTP code: {http_code}",
+                        "long": re.sub(
+                            r"\s+", " ", Path(result_file).read_text()
+                        ),
+                    }
+            except Exception as e:
+                time_seconds = time.time() - start_time
+                if args.log_level == "DEBUG":
+                    traceback.print_exc()
+                error_msg = {
+                    "short": "Exception",
+                    "long": re.sub(r"\s+", " ", str(e)),
+                }
+
+            # Get result size (via the command line, in order to avoid loading
+            # a potentially large JSON file into Python, which is slow).
+            if error_msg is None:
+                result_size, error_msg = self.get_result_size(
+                    args.download_or_count == "count",
+                    query_type,
+                    accept_header,
+                    result_file,
+                )
+                single_int_result = None
+                if (
+                    result_size == 1
+                    and accept_header == "application/sparql-results+json"
+                    and args.download_or_count == "download"
+                ):
+                    single_int_result = self.get_single_int_result(result_file)
+
+            # Get the result yaml record if output file needs to be generated
+            if args.result_file is not None:
+                result_length = None if error_msg is not None else 1
+                result_length = (
+                    result_size
+                    if args.download_or_count == "download"
+                    and result_length is not None
+                    else result_length
+                )
+                query_results = (
+                    error_msg if error_msg is not None else result_file
+                )
+                query_record = self.get_result_yml_query_record(
+                    query=description,
+                    sparql=self.pretty_printed_query(
+                        query, args.show_prefixes
+                    ),
+                    client_time=time_seconds,
+                    result=query_results,
+                    result_size=result_length,
+                    max_result_size=args.max_results_output_file,
+                    accept_header=accept_header,
+                )
+                result_yml_query_records["queries"].append(query_record)
+
+            # Print description, time, result in tabular form.
+            if len(description) > width_query_description:
+                description = (
+                    description[: width_query_description_half - 2]
+                    + " ... "
+                    + description[-width_query_description_half + 2 :]
+                )
+            if error_msg is None:
+                result_size = int(result_size)
+                single_int_result = (
+                    f"   [single int result: {single_int_result:,}]"
+                    if single_int_result is not None
+                    else ""
+                )
+                log.info(
+                    f"{description:<{width_query_description}}  "
+                    f"{time_seconds:6.2f} s  "
+                    f"{result_size:>{args.width_result_size},}"
+                    f"{single_int_result}"
+                )
+                query_times.append(time_seconds)
+                result_sizes.append(result_size)
+            else:
+                num_failed += 1
+                if (
+                    args.width_error_message > 0
+                    and len(error_msg["long"]) > args.width_error_message
+                    and args.log_level != "DEBUG"
+                    and args.show_query != "on-error"
+                ):
+                    error_msg["long"] = (
+                        error_msg["long"][: args.width_error_message - 3]
+                        + "..."
+                    )
+                seperator_short_long = (
+                    "\n" if args.show_query == "on-error" else "  "
+                )
+                log.info(
+                    f"{description:<{width_query_description}}    "
+                    f"{colored('FAILED   ', 'red')}"
+                    f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
+                    f"{seperator_short_long}"
+                    f"{colored(error_msg['long'], 'red')}"
+                )
+                if args.show_query == "on-error":
+                    log.info(
+                        colored(
+                            self.pretty_printed_query(
+                                query, args.show_prefixes
+                            ),
+                            "cyan",
+                        )
+                    )
+                    log.info("")
+
+            # Remove the result file (unless in debug mode).
+            if args.log_level != "DEBUG":
+                Path(result_file).unlink(missing_ok=True)
+
+        # Check that each query has a time and a result size, or it failed.
+        assert len(result_sizes) == len(query_times)
+        assert len(query_times) + num_failed == len(filtered_queries)
+
+        if args.result_file:
+            if len(result_yml_query_records["queries"]) != 0:
+                outfile_name = f"{dataset}.{engine}.results.yaml"
+                outfile = Path(args.results_dir) / outfile_name
+                self.write_query_records_to_result_file(
+                    query_data=result_yml_query_records,
+                    out_file=outfile,
+                )
+            else:
+                log.error(
+                    f"Nothing to write to output result YML file: {args.result_file}"
+                )
+
+        # Show statistics.
+        if len(query_times) > 0:
+            n = len(query_times)
+            total_query_time = sum(query_times)
+            average_query_time = total_query_time / n
+            median_query_time = sorted(query_times)[n // 2]
+            total_result_size = sum(result_sizes)
+            average_result_size = round(total_result_size / n)
+            median_result_size = sorted(result_sizes)[n // 2]
+            query_or_queries = "query" if n == 1 else "queries"
+            description = f"TOTAL   for {n} {query_or_queries}"
+            log.info("")
+            log.info(
+                f"{description:<{width_query_description}}  "
+                f"{total_query_time:6.2f} s  "
+                f"{total_result_size:>14,}"
+            )
+            description = f"AVERAGE for {n} {query_or_queries}"
+            log.info(
+                f"{description:<{width_query_description}}  "
+                f"{average_query_time:6.2f} s  "
+                f"{average_result_size:>14,}"
+            )
+            description = f"MEDIAN  for {n} {query_or_queries}"
+            log.info(
+                f"{description:<{width_query_description}}  "
+                f"{median_query_time:6.2f} s  "
+                f"{median_result_size:>14,}"
+            )
+
+        # Show number of failed queries.
+        if num_failed > 0:
+            log.info("")
+            description = "Number of FAILED queries"
+            num_failed_string = f"{num_failed:>6}"
+            if num_failed == len(filtered_queries):
+                num_failed_string += "  [all]"
+            log.info(
+                colored(
+                    f"{description:<{width_query_description}}  "
+                    f"{num_failed:>24}",
+                    "red",
+                )
+            )
+
+        # Return success (has nothing to do with how many queries failed).
+        return True
+
+    def get_result_yml_query_record(
+        self,
+        query: str,
+        sparql: str,
+        client_time: float,
+        result: str | dict[str, str],
+        result_size: int | None,
+        max_result_size: int,
+        accept_header: str,
+    ) -> dict[str, Any]:
+        """
+        Construct a dictionary with query information for output result yaml file
+        """
+        record = {
+            "query": query,
+            "sparql": sparql,
+            "runtime_info": {},
+        }
+        if result_size is None:
+            results = f"{result['short']}: {result['long']}"
+            headers = []
+        else:
+            record["result_size"] = result_size
+            result_size = (
+                max_result_size
+                if result_size > max_result_size
+                else result_size
+            )
+            headers, results = self.get_query_results(
+                result, result_size, accept_header
+            )
+            if accept_header == "application/qlever-results+json":
+                runtime_info_cmd = (
+                    f"jq 'if .runtimeInformation then"
+                    f" .runtimeInformation else"
+                    f' "null" end\' {result}'
+                )
+                runtime_info_str = run_command(
+                    runtime_info_cmd, return_output=True
+                )
+                if runtime_info_str != "null":
+                    record["runtime_info"] = json.loads(runtime_info_str)
+        record["runtime_info"]["client_time"] = client_time
+        record["headers"] = headers
+        record["results"] = results
+        return record
+
+    def get_query_results(
+        self, result_file: str, result_size: int, accept_header: str
+    ) -> tuple[list[str], list[list[str]]]:
+        """
+        Return headers and query results as a tuple for various accept headers
+        """
+        if accept_header in ("text/tab-separated-values", "text/csv"):
+            separator = "," if accept_header == "text/csv" else "\t"
+            get_result_cmd = f"sed -n '1,{result_size + 1}p' {result_file}"
+            results_str = run_command(get_result_cmd, return_output=True)
+            results = results_str.splitlines()
+            reader = csv.reader(StringIO(results_str), delimiter=separator)
+            headers = next(reader)
+            results = [row for row in reader]
+            return headers, results
+
+        elif accept_header == "application/qlever-results+json":
+            get_result_cmd = (
+                f"jq '{{headers: .selected, results: .res[0:{result_size}]}}' "
+                f"{result_file}"
+            )
+            results_str = run_command(get_result_cmd, return_output=True)
+            results_json = json.loads(results_str)
+            return results_json["headers"], results_json["results"]
+
+        elif accept_header == "application/sparql-results+json":
+            get_result_cmd = (
+                f"jq '{{headers: .head.vars, "
+                f"bindings: .results.bindings[0:{result_size}]}}' "
+                f"{result_file}"
+            )
+            results_str = run_command(get_result_cmd, return_output=True)
+            results_json = json.loads(results_str)
+            results = []
+            bindings = results_json.get("bindings", [])
+            for binding in bindings:
+                result = []
+                if not binding or not isinstance(binding, dict):
+                    results.append([])
+                    continue
+                for obj in binding.values():
+                    value = '"' + obj["value"] + '"'
+                    if obj["type"] == "uri":
+                        value = "<" + value.strip('"') + ">"
+                    elif "datatype" in obj:
+                        value += "^^<" + obj["datatype"] + ">"
+                    elif "xml:lang" in obj:
+                        value += "@" + obj["xml:lang"]
+                    result.append(value)
+                results.append(result)
+            return results_json["headers"], results
+
+        else:  # text/turtle
+            graph = rdflib.Graph()
+            graph.parse(result_file, format="turtle")
+            headers = ["?subject", "?predicate", "?object"]
+            results = []
+            for i, (s, p, o) in enumerate(graph):
+                if i >= result_size:
+                    break
+                results.append([str(s), str(p), str(o)])
+            return headers, results
+
+    @staticmethod
+    def write_query_records_to_result_file(
+        query_data: dict[str, list[dict[str, Any]]], out_file: Path
+    ) -> None:
+        """
+        Write yaml record for all queries to output yaml file
+        """
+        config_yaml = dict_to_yaml(query_data)
+        with open(out_file, "w") as eval_yaml_file:
+            eval_yaml_file.write(config_yaml)
+            log.info("")
+            log.info(
+                f"Generated result yaml file: {out_file.stem}{out_file.suffix} "
+                f"in the directory {out_file.parent.resolve()}"
+            )
diff --git a/src/qlever/commands/example_queries.py b/src/qlever/commands/example_queries.py
deleted file mode 100644
index 38ee47d9..00000000
--- a/src/qlever/commands/example_queries.py
+++ /dev/null
@@ -1,606 +0,0 @@
-from __future__ import annotations
-
-import re
-import shlex
-import subprocess
-import time
-import traceback
-from pathlib import Path
-
-from termcolor import colored
-
-from qlever.command import QleverCommand
-from qlever.commands.clear_cache import ClearCacheCommand
-from qlever.log import log, mute_log
-from qlever.util import run_command, run_curl_command
-
-
-class ExampleQueriesCommand(QleverCommand):
-    """
-    Class for running a given sequence of example queries and showing
-    their processing times and result sizes.
-    """
-
-    def __init__(self):
-        pass
-
-    def description(self) -> str:
-        return "Run the given queries and show their processing times and result sizes"
-
-    def should_have_qleverfile(self) -> bool:
-        return False
-
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
-        return {"server": ["host_name", "port"], "ui": ["ui_config"]}
-
-    def additional_arguments(self, subparser) -> None:
-        subparser.add_argument(
-            "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
-        )
-        subparser.add_argument(
-            "--sparql-endpoint-preset",
-            choices=[
-                "https://qlever.dev/api/wikidata",
-                "https://qlever.dev/api/uniprot",
-                "https://qlever.dev/api/pubchem",
-                "https://qlever.dev/api/osm-planet",
-                "https://wikidata.demo.openlinksw.com/sparql",
-                "https://sparql.uniprot.org/sparql",
-            ],
-            help="SPARQL endpoint from fixed list (to save typing)",
-        )
-        subparser.add_argument(
-            "--get-queries-cmd",
-            type=str,
-            help="Command to get example queries as TSV "
-            "(description, query)",
-        )
-        subparser.add_argument(
-            "--query-ids",
-            type=str,
-            default="1-$",
-            help="Query IDs as comma-separated list of "
-            "ranges (e.g., 1-5,7,12-$)",
-        )
-        subparser.add_argument(
-            "--query-regex",
-            type=str,
-            help="Only consider example queries matching "
-            "this regex (using grep -Pi)",
-        )
-        subparser.add_argument(
-            "--download-or-count",
-            choices=["download", "count"],
-            default="download",
-            help="Whether to download the full result "
-            "or just compute the size of the result",
-        )
-        subparser.add_argument(
-            "--limit", type=int, help="Limit on the number of results"
-        )
-        subparser.add_argument(
-            "--remove-offset-and-limit",
-            action="store_true",
-            default=False,
-            help="Remove OFFSET and LIMIT from the query",
-        )
-        subparser.add_argument(
-            "--accept",
-            type=str,
-            choices=[
-                "text/tab-separated-values",
-                "text/csv",
-                "application/sparql-results+json",
-                "application/qlever-results+json",
-                "application/octet-stream",
-                "text/turtle",
-                "AUTO",
-            ],
-            default="application/sparql-results+json",
-            help="Accept header for the SPARQL query; AUTO means "
-            "`text/turtle` for CONSTRUCT AND DESCRIBE queries, "
-            "`application/sparql-results+json` for all others",
-        )
-        subparser.add_argument(
-            "--clear-cache",
-            choices=["yes", "no"],
-            default="no",
-            help="Clear the cache before each query (only works for QLever)",
-        )
-        subparser.add_argument(
-            "--width-query-description",
-            type=int,
-            default=70,
-            help="Width for printing the query description",
-        )
-        subparser.add_argument(
-            "--width-error-message",
-            type=int,
-            default=80,
-            help="Width for printing the error message " "(0 = no limit)",
-        )
-        subparser.add_argument(
-            "--width-result-size",
-            type=int,
-            default=14,
-            help="Width for printing the result size",
-        )
-        subparser.add_argument(
-            "--add-query-type-to-description",
-            action="store_true",
-            default=False,
-            help="Add the query type (SELECT, ASK, CONSTRUCT, DESCRIBE, "
-            "UNKNOWN) to the description",
-        )
-        subparser.add_argument(
-            "--show-query",
-            choices=["always", "never", "on-error"],
-            default="never",
-            help="Show the queries that will be executed (always, never, on error)",
-        )
-        subparser.add_argument(
-            "--show-prefixes",
-            action="store_true",
-            default=False,
-            help="When showing the query, also show the prefixes",
-        )
-
-    def pretty_printed_query(self, query: str, show_prefixes: bool) -> str:
-        remove_prefixes_cmd = (
-            " | sed '/^PREFIX /Id'" if not show_prefixes else ""
-        )
-        pretty_print_query_cmd = (
-            f"echo {shlex.quote(query)}"
-            f" | docker run -i --rm sparqling/sparql-formatter"
-            f"{remove_prefixes_cmd} | grep -v '^$'"
-        )
-        try:
-            query_pretty_printed = run_command(
-                pretty_print_query_cmd, return_output=True
-            )
-            return query_pretty_printed.rstrip()
-        except Exception:
-            log.error(
-                "Failed to pretty-print query, "
-                "returning original query: {e}"
-            )
-            return query.rstrip()
-
-    def sparql_query_type(self, query: str) -> str:
-        match = re.search(
-            r"(SELECT|ASK|CONSTRUCT|DESCRIBE)\s", query, re.IGNORECASE
-        )
-        if match:
-            return match.group(1).upper()
-        else:
-            return "UNKNOWN"
-
-    def execute(self, args) -> bool:
-        # We can't have both `--remove-offset-and-limit` and `--limit`.
-        if args.remove_offset_and_limit and args.limit:
-            log.error("Cannot have both --remove-offset-and-limit and --limit")
-            return False
-
-        # If `args.accept` is `application/sparql-results+json` or
-        # `application/qlever-results+json` or `AUTO`, we need `jq`.
-        if (
-            args.accept == "application/sparql-results+json"
-            or args.accept == "application/qlever-results+json"
-            or args.accept == "AUTO"
-        ):
-            try:
-                subprocess.run(
-                    "jq --version",
-                    shell=True,
-                    check=True,
-                    stdout=subprocess.DEVNULL,
-                    stderr=subprocess.DEVNULL,
-                )
-            except Exception as e:
-                log.error(f"Please install `jq` for {args.accept} ({e})")
-                return False
-
-        # Handle shotcuts for SPARQL endpoint.
-        if args.sparql_endpoint_preset:
-            args.sparql_endpoint = args.sparql_endpoint_preset
-
-        # Limit only works with full result.
-        if args.limit and args.download_or_count == "count":
-            log.error("Limit only works with full result")
-            return False
-
-        # Clear cache only works for QLever.
-        is_qlever = (
-            not args.sparql_endpoint
-            or args.sparql_endpoint.startswith("https://qlever")
-        )
-        if args.clear_cache == "yes":
-            if is_qlever:
-                log.warning(
-                    "Clearing the cache before each query"
-                    " (only works for QLever)"
-                )
-            else:
-                log.warning(
-                    "Clearing the cache only works for QLever"
-                    ", option `--clear-cache` is ignored"
-                )
-                args.clear_cache = "no"
-
-        # Show what the command will do.
-        get_queries_cmd = (
-            args.get_queries_cmd
-            if args.get_queries_cmd
-            else f"curl -sv https://qlever.cs.uni-freiburg.de/"
-            f"api/examples/{args.ui_config}"
-        )
-        sed_arg = args.query_ids.replace(",", "p;").replace("-", ",") + "p"
-        get_queries_cmd += f" | sed -n '{sed_arg}'"
-        if args.query_regex:
-            get_queries_cmd += f" | grep -Pi {shlex.quote(args.query_regex)}"
-        sparql_endpoint = (
-            args.sparql_endpoint
-            if args.sparql_endpoint
-            else f"{args.host_name}:{args.port}"
-        )
-        self.show(
-            f"Obtain queries via: {get_queries_cmd}\n"
-            f"SPARQL endpoint: {sparql_endpoint}\n"
-            f"Accept header: {args.accept}\n"
-            f"Download result for each query or just count:"
-            f" {args.download_or_count.upper()}"
-            + (f" with LIMIT {args.limit}" if args.limit else ""),
-            only_show=args.show,
-        )
-        if args.show:
-            return True
-
-        # Get the example queries.
-        try:
-            example_query_lines = run_command(
-                get_queries_cmd, return_output=True
-            )
-            if len(example_query_lines) == 0:
-                log.error("No example queries matching the criteria found")
-                return False
-            example_query_lines = example_query_lines.splitlines()
-        except Exception as e:
-            log.error(f"Failed to get example queries: {e}")
-            return False
-
-        # We want the width of the query description to be an uneven number (in
-        # case we have to truncated it, in which case we want to have a " ... "
-        # in the middle).
-        width_query_description_half = args.width_query_description // 2
-        width_query_description = 2 * width_query_description_half + 1
-
-        # Launch the queries one after the other and for each print: the
-        # description, the result size (number of rows), and the query
-        # processing time (seconds).
-        query_times = []
-        result_sizes = []
-        num_failed = 0
-        for example_query_line in example_query_lines:
-            # Parse description and query, and determine query type.
-            description, query = example_query_line.split("\t")
-            if len(query) == 0:
-                log.error("Could not parse description and query, line is:")
-                log.info("")
-                log.info(example_query_line)
-                return False
-            query_type = self.sparql_query_type(query)
-            if args.add_query_type_to_description or args.accept == "AUTO":
-                description = f"{description} [{query_type}]"
-
-            # Clear the cache.
-            if args.clear_cache == "yes":
-                args.server_url = sparql_endpoint
-                args.complete = False
-                clear_cache_successful = False
-                with mute_log():
-                    clear_cache_successful = ClearCacheCommand().execute(args)
-                if not clear_cache_successful:
-                    log.warn("Failed to clear the cache")
-
-            # Remove OFFSET and LIMIT (after the last closing bracket).
-            if args.remove_offset_and_limit or args.limit:
-                closing_bracket_idx = query.rfind("}")
-                regexes = [
-                    re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
-                    re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE),
-                ]
-                for regex in regexes:
-                    match = re.search(regex, query[closing_bracket_idx:])
-                    if match:
-                        query = (
-                            query[: closing_bracket_idx + match.start()]
-                            + query[closing_bracket_idx + match.end() :]
-                        )
-
-            # Limit query.
-            if args.limit:
-                query += f" LIMIT {args.limit}"
-
-            # Count query.
-            if args.download_or_count == "count":
-                # First find out if there is a FROM clause.
-                regex_from_clause = re.compile(
-                    r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE
-                )
-                match_from_clause = re.search(regex_from_clause, query)
-                from_clause = " "
-                if match_from_clause:
-                    from_clause = match_from_clause.group(0)
-                    query = (
-                        query[: match_from_clause.start()]
-                        + " "
-                        + query[match_from_clause.end() :]
-                    )
-                # Now we can add the outer SELECT COUNT(*).
-                query = (
-                    re.sub(
-                        r"SELECT ",
-                        "SELECT (COUNT(*) AS ?qlever_count_)"
-                        + from_clause
-                        + "WHERE { SELECT ",
-                        query,
-                        count=1,
-                        flags=re.IGNORECASE,
-                    )
-                    + " }"
-                )
-
-            # A bit of pretty-printing.
-            query = re.sub(r"\s+", " ", query)
-            query = re.sub(r"\s*\.\s*\}", " }", query)
-            if args.show_query == "always":
-                log.info("")
-                log.info(
-                    colored(
-                        self.pretty_printed_query(query, args.show_prefixes),
-                        "cyan",
-                    )
-                )
-
-            # Accept header. For "AUTO", use `text/turtle` for CONSTRUCT
-            # queries and `application/sparql-results+json` for all others.
-            accept_header = args.accept
-            if accept_header == "AUTO":
-                if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
-                    accept_header = "text/turtle"
-                else:
-                    accept_header = "application/sparql-results+json"
-
-            # Launch query.
-            try:
-                curl_cmd = (
-                    f"curl -Ls {sparql_endpoint}"
-                    f' -w "HTTP code: %{{http_code}}\\n"'
-                    f' -H "Accept: {accept_header}"'
-                    f" --data-urlencode query={shlex.quote(query)}"
-                )
-                log.debug(curl_cmd)
-                result_file = (
-                    f"qlever.example_queries.result."
-                    f"{abs(hash(curl_cmd))}.tmp"
-                )
-                start_time = time.time()
-                http_code = run_curl_command(
-                    sparql_endpoint,
-                    headers={"Accept": accept_header},
-                    params={"query": query},
-                    result_file=result_file,
-                ).strip()
-                if http_code == "200":
-                    time_seconds = time.time() - start_time
-                    error_msg = None
-                else:
-                    error_msg = {
-                        "short": f"HTTP code: {http_code}",
-                        "long": re.sub(
-                            r"\s+", " ", Path(result_file).read_text()
-                        ),
-                    }
-            except Exception as e:
-                if args.log_level == "DEBUG":
-                    traceback.print_exc()
-                error_msg = {
-                    "short": "Exception",
-                    "long": re.sub(r"\s+", " ", str(e)),
-                }
-
-            # Get result size (via the command line, in order to avoid loading
-            # a potentially large JSON file into Python, which is slow).
-            if error_msg is None:
-                single_int_result = None
-                # CASE 0: The result is empty despite a 200 HTTP code (not a
-                # problem for CONSTRUCT and DESCRIBE queries).
-                if Path(result_file).stat().st_size == 0 and (
-                    not query_type == "CONSTRUCT"
-                    and not query_type == "DESCRIBE"
-                ):
-                    result_size = 0
-                    error_msg = {
-                        "short": "Empty result",
-                        "long": "curl returned with code 200, "
-                        "but the result is empty",
-                    }
-
-                # CASE 1: Just counting the size of the result (TSV or JSON).
-                elif args.download_or_count == "count":
-                    if accept_header == "text/tab-separated-values":
-                        result_size = run_command(
-                            f"sed 1d {result_file}", return_output=True
-                        )
-                    else:
-                        try:
-                            result_size = run_command(
-                                f'jq -r ".results.bindings[0]'
-                                f" | to_entries[0].value.value"
-                                f' | tonumber" {result_file}',
-                                return_output=True,
-                            )
-                        except Exception as e:
-                            error_msg = {
-                                "short": "Malformed JSON",
-                                "long": "curl returned with code 200, "
-                                "but the JSON is malformed: "
-                                + re.sub(r"\s+", " ", str(e)),
-                            }
-
-                # CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
-                else:
-                    if (
-                        accept_header == "text/tab-separated-values"
-                        or accept_header == "text/csv"
-                    ):
-                        result_size = run_command(
-                            f"sed 1d {result_file} | wc -l", return_output=True
-                        )
-                    elif accept_header == "text/turtle":
-                        result_size = run_command(
-                            f"sed '1d;/^@prefix/d;/^\\s*$/d' "
-                            f"{result_file} | wc -l",
-                            return_output=True,
-                        )
-                    elif accept_header == "application/qlever-results+json":
-                        result_size = run_command(
-                            f'jq -r ".resultsize" {result_file}',
-                            return_output=True,
-                        )
-                    else:
-                        try:
-                            result_size = int(
-                                run_command(
-                                    f'jq -r ".results.bindings | length"'
-                                    f" {result_file}",
-                                    return_output=True,
-                                ).rstrip()
-                            )
-                        except Exception as e:
-                            error_msg = {
-                                "short": "Malformed JSON",
-                                "long": re.sub(r"\s+", " ", str(e)),
-                            }
-                        if result_size == 1:
-                            try:
-                                single_int_result = int(
-                                    run_command(
-                                        f'jq -e -r ".results.bindings[0][] | .value"'
-                                        f" {result_file}",
-                                        return_output=True,
-                                    ).rstrip()
-                                )
-                            except Exception:
-                                pass
-
-            # Remove the result file (unless in debug mode).
-            if args.log_level != "DEBUG":
-                Path(result_file).unlink(missing_ok=True)
-
-            # Print description, time, result in tabular form.
-            if len(description) > width_query_description:
-                description = (
-                    description[: width_query_description_half - 2]
-                    + " ... "
-                    + description[-width_query_description_half + 2 :]
-                )
-            if error_msg is None:
-                result_size = int(result_size)
-                single_int_result = (
-                    f"   [single int result: {single_int_result:,}]"
-                    if single_int_result is not None
-                    else ""
-                )
-                log.info(
-                    f"{description:<{width_query_description}}  "
-                    f"{time_seconds:6.2f} s  "
-                    f"{result_size:>{args.width_result_size},}"
-                    f"{single_int_result}"
-                )
-                query_times.append(time_seconds)
-                result_sizes.append(result_size)
-            else:
-                num_failed += 1
-                if (
-                    args.width_error_message > 0
-                    and len(error_msg["long"]) > args.width_error_message
-                    and args.log_level != "DEBUG"
-                    and args.show_query != "on-error"
-                ):
-                    error_msg["long"] = (
-                        error_msg["long"][: args.width_error_message - 3]
-                        + "..."
-                    )
-                seperator_short_long = (
-                    "\n" if args.show_query == "on-error" else "  "
-                )
-                log.info(
-                    f"{description:<{width_query_description}}    "
-                    f"{colored('FAILED   ', 'red')}"
-                    f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
-                    f"{seperator_short_long}"
-                    f"{colored(error_msg['long'], 'red')}"
-                )
-                if args.show_query == "on-error":
-                    log.info(
-                        colored(
-                            self.pretty_printed_query(
-                                query, args.show_prefixes
-                            ),
-                            "cyan",
-                        )
-                    )
-                    log.info("")
-
-        # Check that each query has a time and a result size, or it failed.
-        assert len(result_sizes) == len(query_times)
-        assert len(query_times) + num_failed == len(example_query_lines)
-
-        # Show statistics.
-        if len(query_times) > 0:
-            n = len(query_times)
-            total_query_time = sum(query_times)
-            average_query_time = total_query_time / n
-            median_query_time = sorted(query_times)[n // 2]
-            total_result_size = sum(result_sizes)
-            average_result_size = round(total_result_size / n)
-            median_result_size = sorted(result_sizes)[n // 2]
-            query_or_queries = "query" if n == 1 else "queries"
-            description = f"TOTAL   for {n} {query_or_queries}"
-            log.info("")
-            log.info(
-                f"{description:<{width_query_description}}  "
-                f"{total_query_time:6.2f} s  "
-                f"{total_result_size:>14,}"
-            )
-            description = f"AVERAGE for {n} {query_or_queries}"
-            log.info(
-                f"{description:<{width_query_description}}  "
-                f"{average_query_time:6.2f} s  "
-                f"{average_result_size:>14,}"
-            )
-            description = f"MEDIAN  for {n} {query_or_queries}"
-            log.info(
-                f"{description:<{width_query_description}}  "
-                f"{median_query_time:6.2f} s  "
-                f"{median_result_size:>14,}"
-            )
-
-        # Show number of failed queries.
-        if num_failed > 0:
-            log.info("")
-            description = "Number of FAILED queries"
-            num_failed_string = f"{num_failed:>6}"
-            if num_failed == len(example_query_lines):
-                num_failed_string += "  [all]"
-            log.info(
-                colored(
-                    f"{description:<{width_query_description}}  "
-                    f"{num_failed:>24}",
-                    "red",
-                )
-            )
-
-        # Return success (has nothing to do with how many queries failed).
-        return True
diff --git a/src/qlever/commands/ui.py b/src/qlever/commands/ui.py
index fb49ee78..b7bbd686 100644
--- a/src/qlever/commands/ui.py
+++ b/src/qlever/commands/ui.py
@@ -13,13 +13,16 @@
 
 # Return a YAML string for the given dictionary. Format values with
 # newlines using the "|" style.
-def dict_to_yaml(dictionary):
-    # Custom representer for yaml, which uses the "|" style only for
-    # multiline strings.
-    #
-    # NOTE: We replace all `\r\n` with `\n` because otherwise the `|` style
-    # does not work as expected.
-    class MultiLineDumper(yaml.Dumper):
+def dict_to_yaml(dictionary: dict) -> str:
+    """
+    Custom representer for yaml, which uses the "|" style only for
+    multiline strings.
+
+    NOTE: We replace all `\r\n` with `\n` because otherwise the `|` style
+    does not work as expected.
+    """
+
+    class MultiLineDumper(yaml.SafeDumper):
         def represent_scalar(self, tag, value, style=None):
             value = value.replace("\r\n", "\n")
             if isinstance(value, str) and "\n" in value:
@@ -30,6 +33,7 @@ def represent_scalar(self, tag, value, style=None):
     return yaml.dump(
         dictionary,
         sort_keys=False,
+        allow_unicode=True,
         Dumper=MultiLineDumper,
     )
 
diff --git a/test/qlever/commands/test_benchmark_queries_methods.py b/test/qlever/commands/test_benchmark_queries_methods.py
new file mode 100644
index 00000000..39f0e3da
--- /dev/null
+++ b/test/qlever/commands/test_benchmark_queries_methods.py
@@ -0,0 +1,188 @@
+import pytest
+
+from qlever.commands.benchmark_queries import BenchmarkQueriesCommand
+
+MODULE = "qlever.commands.benchmark_queries"
+
+JSON_ACCEPT_HEADERS_AND_RESULT_FILES = [
+    ("application/sparql-results+json", "result.json"),
+    ("application/qlever-results+json", "result.json"),
+]
+
+ALL_ACCEPT_HEADERS_AND_RESULT_FILES = [
+    ("text/csv", "result.csv"),
+    ("text/tab-separated-values", "result.tsv"),
+    *JSON_ACCEPT_HEADERS_AND_RESULT_FILES,
+]
+
+
+@pytest.mark.parametrize("download_or_count", ["count", "download"])
+@pytest.mark.parametrize(
+    "accept_header, result_file", ALL_ACCEPT_HEADERS_AND_RESULT_FILES
+)
+def test_empty_result_non_construct_describe(
+    mock_command,
+    download_or_count,
+    accept_header,
+    result_file,
+):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 0
+    run_cmd_mock = mock_command(MODULE, "run_command")
+
+    size, err = BenchmarkQueriesCommand().get_result_size(
+        count_only=download_or_count == "count",
+        query_type="SELECT",
+        accept_header=accept_header,
+        result_file=result_file,
+    )
+
+    assert size == 0
+    assert err["short"] == "Empty result"
+    assert (
+        err["long"] == "curl returned with code 200, but the result is empty"
+    )
+    run_cmd_mock.assert_not_called()
+
+
+@pytest.mark.parametrize("download_or_count", ["count", "download"])
+@pytest.mark.parametrize(
+    "accept_header, result_file", ALL_ACCEPT_HEADERS_AND_RESULT_FILES
+)
+@pytest.mark.parametrize("query_type", ["CONSTRUCT", "DESCRIBE"])
+def test_empty_result_construct_describe(
+    mock_command,
+    download_or_count,
+    query_type,
+    accept_header,
+    result_file,
+):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 0
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "42"
+
+    size, err = BenchmarkQueriesCommand().get_result_size(
+        count_only=download_or_count == "count",
+        query_type=query_type,
+        accept_header=accept_header,
+        result_file=result_file,
+    )
+
+    assert size == 42
+    assert err is None
+
+
+@pytest.mark.parametrize("download_or_count", ["count", "download"])
+@pytest.mark.parametrize(
+    "accept_header, result_file", ALL_ACCEPT_HEADERS_AND_RESULT_FILES
+)
+def test_count_and_download_success(
+    mock_command,
+    download_or_count,
+    accept_header,
+    result_file,
+):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 100
+
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "42"
+
+    size, err = BenchmarkQueriesCommand().get_result_size(
+        count_only=download_or_count == "count",
+        query_type="SELECT",
+        accept_header=accept_header,
+        result_file=result_file,
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert size == 42
+    assert err is None
+
+
+def test_download_turtle_success(mock_command):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 100
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "42"
+
+    size, err = BenchmarkQueriesCommand().get_result_size(
+        count_only=False,
+        query_type="SELECT",
+        accept_header="text/turtle",
+        result_file="result.ttl",
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert size == 42
+    assert err is None
+
+
+@pytest.mark.parametrize("download_or_count", ["count", "download"])
+@pytest.mark.parametrize(
+    "accept_header, result_file", JSON_ACCEPT_HEADERS_AND_RESULT_FILES
+)
+def test_download_and_count_json_malformed(
+    mock_command,
+    download_or_count,
+    accept_header,
+    result_file,
+):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 100
+
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.side_effect = Exception("jq failed")
+
+    size, err = BenchmarkQueriesCommand().get_result_size(
+        count_only=download_or_count == "count",
+        query_type="SELECT",
+        accept_header=accept_header,
+        result_file=result_file,
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert size == 0
+    assert err["short"] == "Malformed JSON"
+    assert (
+        "curl returned with code 200, but the JSON is malformed: "
+        in err["long"]
+    )
+    assert "jq failed" in err["long"]
+
+
+def test_single_int_result_success(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "123"
+
+    single_int_result = BenchmarkQueriesCommand().get_single_int_result(
+        "result.json"
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert single_int_result == 123
+
+
+def test_single_int_result_non_int_fail(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "abc"
+
+    single_int_result = BenchmarkQueriesCommand().get_single_int_result(
+        "result.json"
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert single_int_result is None
+
+
+def test_single_int_result_failure(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.side_effect = Exception("jq failed")
+
+    single_int_result = BenchmarkQueriesCommand().get_single_int_result(
+        "result.json"
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert single_int_result is None
diff --git a/test/qlever/conftest.py b/test/qlever/conftest.py
new file mode 100644
index 00000000..9f825cb1
--- /dev/null
+++ b/test/qlever/conftest.py
@@ -0,0 +1,16 @@
+from unittest.mock import MagicMock
+
+import pytest
+
+
+@pytest.fixture
+def mock_command(monkeypatch):
+    def _mock(module_name: str, function_name: str, override=None):
+        if override:
+            monkeypatch.setattr(f"{module_name}.{function_name}", override)
+            return override
+        mock = MagicMock(name=f"{function_name}_mock")
+        monkeypatch.setattr(f"{module_name}.{function_name}", mock)
+        return mock
+
+    return _mock