Skip to content

Commit

Permalink
_
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromedockes committed Jul 31, 2024
1 parent 48290ca commit 78b1007
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 67 deletions.
4 changes: 3 additions & 1 deletion _includes/example-report.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
href="https://cdn.jsdelivr.net/npm/[email protected]/build/pure-min.css"
integrity="sha384-X38yfunGUhNzHpBaEBsWLO+A0HDYOQi8ufWDkZ0k9e0eXz/tH3II7uKZ9msv++Ls"
crossorigin="anonymous">
<link rel="stylesheet" href="../skrub-online-report.css?version4395">
<style>
{% include "skrub-online-report.css" %}
</style>
<link rel="icon" href="../skrub.svg">
</head>

Expand Down
4 changes: 3 additions & 1 deletion _includes/examples-index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
href="https://cdn.jsdelivr.net/npm/[email protected]/build/pure-min.css"
integrity="sha384-X38yfunGUhNzHpBaEBsWLO+A0HDYOQi8ufWDkZ0k9e0eXz/tH3II7uKZ9msv++Ls"
crossorigin="anonymous">
<link rel="stylesheet" href="../skrub-online-report.css?version4395">
<style>
{% include "skrub-online-report.css" %}
</style>
<link rel="icon" href="../skrub.svg">
</head>

Expand Down
2 changes: 1 addition & 1 deletion _includes/nav.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{% for name, href in nav_links %}
<li>
<a
href="{{ '../' + href if 'examples' in current_page else href }}"
href={{ href | relative }}
{% if href == current_page %}
class="current-page"
{% endif %}
Expand Down
118 changes: 60 additions & 58 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@
from skrub import TableReport


NAV_LINKS = [("Demo", "index.html"), ("Examples", "examples/index.html")]


def write(text, path):
path.write_text(bust(text), encoding="utf-8")


@jinja2.pass_context
def relative(context, path):
current_page = context["current_page"]
depth = len(current_page.split("/")) - 1
parts = [".."] * depth + path.split("/")
return "/".join(parts)


def get_jinja_env():
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(
Expand All @@ -22,49 +37,47 @@ def get_jinja_env():
),
autoescape=True,
)
env.filters["relative"] = relative
env.globals = {
"nav_links": NAV_LINKS,
}
return env


def bust(text):
return re.sub(r"\?version[a-z0-9]+", f"?version{VERSION}", text)
return re.sub(r"\?__skrub_[a-zA-Z0-9]+__", f"?__skrub_{VERSION}__", text)


def get_datasets():
AMES_HOUSING_CSV = (
"https://www.openml.org/data/get_csv/20649135/file2ed11cebe25.arff"
)
datasets = [((lambda: pd.read_csv(AMES_HOUSING_CSV)), "AMES Housing")]
datasets = [("AMES Housing", (lambda: pd.read_csv(AMES_HOUSING_CSV)))]
skrub_dataset_names = [
"employee_salaries",
"medical_charge",
"traffic_violations",
"drug_directory",
]
datasets.extend(
[
((lambda: getattr(skrub_data, f"fetch_{name}")().X, name))
for name in skrub_dataset_names
]
)
for name in skrub_dataset_names:

sklearn_dataset_names = ["titanic"]
datasets.extend(
[
(
(
lambda: sklearn_data.fetch_openml(
name, as_frame=True, parser="auto", version=1
).frame
),
name,
)
for name in sklearn_dataset_names
]
)
def fetch(name=name):
return getattr(skrub_data, f"fetch_{name}")().X

datasets.append((name, fetch))
sklearn_dataset_names = ["titanic", "iris"]
for name in sklearn_dataset_names:

def fetch(name=name):
return sklearn_data.fetch_openml(
name, as_frame=True, parser="auto", version=1
).frame

datasets.append((name, fetch))
return datasets


def add_report(fetcher, name):
def add_report(name, fetcher):
if ARGS.no_reports:
html = '<div class="report-placeholder">report</div>'
elapsed = 0
Expand All @@ -74,55 +87,47 @@ def add_report(fetcher, name):
pretty_name = name.replace("_", " ").capitalize()
start = time.time()
html = TableReport(df, title=pretty_name).html_snippet()
stop = time.time()
elapsed = stop - start
print(f"{name} took {stop - start:.2f}s")
elapsed = time.time() - start
print(f"{name} took {elapsed:.2f}s")
report_template = ENV.get_template("example-report.html")
current_page = f"examples/{name}.html"
html = report_template.render(
{"report": html, "time": elapsed}
| TEMPLATE_DATA
| {
"nav_links": TEMPLATE_DATA["nav_links"]
+ [(name, f"examples/{name}.html")]
}
| {"current_page": f"examples/{name}.html"}
nav_links=NAV_LINKS + [(name, current_page)],
current_page=current_page,
report=html,
time=elapsed,
)
html = bust(html)
(EXAMPLES_DIR / f"{name}.html").write_text(html, "utf-8")
write(html, EXAMPLES_DIR / f"{name}.html")


def build_examples():
datasets = get_datasets()
for name, fetcher in datasets:
add_report(name, fetcher)

examples_index = ENV.get_template("examples-index.html")
html = examples_index.render(
report_names=[name for name, _ in datasets], current_page="examples/index.html"
)
write(html, EXAMPLES_DIR / "index.html")


def build_pages():
all_pages = REPO.glob("*.html")
for page in all_pages:
template = ENV.get_template(page.name)
rendered = template.render(TEMPLATE_DATA | {"current_page": f"{page.name}"})
rendered = bust(rendered)
(BUILD_DIR / page.name).write_text(rendered, "utf-8")
rendered = template.render(current_page=f"{page.name}")
write(rendered, BUILD_DIR / page.name)

for ext in ["css", "js", "svg"]:
for file_path in REPO.glob(f"*.{ext}"):
text = bust(file_path.read_text("utf-8"))
(BUILD_DIR / file_path.name).write_text(text, "utf-8")
text = file_path.read_text("utf-8")
write(text, BUILD_DIR / file_path.name)

for asset in REPO.glob(f"*.whl"):
shutil.copyfile(asset, BUILD_DIR / asset.name)


def build_examples():
datasets = get_datasets()
for fetcher, name in datasets:
add_report(fetcher, name)

examples_index = ENV.get_template("examples-index.html")
html = examples_index.render(
{"report_names": [name for _, name in datasets]}
| TEMPLATE_DATA
| {"current_page": "examples/index.html"}
)
html = bust(html)
(EXAMPLES_DIR / "index.html").write_text(html, "utf-8")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
Expand All @@ -142,9 +147,6 @@ def build_examples():
VERSION = secrets.token_hex()[:4]

ENV = get_jinja_env()
TEMPLATE_DATA = {
"nav_links": [("Demo", "index.html"), ("Examples", "examples/index.html")]
}

build_pages()
build_examples()
10 changes: 6 additions & 4 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
href="https://cdn.jsdelivr.net/npm/[email protected]/build/pure-min.css"
integrity="sha384-X38yfunGUhNzHpBaEBsWLO+A0HDYOQi8ufWDkZ0k9e0eXz/tH3II7uKZ9msv++Ls"
crossorigin="anonymous">
<link rel="stylesheet" href="skrub-online-report.css?version4395">
<style>
{% include "skrub-online-report.css" %}
</style>
<link rel="icon" href="skrub.svg">
<script src="copybutton.js?version4395" defer></script>
<script src="skrub-online-report.js?version4395" defer></script>
<script src="copybutton.js?__skrub_XXXX__" defer></script>
<script src="skrub-online-report.js?__skrub_XXXX__" defer></script>
</head>

<body>
Expand All @@ -37,7 +39,7 @@ <h1 class="banner"><span class="from">from</span> <a href="https://skrub-data.or
<h2>This online demo is experimental.</h2>
<p>Due to the limitations
inherent to running Python in a web page, we recommend choosing
a small file (<strong>less than 100 MB</strong>).
a small file (<strong>less than 100 MB</strong>).
A <strong>more reliable and performant way</strong> of generating
reports is to
<a href="https://skrub-data.org/dev/install.html#advanced-usage-for-contributors">install</a>
Expand Down
2 changes: 1 addition & 1 deletion skrub-online-report.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class App {
this.handlers.push(new cls(elem, this));
}

this.worker = new Worker("skrub-worker.js?version4395");
this.worker = new Worker("skrub-worker.js?__skrub_XXXX__");
this.worker.onmessage = (e) => {
this.receive(e.data);
};
Expand Down
2 changes: 1 addition & 1 deletion skrub-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ async function startPyodide() {
await self.pyodide.loadPackage("micropip");
const micropip = self.pyodide.pyimport("micropip");
await micropip.install(["pandas", "fastparquet", "matplotlib",
"skrub-0.3.0.dev0-py3-none-any.whl?version4395",
"skrub-0.3.0.dev0-py3-none-any.whl?__skrub_XXXX__",
"clevercsv-0.8.2-cp312-cp312-pyodide_2024_0_wasm32.whl"
]);
await pyodide.runPython(`
Expand Down

0 comments on commit 78b1007

Please sign in to comment.