Skip to content

Commit 9d11ca7

Browse files
committed
cleanup: simplify content and add ability to remove excerpt and image from post display
1 parent c6b191a commit 9d11ca7

File tree

9 files changed

+44
-36
lines changed

9 files changed

+44
-36
lines changed

public/decapcms/config.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,5 @@ collections:
2727
- { label: 'Publish Date', name: 'publishDate', widget: 'datetime', required: false }
2828
- { label: 'Author', name: 'author', widget: 'string' }
2929
- { label: 'Content', name: 'body', widget: 'markdown' }
30+
- { label: 'Display Excerpt', name: 'displayExcerpt', required: false }
31+
- { label: 'Display Image', name: 'displayImage', required: false }

src/components/CustomStyles.astro

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,6 @@ import '@fontsource-variable/inter';
3535
--aw-color-bg-page: rgb(255 255 255);
3636

3737
--aw-color-bg-page-dark: rgb(12, 10, 14);
38-
39-
::selection {
40-
background-color: lavender;
41-
}
4238
}
4339

4440
.dark {
@@ -54,10 +50,5 @@ import '@fontsource-variable/inter';
5450
--aw-color-text-default: rgb(229 236 246);
5551
--aw-color-text-muted: rgb(229 236 246 / 66%);
5652
--aw-color-bg-page: var(--aw-color-bg-page-dark);
57-
58-
::selection {
59-
background-color: black;
60-
color: snow;
61-
}
6253
}
6354
</style>

src/components/blog/SinglePost.astro

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,17 @@ const { Content } = post;
5858
>
5959
{post.title}
6060
</h1>
61-
<p
62-
class="max-w-3xl mx-auto mt-4 mb-8 px-4 sm:px-6 text-xl md:text-2xl text-muted dark:text-slate-400 text-justify"
63-
>
64-
{post.excerpt}
65-
</p>
6661

6762
{
68-
post.image ? (
63+
(post.displayExcerpt ?? true) && (
64+
<p class="max-w-3xl mx-auto mt-4 mb-8 px-4 sm:px-6 text-xl md:text-2xl text-muted dark:text-slate-400 text-justify">
65+
{post.excerpt}
66+
</p>
67+
)
68+
}
69+
70+
{
71+
post.image && (post.displayImage ?? true) ? (
6972
<Image
7073
src={post.image}
7174
class="max-w-full lg:max-w-[900px] mx-auto mb-6 sm:rounded-md bg-gray-400 dark:bg-slate-700 aspect-auto"
@@ -77,7 +80,7 @@ const { Content } = post;
7780
decoding="async"
7881
/>
7982
) : (
80-
<div class="max-w-3xl mx-auto px-4 sm:px-6">
83+
<div class="max-w-3xl mx-auto px-4 sm:px-6 mt-2">
8184
<div class="border-t dark:border-slate-700" />
8285
</div>
8386
)

src/components/widgets/Announcement.astro

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,6 @@ import { Icon } from 'astro-icon/components';
4343
title="If you like Trieve, give us a star."
4444
href="https://github.com/devflowinc/trieve"
4545
>
46-
<Icon name="ri:github-line" class="w-4 h-4 mr-1" /> Star Us | <span id="trieve-starcount">1261</span>
46+
<Icon name="ri:github-line" class="w-4 h-4 mr-1" /> Star Us | <span id="trieve-starcount">1317</span>
4747
</a>
4848
</div>

src/content/config.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ const postCollection = defineCollection({
6060
author: z.string().optional(),
6161

6262
metadata: metadataDefinition(),
63+
64+
displayExcerpt: z.boolean().optional(),
65+
displayImage: z.boolean().optional(),
6366
}),
6467
});
6568

src/content/post/building-blazingly-fast-typo-correction-in-rust.mdx

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,33 @@
11
---
2-
publishDate: 2024-09-06T08:45:00Z
2+
publishDate: 2024-09-09T08:45:00Z
33
author: densumesh
44
title: Building 300μs Typo Correction for 1.3M Words in Rust
5-
excerpt: We tell the story of how we built blazingly fast spellcheck in Rust using BKTrees, Redis queues, and Clickhouse in this blog.
5+
excerpt: We explain how we built blazingly fast spellcheck in Rust using BKTrees, Redis queues, and Clickhouse in this blog.
66
image: https://cdn.trieve.ai/blog/building-30%CE%BCs-typo-tolerance-for-1.3M-words%20using%20Rust/building-30us-typo-tolerance-cover.webp
77
tags:
88
- rust
99
- BKTree
1010
- typo-correction
1111
- relevance
12+
displayImage: false
13+
displayExcerpt: false
1214
---
1315

14-
We launched our [Hacker News search and RAG engine](https://hn.trieve.ai) with a half-baked typo correction system. Our first draft took 30+ms which was slow enough that we defaulted it to off. Our latest version is 100 times faster and you can try it at [hn.trieve.ai](https://hn.trieve.ai). Heres the story of how we did it:
16+
We launched our [Hacker News search and RAG engine](https://hn.trieve.ai) with a half-baked typo correction system. Our first draft took 30+ms which was slow enough that we defaulted it to off. Our latest version is 100 times faster and you can try it at [hn.trieve.ai](https://hn.trieve.ai). We tell you exactly how we did it in this post!
1517

1618
![video demo of spellcheck](https://cdn.trieve.ai/blog/building-30%CE%BCs-typo-tolerance-for-1.3M-words%20using%20Rust/typo-tolerance-demo.gif)
1719

20+
## Sample Queries to Try
21+
22+
- [OpnAI](https://hn.trieve.ai/?score_threshold=5&page_size=30&prefetch_amount=30&rerank_type=none&highlight_delimiters=+%2C-%2C_%2C.%2C%2C&highlight_threshold=0.85&highlight_max_length=50&highlight_max_num=50&highlight_window=0&recency_bias=0&highlight_results=true&use_quote_negated_terms=true&q=OpnAi&storyType=story&matchAnyAuthorNames=&matchNoneAuthorNames=&popularityFilters=%7B%7D&sortby=relevance&dateRange=all&searchType=fulltext&page=1&getAISummary=false)
23+
- [Cnva devloper platfirm](https://hn.trieve.ai/?score_threshold=5&page_size=30&prefetch_amount=30&rerank_type=none&highlight_delimiters=+%2C-%2C_%2C.%2C%2C&highlight_threshold=0.85&highlight_max_length=50&highlight_max_num=50&highlight_window=0&recency_bias=0&highlight_results=true&use_quote_negated_terms=true&q=Cnva+devloper+platfirm&storyType=story&matchAnyAuthorNames=&matchNoneAuthorNames=&popularityFilters=%7B%7D&sortby=relevance&dateRange=all&searchType=fulltext&page=1&getAISummary=false)
24+
- [prviacy focsed email](https://hn.trieve.ai/?score_threshold=5&page_size=30&prefetch_amount=30&rerank_type=none&highlight_delimiters=+%2C-%2C_%2C.%2C%2C&highlight_threshold=0.85&highlight_max_length=50&highlight_max_num=50&highlight_window=0&recency_bias=0&highlight_results=true&use_quote_negated_terms=true&q=prviacy+focsed+email&storyType=story&matchAnyAuthorNames=&matchNoneAuthorNames=&popularityFilters=%7B%7D&sortby=relevance&dateRange=all&searchType=fulltext&page=1&getAISummary=false)
25+
1826
## Creating a dictionary of Words and Frequencies
1927

2028
For small datasets, this is an easy task. You can scroll ~1000 HN post size text blobs in 10 seconds with one worker and basic word splitting. However, as you scale to the size of our [Hacker News Demo (38M+ posts)](https://hn.trieve.ai), work needs to be distributed.
2129

22-
Eventually, we decided on 2 distinct workers for dictionary building:
30+
Eventually, we decided on 2 distinct workers for dictionary building:
2331

2432
1. [Cronjob](https://github.com/devflowinc/trieve/blob/main/server/src/bin/word-id-cronjob.rs) to scroll all of the documents present in each of our users' search indices and add chunk ids from our database into a Redis queue 500 at a time.
2533
2. [Word worker](https://github.com/devflowinc/trieve/blob/main/server/src/bin/word-worker.rs) that pops off the queue and procesesses 500 chunks at a time. Text for each chunk is pulled, split into words, and each word is then loaded into Clickhouse.
@@ -28,17 +36,9 @@ We chose [ClickHouse](https://clickhouse.com/) to store the dictionary as we ran
2836

2937
## Using a BKTree data structure to identify and correct typos
3038

31-
We take the [standard approach to typo correction](https://nullwords.wordpress.com/2013/03/13/the-bk-tree-a-data-structure-for-spell-checking/) and build per-dataset Burkhard-Keller Trees (BKTrees) for efficient comparision of words in the search query and the dataset's dictionary in O(log N) time complexity. Read more on the data structure at [wikipedia.org/BKTree](https://en.wikipedia.org/wiki/BK-tree).
32-
33-
![visualization of BK Tree](https://cdn.trieve.ai/blog/building-30%CE%BCs-typo-tolerance-for-1.3M-words%20using%20Rust/bktree-viz.webp)
34-
35-
BKTrees are a type of metric tree whose nodes are the word (and frequency in our case) and edges are the edit distance between the node and its children.
36-
This type of tree implements a trait known as the triangle inequality.
37-
38-
Let's say we're at a node n in the BKtree, and we know the distance `d(n, q)` between `n` and our query `q`. For any child `c` of `n`, we know `d(n, c)` (it's stored in the tree). The triangle inequality tells us:
39-
`d(q, c) ≥ |d(q, n) - d(n, c)|`. This means if `|d(q, n) - d(n, c)| > k`, we can safely skip the entire subtree rooted at `c`, as no string in that subtree can be within distance `k` of `q`.
39+
We take the [standard approach to typo correction](https://nullwords.wordpress.com/2013/03/13/the-bk-tree-a-data-structure-for-spell-checking/) and build per-dataset Burkhard-Keller Trees (BKTrees) for efficient comparision of words in the search query and the dataset's dictionary in O(log N) time complexity. Explaining this data structure in depth is outside the scope of this blog, but you can read our [Rust implementation here]((https://github.com/devflowinc/trieve/blob/6e114abdca5683440e2834eccacf3f850dff810f/server/src/operators/typo_operator.rs#L35-112)) or read its [wiki](https://en.wikipedia.org/wiki/BK-tree).
4040

41-
We utilized a third[ bktree-worker](https://github.com/devflowinc/trieve/blob/main/server/src/bin/bktree-worker.rs) to build the BKTrees. It takes datasets with completed dictonaries stored in Clickhouse then uses their words and frequencies to construct a tree.
41+
We utilized a third[ bktree-worker](https://github.com/devflowinc/trieve/blob/main/server/src/bin/bktree-worker.rs) to build the BKTrees. It takes datasets with completed dictonaries stored in Clickhouse then uses their words and frequencies to construct a tree.
4242

4343
Once the BKTree is constructed, the worker then stores it in Redis such that it can be efficiently loaded into the API server's memory when needed at first query time for a given dataset.
4444

@@ -73,7 +73,7 @@ lazy_static! {
7373
}
7474
```
7575

76-
On the first search with typo-tolerance enabled, we initiate a ~200-400ms cold start to pull the BKTree for the dataset being queried from Redis into server memory. Searches following this operation then use the BKTree to check for typos which only takes 100-300μs.
76+
On the first search with typo-tolerance enabled, we initiate a ~200-400ms cold start to pull the BKTree for the dataset being queried from Redis into server memory. Searches following this operation then use the BKTree to check for typos which only takes 100-300μs.
7777

7878
### Identifying English Words
7979

@@ -94,7 +94,7 @@ static ref ENGLISH_WORDS: HashSet<String> = {
9494

9595
#### 2. Affix Analysis
9696

97-
We then check for if the word is just an english word with a prefix or suffix:
97+
We then check for if the word is just an english word with a prefix or suffix:
9898

9999
- We construct separate Tries for common prefixes and suffixes.
100100

@@ -230,6 +230,6 @@ fn is_best_correction(word: &str, correction: &str) -> bool {
230230

231231
## Future Ideas
232232

233-
We plan to leverage this same system to implement query splitting and concatenation as those features share the same requirement of quickly looking up words in a dictionary as does typo tolerance.
233+
We plan to leverage this same system to implement query splitting and concatenation as those features share the same requirement of quickly looking up words in a dictionary.
234234

235235
Trieve will always pursue the best possible relevance out of the box! Try it on our [HN search engine](https://hn.trieve.ai), [sign up for a free cloud account](https://dashboard.trieve.ai), or [see our self-hosting guides](https://docs.trieve.ai/self-hosting/aws).

src/content/post/hn-search-complaints.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
publishDate: 2024-08-30T08:45:00Z
2+
publishDate: 2024-09-06T08:45:00Z
33
author: danielsgriffin
44
title: "Examining HN Discovery Quality Using Existing Complaints"
55
excerpt: Comparing the search quality between HN search engines with publicly available complaints.

src/types.d.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ export interface Post {
4242

4343
/** */
4444
readingTime?: number;
45+
46+
displayImage?: boolean;
47+
48+
displayExcerpt?: boolean;
4549
}
4650

4751
export interface MetaData {
@@ -280,4 +284,4 @@ export interface Content extends Headline, Widget {
280284
callToAction?: CallToAction;
281285
}
282286

283-
export interface Contact extends Headline, Form, Widget {}
287+
export interface Contact extends Headline, Form, Widget { }

src/utils/blog.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ const getNormalizedPost = async (post: CollectionEntry<'post'>): Promise<Post> =
5555
author,
5656
draft = false,
5757
metadata = {},
58+
displayExcerpt,
59+
displayImage
5860
} = data;
5961

6062
const slug = cleanSlug(rawSlug); // cleanSlug(rawSlug.split('/').pop());
@@ -87,6 +89,9 @@ const getNormalizedPost = async (post: CollectionEntry<'post'>): Promise<Post> =
8789
// or 'content' in case you consume from API
8890

8991
readingTime: remarkPluginFrontmatter?.readingTime,
92+
93+
displayExcerpt: displayExcerpt,
94+
displayImage: displayImage
9095
};
9196
};
9297

0 commit comments

Comments
 (0)