Update content for NVFP4, MobileLLM-R1, and DeepSeek pages to use HTML entities for apostrophes

vukrosic · vukrosic · commit 52e4fbd1adfc · 2025-10-01T21:26:23.000+02:00
- Replaced apostrophes with HTML entities in the NVFP4, MobileLLM-R1, and DeepSeek pages to ensure proper rendering in the browser.
- Enhanced the user experience by maintaining consistent formatting across the documentation.
diff --git a/app/blog/deepseek-sparse-attention/page.tsx b/app/blog/deepseek-sparse-attention/page.tsx
@@ -205,30 +205,42 @@ export default function DeepSeekProject() {
             <div className="bg-white/5 backdrop-blur-xl border border-white/10 rounded-3xl shadow-2xl overflow-hidden">
               {/* Copy Article Button */}
               <div className="px-8 sm:px-12 pt-8 pb-4">
-                <button
-                  onClick={handleCopyArticle}
-                  className={`group flex items-center gap-2 px-4 py-2 rounded-lg font-medium transition-all duration-300 ${
-                    copySuccess
-                      ? 'bg-green-500/20 text-green-400 border border-green-500/30'
-                      : 'bg-white/5 hover:bg-white/10 text-slate-300 hover:text-blue-400 border border-white/10 hover:border-blue-500/50'
-                  }`}
-                >
-                  {copySuccess ? (
-                    <>
-                      <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
-                        <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
-                      </svg>
-                      {language === 'en' ? 'Copied!' : '已复制!'}
-                    </>
-                  ) : (
-                    <>
-                      <svg className="w-4 h-4 group-hover:scale-110 transition-transform" fill="none" stroke="currentColor" viewBox="0 0 24 24">
-                        <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z" />
-                      </svg>
-                      {language === 'en' ? 'Copy Article' : '复制文章'}
-                    </>
-                  )}
-                </button>
+                <div className="relative inline-block">
+                  <button
+                    onClick={handleCopyArticle}
+                    className={`group flex items-center gap-2 px-4 py-2 rounded-lg font-medium transition-all duration-300 ${
+                      copySuccess
+                        ? 'bg-green-500/20 text-green-400 border border-green-500/30'
+                        : 'bg-white/5 hover:bg-white/10 text-slate-300 hover:text-blue-400 border border-white/10 hover:border-blue-500/50'
+                    }`}
+                  >
+                    {copySuccess ? (
+                      <>
+                        <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                          <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                        </svg>
+                        {language === 'en' ? 'Copied!' : '已复制!'}
+                      </>
+                    ) : (
+                      <>
+                        <svg className="w-4 h-4 group-hover:scale-110 transition-transform" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                          <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z" />
+                        </svg>
+                        {language === 'en' ? 'Copy Article' : '复制文章'}
+                      </>
+                    )}
+                  </button>
+                  
+                  {/* Tooltip */}
+                  <div className="absolute bottom-full left-1/2 transform -translate-x-1/2 mb-2 px-3 py-2 bg-slate-800 text-white text-sm rounded-lg shadow-lg opacity-0 group-hover:opacity-100 transition-opacity duration-200 pointer-events-none whitespace-nowrap z-10 border border-slate-600">
+                    {language === 'en' 
+                      ? 'Perfect for pasting into AI chatbots for self-studying! 🤖' 
+                      : '非常适合粘贴到AI聊天机器人进行自学！🤖'
+                    }
+                    {/* Tooltip arrow */}
+                    <div className="absolute top-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-l-4 border-r-4 border-t-4 border-transparent border-t-slate-800"></div>
+                  </div>
+                </div>
               </div>
               
               {/* Article Body */}
diff --git a/app/blog/mobilellm-r1/page.tsx b/app/blog/mobilellm-r1/page.tsx
@@ -105,10 +105,10 @@ export default function MobileLLMR1Project() {
                 TL;DR
               </h2>
               <p className="text-slate-300 leading-relaxed mb-4">
-                Meta's MobileLLM-R1 challenges two fundamental assumptions about reasoning in language models: (1) that reasoning only emerges in large models, and (2) that it requires massive datasets. They demonstrate that <strong className="text-blue-400">sub-billion parameter models can achieve strong reasoning</strong> with just 2T tokens of carefully curated data.
+                Meta&apos;s MobileLLM-R1 challenges two fundamental assumptions about reasoning in language models: (1) that reasoning only emerges in large models, and (2) that it requires massive datasets. They demonstrate that <strong className="text-blue-400">sub-billion parameter models can achieve strong reasoning</strong> with just 2T tokens of carefully curated data.
               </p>
               <p className="text-slate-300 leading-relaxed">
-                Their <strong className="text-purple-400">950M parameter model achieves an AIME score of 15.5</strong>, compared to just 0.6 for OLMo-2-1.48B and 0.3 for SmoILM-2-1.7B. Remarkably, despite being trained on only 11.7% of the tokens compared to Qwen3's 36T-token corpus, MobileLLM-R1-950M matches or surpasses Qwen3-0.6B across multiple reasoning benchmarks.
+                Their <strong className="text-purple-400">950M parameter model achieves an AIME score of 15.5</strong>, compared to just 0.6 for OLMo-2-1.48B and 0.3 for SmoILM-2-1.7B. Remarkably, despite being trained on only 11.7% of the tokens compared to Qwen3&apos;s 36T-token corpus, MobileLLM-R1-950M matches or surpasses Qwen3-0.6B across multiple reasoning benchmarks.
               </p>
             </div>
           </div>
@@ -253,7 +253,7 @@ export default function MobileLLMR1Project() {
                 content={
                   <div>
                     <div className="font-bold text-purple-400 mb-2">🔄 Data-Model Co-Evolution</div>
-                    <p className="mb-2">As the model's capacity changes during training, the data mixture adapts to match the model's current capabilities.</p>
+                    <p className="mb-2">As the model&apos;s capacity changes during training, the data mixture adapts to match the model&apos;s current capabilities.</p>
                     
                     <div className="bg-slate-700/50 rounded p-2 mb-2">
                       <div className="text-xs text-slate-300 font-semibold mb-1">Early Training</div>
@@ -289,8 +289,8 @@ export default function MobileLLMR1Project() {
                     </div>
                     <h3 className="text-xl font-bold text-white">Data-Model Co-Evolution</h3>
                   </div>
-                  <p className="text-slate-300 mb-3">
-                    Adaptive training strategy where the data mixture evolves alongside the model's growing capacity, ensuring optimal challenge levels throughout training.
+                    <p className="text-slate-300 mb-3">
+                    Adaptive training strategy where the data mixture evolves alongside the model&apos;s growing capacity, ensuring optimal challenge levels throughout training.
                   </p>
                   <div className="bg-purple-500/10 border border-purple-500/30 rounded-lg p-3">
                     <div className="text-purple-400 text-sm font-mono">Adaptive Curriculum = Optimal Learning</div>
@@ -745,7 +745,7 @@ export default function MobileLLMR1Project() {
                 <div className="text-4xl mb-4">⚡</div>
                 <h3 className="text-xl font-bold text-white mb-2">Efficiency</h3>
                 <div className="text-purple-400 text-3xl font-bold mb-2">11.7%</div>
-                <p className="text-slate-300 text-sm mb-3">Of Qwen3's tokens</p>
+                <p className="text-slate-300 text-sm mb-3">Of Qwen3&apos;s tokens</p>
                 <div className="bg-purple-500/10 border border-purple-500/30 rounded-lg p-3">
                   <div className="text-purple-400 text-sm">2T vs 36T tokens</div>
                   <div className="text-purple-400 text-sm">Same or better performance</div>
@@ -826,13 +826,13 @@ export default function MobileLLMR1Project() {
                 content={
                   <div>
                     <div className="font-bold text-blue-400 mb-2">🔍 Leave-One-Out Analysis</div>
-                    <p className="mb-2">Systematic evaluation of each dataset's contribution to reasoning capabilities.</p>
+                    <p className="mb-2">Systematic evaluation of each dataset&apos;s contribution to reasoning capabilities.</p>
                     
                     <div className="bg-slate-700/50 rounded p-2 mb-2">
                       <div className="text-xs text-slate-300 font-semibold mb-1">Methodology</div>
                       <div className="text-xs text-slate-300">• Train models excluding one dataset at a time</div>
                       <div className="text-xs text-slate-300">• Measure negative log-likelihood on capability-probing datasets</div>
-                      <div className="text-xs text-slate-300">• Quantify each dataset's impact on reasoning</div>
+                      <div className="text-xs text-slate-300">• Quantify each dataset&apos;s impact on reasoning</div>
                     </div>
                     
                     <div className="bg-slate-700/50 rounded p-2 mb-2">
@@ -859,8 +859,8 @@ export default function MobileLLMR1Project() {
                     </div>
                     <h3 className="text-xl font-bold text-white">Leave-One-Out Analysis</h3>
                   </div>
-                  <p className="text-slate-300 mb-3">
-                    Systematic evaluation of each dataset's contribution to reasoning capabilities by training models with and without specific data sources.
+                    <p className="text-slate-300 mb-3">
+                    Systematic evaluation of each dataset&apos;s contribution to reasoning capabilities by training models with and without specific data sources.
                   </p>
                   <div className="bg-blue-500/10 border border-blue-500/30 rounded-lg p-3">
                     <div className="text-blue-400 text-sm font-mono">Quantify Data Impact = Better Mixtures</div>
diff --git a/app/blog/nvfp4-4bit-training/page.tsx b/app/blog/nvfp4-4bit-training/page.tsx
@@ -72,7 +72,7 @@ export default function NVFP4Project() {
             <div className="relative">
               <h1 className="text-4xl md:text-5xl lg:text-6xl font-medium mb-8 leading-tight">
                 <span className="bg-gradient-to-r from-green-400 via-emerald-400 to-teal-400 bg-clip-text text-transparent">
-                  NVIDIA's 4-Bit Revolution
+                  NVIDIA&apos;s 4-Bit Revolution
                 </span>
               </h1>
               <div className="text-lg md:text-xl text-slate-400 mb-4">
@@ -81,7 +81,7 @@ export default function NVFP4Project() {
               
               <div className="absolute inset-0 text-4xl md:text-5xl lg:text-6xl font-medium leading-tight blur-sm">
                 <span className="bg-gradient-to-r from-green-400/20 via-emerald-400/20 to-teal-400/20 bg-clip-text text-transparent">
-                  NVIDIA's 4-Bit Revolution
+                  NVIDIA&apos;s 4-Bit Revolution
                 </span>
               </div>
             </div>
@@ -105,7 +105,7 @@ export default function NVFP4Project() {
                 TL;DR
               </h2>
               <p className="text-slate-300 leading-relaxed mb-4">
-                NVIDIA has figured out how to train massive LLMs using a new <strong className="text-green-400">4-bit number format called NVFP4</strong>, which is a huge deal for efficiency. Training in 4-bit is much faster and uses less memory than the current 8-bit standard (FP8), but it's very difficult to do without the model's performance collapsing.
+                NVIDIA has figured out how to train massive LLMs using a new <strong className="text-green-400">4-bit number format called NVFP4</strong>, which is a huge deal for efficiency. Training in 4-bit is much faster and uses less memory than the current 8-bit standard (FP8), but it&apos;s very difficult to do without the model&apos;s performance collapsing.
               </p>
               <p className="text-slate-300 leading-relaxed">
                 Their solution combines four key techniques to train a <strong className="text-emerald-400">12-billion-parameter hybrid Mamba-Transformer model on 10 trillion tokens</strong> with performance nearly identical to FP8 training. This marks the first successful demonstration of training billion-parameter language models with 4-bit precision over a multi-trillion-token horizon.
@@ -201,7 +201,7 @@ export default function NVFP4Project() {
                 NVFP4 vs MXFP4
               </h2>
               <p className="text-slate-400 text-lg">
-                How NVIDIA's format improves on the standard
+                How NVIDIA&apos;s format improves on the standard
               </p>
             </div>
             
@@ -310,7 +310,7 @@ export default function NVFP4Project() {
                 The 4 Key Techniques
               </h2>
               <p className="text-slate-400 text-lg">
-                The "secret sauce" that makes NVFP4 work
+                The &quot;secret sauce&quot; that makes NVFP4 work
               </p>
             </div>
             
@@ -399,7 +399,7 @@ export default function NVFP4Project() {
                     <h3 className="text-xl font-bold text-white">Random Hadamard Transforms (RHT)</h3>
                   </div>
                   <p className="text-slate-300 mb-3">
-                    Mathematical operation that "smears" extreme outlier values across all values, making distributions more uniform and easier to quantize.
+                    Mathematical operation that &quot;smears&quot; extreme outlier values across all values, making distributions more uniform and easier to quantize.
                   </p>
                   <div className="bg-green-500/10 border border-green-500/30 rounded-lg p-3">
                     <div className="text-green-400 text-sm font-mono">Outliers → Uniform Distribution</div>
@@ -491,7 +491,7 @@ export default function NVFP4Project() {
                     <h3 className="text-xl font-bold text-white">Stochastic Rounding</h3>
                   </div>
                   <p className="text-slate-300 mb-3">
-                    Probabilistic rounding instead of deterministic "round-to-nearest" eliminates systematic bias that accumulates in gradient calculations.
+                    Probabilistic rounding instead of deterministic &quot;round-to-nearest&quot; eliminates systematic bias that accumulates in gradient calculations.
                   </p>
                   <div className="bg-green-500/10 border border-green-500/30 rounded-lg p-3">
                     <div className="text-green-400 text-sm font-mono">Unbiased Gradients = Better Training</div>
@@ -544,7 +544,7 @@ export default function NVFP4Project() {
                 NVFP4 vs MXFP4
               </h3>
               <p className="text-slate-300 mb-4">
-                In direct comparison on an 8B model, MXFP4 needed <strong className="text-green-400">36% more training data</strong> (1.36T vs 1T tokens) to match NVFP4's performance. This proves NVFP4's superior design.
+                In direct comparison on an 8B model, MXFP4 needed <strong className="text-green-400">36% more training data</strong> (1.36T vs 1T tokens) to match NVFP4&apos;s performance. This proves NVFP4&apos;s superior design.
               </p>
               <div className="grid md:grid-cols-2 gap-4">
                 <div className="bg-green-500/10 border border-green-500/30 rounded-lg p-4">
diff --git a/app/page.tsx b/app/page.tsx
@@ -483,7 +483,7 @@ export default function Home() {
                       NVFP4 LLM Pretraining Research
                     </h3>
                     <p className="text-slate-300 text-sm mb-4">
-                      Research NVIDIA's NVFP4 (4-bit floating point) training methodology - 2-3x performance boost with 50% memory reduction
+                      Research NVIDIA&apos;s NVFP4 (4-bit floating point) training methodology - 2-3x performance boost with 50% memory reduction
                     </p>
                     <div className="space-y-2">
                       <div className="flex items-center gap-2 text-xs text-slate-400">
@@ -540,7 +540,7 @@ export default function Home() {
                       MobileLLM-R1 Sub-Billion Reasoning Research
                     </h3>
                     <p className="text-slate-300 text-sm mb-4">
-                      Research Meta's MobileLLM-R1 - sub-billion parameter reasoning models with strong capabilities using only ~2T high-quality tokens
+                      Research Meta&apos;s MobileLLM-R1 - sub-billion parameter reasoning models with strong capabilities using only ~2T high-quality tokens
                     </p>
                     <div className="space-y-2">
                       <div className="flex items-center gap-2 text-xs text-slate-400">