Skip to content

898833 - added sample for the extracting text using OCR #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.12.35707.178 d17.12
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WinForms_TextExtractionByOCR", "WinForms_TextExtractionByOCR\WinForms_TextExtractionByOCR.csproj", "{1B09455E-6F67-4155-AFE2-EB421BAB1190}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{1B09455E-6F67-4155-AFE2-EB421BAB1190}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1B09455E-6F67-4155-AFE2-EB421BAB1190}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1B09455E-6F67-4155-AFE2-EB421BAB1190}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1B09455E-6F67-4155-AFE2-EB421BAB1190}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8" />
</startup>
</configuration>
Binary file not shown.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using Syncfusion.Windows.PdfViewer;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Windows.Forms.Integration;

namespace WinForms_TextExtractionByOCR
{
public partial class Form1 : Form
{
ElementHost elementHost = new ElementHost();
PdfViewer pdfViewer;
public Form1()
{
InitializeComponent();
this.WindowState = FormWindowState.Maximized;
pdfViewer = new PdfViewer();
elementHost.Dock = DockStyle.Fill;
elementHost.Child = pdfViewer;
this.Controls.Add(elementHost);

}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<UserControl xmlns:PdfViewer="clr-namespace:Syncfusion.Windows.PdfViewer;assembly=Syncfusion.PdfViewer.WPF"
x:Class="WinForms_TextExtractionByOCR.PdfViewer"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:local="clr-namespace:WinForms_TextExtractionByOCR"
mc:Ignorable="d"
d:DesignHeight="450" d:DesignWidth="800">
<Grid>
<Grid.RowDefinitions>
<RowDefinition Height="30"/>
<RowDefinition/>
</Grid.RowDefinitions>
<Button x:Name="Rectangle" Content="Mark and Extract" Width="60" Click="Rectangle_Click"/>
<PdfViewer:PdfViewerControl Grid.Row="1" x:Name="pdfViewer"/>
</Grid>
</UserControl>
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
using Syncfusion.OCRProcessor;
using Syncfusion.Pdf.Parsing;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;

namespace WinForms_TextExtractionByOCR
{
/// <summary>
/// Interaction logic for PdfViewer.xaml
/// </summary>
public partial class PdfViewer : UserControl
{
RectangleF bounds;
public PdfViewer()
{
InitializeComponent();
pdfViewer.Load("../../Data/F#.pdf");
}

private void Rectangle_Click(object sender, RoutedEventArgs e)
{
pdfViewer.AnnotationMode = Syncfusion.Windows.PdfViewer.PdfDocumentView.PdfViewerAnnotationMode.Rectangle;
pdfViewer.ShapeAnnotationChanged += PdfViewer_ShapeAnnotationChanged;
}

private void PdfViewer_ShapeAnnotationChanged(object sender, Syncfusion.Windows.PdfViewer.ShapeAnnotationChangedEventArgs e)
{
if (e.Action == Syncfusion.Windows.PdfViewer.AnnotationChangedAction.Add)
{
bounds = e.NewBounds;
PdfLoadedDocument loadedDocument = pdfViewer.LoadedDocument;
using (OCRProcessor processor = new OCRProcessor("../../Tesseract binaries"))
{
//Language to process the OCR
processor.Settings.Language = Languages.English;
Bitmap image = GetBitmap(pdfViewer.ExportAsImage(pdfViewer.CurrentPageIndex - 1));
using (Bitmap clonedImage = image.Clone(bounds, System.Drawing.Imaging.PixelFormat.Format32bppArgb))
{
string ocrText = processor.PerformOCR(clonedImage, @"../../Tessdata/");
}
image.Dispose();
}
}

}

Bitmap GetBitmap(BitmapSource source)
{
Bitmap bmp = new Bitmap(
source.PixelWidth,
source.PixelHeight,
System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
BitmapData data = bmp.LockBits(
new System.Drawing.Rectangle(System.Drawing.Point.Empty, bmp.Size),
ImageLockMode.WriteOnly,
System.Drawing.Imaging.PixelFormat.Format32bppPArgb);
source.CopyPixels(
Int32Rect.Empty,
data.Scan0,
data.Height * data.Stride,
data.Stride);
bmp.UnlockBits(data);
return bmp;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Windows.Forms;
using WinForms_TextExtractionByOCR;

namespace WindWinForms_TextExtractionByOCR_NETowsFormsApp1
{
internal static class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main()
{
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new Form1());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("WinForms_TextExtractionByOCR")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("WinForms_TextExtractionByOCR")]
[assembly: AssemblyCopyright("Copyright © 2025")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("1b09455e-6f67-4155-afe2-eb421bab1190")]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading