-
Notifications
You must be signed in to change notification settings - Fork 4
/
aozora_prepare.rb
executable file
·52 lines (47 loc) · 1.38 KB
/
aozora_prepare.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env ruby
########################################################################
# aozora_prepare.rb: Prepare for Aozora Bunko Text Processing
#
# Description:
# This script processes text files from Aozora Bunko for readability and
# standardization. It converts the encoding, removes annotations, replaces
# full-width spaces, and standardizes newlines.
#
# Author: id774 (More info: http://id774.net)
# Source Code: https://github.com/id774/scripts
# License: LGPLv3 (Details: https://www.gnu.org/licenses/lgpl-3.0.html)
# Contact: [email protected]
#
# Version History:
# v1.1 2023-12-06
# Refactored for improved readability and documentation.
# v1.0 2014-01-22
# Initial release.
#
# Usage:
# ./aozora_prepare.rb [input file] [output file]
#
########################################################################
class Aozora
def initialize(args)
@infile = args.shift || "in.txt"
@outfile = args.shift || "out.txt"
end
def run
File.open(@infile, "r:Windows-31J:UTF-8") do |source|
File.open(@outfile, "w") do |data|
content = source.read
content.gsub!(/《[^》]+》/, "")
content.gsub!(/ /, " ")
data.print content.gsub(/(\r\n)/, "\n")
end
end
end
end
if __FILE__ == $0
if ARGV.length == 2
Aozora.new(ARGV).run
else
puts "Syntax: aozora_prepare.rb [infile] [outfile]"
end
end