11require 'rss'
2- require 'net/http'
3- require 'uri'
4- require 'yaml'
5- require 'time'
6- require 'active_support/broadcast_logger'
7-
8- def safe_open ( url )
9- uri = URI . parse ( url )
10- raise "不正なURLです: #{ url } " unless uri . is_a? ( URI ::HTTP ) || uri . is_a? ( URI ::HTTPS )
11-
12- Net ::HTTP . start ( uri . host , uri . port , use_ssl : uri . scheme == 'https' ) do |http |
13- request = Net ::HTTP ::Get . new ( uri )
14- response = http . request ( request )
15- response . body
16- end
17- end
18-
19- def fetch_rss_items ( url , logger )
20- logger . info ( "Fetching RSS → #{ url } " )
21- begin
22- rss = safe_open ( url )
23- feed = RSS ::Parser . parse ( rss , false )
24- feed . items . map { |item | item_to_hash ( item ) }
25- rescue => e
26- logger . warn ( "⚠️ Failed to fetch #{ url } : #{ e . message } " )
27- [ ]
28- end
29- end
302
31- def item_to_hash ( item )
32- {
33- 'url' => item . link ,
34- 'title' => item . title ,
35- 'published_at' => item . pubDate . to_s
36- }
37- end
3+ NEWS_YAML_PATH = 'db/news.yml' . freeze
4+ NEWS_LOG_PATH = 'log/news.log' . freeze
385
396namespace :news do
40- desc ' RSS フィードを取得し、db/news.yml に保存'
7+ desc " RSS フィードを取得し、#{ NEWS_YAML_PATH } に保存"
418 task fetch : :environment do
429 # ロガー設定(ファイル+コンソール出力)
43- file_logger = ActiveSupport ::Logger . new ( 'log/news.log' )
4410 console = ActiveSupport ::Logger . new ( STDOUT )
45- logger = ActiveSupport ::BroadcastLogger . new ( file_logger , console )
11+ logger_file = ActiveSupport ::Logger . new ( NEWS_LOG_PATH )
12+ logger = ActiveSupport ::BroadcastLogger . new ( logger_file , console )
4613
4714 logger . info ( '==== START news:fetch ====' )
4815
49- # 既存の news.yml を読み込み
50- yaml_path = Rails . root . join ( 'db' , 'news.yml' )
51- existing_news = if File . exist? ( yaml_path )
52- YAML . safe_load ( File . read ( yaml_path ) , permitted_classes : [ Time ] , aliases : true ) [ 'news' ] || [ ]
53- else
54- [ ]
55- end
56-
57- # テスト/ステージング環境ではサンプルファイル、本番は実サイトのフィード
58- feed_urls = if Rails . env . test? || Rails . env . staging?
59- [ Rails . root . join ( 'spec' , 'fixtures' , 'sample_news.rss' ) . to_s ]
60- else
61- [
62- 'https://news.coderdojo.jp/feed/'
63- # 必要に応じて他 Dojo の RSS もここに追加可能
64- # 'https://coderdojotokyo.org/feed',
65- ]
66- end
67-
68- new_items = feed_urls . flat_map { |url | fetch_rss_items ( url , logger ) }
69-
70- # 既存データをハッシュに変換(URL をキーに)
71- existing_items_hash = existing_news . index_by { |item | item [ 'url' ] }
72-
73- # 新しいアイテムと既存アイテムを分離
74- truly_new_items = [ ]
16+ # 本番/開発環境では実フィード、それ以外(テスト環境など)ではテスト用フィード
17+ DOJO_NEWS_FEED = 'https://news.coderdojo.jp/feed/'
18+ TEST_NEWS_FEED = Rails . root . join ( 'spec' , 'fixtures' , 'sample_news.rss' )
19+ RSS_FEED_LIST = ( Rails . env . test? || Rails . env . staging? ) ?
20+ [ TEST_NEWS_FEED ] :
21+ [ DOJO_NEWS_FEED ]
22+
23+ # RSS のデータ構造を、News のデータ構造に変換
24+ fetched_items = RSS_FEED_LIST . flat_map do |feed |
25+ feed = RSS ::Parser . parse ( feed , false )
26+ feed . items . map { |item |
27+ {
28+ 'url' => item . link ,
29+ 'title' => item . title ,
30+ 'published_at' => item . pubDate . to_s
31+ }
32+ }
33+ end
34+
35+ # 取得済みニュース (YAML) を読み込み、URL をキーとしたハッシュに変換
36+ existing_items = YAML . safe_load ( File . read NEWS_YAML_PATH ) . index_by { it [ 'url' ] }
37+ existing_max_id = existing_items . flat_map { |url , item | item [ 'id' ] . to_i } . max || 0
38+
39+ # 新規記事と既存記事を分離
40+ created_items = [ ]
7541 updated_items = [ ]
7642
77- new_items . each do |new_item |
78- if existing_items_hash . key? ( new_item [ 'url' ] )
79- existing_item = existing_items_hash [ new_item [ 'url' ] ]
80- # タイトルまたは公開日が変わった場合のみ更新
81- if existing_item [ 'title' ] != new_item [ 'title' ] || existing_item [ 'published_at' ] != new_item [ 'published_at' ]
82- updated_items << existing_item . merge ( new_item )
83- end
84- else
85- truly_new_items << new_item
43+ fetched_items . each do |fetched_item |
44+ existing_item = existing_items [ fetched_item [ 'url' ] ]
45+
46+ if existing_item . nil?
47+ # 新規アイテムならそのまま追加
48+ created_items << fetched_item
49+ elsif existing_item [ 'title' ] != fetched_item [ 'title' ] || existing_item [ 'published_at' ] != fetched_item [ 'published_at' ]
50+ # タイトルまたは公開日が変わっていたら更新
51+ updated_items << existing_item . merge ( fetched_item )
8652 end
8753 end
8854
89- # 既存の最大IDを取得
90- max_existing_id = existing_news . map { |item | item [ 'id' ] . to_i } . max || 0
91-
9255 # 新しいアイテムのみに ID を割り当て(古い順)
93- truly_new_items_sorted = truly_new_items . sort_by { |item |
94- Time . parse ( item [ 'published_at' ] )
95- }
96-
97- truly_new_items_sorted . each_with_index do |item , index |
98- item [ 'id' ] = max_existing_id + index + 1
56+ created_items . sort_by! { Time . parse it [ 'published_at' ] }
57+ created_items . each . with_index ( 1 ) do |item , index |
58+ item [ 'id' ] = existing_max_id + index
9959 end
10060
101- # 更新されなかった既存アイテムを取得
102- updated_urls = updated_items . map { |item | item [ 'url' ] }
103- unchanged_items = existing_news . reject { |item | updated_urls . include? ( item [ 'url' ] ) }
104-
105- # 全アイテムをマージ
106- all_items = unchanged_items + updated_items + truly_new_items_sorted
61+ # URL をキーに、更新されなかった既存の YAML データを取得・保持
62+ updated_urls = updated_items . map { it [ 'url' ] }
63+ unchanged_items = existing_items . values . reject { updated_urls . include? ( it [ 'url' ] ) }
10764
108- # 日付降順ソート
109- sorted_items = all_items . sort_by { | item |
110- Time . parse ( item [ 'published_at' ] )
65+ # 新規・更新・既存の各アイテムをマージし、日付降順でソート
66+ merged_items = ( unchanged_items + updated_items + created_items ) . sort_by {
67+ Time . parse ( it [ 'published_at' ] )
11168 } . reverse
11269
11370 # YAML ファイルに書き出し
114- File . open ( 'db/news.yml' , 'w' ) do |f |
115- formatted_items = sorted_items . map do |item |
71+ File . open ( NEWS_YAML_PATH , 'w' ) do |f |
72+ formatted_items = merged_items . map do |item |
11673 {
11774 'id' => item [ 'id' ] ,
11875 'url' => item [ 'url' ] ,
@@ -121,51 +78,49 @@ namespace :news do
12178 }
12279 end
12380
124- f . write ( { 'news' => formatted_items } . to_yaml )
81+ f . write ( formatted_items . to_yaml )
12582 end
12683
127- logger . info ( "✅ Wrote #{ sorted_items . size } items to db/news.yml (#{ truly_new_items_sorted . size } new, #{ updated_items . size } updated)" )
128- logger . info ( '==== END news:fetch ====' )
84+ logger . info "✅ Wrote #{ merged_items . size } items to #{ NEWS_YAML_PATH } (#{ created_items . size } new, #{ updated_items . size } updated)"
85+ logger . info "==== END news:fetch ===="
86+ logger . info ""
12987 end
13088
131- desc 'db/news.yml からデータベースに upsert'
89+ desc " #{ NEWS_YAML_PATH } からデータベースに upsert"
13290 task upsert : :environment do
133- file_logger = ActiveSupport ::Logger . new ( 'log/news.log' )
13491 console = ActiveSupport ::Logger . new ( STDOUT )
135- logger = ActiveSupport ::BroadcastLogger . new ( file_logger , console )
92+ logger_file = ActiveSupport ::Logger . new ( NEWS_LOG_PATH )
93+ logger = ActiveSupport ::BroadcastLogger . new ( logger_file , console )
13694
13795 logger . info "==== START news:upsert ===="
13896
139- yaml_path = Rails . root . join ( 'db' , 'news.yml' )
140- raw = YAML . safe_load ( File . read ( yaml_path ) , permitted_classes : [ Time ] , aliases : true )
141-
142- entries = raw [ 'news' ] || [ ]
143- new_count = 0
97+ news_items = YAML . safe_load File . read ( NEWS_YAML_PATH )
98+ created_count = 0
14499 updated_count = 0
145100
146101 News . transaction do
147- entries . each do |attrs |
148- news = News . find_or_initialize_by ( url : attrs [ 'url' ] )
149- is_new = news . new_record?
150-
102+ news_items . each do |item |
103+ news = News . find_or_initialize_by ( url : item [ 'url' ] )
151104 news . assign_attributes (
152- title : attrs [ 'title' ] ,
153- published_at : attrs [ 'published_at' ]
105+ title : item [ 'title' ] ,
106+ published_at : item [ 'published_at' ]
154107 )
155-
156- if is_new || news . changed?
108+
109+ is_new_record = news . new_record?
110+ if is_new_record || news . changed?
157111 news . save!
158- status = is_new ? 'new' : 'updated'
159- new_count += 1 if is_new
160- updated_count += 1 unless is_new
112+
113+ status = is_new_record ? 'new' : 'updated'
114+ created_count += 1 if is_new_record
115+ updated_count += 1 unless is_new_record
161116
162117 logger . info "[News] #{ news . published_at . to_date } #{ news . title } (#{ status } )"
163118 end
164119 end
165120 end
166121
167- logger . info "Upserted #{ new_count + updated_count } items (#{ new_count } new, #{ updated_count } updated)."
122+ logger . info "Upserted #{ created_count + updated_count } items (#{ created_count } new, #{ updated_count } updated)."
168123 logger . info "==== END news:upsert ===="
124+ logger . info ""
169125 end
170-
171126end
0 commit comments