diff --git a/db/news.yml b/db/news.yml index 2e6b9eb5..98f3e89b 100644 --- a/db/news.yml +++ b/db/news.yml @@ -1,5 +1,4 @@ --- -news: - id: 13 url: https://news.coderdojo.jp/2025/10/04/dojoletter-vol-89-2025%e5%b9%b408%e6%9c%88%e5%8f%b7/ title: DojoLetter Vol.89 2025年08月号 diff --git a/lib/tasks/news.rake b/lib/tasks/news.rake index 12f9a399..8f3c654d 100644 --- a/lib/tasks/news.rake +++ b/lib/tasks/news.rake @@ -1,118 +1,75 @@ require 'rss' -require 'net/http' -require 'uri' -require 'yaml' -require 'time' -require 'active_support/broadcast_logger' - -def safe_open(url) - uri = URI.parse(url) - raise "不正なURLです: #{url}" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) - - Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| - request = Net::HTTP::Get.new(uri) - response = http.request(request) - response.body - end -end - -def fetch_rss_items(url, logger) - logger.info("Fetching RSS → #{url}") - begin - rss = safe_open(url) - feed = RSS::Parser.parse(rss, false) - feed.items.map { |item| item_to_hash(item) } - rescue => e - logger.warn("⚠️ Failed to fetch #{url}: #{e.message}") - [] - end -end -def item_to_hash(item) - { - 'url' => item.link, - 'title' => item.title, - 'published_at' => item.pubDate.to_s - } -end +NEWS_YAML_PATH = 'db/news.yml'.freeze +NEWS_LOG_PATH = 'log/news.log'.freeze namespace :news do - desc 'RSS フィードを取得し、db/news.yml に保存' + desc "RSS フィードを取得し、#{NEWS_YAML_PATH} に保存" task fetch: :environment do # ロガー設定(ファイル+コンソール出力) - file_logger = ActiveSupport::Logger.new('log/news.log') console = ActiveSupport::Logger.new(STDOUT) - logger = ActiveSupport::BroadcastLogger.new(file_logger, console) + logger_file = ActiveSupport::Logger.new(NEWS_LOG_PATH) + logger = ActiveSupport::BroadcastLogger.new(logger_file, console) logger.info('==== START news:fetch ====') - # 既存の news.yml を読み込み - yaml_path = Rails.root.join('db', 'news.yml') - existing_news = if File.exist?(yaml_path) - YAML.safe_load(File.read(yaml_path), permitted_classes: [Time], aliases: true)['news'] || [] - else - [] - end - - # テスト/ステージング環境ではサンプルファイル、本番は実サイトのフィード - feed_urls = if Rails.env.test? || Rails.env.staging? - [Rails.root.join('spec', 'fixtures', 'sample_news.rss').to_s] - else - [ - 'https://news.coderdojo.jp/feed/' - # 必要に応じて他 Dojo の RSS もここに追加可能 - # 'https://coderdojotokyo.org/feed', - ] - end - - new_items = feed_urls.flat_map { |url| fetch_rss_items(url, logger) } - - # 既存データをハッシュに変換(URL をキーに) - existing_items_hash = existing_news.index_by { |item| item['url'] } - - # 新しいアイテムと既存アイテムを分離 - truly_new_items = [] + # 本番/開発環境では実フィード、それ以外(テスト環境など)ではテスト用フィード + DOJO_NEWS_FEED = 'https://news.coderdojo.jp/feed/' + TEST_NEWS_FEED = Rails.root.join('spec', 'fixtures', 'sample_news.rss') + RSS_FEED_LIST = (Rails.env.test? || Rails.env.staging?) ? + [TEST_NEWS_FEED] : + [DOJO_NEWS_FEED] + + # RSS のデータ構造を、News のデータ構造に変換 + fetched_items = RSS_FEED_LIST.flat_map do |feed| + feed = RSS::Parser.parse(feed, false) + feed.items.map { |item| + { + 'url' => item.link, + 'title' => item.title, + 'published_at' => item.pubDate.to_s + } + } + end + + # 取得済みニュース (YAML) を読み込み、URL をキーとしたハッシュに変換 + existing_items = YAML.safe_load(File.read NEWS_YAML_PATH).index_by { it['url'] } + existing_max_id = existing_items.flat_map { |url, item| item['id'].to_i }.max || 0 + + # 新規記事と既存記事を分離 + created_items = [] updated_items = [] - new_items.each do |new_item| - if existing_items_hash.key?(new_item['url']) - existing_item = existing_items_hash[new_item['url']] - # タイトルまたは公開日が変わった場合のみ更新 - if existing_item['title'] != new_item['title'] || existing_item['published_at'] != new_item['published_at'] - updated_items << existing_item.merge(new_item) - end - else - truly_new_items << new_item + fetched_items.each do |fetched_item| + existing_item = existing_items[fetched_item['url']] + + if existing_item.nil? + # 新規アイテムならそのまま追加 + created_items << fetched_item + elsif existing_item['title'] != fetched_item['title'] || existing_item['published_at'] != fetched_item['published_at'] + # タイトルまたは公開日が変わっていたら更新 + updated_items << existing_item.merge(fetched_item) end end - # 既存の最大IDを取得 - max_existing_id = existing_news.map { |item| item['id'].to_i }.max || 0 - # 新しいアイテムのみに ID を割り当て(古い順) - truly_new_items_sorted = truly_new_items.sort_by { |item| - Time.parse(item['published_at']) - } - - truly_new_items_sorted.each_with_index do |item, index| - item['id'] = max_existing_id + index + 1 + created_items.sort_by! { Time.parse it['published_at'] } + created_items.each.with_index(1) do |item, index| + item['id'] = existing_max_id + index end - # 更新されなかった既存アイテムを取得 - updated_urls = updated_items.map { |item| item['url'] } - unchanged_items = existing_news.reject { |item| updated_urls.include?(item['url']) } - - # 全アイテムをマージ - all_items = unchanged_items + updated_items + truly_new_items_sorted + # URL をキーに、更新されなかった既存の YAML データを取得・保持 + updated_urls = updated_items.map { it['url'] } + unchanged_items = existing_items.values.reject { updated_urls.include?(it['url']) } - # 日付降順ソート - sorted_items = all_items.sort_by { |item| - Time.parse(item['published_at']) + # 新規・更新・既存の各アイテムをマージし、日付降順でソート + merged_items = (unchanged_items + updated_items + created_items).sort_by { + Time.parse(it['published_at']) }.reverse # YAML ファイルに書き出し - File.open('db/news.yml', 'w') do |f| - formatted_items = sorted_items.map do |item| + File.open(NEWS_YAML_PATH, 'w') do |f| + formatted_items = merged_items.map do |item| { 'id' => item['id'], 'url' => item['url'], @@ -121,51 +78,49 @@ namespace :news do } end - f.write({ 'news' => formatted_items }.to_yaml) + f.write(formatted_items.to_yaml) end - logger.info("✅ Wrote #{sorted_items.size} items to db/news.yml (#{truly_new_items_sorted.size} new, #{updated_items.size} updated)") - logger.info('==== END news:fetch ====') + logger.info "✅ Wrote #{merged_items.size} items to #{NEWS_YAML_PATH} (#{created_items.size} new, #{updated_items.size} updated)" + logger.info "==== END news:fetch ====" + logger.info "" end - desc 'db/news.yml からデータベースに upsert' + desc "#{NEWS_YAML_PATH} からデータベースに upsert" task upsert: :environment do - file_logger = ActiveSupport::Logger.new('log/news.log') console = ActiveSupport::Logger.new(STDOUT) - logger = ActiveSupport::BroadcastLogger.new(file_logger, console) + logger_file = ActiveSupport::Logger.new(NEWS_LOG_PATH) + logger = ActiveSupport::BroadcastLogger.new(logger_file, console) logger.info "==== START news:upsert ====" - yaml_path = Rails.root.join('db', 'news.yml') - raw = YAML.safe_load(File.read(yaml_path), permitted_classes: [Time], aliases: true) - - entries = raw['news'] || [] - new_count = 0 + news_items = YAML.safe_load File.read(NEWS_YAML_PATH) + created_count = 0 updated_count = 0 News.transaction do - entries.each do |attrs| - news = News.find_or_initialize_by(url: attrs['url']) - is_new = news.new_record? - + news_items.each do |item| + news = News.find_or_initialize_by(url: item['url']) news.assign_attributes( - title: attrs['title'], - published_at: attrs['published_at'] + title: item['title'], + published_at: item['published_at'] ) - - if is_new || news.changed? + + is_new_record = news.new_record? + if is_new_record || news.changed? news.save! - status = is_new ? 'new' : 'updated' - new_count += 1 if is_new - updated_count += 1 unless is_new + + status = is_new_record ? 'new' : 'updated' + created_count += 1 if is_new_record + updated_count += 1 unless is_new_record logger.info "[News] #{news.published_at.to_date} #{news.title} (#{status})" end end end - logger.info "Upserted #{new_count + updated_count} items (#{new_count} new, #{updated_count} updated)." + logger.info "Upserted #{created_count + updated_count} items (#{created_count} new, #{updated_count} updated)." logger.info "==== END news:upsert ====" + logger.info "" end - end