Skip to content

Commit 6fcad50

Browse files
committed
Updated parsing and fixed views
Fixed admin Removed pyppeteer mentions Added crochet and celery
1 parent b16cb14 commit 6fcad50

19 files changed

+430
-75
lines changed

.envs.example/docker.env.example

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# Used in docker compose
22

3-
# ##########
4-
# DATABASE #
5-
# ##########
3+
# ######
4+
# DATA #
5+
########
6+
7+
POSTGRES_USER=postgres
8+
POSTGRES_PASSWORD=postgres
9+
POSTGRES_DB=sora
610

711
DATABASE_HOST=db
812
DATABASE_USER=postgres
@@ -11,6 +15,10 @@ DATABASE_PASSWORD=postgres
1115
DATABASE_PORT=5432
1216
DATABASE_NAME=sora
1317

18+
REDIS_URL=redis://redis:6379
19+
20+
ELASTICSEARCH_HOST=elasticsearch:9200
21+
1422
# ########
1523
# DJANGO #
1624
# ########
@@ -20,22 +28,6 @@ SECRET_KEY=DevServer
2028
# Comma-separated list
2129
ALLOWED_HOSTS=*
2230
PAGE_SIZE=20
23-
DJANGO_COLORS="success=green,bold;notice=cyan,bold;error=red,bold"
24-
25-
# ########
26-
# DOCKER #
27-
# ########
28-
29-
# Postgres
30-
POSTGRES_USER=postgres
31-
POSTGRES_PASSWORD=postgres
32-
POSTGRES_DB=sora
33-
34-
# #######
35-
# REDIS #
36-
# #######
37-
38-
REDIS_URL=redis://redis:6379
3931

4032
# #######
4133
# Other #

.envs.example/local.env.example

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Your local config to develop without docker
22

3-
# ##########
4-
# DATABASE #
5-
# ##########
3+
# ######
4+
# DATA #
5+
# ######
66

77
DATABASE_HOST=localhost
88
DATABASE_USER=postgres
@@ -12,6 +12,12 @@ DATABASE_PASSWORD=postgres
1212
DATABASE_PORT=8882
1313
DATABASE_NAME=sora
1414

15+
# Port from docker-compose
16+
REDIS_URL=redis://localhost:8883
17+
18+
# Port from docker-compose
19+
ELASTICSEARCH_HOST=localhost:9200
20+
1521
# ########
1622
# DJANGO #
1723
# ########
@@ -21,14 +27,6 @@ SECRET_KEY=DevServer
2127
# Comma-separated list
2228
ALLOWED_HOSTS=*
2329
PAGE_SIZE=20
24-
DJANGO_COLORS="success=green,bold;notice=cyan,bold;error=red,bold"
25-
26-
# #######
27-
# REDIS #
28-
# #######
29-
30-
# Port from docker-compose
31-
REDIS_URL=redis://localhost:8883
3230

3331
# #######
3432
# Other #

apps/parse/admin.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class MangaAdmin(BaseAdmin, ImagePreviewMixin, admin.ModelAdmin):
5454
"status",
5555
"genre_list",
5656
)
57-
list_filter = ("genres", SourceFilter)
57+
# list_filter = ("genres", SourceFilter)
5858

5959
def custom_title(self, obj: Manga):
6060
concat = f"{obj.title}{', ' + obj.year if obj.year else ''}"
@@ -98,4 +98,4 @@ class ChapterAdmin(BaseAdmin, admin.ModelAdmin):
9898
}
9999

100100
def manga_name(self, obj):
101-
return obj.manga_set.first().title
101+
return obj.manga.title

apps/parse/api/views.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ def retrieve(self, _, pk, *args, **kwargs):
3232
manga = self.get_fast_manga(pk)
3333
try:
3434
if needs_update(manga["updated_detail"]):
35-
run_parser(DETAIL_PARSER, manga.source, manga["source_url"])
35+
run_parser(DETAIL_PARSER, manga["source"], manga["source_url"])
3636
except Exception as e:
37-
return format_error_response("Errors occured during parsing" + str(e))
37+
return format_error_response("Errors occured during parsing " + str(e))
3838
return get_fast_response(manga)
3939

4040
def list(self, request):
@@ -67,11 +67,11 @@ def chapters_list(self, _, pk):
6767
manga: Manga = Manga.objects.prefetch_related("chapters").get(pk=pk)
6868

6969
try:
70-
if needs_update(manga["updated_detail"]):
71-
run_parser(DETAIL_PARSER, manga.source, manga["source_url"])
72-
run_parser(CHAPTER_PARSER, manga.source, manga["source_url"])
70+
if needs_update(manga.updated_detail.isoformat()):
71+
run_parser(DETAIL_PARSER, manga.source, manga.source_url)
72+
run_parser(CHAPTER_PARSER, manga.source, manga.source_url)
7373
except Exception as e:
74-
return format_error_response("Errors occured during parsing" + str(e))
74+
return format_error_response("Errors occured during parsing " + str(e))
7575
return get_fast_response(
7676
list(manga.chapters.order_by("-volume", "-number").values(*CHAPTER_FIELDS))
7777
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import logging
2+
import sys
3+
4+
from django.core.management.base import BaseCommand, CommandParser
5+
6+
from apps.parse.const import (
7+
CATALOGUE_NAMES,
8+
CHAPTER_PARSER,
9+
DETAIL_PARSER,
10+
IMAGE_PARSER,
11+
PARSER_TYPES,
12+
)
13+
from apps.parse.scrapy.utils import run_parser
14+
from apps.parse.utils import mute_logger_stdout
15+
16+
logger = logging.getLogger("management")
17+
18+
19+
class Command(BaseCommand):
20+
def add_arguments(self, parser: CommandParser) -> None:
21+
parser.add_argument(
22+
"type",
23+
type=str,
24+
choices=PARSER_TYPES,
25+
help="which type of data to parse",
26+
)
27+
parser.add_argument(
28+
"catalogue",
29+
type=str,
30+
default="readmanga",
31+
choices=CATALOGUE_NAMES,
32+
help="parser to use which respresents a website source",
33+
)
34+
parser.add_argument(
35+
"--url",
36+
type=str,
37+
required=sys.argv[2] in [DETAIL_PARSER, CHAPTER_PARSER, IMAGE_PARSER],
38+
help="A link which to parse (detail/chapter/rss url)",
39+
)
40+
41+
def handle(self, *args, **options):
42+
mute_logger_stdout("scrapy", "elasticsearch", "asyncio", "protego", "urllib3", "requests")
43+
try:
44+
catalogue_name: str = options["catalogue"]
45+
logger.info("Running parser")
46+
run_parser(options["type"], catalogue_name, url=options["url"])
47+
logger.info("Finished parsing")
48+
except (AttributeError, KeyError):
49+
logger.error(f"Can't find Catalogue [{catalogue_name}]")
50+
except Exception:
51+
logger.error(f"Some errors occured in the parser {catalogue_name}")

apps/parse/management/commands/parse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,5 @@ def handle(self, *args, **options):
4747
logger.info("Finished parsing")
4848
except (AttributeError, KeyError):
4949
logger.error(f"Can't find Catalogue [{catalogue_name}]")
50-
except Exception:
51-
logger.error(f"Some errors occured in the parser {catalogue_name}")
50+
except Exception as e:
51+
logger.error(f"Some errors occured in the parser {catalogue_name} {e}")
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Generated by Django 3.1 on 2021-11-12 14:48
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
('parse', '0025_chapters_to_fk'),
11+
]
12+
13+
operations = [
14+
migrations.AlterField(
15+
model_name='chapter',
16+
name='manga',
17+
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='chapters', to='parse.manga'),
18+
),
19+
]

apps/parse/readmanga/chapter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ class ReadmangaChapterSpider(scrapy.Spider):
1717
"ITEM_PIPELINES": {"apps.parse.readmanga.pipelines.ReadmangaChapterPipeline": 300}
1818
}
1919

20-
def __init__(self, *, url: str):
21-
self.start_urls = [url]
20+
def __init__(self, *args, url: str):
21+
super().__init__(*args, start_urls=[url])
2222

2323
def parse(self, response: XmlResponse) -> List[MangaChapterItem]:
2424
chapters = []

apps/parse/readmanga/detail.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
class ReadmangaDetailSpider(scrapy.Spider):
1818
name = "readmanga_detail"
1919

20-
def __init__(self, *, url: str):
21-
self.start_urls = [url]
20+
def __init__(self, *args, url: str):
21+
super().__init__(*args, start_urls=[url])
2222

2323
def parse(self, response: HtmlResponse):
2424
year = response.xpath(YEAR_TAG).extract_first("")

apps/parse/readmanga/images.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ class ReadmangaImageSpider(scrapy.Spider):
1515
"ITEM_PIPELINES": {"apps.parse.readmanga.pipelines.ReadmangaImagePipeline": 300}
1616
}
1717

18-
def __init__(self, *, url: str):
19-
self.start_urls = [url]
20-
self.redis_client = init_redis_client()
18+
def __init__(self, *args, url: str, **kwargs):
19+
super().__init__(*args, **kwargs, start_urls=[url], redis_client=init_redis_client())
2120

2221
def parse(self, response: HtmlResponse):
2322
images = re.search(r"rm_h.initReader\(.*(\[{2}.*\]{2}).*\)", response.text)
23+
image_links = []
2424
if images:
2525
image_links = [
2626
"".join(image[:COUNT_LINK_ELEMENTS])

apps/parse/readmanga/pipelines.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def bulk_get_or_create(cls: Type[BaseModel], names: List[str]) -> Tuple:
2828
class ReadmangaImagePipeline:
2929
@staticmethod
3030
def process_item(item: Dict[str, List[str]], spider: ReadmangaImageSpider):
31-
url, images = item.items()[0]
31+
url, images = next(iter(item.items()))
3232
spider.redis_client.delete(url)
3333
spider.redis_client.expire(url, IMAGE_UPDATE_FREQUENCY)
3434
spider.redis_client.rpush(url, *images)

apps/parse/scrapy/utils.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
1-
import os
2-
31
from scrapy.crawler import CrawlerProcess
42
from scrapy.utils.project import get_project_settings
53

64
from apps.parse.const import CATALOGUES
75

86

9-
def run_parser(parser_type: str, catalogue_name: str = "readmanga", *args, **kwargs):
7+
def run_parser(parser_type: str, catalogue_name: str = "readmanga", url: str = None):
108
catalogue = CATALOGUES[catalogue_name]
119
spider = catalogue["parsers"][parser_type]
1210

13-
os.environ.setdefault("SCRAPY_SETTINGS_MODULE", catalogue["settings"])
1411
process = CrawlerProcess(get_project_settings())
1512

16-
process.crawl(spider, *args, **kwargs)
13+
process.crawl(spider, url=url)
1714
process.start()

docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ services:
4848

4949
elasticsearch:
5050
image: elasticsearch:7.14.2
51+
restart: unless-stopped
5152
environment:
5253
- discovery.type=single-node
5354
volumes:

docker-entrypoint.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@ echo
2222
echo "Running migrations"
2323
./manage.py migrate --no-input
2424

25+
until curl --output /dev/null --silent --head --fail "http://$ELASTICSEARCH_HOST"; do
26+
echo >&2 "Postgres is unavailable - sleeping"
27+
sleep 1
28+
done
29+
echo
30+
echo "Rebuilding index"
31+
./manage.py search_index --rebuild
32+
2533
echo "Running the server on port $PORT"
2634

2735
core_count=$(grep 'cpu[0-9]' /proc/stat | wc -l)

manga_reader/settings.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
BASE_DIR = Path(__file__).resolve().parent.parent
2020
ROOT_URLCONF = "manga_reader.urls"
2121
WSGI_APPLICATION = "manga_reader.wsgi.application"
22+
WEBDRIVER_PATH = os.getenv("WEBDRIVER_PATH", None)
2223

2324
############
2425
# Security #
@@ -50,10 +51,13 @@
5051
"apps.core.apps.CoreConfig",
5152
"django_extensions",
5253
"django.contrib.postgres",
53-
"silk",
5454
"django_elasticsearch_dsl",
5555
]
5656

57+
ELASTICSEARCH_DSL = {
58+
"default": {"hosts": os.getenv("ELASTICSEARCH_HOST", "localhost:92000")},
59+
}
60+
5761
#########
5862
# ADMIN #
5963
#########
@@ -278,29 +282,14 @@
278282
)
279283
ignore_logger("django.security.DisallowedHost")
280284

281-
#############
282-
# Pyppeteer #
283-
#############
284-
285-
WEBDRIVER_PATH = os.getenv("WEBDRIVER_PATH", None)
286-
DEFAULT_LAUNCH_ARGS = {
287-
"headless": True,
288-
"args": ["--no-sandbox", "--disable-setuid-sandbox"],
289-
"executablePath": WEBDRIVER_PATH,
290-
}
291-
292-
293285
########
294286
# Silk #
295287
########
296288

297289
if DEBUG:
290+
INSTALLED_APPS.append("silk")
298291
MIDDLEWARE.append("silk.middleware.SilkyMiddleware")
299292

300-
ELASTICSEARCH_DSL = {
301-
"default": {"hosts": "localhost:9200"},
302-
}
303-
304293
##########
305294
# Logging #
306295
##########

manga_reader/urls.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from django.conf import settings
12
from django.conf.urls import include
23
from django.contrib import admin
34
from django.urls import path, re_path
@@ -10,6 +11,8 @@
1011

1112
urlpatterns = [
1213
path("api/", include(apipatterns)),
13-
path("silk/", include("silk.urls", namespace="silk")),
1414
re_path(r"^(?!api)\w*?", admin.site.urls),
1515
]
16+
17+
if settings.DEBUG:
18+
urlpatterns.append(path("silk/", include("silk.urls", namespace="silk")))

0 commit comments

Comments
 (0)