-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstarbelly.proto
874 lines (760 loc) · 28.9 KB
/
starbelly.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
syntax = "proto2";
// Generic CAPTCHA solver.
message CaptchaSolver {
// Unique identifier.
optional bytes solver_id = 1;
// Name of CAPTCHA solver entry.
optional string name = 2;
// When this entry was created. (RFC-3339)
optional string created_at = 3;
// When this entry was last updated. (RFC-3339)
optional string updated_at = 4;
oneof SolverType {
// Extra configuration for Antigate CAPTCHA solver.
CaptchaSolverAntigate antigate = 5;
}
}
enum CaptchaSolverAntigateCharacters {
// The CAPTCHA may contain any alphanumeric characters.
ALPHANUMERIC = 1;
// The CAPTCHA contains numeric characters only.
NUMBERS_ONLY = 2;
// The CAPTCHA contains alphabetic characters only.
ALPHA_ONLY = 3;
}
// CAPTCHA solver for Antigate-compatible API.
message CaptchaSolverAntigate {
// The CAPTCHA solving URL endpoint.
optional string service_url = 1;
// An API key.
optional string api_key = 2;
// Some CAPTCHAs use a phrase instead of a short sequence of characters.
optional bool require_phrase = 3;
// Some CAPTCHAs are case sensitive.
optional bool case_sensitive = 4;
// CAPTCHAs may permit different sets of characters.
optional CaptchaSolverAntigateCharacters characters = 5;
// Some CAPTCHAs require solving a match problem.
optional bool require_math = 6;
// The minimum length of the CAPTCHA.
optional int32 min_length = 7;
// The maximum length of the CAPTCHA.
optional int32 max_length = 8;
}
// Contains a crawl response, i.e. something downloaded by the crawler or the error/exception that resulted from attempting to download it.
message CrawlResponse {
// The raw response body.
optional bytes body = 1;
// When this response was finished downloading. (RFC-3339)
optional string completed_at = 2;
// The MIME type of this content.
optional string content_type = 3;
// This item's crawl cost. (See "Crawl Policy" for more details.)
optional double cost = 4;
// How long this item took to download
optional double duration = 5;
// The complete text of any exception that occurred while downloading.
optional string exception = 6;
// The HTTP headers for this response.
repeated Header headers = 7;
// If true, the response body is compressed.
optional bool is_compressed = 8;
// If true, the response is considered a success. If false, either an HTTP error or an exception occurred.
optional bool is_success = 9;
// The identifier of the crawl job that this crawl response is associated with.
optional bytes job_id = 10;
// When the request for this item was sent. (RFC-3339)
optional string started_at = 11;
// The HTTP status code returned for this response.
optional int32 status_code = 12;
// The original request URL.
optional string url = 13;
// The canonicalized request URL.
optional string url_can = 14;
}
// An HTTP header.
message Header {
// The name of the header.
optional string key = 1;
// The header's value.
optional string value = 2;
}
// Login information for a domain.
message DomainLogin {
// The domain for which this login information is valid.
optional string domain = 1;
// The URL for logging into this domain.
optional string login_url = 2;
optional string login_test = 3;
// Credentials for this domain.
repeated DomainLoginUser users = 5;
}
// Username/password for a domain.
message DomainLoginUser {
// The username part of a credential.
optional string username = 1;
// The password part of a credential.
optional string password = 2;
// If true, this credential is expected to work. If false, the credential is expected to fail.
optional bool working = 3;
}
// An Event is sent for any active subscription whenever data is available for that subscription.
message Event {
// A unique identifier for each subscription.
required int32 subscription_id = 1;
oneof Body {
// A list of crawl jobs.
JobList job_list = 2;
// A list of schedules.
ScheduleList schedule_list = 7;
// A snapshot of resource consumption at a point in time.
ResourceFrame resource_frame = 3;
// Indicates that a subscription has ended.
SubscriptionClosed subscription_closed = 4;
// A single crawled item wrapped with metadata to assist synchronization.
SyncItem sync_item = 5;
// A snapshot of the Trio task's running at a point in time.
TaskTree task_tree = 6;
}
}
// A crawl job.
message Job {
// The unique identifier for a crawl job.
required bytes job_id = 1;
// The list of seed URLs for a crawl job.
repeated string seeds = 2;
// The crawl policy associated with a crawl job.
optional Policy policy = 3;
// The name assigned to this crawl job.
optional string name = 4;
// The tags associated with this crawl job.
repeated string tags = 5;
// The crawl job's current run state.
optional JobRunState run_state = 6;
// The password part of a credential.
// When this crawl job started. (RFC-3339)
optional string started_at = 7;
// Which this crawl job ended. (RFC-3339)
optional string completed_at = 8;
// The number of requests sent for this crawl, inclusive of HTTP errors and exceptions.
optional int32 item_count = 9 [default = -1];
// The number of successful responses for this crawl.
optional int32 http_success_count = 10 [default = -1];
// The number of HTTP error responses for this crawl.
optional int32 http_error_count = 11 [default = -1];
// The number of requests that resulted in an exception during this crawl.
optional int32 exception_count = 12 [default = -1];
// Counts of the number of times each HTTP status code has been received during this crawl.
map<int32, int32> http_status_counts = 13;
}
// A list of jobs.
//
// This seemingly stupid message is necessary so that a list of jobs can be used as the body of a "oneof".
message JobList {
repeated Job jobs = 1;
}
// The various run states a job may have.
enum JobRunState {
// The job was cancelled.
CANCELLED = 1;
// The job ran to completion.
COMPLETED = 2;
// The job is paused but may be resumed in the future.
PAUSED = 3;
// The job has been created but has not started running yet.
PENDING = 4;
// The job is currently funning.
RUNNING = 5;
// This isn't actually a valid run state. This state is sent to job status subscriptions when a job is deleted so that subscribers know that the job is gone.
DELETED = 6;
}
// Schedule information for a job.
message Schedule {
// Unique identifier for this schedule.
optional bytes schedule_id = 1;
// When this schedule was created. (RFC-3339)
optional string created_at = 2;
// When this job was last updated. (RFC-3339)
optional string updated_at = 3;
// If true, jobs will run as scheduled. If false, the schedule is ignored and no new jobs will be created.
optional bool enabled = 4;
// The unit of measurement referred to by `num_units`.
optional ScheduleTimeUnit time_unit = 5;
// The number of time units that elapse between jobs.
optional int32 num_units = 6;
// Indicates how the timing of a new job is related to the timing of the previous job.
optional ScheduleTiming timing = 7;
// The name of this schedule.
optional string schedule_name = 8;
// The template to use when naming jobs created by this schedule.
optional string job_name = 9;
// The seed URLs to use for jobs created by this schedule.
repeated string seeds = 10;
// The policy to use for jobs created by this schedule.
optional bytes policy_id = 11;
// The tags to use for jobs created by this schedule.
repeated string tags = 12;
// The number of jobs created by this schedule.
optional int32 job_count = 13;
}
// A list of job schedules.
//
// This seemingly stupid message is necessary so that a list of job schedules can be used as the body of a "oneof".
message ScheduleList {
repeated Schedule schedules = 1;
}
// Define time units that may be used for scheduling a job.
enum ScheduleTimeUnit {
MINUTES = 1;
HOURS = 2;
DAYS = 3;
WEEKS = 4;
MONTHS = 5;
YEARS = 6;
}
// Define when a job should be scheduled.
enum ScheduleTiming {
// Schedule a job X units of time after the previous job completes.
AFTER_PREVIOUS_JOB_FINISHED = 1;
// Schedule a job every X units of time; if the previous job is still running, end it.
REGULAR_INTERVAL = 2;
}
// For paginating large sets, similar to LIMIT/OFFSET in SQL.
message Page {
// The number of items to include on each page.
optional int32 limit = 1 [default = 10];
// The number of items to skip over before emitting items for the current page.
optional int32 offset = 2;
}
// Specifies how a regex pattern should be used.
enum PatternMatch {
MATCHES = 1;
DOES_NOT_MATCH = 2;
}
// Settings that dictate crawler behavior. Policy is set on a job-by-job basis.
message Policy {
// Unique identifier for this policy.
optional bytes policy_id = 1;
// The name assigned to this policy.
optional string name = 2;
// When this policy was created. (RFC-3339)
optional string created_at = 3;
// When this policy was last updated. (RFC-3339)
optional string updated_at = 4;
// The CAPTCHA sovler to use.
optional bytes captcha_solver_id = 14;
// The authentication subpolicy controls how the crawler logs in to websites.
optional PolicyAuthentication authentication = 6;
// The limits subpolicy controls how long the crawler runs.
optional PolicyLimits limits = 7;
// The proxy subpolicy controls what proxies the crawler uses.
repeated PolicyProxyRule proxy_rules = 8;
// The MIME-type subpolicy controls what types of content the crawler downloads.
repeated PolicyMimeTypeRule mime_type_rules = 9;
// The robots.txt subpolicy controls how the crawler handles robots directives.
optional PolicyRobotsTxt robots_txt = 10;
// The normalization subpolicy controls how the crawler normalizes URLs in its crawl frontier.
optional PolicyUrlNormalization url_normalization = 13;
// The URL rules subpolicy controls how the crawler computes cost numbers for URLs in its crawl frontier.
repeated PolicyUrlRule url_rules = 11;
// The user agent subpolicy controls what user agent strings the crawler sends.
repeated PolicyUserAgent user_agents = 12;
}
// Settings for authenticated crawling.
message PolicyAuthentication {
optional bool enabled = 1;
}
// Specifies limits on how far or how long a crawl runs.
message PolicyLimits {
// Crawl items that exceed this cost will not be fetched.
optional double max_cost = 1;
// The crawl will end after this many seconds have elapsed.
optional double max_duration = 2;
// Crawl will end after this many items have been downloaded.
optional int32 max_items = 3;
}
// Specifies whether to save or discard certain responses based on MIME type.
//
// If pattern is not specified, then this rule applies to all responses.
message PolicyMimeTypeRule {
// A regex pattern to match against the MIME type.
optional string pattern = 1;
// Allows to invert a pattern.
optional PatternMatch match = 2;
// If true, items with matching MIME types are saved and non-matches are discarded. If false, it's the opposite.
optional bool save = 3;
}
// When and how to proxy requests.
message PolicyProxyRule {
// A regex pattern to match against the URL.
optional string pattern = 1;
// Allows to invert a pattern.
optional PatternMatch match = 2;
// The URL of the proxy to use when items match this rule. If not provided, then no proxy is used.
optional string proxy_url = 3;
}
// Specify handling of robots.txt.
message PolicyRobotsTxt {
enum Usage {
// Obey robots.txt rules.
OBEY = 1;
// Do the opposite of robots.txt rules.
INVERT = 2;
// Ignore robots.txt.
IGNORE = 3;
}
required Usage usage = 1;
}
message PolicyUrlNormalization {
// If true, URL normalization is applied to URLs in the crawl frontier. If false, URLs are not normalized.
optional bool enabled = 1;
// A list of URL query parameters that are removed during normalization.
repeated string strip_parameters = 2;
}
// A rule for adjusting a URL's cost.
message PolicyUrlRule {
enum Action {
// Add `amount` to parent cost.
ADD = 1;
// Multiply parent cost by `amount`.
MULTIPLY = 2;
}
// A regex pattern to match against the URL.
optional string pattern = 1;
// Allows to invert a pattern.
optional PatternMatch match = 2;
// The arithmetic operation to perform when a URL is matched.
optional Action action = 3;
// The right operand to the arithmetic operation. (The left operand is the cost of the parent item.)
optional double amount = 4;
}
// Specifies a user agent string to send when downloading a resource.
message PolicyUserAgent {
required string name = 1;
}
// A Request is issued by the client, and the server is expected to send exactly 1 Response for each Request.
message Request {
// The request ID is included in the response so that the client can correlate requests and responses. (Responses may arrive in a different order than the requests were sent.)
required int32 request_id = 1;
oneof Command {
RequestDeleteCaptchaSolver delete_captcha_solver = 31;
RequestGetCaptchaSolver get_captcha_solver = 28;
RequestListCaptchaSolvers list_captcha_solvers = 29;
RequestSetCaptchaSolver set_captcha_solver = 30;
RequestDeleteJob delete_job = 3;
RequestGetJob get_job = 6;
RequestGetJobItems get_job_items = 7;
RequestListJobs list_jobs = 11;
RequestSetJob set_job = 16;
RequestDeleteSchedule delete_schedule = 24;
RequestGetSchedule get_schedule = 25;
RequestListSchedules list_schedules = 26;
RequestListScheduleJobs list_schedule_jobs = 32;
RequestSetSchedule set_schedule = 27;
RequestDeletePolicy delete_policy = 4;
RequestGetPolicy get_policy = 8;
RequestListPolicies list_policies = 12;
RequestSetPolicy set_policy = 17;
RequestDeleteDomainLogin delete_domain_login = 2;
RequestGetDomainLogin get_domain_login = 5;
RequestListDomainLogins list_domain_logins = 10;
RequestSetDomainLogin set_domain_login = 15;
RequestListRateLimits list_rate_limits = 9;
RequestSetRateLimit set_rate_limit = 18;
RequestPerformanceProfile performance_profile = 13;
RequestSubscribeJobStatus subscribe_job_status = 19;
RequestSubscribeJobSync subscribe_job_sync = 20;
RequestSubscribeResourceMonitor subscribe_resource_monitor = 21;
RequestSubscribeTaskMonitor subscribe_task_monitor = 22;
RequestUnsubscribe unsubscribe = 23;
}
}
// The server sends exactly one Response for each Request it receives.
message Response {
// The request ID will match the request that prompted this response.
required int32 request_id = 1;
// If true, this is a successful response to a request. If false, an error occurred.
required bool is_success = 2;
// If this is an error response, this field contains information about the error.
optional string error_message = 3;
// The body is optional. Some commands only need to return success/error data.
oneof Body {
CaptchaSolver solver = 22;
ResponseNewCaptchaSolver new_solver = 24;
ResponseListCaptchaSolvers list_captcha_solvers = 23;
DomainLogin domain_login = 5;
DomainLoginUser domain_login_user = 6;
ResponseListDomainLogins list_domain_logins = 9;
Job job = 7;
ResponseNewJob new_job = 14;
ResponseListItems list_items = 10;
ResponseListJobs list_jobs = 11;
Schedule schedule = 19;
ResponseNewSchedule new_schedule = 21;
ResponseListSchedules list_schedules = 20;
ResponseListScheduleJobs list_schedule_jobs = 25;
Policy policy = 8;
ResponseNewPolicy new_policy = 15;
ResponseListPolicies list_policies = 12;
ResponseListRateLimits list_rate_limits = 13;
ResponseNewSubscription new_subscription = 16;
ResponsePerformanceProfile performance_profile = 17;
}
}
// Delete a CAPTCHA solver.
message RequestDeleteCaptchaSolver {
optional bytes solver_id = 1;
}
// Get a CAPTCHA solver by ID.
message RequestGetCaptchaSolver {
required bytes solver_id = 1;
}
// Get a list of CAPTCHA solvers.
message RequestListCaptchaSolvers {
optional Page page = 1;
}
// Return a list of CAPTCHA solvers.
message ResponseListCaptchaSolvers {
repeated CaptchaSolver solvers = 1;
// The total number of solvers (not the count of solvers included in this response).
optional int32 total = 2;
}
// Create or modify a CAPTCHA solver.
message RequestSetCaptchaSolver {
optional CaptchaSolver solver = 1;
}
// A response containing the ID of a newly created CAPTCHA solver.
message ResponseNewCaptchaSolver {
required bytes solver_id = 1;
}
// Delete a credential and all of its passwords.
message RequestDeleteDomainLogin {
optional string domain = 1;
}
// Get credential data for the specified domain.
message RequestGetDomainLogin {
required string domain = 1;
}
// Get a list of domain logins.
message RequestListDomainLogins {
optional Page page = 1;
}
// Return a list of domain logins.
message ResponseListDomainLogins {
repeated DomainLogin logins = 1;
// The total number of domains (not the count of domains included in this response).
optional int32 total = 2;
}
// Add or update metadata for a domain login.
message RequestSetDomainLogin {
optional DomainLogin login = 1;
}
// Delete a job and all of its items.
message RequestDeleteJob {
required bytes job_id = 1;
}
// Get metadata for the specified job.
message RequestGetJob {
required bytes job_id = 1;
}
// Get a list of jobs.
message RequestListJobs {
// Pagination options for the job list.
optional Page page = 1;
// Only return jobs started after the given datetime. (RFC-3339)
optional string started_after = 2;
// Only return jobs matching the given tag.
optional string tag = 3;
// Only return jobs belonging to the given schedule.
optional bytes schedule_id = 4;
}
// Return a list of jobs.
message ResponseListJobs {
repeated Job jobs = 1;
// The total number of jobs (not the count of jobs included in this response).
optional int32 total = 2;
}
// Create a job or update state for an existing job.
//
// Name, seeds, tags, and policy may only be specified for new jobs. Once a job is created, only the run state may be changed.
message RequestSetJob {
// If specified, update the state of that job. Omit when creating new jobs.
optional bytes job_id = 1;
// Set the run state for the job.
optional JobRunState run_state = 2;
// Set the policy for this job. (This is only applicable when creating a new job.)
optional bytes policy_id = 3;
// Set the seeds for this job. (This is only applicable when creating a new job.)
repeated string seeds = 4;
// Set the name for this job. (This is only applicable when creating a new job.)
optional string name = 5;
// Set the tags for this job. (This is only applicable when creating a new job.)
repeated string tags = 6;
}
// A response containing the ID of a newly created job.
message ResponseNewJob {
required bytes job_id = 1;
}
// Get a list of items (crawl responses) from a job.
message RequestGetJobItems {
// Get items beloning to this job.
required bytes job_id = 1;
optional bool include_success = 2;
optional bool include_error = 3;
optional bool include_exception = 4;
// If true, some items may have compressed response bodies. The caller should check the compression flag for each item and handle accordingly.
optional bool compression_ok = 5 [default = true];
// Pagination options for job items.
optional Page page = 6;
}
// Return a list of items (crawl responses) for a job.
message ResponseListItems {
repeated CrawlResponse items = 1;
// The total number of items (not the count of items included in this response).
optional int32 total = 2;
}
// Delete a job schedule.
message RequestDeleteSchedule {
required bytes schedule_id = 1;
}
// Get metadata for the specified job schedule.
message RequestGetSchedule {
required bytes schedule_id = 1;
}
// Get a list of job schedules.
message RequestListSchedules {
optional Page page = 1;
}
// Return a list of job schedules.
message ResponseListSchedules {
repeated Schedule schedules = 1;
// The total number of job schedules (not just the ones included in this response).
optional int32 total = 2;
}
// Get a list of jobs for a given schedules.
message RequestListScheduleJobs {
required bytes schedule_id = 1;
// Pagination options for job schedules.
optional Page page = 2;
}
// Return a list of jobs for a given schedules.
message ResponseListScheduleJobs {
repeated Job jobs = 1;
// The total number of jobs associated with this schedule (not just the
// ones included in this response).
optional int32 total = 2;
}
// Create or update a job schedule.
message RequestSetSchedule {
optional Schedule schedule = 1;
}
// A response containing the ID of a newly created job schedule.
message ResponseNewSchedule {
required bytes schedule_id = 1;
}
// Delete a policy.
message RequestDeletePolicy {
required bytes policy_id = 1;
}
// Get a policy.
message RequestGetPolicy {
required bytes policy_id = 1;
}
// Get a list of policies.
message RequestListPolicies {
// Pagination options for policies.
optional Page page = 1;
}
// Return a list of jobs.
message ResponseListPolicies {
repeated Policy policies = 1;
// The total number of jobs (not the count of jobs included in this response).
optional int32 total = 2;
}
// Create or update a crawl policy.
//
// If the policy's ID is blank, then the server will create a new policy. If the ID is not blank, then the server will update the corresponding policy.
message RequestSetPolicy {
required Policy policy = 1;
}
// A response containing the ID of a newly created policy.
message ResponseNewPolicy {
required bytes policy_id = 1;
}
// Model for a rate limit.
//
// If domain is not specified, the global rate limit is modified. If delay is not specified, then the rate limit for the specified domain is deleted. Either delay or domain must be specified: you are not allowed to delete the global limit.
//
// If the client sends a name, the server ignores it. The server will always send a name to the client.
message RateLimit {
// The name of the rate-limit. (Read-only)
optional string name = 1;
// The delay, in seconds, between requests associated with this rate-limit token.
optional float delay = 2;
// The rate-limit token. (Read-only)
optional bytes token = 3;
// The name of the domain that this rate limit applies to.
optional string domain = 4;
}
// Show rate limits.
message RequestListRateLimits {
// Pagination options for rate limits.
optional Page page = 1;
}
// Return a list of rate limits.
message ResponseListRateLimits {
repeated RateLimit rate_limits = 1;
// The total number of rate limits (not the count of rate limits included in this response).
optional int32 total = 2;
}
// Set a rate limit.
message RequestSetRateLimit {
// The domain to set the rate limit for.
optional string domain = 1;
// The delay, in seconds, between requests to this domain.
optional float delay = 2;
}
// Request a performance profile.
message RequestPerformanceProfile {
// The amount of time to spend collecting samples.
optional double duration = 1 [default = 5.0];
optional string sort_by = 2 [default = "total_time"];
optional int32 top_n = 3;
}
// Performance profile data for a single function.
message PerformanceProfileFunction {
optional string file = 1;
optional int32 line_number = 2;
optional string function = 3;
optional int32 calls = 4;
optional int32 non_recursive_calls = 5;
optional double total_time = 6;
optional double cumulative_time = 7;
}
// Contains performance profile data.
message ResponsePerformanceProfile {
optional int32 total_calls = 1;
optional double total_time = 2;
repeated PerformanceProfileFunction functions = 3;
}
// Subscribe to job status, e.g. run state, statistics, etc.
message RequestSubscribeJobStatus {
// The minimum amount of time between jobs status messages. If multiple job statuses change in rapid succession, those changes are coalesced into a single event.
optional double min_interval = 1 [default = 1.0];
}
// Synchronize crawl responses.
message RequestSubscribeJobSync {
// The job ID to subscribe to.
required bytes job_id = 1;
// If provided, resume an earlier sync subscription at the point represented by this token. (Tokens are obtained from previously received sync items.)
optional bytes sync_token = 2;
// If true, then some response bodies may be compressed. The caller should check the compression flag on each item and handle accordingly.
optional bool compression_ok = 3 [default = true];
}
// An item sent when syncing a crawl job. One event is sent for each crawl response.
message SyncItem {
// The item downloaded by the crawl job.
required CrawlResponse item = 1;
// The sync token associated with this item. This can be used to resume a subscription later from the same point.
required bytes token = 2;
}
// A wrapper for all server messages that contains either a response to a command or a subscription event.
message ServerMessage {
oneof MessageType {
Event event = 1;
Response response = 2;
}
}
// Subscribe to resource monitoring, e.g. CPU usage, memory usage, downloads/sec, etc.
message RequestSubscribeResourceMonitor {
optional int32 history = 1 [default = 300];
}
// Subscribe to updates about resource usage (CPU, memory, disk, etc.)
message RequestSubscribeTaskMonitor {
// The number of seconds in between task snapshots.
optional double period = 1 [default = 3];
}
// A response containing the ID of a newly created subscription.
message ResponseNewSubscription {
required int32 subscription_id = 1;
}
// Close the specified subscription.
message RequestUnsubscribe {
required int32 subscription_id = 1;
}
// Sent when the server ends a subscription.
message SubscriptionClosed {
enum Reason {
COMPLETE = 1;
ERROR = 2;
}
required Reason reason = 1;
optional string message = 2;
}
// Data about resource consumption.
message ResourceFrame {
// When this snapshot was created. (RFC-3339)
optional string timestamp = 1;
// Information about CPU usage.
repeated ResourceFrameCpu cpus = 2;
// Information about memory usage.
optional ResourceFrameMemory memory = 3;
// Information about disk usage.
repeated ResourceFrameDisk disks = 4;
// Information about network interface usage.
repeated ResourceFrameNetwork networks = 5;
// Information about current crawl jobs.
repeated ResourceFrameJob jobs = 6;
// The total number of in-flight requests made by the downloader.
optional int32 current_downloads = 7;
// The maximum number of in-flight requests permitted by the downloader.
optional int32 maximum_downloads = 8;
// The number of items buffered inside the rate limiter.
optional int32 rate_limiter = 9;
}
// CPU usage.
message ResourceFrameCpu {
// CPU utilization as a percentaged (0.0-100.0)
optional double usage = 1;
}
// Resources used by a crawl job.
message ResourceFrameJob {
// The job's identifier.
optional bytes job_id = 1;
// The job's name.
optional string name = 2;
// The number of items this job is currently downloading.
optional int32 current_downloads = 3;
}
// Disk usage.
message ResourceFrameDisk {
// The mountpoint for a file system.
optional string mount = 1;
// The amount of space used on a file system, in bytes.
optional int64 used = 2;
// The total space on a file system, in bytes.
optional int64 total = 3;
}
// Memory usage.
message ResourceFrameMemory {
// Memory usage in bytes.
optional int64 used = 1;
// The total amount of memory on the system, in bytes.
optional int64 total = 2;
}
// Network usage.
message ResourceFrameNetwork {
// The name of the network interface.
optional string name = 1;
// The number of bytes sent on this interface.
optional int64 sent = 2;
// The number of bytes received on this interface.
optional int64 received = 3;
}
// A subtree of Trio tasks.
message TaskTree {
// The name of the Trio task.
optional string name = 1;
// Child tasks belonging to this task.
repeated TaskTree subtasks = 2;
}