diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 0b898836157..fd124656302 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -33,6 +33,7 @@ b12cf444edea15da6274975e1b2ca6a7fce2a090 364c27f5d18ab9dd31825e67a93efabecad06823 d8b4de9076531dd13bdffa20cc10c72290a52356 bdf06bca7534fbc0c4fc3cee3408a51a22615226 +eefc649e17086fbc200e4da114ea673825e79864 # ocp-indent d018d26d6acd4707a23288b327b49e44f732725e diff --git a/doc/content/toolstack/features/Tracing/index.md b/doc/content/toolstack/features/Tracing/index.md index c54441bbb68..4c90c570699 100644 --- a/doc/content/toolstack/features/Tracing/index.md +++ b/doc/content/toolstack/features/Tracing/index.md @@ -81,14 +81,17 @@ and also assist newcomers in onboarding to the project. By default, traces are generated locally in the `/var/log/dt` directory. You can copy or forward these traces to another location or endpoint using the `xs-trace` tool. For example, if you have -a *Jaeger* server running locally, you can run: +a *Jaeger* server running locally, you can copy a trace to an endpoint by running: ```sh -xs-trace /var/log/dt/ http://127.0.0.1:9411/api/v2/spans +xs-trace cp /var/log/dt/ http://127.0.0.1:9411/api/v2/spans ``` You will then be able to visualize the traces in Jaeger. +The `xs-trace` tool also supports trace files in `.ndjson` and compressed `.zst` formats, so +you can copy or forward these files directly as well. + ### Tagging Trace Sessions for Easier Search #### Specific attributes diff --git a/dune-project b/dune-project index 8d329288de3..d855636c0ac 100644 --- a/dune-project +++ b/dune-project @@ -52,6 +52,13 @@ (name tgroup) (depends xapi-log xapi-stdext-unix)) +(package + (name rate-limit) + (synopsis "Simple token bucket-based rate-limiting") + (depends + (ocaml (>= 4.12)) + xapi-log xapi-stdext-unix)) + (package (name xml-light2)) @@ -586,6 +593,7 @@ (depends qcow-stream cmdliner + yojson ) ) @@ -593,7 +601,17 @@ (name varstored-guard)) (package - (name uuid)) + (name uuid) + (synopsis "Library used by xapi to generate database UUIDs") + (description + "This library allows xapi to use UUIDs with phantom types to avoid mixing UUIDs from different classes of objects. It's based on `uuidm`.") + (depends + (alcotest :with-test) + (fmt :with-test) + ptime + uuidm + ) +) (package (name stunnel) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index bfe326e4356..63eb2aed2fb 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -10535,6 +10535,7 @@ let all_system = ; Datamodel_vm_group.t ; Datamodel_host_driver.t ; Datamodel_driver_variant.t + ; Datamodel_rate_limit.t ] (* If the relation is one-to-many, the "many" nodes (one edge each) must come before the "one" node (many edges) *) @@ -10786,6 +10787,7 @@ let expose_get_all_messages_for = ; _observer ; _host_driver ; _driver_variant + ; _rate_limit ] let no_task_id_for = [_task; (* _alert; *) _event] @@ -11142,6 +11144,10 @@ let http_actions = ; ("put_bundle", (Put, Constants.put_bundle_uri, true, [], _R_POOL_OP, [])) ] +(* Actions that incorporate the rate limiter from Xapi_rate_limiting within their handler + For now, just RPC calls *) +let custom_rate_limit_http_actions = ["post_root"; "post_RPC2"; "post_jsonrpc"] + (* these public http actions will NOT be checked by RBAC *) (* they are meant to be used in exceptional cases where RBAC is already *) (* checked inside them, such as in the XMLRPC (API) calls *) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 12c548580b1..efe0cb5f2cd 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -315,6 +315,8 @@ let _host_driver = "Host_driver" let _driver_variant = "Driver_variant" +let _rate_limit = "Rate_limit" + let update_guidances = Enum ( "update_guidances" diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 29b5610b226..38bf68a60d0 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1209,6 +1209,41 @@ let license_remove = to the unlicensed edition" ~allowed_roles:_R_POOL_OP () +let host_numa_affinity_policy = + Enum + ( "host_numa_affinity_policy" + , [ + ("any", "VMs are spread across all available NUMA nodes") + ; ( "best_effort" + , "VMs are placed on the smallest number of NUMA nodes that they fit \ + using soft-pinning, but the policy doesn't guarantee a balanced \ + placement, falling back to the 'any' policy." + ) + ; ( "default_policy" + , "Use the NUMA affinity policy that is the default for the current \ + version" + ) + ] + ) + +let latest_synced_updates_applied_state = + Enum + ( "latest_synced_updates_applied_state" + , [ + ( "yes" + , "The host is up to date with the latest updates synced from remote \ + CDN" + ) + ; ( "no" + , "The host is outdated with the latest updates synced from remote CDN" + ) + ; ( "unknown" + , "If the host is up to date with the latest updates synced from \ + remote CDN is unknown" + ) + ] + ) + let create_params = [ { @@ -1398,6 +1433,51 @@ let create_params = ; param_release= numbered_release "25.32.0-next" ; param_default= Some (VMap []) } + ; { + param_type= Bool + ; param_name= "https_only" + ; param_doc= + "updates firewall to open or close port 80 depending on the value" + ; param_release= numbered_release "25.38.0-next" + ; param_default= Some (VBool false) + } + ; { + param_type= host_numa_affinity_policy + ; param_name= "numa_affinity_policy" + ; param_doc= "NUMA-aware VM memory and vCPU placement policy" + ; param_release= numbered_release "25.39.0-next" + ; param_default= Some (VEnum "default_policy") + } + ; { + param_type= latest_synced_updates_applied_state + ; param_name= "latest_synced_updates_applied" + ; param_doc= + "Default as 'unknown', 'yes' if the host is up to date with updates \ + synced from remote CDN, otherwise 'no'" + ; param_release= numbered_release "25.39.0-next" + ; param_default= Some (VSet []) + } + ; { + param_type= Set update_guidances + ; param_name= "pending_guidances_full" + ; param_doc= + "The set of pending full guidances after applying updates, which a \ + user should follow to make some updates, e.g. specific hardware \ + drivers or CPU features, fully effective, but the 'average user' \ + doesn't need to" + ; param_release= numbered_release "25.39.0-next" + ; param_default= Some (VSet []) + } + ; { + param_type= Set update_guidances + ; param_name= "pending_guidances_recommended" + ; param_doc= + "The set of pending recommended guidances after applying updates, \ + which most users should follow to make the updates effective, but if \ + not followed, will not cause a failure" + ; param_release= numbered_release "25.39.0-next" + ; param_default= Some (VSet []) + } ] let create = @@ -1416,6 +1496,7 @@ let create = --console_idle_timeout --ssh_auto_mode options to allow them to be \ configured for new host" ) + ; (Changed, "25.38.0-next", "Added --https_only to disable http") ] ~versioned_params:create_params ~doc:"Create a new host record" ~result:(Ref _host, "Reference to the newly created host object.") @@ -2302,23 +2383,6 @@ let cleanup_pool_secret = ] ~allowed_roles:_R_LOCAL_ROOT_ONLY ~hide_from_docs:true () -let host_numa_affinity_policy = - Enum - ( "host_numa_affinity_policy" - , [ - ("any", "VMs are spread across all available NUMA nodes") - ; ( "best_effort" - , "VMs are placed on the smallest number of NUMA nodes that they fit \ - using soft-pinning, but the policy doesn't guarantee a balanced \ - placement, falling back to the 'any' policy." - ) - ; ( "default_policy" - , "Use the NUMA affinity policy that is the default for the current \ - version" - ) - ] - ) - let set_numa_affinity_policy = call ~name:"set_numa_affinity_policy" ~lifecycle:[] ~doc:"Set VM placement NUMA affinity policy" @@ -2526,24 +2590,6 @@ let update_firewalld_service_status = status." ~allowed_roles:_R_POOL_OP () -let latest_synced_updates_applied_state = - Enum - ( "latest_synced_updates_applied_state" - , [ - ( "yes" - , "The host is up to date with the latest updates synced from remote \ - CDN" - ) - ; ( "no" - , "The host is outdated with the latest updates synced from remote CDN" - ) - ; ( "unknown" - , "If the host is up to date with the latest updates synced from \ - remote CDN is unknown" - ) - ] - ) - let get_tracked_user_agents = call ~name:"get_tracked_user_agents" ~lifecycle:[] ~doc: diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index a98e52d1dd0..65b893cd855 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -1,4 +1,6 @@ let prototyped_of_class = function + | "Rate_limit" -> + Some "25.39.0" | "Driver_variant" -> Some "25.2.0" | "Host_driver" -> @@ -13,6 +15,14 @@ let prototyped_of_class = function None let prototyped_of_field = function + | "Rate_limit", "fill_rate" -> + Some "25.39.0" + | "Rate_limit", "burst_size" -> + Some "25.39.0" + | "Rate_limit", "client_id" -> + Some "25.39.0" + | "Rate_limit", "uuid" -> + Some "25.39.0" | "Driver_variant", "status" -> Some "25.2.0" | "Driver_variant", "priority" -> diff --git a/ocaml/idl/datamodel_rate_limit.ml b/ocaml/idl/datamodel_rate_limit.ml new file mode 100644 index 00000000000..f870ba73a1a --- /dev/null +++ b/ocaml/idl/datamodel_rate_limit.ml @@ -0,0 +1,40 @@ +(* + * Copyright (C) 2023 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Datamodel_types +open Datamodel_common +open Datamodel_roles + +let lifecycle = [] + +let t = + create_obj ~name:_rate_limit ~descr:"Rate limiting policy for a XAPI client" + ~doccomments:[] ~gen_constructor_destructor:true ~gen_events:true + ~in_db:true ~lifecycle:[] ~persist:PersistEverything ~in_oss_since:None + ~messages_default_allowed_roles:_R_POOL_ADMIN + ~contents: + ([uid _rate_limit ~lifecycle] + @ [ + field ~qualifier:StaticRO ~ty:String ~lifecycle "client_id" + "An identifier for the rate limited client" ~ignore_foreign_key:true + ~default_value:(Some (VString "")) + ; field ~qualifier:StaticRO ~ty:Float ~lifecycle "burst_size" + "Amount of tokens that can be consumed in one burst" + ~ignore_foreign_key:true ~default_value:(Some (VFloat 0.)) + ; field ~qualifier:StaticRO ~ty:Float ~lifecycle "fill_rate" + "Tokens added to token bucket per second" ~ignore_foreign_key:true + ~default_value:(Some (VFloat 0.)) + ] + ) + ~messages:[] () diff --git a/ocaml/idl/dune b/ocaml/idl/dune index bc22a311cd7..eb55c786d40 100644 --- a/ocaml/idl/dune +++ b/ocaml/idl/dune @@ -7,7 +7,7 @@ datamodel_values datamodel_schema datamodel_certificate datamodel_diagnostics datamodel_repository datamodel_lifecycle datamodel_vtpm datamodel_observer datamodel_vm_group api_version - datamodel_host_driver datamodel_driver_variant) + datamodel_host_driver datamodel_driver_variant datamodel_rate_limit) (libraries rpclib.core sexplib0 @@ -64,9 +64,9 @@ ) (tests - (names schematest test_datetimes) + (names schematest test_datetimes test_host) (modes exe) - (modules schematest test_datetimes) + (modules schematest test_datetimes test_host) (libraries astring rpclib.core diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 9411d1c3b42..2b70bf725b6 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "3b20f4304cfaaa7b6213af91ae632e64" +let last_known_schema_hash = "4708cb1f0cf7c1231c6958590ee1ed04" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/idl/test_host.ml b/ocaml/idl/test_host.ml new file mode 100644 index 00000000000..b70a9cb72dc --- /dev/null +++ b/ocaml/idl/test_host.ml @@ -0,0 +1,31 @@ +module DT = Datamodel_types +module FieldSet = Astring.String.Set + +let recent_field (f : DT.field) = f.lifecycle.transitions = [] + +let rec field_full_names = function + | DT.Field f -> + if recent_field f then + f.full_name |> String.concat "_" |> Seq.return + else + Seq.empty + | DT.Namespace (_, xs) -> + xs |> List.to_seq |> Seq.concat_map field_full_names + +let () = + let create_params = + Datamodel_host.create_params + |> List.map (fun p -> p.DT.param_name) + |> FieldSet.of_list + and fields = + Datamodel_host.t.contents + |> List.to_seq + |> Seq.concat_map field_full_names + |> FieldSet.of_seq + in + let missing_in_create_params = FieldSet.diff fields create_params in + if not (FieldSet.is_empty missing_in_create_params) then ( + Format.eprintf "Missing fields in create_params: %a@." FieldSet.dump + missing_in_create_params ; + exit 1 + ) diff --git a/ocaml/idl/test_host.mli b/ocaml/idl/test_host.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml new file mode 100644 index 00000000000..a80af32a050 --- /dev/null +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -0,0 +1,177 @@ +(* + * Copyright (C) 2025 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type rate_limit_data = { + bucket: Token_bucket.t + ; process_queue: + (float * (unit -> unit)) Queue.t (* contains token cost and callback *) + ; process_queue_lock: Mutex.t + ; worker_thread_cond: Condition.t + ; should_terminate: bool ref (* signal termination to worker thread *) + ; worker_thread: Thread.t +} +[@@warning "-69"] + +module StringMap = Map.Make (String) + +module D = Debug.Make (struct let name = "bucket_table" end) + +type t = rate_limit_data StringMap.t Atomic.t + +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute + +let create () = Atomic.make StringMap.empty + +let mem t ~user_agent = + let map = Atomic.get t in + StringMap.mem user_agent map + +(* The worker thread is responsible for calling the callback when the token + amount becomes available *) +let rec worker_loop ~bucket ~process_queue ~process_queue_lock + ~worker_thread_cond ~should_terminate = + let process_item cost callback = + Token_bucket.delay_then_consume bucket cost ; + callback () + in + Mutex.lock process_queue_lock ; + while Queue.is_empty process_queue && not !should_terminate do + Condition.wait worker_thread_cond process_queue_lock + done ; + let item_opt = Queue.take_opt process_queue in + Mutex.unlock process_queue_lock ; + match item_opt with + | None -> + (* Queue is empty only when termination was signalled *) + () + | Some (cost, callback) -> + process_item cost callback ; + worker_loop ~bucket ~process_queue ~process_queue_lock ~worker_thread_cond + ~should_terminate + +(* TODO: Indicate failure reason - did we get invalid config or try to add an + already present user_agent? *) +let add_bucket t ~user_agent ~burst_size ~fill_rate = + let map = Atomic.get t in + if StringMap.mem user_agent map then + false + else + match Token_bucket.create ~burst_size ~fill_rate with + | Some bucket -> + let process_queue = Queue.create () in + let process_queue_lock = Mutex.create () in + let worker_thread_cond = Condition.create () in + let should_terminate = ref false in + let worker_thread = + Thread.create + (fun () -> + worker_loop ~bucket ~process_queue ~process_queue_lock + ~worker_thread_cond ~should_terminate + ) + () + in + let data = + { + bucket + ; process_queue + ; process_queue_lock + ; worker_thread_cond + ; should_terminate + ; worker_thread + } + in + let updated_map = StringMap.add user_agent data map in + Atomic.set t updated_map ; true + | None -> + false + +let delete_bucket t ~user_agent = + let map = Atomic.get t in + match StringMap.find_opt user_agent map with + | None -> + () + | Some data -> + Mutex.lock data.process_queue_lock ; + data.should_terminate := true ; + Condition.signal data.worker_thread_cond ; + Mutex.unlock data.process_queue_lock ; + Atomic.set t (StringMap.remove user_agent map) + +let try_consume t ~user_agent amount = + let map = Atomic.get t in + match StringMap.find_opt user_agent map with + | None -> + false + | Some data -> + Token_bucket.consume data.bucket amount + +let peek t ~user_agent = + let map = Atomic.get t in + Option.map + (fun contents -> Token_bucket.peek contents.bucket) + (StringMap.find_opt user_agent map) + +(* The callback should return quickly - if it is a longer task it is + responsible for creating a thread to do the task *) +let submit t ~user_agent ~callback amount = + let map = Atomic.get t in + match StringMap.find_opt user_agent map with + | None -> + D.debug "Found no rate limited user_agent for %s, returning" user_agent ; + callback () + | Some {bucket; process_queue; process_queue_lock; worker_thread_cond; _} -> + let run_immediately = + with_lock process_queue_lock (fun () -> + let immediate = + Queue.is_empty process_queue && Token_bucket.consume bucket amount + in + if not immediate then + Queue.add (amount, callback) process_queue ; + Condition.signal worker_thread_cond ; + immediate + ) + in + if run_immediately then callback () + +(* Block and execute on the same thread *) +let submit_sync t ~user_agent ~callback amount = + let map = Atomic.get t in + match StringMap.find_opt user_agent map with + | None -> + callback () + | Some bucket_data -> ( + let channel_opt = + with_lock bucket_data.process_queue_lock (fun () -> + if + Queue.is_empty bucket_data.process_queue + && Token_bucket.consume bucket_data.bucket amount + then + None (* Can run callback immediately after releasing lock *) + else + (* Rate limited, need to retrieve function result via channel *) + let channel = Event.new_channel () in + Queue.add + (amount, fun () -> Event.sync (Event.send channel ())) + bucket_data.process_queue ; + Condition.signal bucket_data.worker_thread_cond ; + Some channel + ) + in + match channel_opt with + | None -> + callback () + | Some channel -> + Event.sync (Event.receive channel) ; + callback () + ) diff --git a/ocaml/libs/rate-limit/bucket_table.mli b/ocaml/libs/rate-limit/bucket_table.mli new file mode 100644 index 00000000000..87bb5f49bd9 --- /dev/null +++ b/ocaml/libs/rate-limit/bucket_table.mli @@ -0,0 +1,51 @@ +(* + * Copyright (C) 2025 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** Hash table mapping client identifiers to their token buckets for rate limiting. *) +type t + +val create : unit -> t +(** [create ()] creates a new empty bucket table. *) + +val add_bucket : + t -> user_agent:string -> burst_size:float -> fill_rate:float -> bool +(** [add_bucket table ~user_agent ~burst_size ~fill_rate] adds a token bucket + for the given user agent. Returns [false] if a bucket already exists, or if + the bucket configuration is invalid, e.g. negative/zero fill rate. *) + +val mem : t -> user_agent:string -> bool +(** [mem table ~user_agent] returns whether [user_agent] has an associated + token bucket in the bucket table *) + +val peek : t -> user_agent:string -> float option +(** [peek table ~user_agent] returns the current token count for the user agent, + or [None] if no bucket exists. *) + +val delete_bucket : t -> user_agent:string -> unit +(** [delete_bucket table ~user_agent] removes the bucket for the user agent. *) + +val try_consume : t -> user_agent:string -> float -> bool +(** [try_consume table ~user_agent amount] attempts to consume tokens. + Returns [true] on success, [false] if insufficient tokens. *) + +val submit : t -> user_agent:string -> callback:(unit -> unit) -> float -> unit +(** [submit table ~user_agent ~callback amount] submits a callback to be executed + under rate limiting. If tokens are immediately available and no callbacks are + queued, the callback runs synchronously. Otherwise, it is enqueued and will + be executed by a worker thread when tokens become available. Returns immediately. *) + +val submit_sync : t -> user_agent:string -> callback:(unit -> 'a) -> float -> 'a +(** [submit_sync table ~user_agent ~callback amount] submits a callback to be + executed under rate limiting and blocks until it completes, returning the + callback's result. *) diff --git a/ocaml/libs/rate-limit/dune b/ocaml/libs/rate-limit/dune new file mode 100644 index 00000000000..3436c398228 --- /dev/null +++ b/ocaml/libs/rate-limit/dune @@ -0,0 +1,7 @@ +(library + (name rate_limit) + (public_name rate-limit) + (libraries threads.posix mtime mtime.clock.os xapi-log xapi-stdext-threads clock) +) + + diff --git a/ocaml/libs/rate-limit/test/dune b/ocaml/libs/rate-limit/test/dune new file mode 100644 index 00000000000..ca4afbe782a --- /dev/null +++ b/ocaml/libs/rate-limit/test/dune @@ -0,0 +1,4 @@ +(tests + (names test_token_bucket test_bucket_table) + (package rate-limit) + (libraries rate_limit alcotest qcheck-core qcheck-alcotest mtime mtime.clock.os fmt xapi-log threads.posix)) \ No newline at end of file diff --git a/ocaml/libs/rate-limit/test/test_bucket_table.ml b/ocaml/libs/rate-limit/test/test_bucket_table.ml new file mode 100644 index 00000000000..7b214b9b55a --- /dev/null +++ b/ocaml/libs/rate-limit/test/test_bucket_table.ml @@ -0,0 +1,447 @@ +open Rate_limit + +let test_create () = + let table = Bucket_table.create () in + Alcotest.(check (option (float 0.0))) + "Empty table returns None for peek" None + (Bucket_table.peek table ~user_agent:"test") + +let test_add_bucket () = + let table = Bucket_table.create () in + let success = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:2.0 + in + Alcotest.(check bool) "Adding valid bucket should succeed" true success ; + Alcotest.(check (option (float 0.1))) + "Peek should return burst_size" (Some 10.0) + (Bucket_table.peek table ~user_agent:"agent1") + +let test_add_bucket_invalid () = + let table = Bucket_table.create () in + let success = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:0.0 + in + Alcotest.(check bool) + "Adding bucket with zero fill rate should fail" false success ; + let success_neg = + Bucket_table.add_bucket table ~user_agent:"agent2" ~burst_size:10.0 + ~fill_rate:(-1.0) + in + Alcotest.(check bool) + "Adding bucket with negative fill rate should fail" false success_neg + +let test_delete_bucket () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:2.0 + in + Alcotest.(check (option (float 0.1))) + "Bucket exists before delete" (Some 10.0) + (Bucket_table.peek table ~user_agent:"agent1") ; + Bucket_table.delete_bucket table ~user_agent:"agent1" ; + Alcotest.(check (option (float 0.0))) + "Bucket removed after delete" None + (Bucket_table.peek table ~user_agent:"agent1") + +let test_delete_nonexistent () = + let table = Bucket_table.create () in + Bucket_table.delete_bucket table ~user_agent:"nonexistent" ; + Alcotest.(check pass) "Deleting nonexistent bucket should not raise" () () + +let test_try_consume () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:2.0 + in + let success = Bucket_table.try_consume table ~user_agent:"agent1" 3.0 in + Alcotest.(check bool) "Consuming available tokens should succeed" true success ; + Alcotest.(check (option (float 0.1))) + "Tokens reduced after consume" (Some 7.0) + (Bucket_table.peek table ~user_agent:"agent1") + +let test_try_consume_insufficient () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:5.0 + ~fill_rate:1.0 + in + let success = Bucket_table.try_consume table ~user_agent:"agent1" 10.0 in + Alcotest.(check bool) + "Consuming more than available should fail" false success ; + Alcotest.(check (option (float 0.1))) + "Tokens unchanged after failed consume" (Some 5.0) + (Bucket_table.peek table ~user_agent:"agent1") + +let test_try_consume_nonexistent () = + let table = Bucket_table.create () in + let success = Bucket_table.try_consume table ~user_agent:"nonexistent" 1.0 in + Alcotest.(check bool) + "Consuming from nonexistent bucket should fail" false success + +let test_peek_nonexistent () = + let table = Bucket_table.create () in + Alcotest.(check (option (float 0.0))) + "Peek nonexistent bucket returns None" None + (Bucket_table.peek table ~user_agent:"nonexistent") + +let test_multiple_agents () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:2.0 + in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent2" ~burst_size:20.0 + ~fill_rate:5.0 + in + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 5.0 in + Alcotest.(check (option (float 0.1))) + "Agent1 tokens reduced" (Some 5.0) + (Bucket_table.peek table ~user_agent:"agent1") ; + Alcotest.(check (option (float 0.1))) + "Agent2 tokens unchanged" (Some 20.0) + (Bucket_table.peek table ~user_agent:"agent2") + +let test_submit () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:10.0 + in + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 10.0 in + let executed = ref false in + let start_counter = Mtime_clock.counter () in + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> executed := true) + 5.0 ; + let elapsed_span = Mtime_clock.count start_counter in + let elapsed_seconds = Mtime.Span.to_float_ns elapsed_span *. 1e-9 in + (* submit should return immediately (non-blocking) *) + Alcotest.(check bool) "submit returns immediately" true (elapsed_seconds < 0.1) ; + (* Wait for callback to be executed by worker *) + Thread.delay 0.6 ; + Alcotest.(check bool) "callback eventually executed" true !executed + +let test_submit_nonexistent () = + let table = Bucket_table.create () in + let executed = ref false in + Bucket_table.submit table ~user_agent:"nonexistent" + ~callback:(fun () -> executed := true) + 1.0 ; + Alcotest.(check bool) + "submit on nonexistent bucket runs callback immediately" true !executed + +let test_submit_fairness () = + (* Test that callbacks are executed in FIFO order regardless of token cost *) + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:5.0 + ~fill_rate:5.0 + in + (* Drain the bucket *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 5.0 in + let execution_order = ref [] in + let order_mutex = Mutex.create () in + let record_execution id = + Mutex.lock order_mutex ; + execution_order := id :: !execution_order ; + Mutex.unlock order_mutex + in + (* Submit callbacks with varying costs - order should be preserved *) + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 1) + 1.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 2) + 3.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 3) + 1.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 4) + 2.0 ; + (* Wait for all callbacks to complete (total cost = 7 tokens, rate = 5/s) *) + Thread.delay 2.0 ; + let order = List.rev !execution_order in + Alcotest.(check (list int)) + "callbacks execute in FIFO order" [1; 2; 3; 4] order + +let test_submit_sync () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:10.0 + in + (* Test 1: Returns callback result immediately when tokens available *) + let result = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> 42) + 5.0 + in + Alcotest.(check int) "returns callback result" 42 result ; + (* Test 2: Blocks and waits for tokens, then returns result *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 5.0 in + (* drain bucket *) + let start_counter = Mtime_clock.counter () in + let result2 = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> "hello") + 5.0 + in + let elapsed_span = Mtime_clock.count start_counter in + let elapsed_seconds = Mtime.Span.to_float_ns elapsed_span *. 1e-9 in + Alcotest.(check string) "returns string result" "hello" result2 ; + Alcotest.(check bool) + "blocked waiting for tokens" true (elapsed_seconds >= 0.4) + +let test_submit_sync_nonexistent () = + let table = Bucket_table.create () in + let result = + Bucket_table.submit_sync table ~user_agent:"nonexistent" + ~callback:(fun () -> 99) + 1.0 + in + Alcotest.(check int) + "submit_sync on nonexistent bucket runs callback immediately" 99 result + +let test_submit_sync_with_queued_items () = + (* Test that submit_sync respects FIFO ordering when queue has items *) + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:5.0 + ~fill_rate:10.0 + in + (* Drain the bucket *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 5.0 in + let execution_order = ref [] in + let order_mutex = Mutex.create () in + let record_execution id = + Mutex.lock order_mutex ; + execution_order := id :: !execution_order ; + Mutex.unlock order_mutex + in + (* Submit async items first *) + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 1) + 1.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 2) + 1.0 ; + (* Now submit_sync should queue behind the async items *) + let result = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 3 ; "sync_result") + 1.0 + in + Alcotest.(check string) + "submit_sync returns correct result" "sync_result" result ; + let order = List.rev !execution_order in + Alcotest.(check (list int)) + "submit_sync executes after queued items" [1; 2; 3] order + +let test_submit_sync_concurrent () = + (* Test multiple concurrent submit_sync calls *) + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:1.0 + ~fill_rate:10.0 + in + (* Drain the bucket to force queueing *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 1.0 in + let results = Array.make 5 0 in + let threads = + Array.init 5 (fun i -> + Thread.create + (fun () -> + let r = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> i + 1) + 1.0 + in + results.(i) <- r + ) + () + ) + in + Array.iter Thread.join threads ; + (* Each thread should get its own result back *) + for i = 0 to 4 do + Alcotest.(check int) + (Printf.sprintf "thread %d gets correct result" i) + (i + 1) results.(i) + done + +let test_submit_sync_interleaved () = + (* Test interleaving submit and submit_sync *) + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:2.0 + ~fill_rate:10.0 + in + (* Drain the bucket *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 2.0 in + let async_executed = ref false in + (* Submit async first *) + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> async_executed := true) + 1.0 ; + (* Submit sync should wait for async to complete first *) + let sync_result = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> !async_executed) + 1.0 + in + Alcotest.(check bool) + "sync callback sees async already executed" true sync_result + +let test_concurrent_add_delete_stress () = + (* Stress test: rapidly add and delete entries. + Without proper locking, hashtable can get corrupted. *) + let table = Bucket_table.create () in + let iterations = 1000 in + let num_keys = 10 in + let errors = ref 0 in + let errors_mutex = Mutex.create () in + let add_threads = + Array.init 5 (fun t -> + Thread.create + (fun () -> + for i = 0 to iterations - 1 do + let key = + Printf.sprintf "key%d" (((t * iterations) + i) mod num_keys) + in + let _ = + Bucket_table.add_bucket table ~user_agent:key ~burst_size:10.0 + ~fill_rate:1.0 + in + () + done + ) + () + ) + in + let delete_threads = + Array.init 5 (fun t -> + Thread.create + (fun () -> + for i = 0 to iterations - 1 do + let key = + Printf.sprintf "key%d" (((t * iterations) + i) mod num_keys) + in + Bucket_table.delete_bucket table ~user_agent:key + done + ) + () + ) + in + let read_threads = + Array.init 5 (fun t -> + Thread.create + (fun () -> + for i = 0 to iterations - 1 do + let key = + Printf.sprintf "key%d" (((t * iterations) + i) mod num_keys) + in + (* This should never crash, even if key doesn't exist *) + try + let _ = Bucket_table.peek table ~user_agent:key in + () + with _ -> + Mutex.lock errors_mutex ; + incr errors ; + Mutex.unlock errors_mutex + done + ) + () + ) + in + Array.iter Thread.join add_threads ; + Array.iter Thread.join delete_threads ; + Array.iter Thread.join read_threads ; + Alcotest.(check int) "No errors during concurrent operations" 0 !errors + +let test_consume_during_delete_race () = + (* Test that try_consume doesn't crash when bucket is being deleted. + Without proper locking, we could try to access a deleted bucket. *) + let iterations = 500 in + let errors = ref 0 in + let errors_mutex = Mutex.create () in + for _ = 1 to iterations do + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"target" ~burst_size:100.0 + ~fill_rate:1.0 + in + let barrier = ref 0 in + let barrier_mutex = Mutex.create () in + let consumer = + Thread.create + (fun () -> + Mutex.lock barrier_mutex ; + incr barrier ; + Mutex.unlock barrier_mutex ; + while + Mutex.lock barrier_mutex ; + let b = !barrier in + Mutex.unlock barrier_mutex ; b < 2 + do + Thread.yield () + done ; + try + let _ = Bucket_table.try_consume table ~user_agent:"target" 1.0 in + () + with _ -> + Mutex.lock errors_mutex ; incr errors ; Mutex.unlock errors_mutex + ) + () + in + let deleter = + Thread.create + (fun () -> + Mutex.lock barrier_mutex ; + incr barrier ; + Mutex.unlock barrier_mutex ; + while + Mutex.lock barrier_mutex ; + let b = !barrier in + Mutex.unlock barrier_mutex ; b < 2 + do + Thread.yield () + done ; + Bucket_table.delete_bucket table ~user_agent:"target" + ) + () + in + Thread.join consumer ; Thread.join deleter + done ; + Alcotest.(check int) "No crashes during consume/delete race" 0 !errors + +let test = + [ + ("Create empty table", `Quick, test_create) + ; ("Add valid bucket", `Quick, test_add_bucket) + ; ("Add invalid bucket", `Quick, test_add_bucket_invalid) + ; ("Delete bucket", `Quick, test_delete_bucket) + ; ("Delete nonexistent bucket", `Quick, test_delete_nonexistent) + ; ("Try consume", `Quick, test_try_consume) + ; ("Try consume insufficient", `Quick, test_try_consume_insufficient) + ; ("Try consume nonexistent", `Quick, test_try_consume_nonexistent) + ; ("Peek nonexistent", `Quick, test_peek_nonexistent) + ; ("Multiple agents", `Quick, test_multiple_agents) + ; ("Submit", `Slow, test_submit) + ; ("Submit nonexistent", `Quick, test_submit_nonexistent) + ; ("Submit fairness", `Slow, test_submit_fairness) + ; ("Submit sync", `Slow, test_submit_sync) + ; ("Submit sync interleaved", `Slow, test_submit_sync_interleaved) + ; ("Submit sync nonexistent", `Slow, test_submit_sync_nonexistent) + ; ("Submit sync concurrent", `Slow, test_submit_sync_concurrent) + ; ("Submit sync with queue", `Slow, test_submit_sync_with_queued_items) + ; ("Concurrent add/delete stress", `Quick, test_concurrent_add_delete_stress) + ; ("Consume during delete race", `Quick, test_consume_during_delete_race) + ] + +let () = Alcotest.run "Bucket table library" [("Bucket table tests", test)] diff --git a/ocaml/libs/rate-limit/test/test_bucket_table.mli b/ocaml/libs/rate-limit/test/test_bucket_table.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/libs/rate-limit/test/test_token_bucket.ml b/ocaml/libs/rate-limit/test/test_token_bucket.ml new file mode 100644 index 00000000000..2cd3a7992ef --- /dev/null +++ b/ocaml/libs/rate-limit/test/test_token_bucket.ml @@ -0,0 +1,409 @@ +open Thread +open Rate_limit + +let test_bad_fill_rate () = + let tb_zero = Token_bucket.create ~burst_size:1.0 ~fill_rate:0.0 in + Alcotest.(check bool) + "Creating a token bucket with 0 fill rate should fail" true (tb_zero = None) ; + let tb_negative = Token_bucket.create ~burst_size:1.0 ~fill_rate:~-.1.0 in + Alcotest.(check bool) + "Creating a token bucket with negative fill rate should fail" true + (tb_negative = None) + +let test_consume_removes_correct_amount () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + ) + in + + Alcotest.(check (float 0.0)) + "Initial tokens should be burst_size" 10.0 + (Token_bucket.peek_with_timestamp initial_time tb) ; + + let consume_time = Mtime.Span.of_uint64_ns 1_000_000_000L in + let success = + Token_bucket.consume_with_timestamp (fun () -> consume_time) tb 3.0 + in + Alcotest.(check bool) "Consume 3 tokens should succeed" true success ; + Alcotest.(check (float 0.0)) + "After consume, tokens should be 7" 7.0 + (Token_bucket.peek_with_timestamp consume_time tb) + +let test_consume_more_than_available () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:5.0 + ~fill_rate:1.0 + ) + in + + let _ = Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 4.0 in + + let consume_time = Mtime.Span.of_uint64_ns 1_000_000_000L in + let success = + Token_bucket.consume_with_timestamp (fun () -> consume_time) tb 10.0 + in + Alcotest.(check bool) "Consume more than available should fail" false success ; + Alcotest.(check (float 0.0)) + "After failed consume, tokens should be 2" 2.0 + (Token_bucket.peek_with_timestamp consume_time tb) + +let test_consume_refills_before_removing () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + ) + in + + let first_consume = + Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 5.0 + in + Alcotest.(check bool) "First consume should succeed" true first_consume ; + + let later_time = Mtime.Span.of_uint64_ns 3_000_000_000L in + let second_consume = + Token_bucket.consume_with_timestamp (fun () -> later_time) tb 8.0 + in + + Alcotest.(check bool) + "Second consume after refill should succeed" true second_consume ; + + Alcotest.(check (float 0.0)) + "After refill and consume, tokens should be 2" 2.0 + (Token_bucket.peek_with_timestamp later_time tb) + +let test_peek_respects_burst_size () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:5.0 + ) + in + + let _ = Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 8.0 in + + let later_time = Mtime.Span.of_uint64_ns 10_000_000_000L in + let available = Token_bucket.peek_with_timestamp later_time tb in + Alcotest.(check (float 0.0)) + "Peek should respect burst_size limit" 10.0 available + +let test_concurrent_access () = + let tb = + Option.get + (Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:15.0 + ~fill_rate:0.01 + ) + in + let threads = + Array.init 10 (fun _ -> + create + (fun () -> + Token_bucket.consume_with_timestamp + (fun () -> Mtime.Span.zero) + tb 1.0 + ) + () + ) + in + Array.iter Thread.join threads ; + Alcotest.(check (float 0.0)) + "Threads consuming concurrently should all remove from token amount" + (Token_bucket.peek_with_timestamp Mtime.Span.zero tb) + 5.0 + +let test_sleep () = + let tb = Option.get (Token_bucket.create ~burst_size:20.0 ~fill_rate:5.0) in + let _ = Token_bucket.consume tb 10.0 in + Thread.delay 1.0 ; + Alcotest.(check (float 0.5)) + "Sleep 1 should refill token bucket by fill_rate" 15.0 (Token_bucket.peek tb) + +let test_system_time_versions () = + let tb = Option.get (Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0) in + + let initial_peek = Token_bucket.peek tb in + Alcotest.(check (float 0.01)) + "System time peek should return burst_size initially" 10.0 initial_peek ; + + let consume_result = Token_bucket.consume tb 3.0 in + Alcotest.(check bool) "System time consume should succeed" true consume_result ; + + let after_consume_peek = Token_bucket.peek tb in + Alcotest.(check (float 0.01)) + "After consume, should have 7 tokens" 7.0 after_consume_peek + +let test_concurrent_system_time () = + let tb = Option.get (Token_bucket.create ~burst_size:100.0 ~fill_rate:10.0) in + let num_threads = 20 in + let consume_per_thread = 3 in + + let threads = + Array.init num_threads (fun _ -> + create + (fun () -> + for _ = 1 to consume_per_thread do + ignore (Token_bucket.consume tb 1.0) + done + ) + () + ) + in + Array.iter Thread.join threads ; + + let remaining = Token_bucket.peek tb in + let expected_remaining = + 100.0 -. float_of_int (num_threads * consume_per_thread) + in + Alcotest.(check (float 0.1)) + "Concurrent system time consumption should work correctly" + expected_remaining remaining + +let test_consume_more_than_available_concurrent () = + let tb = + Option.get + (Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 + ~fill_rate:0.1 + ) + in + let num_threads = 10 in + let consume_per_thread = 1 in + let successful_consumes = ref 0 in + let counter_mutex = Mutex.create () in + + let threads = + Array.init num_threads (fun _ -> + create + (fun () -> + let success = + Token_bucket.consume_with_timestamp + (fun () -> Mtime.Span.zero) + tb + (float_of_int consume_per_thread) + in + if success then ( + Mutex.lock counter_mutex ; + incr successful_consumes ; + Mutex.unlock counter_mutex + ) + ) + () + ) + in + Array.iter Thread.join threads ; + + Alcotest.(check int) + "Only 5 consumptions should succeed" 5 !successful_consumes ; + Alcotest.(check (float 0.1)) + "Bucket should be empty after consumptions" 0.0 + (Token_bucket.peek_with_timestamp Mtime.Span.zero tb) + +let test_delay_until_available () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + ) + in + + let _ = + Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 10.0 + in + + let delay = + Token_bucket.get_delay_until_available_timestamp initial_time tb 4.0 + in + Alcotest.(check (float 0.01)) + "Delay for 4 tokens at 2 tokens/sec should be 2 seconds" 2.0 delay ; + + let tb_fresh = + Option.get (Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0) + in + let _ = Token_bucket.consume tb_fresh 10.0 in + let delay_system = Token_bucket.get_delay_until_available tb_fresh 4.0 in + + Alcotest.(check (float 0.1)) + "System time delay should be approximately 2 seconds" 2.0 delay_system + +let test_edge_cases () = + let tb = + Option.get + (Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 + ~fill_rate:1.0 + ) + in + let success = + Token_bucket.consume_with_timestamp (fun () -> Mtime.Span.zero) tb 0.0 + in + Alcotest.(check bool) "Consuming zero tokens should succeed" true success ; + + let tb_small = + Option.get + (Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:1.0 + ~fill_rate:0.1 + ) + in + let success_small = + Token_bucket.consume_with_timestamp + (fun () -> Mtime.Span.zero) + tb_small 0.001 + in + Alcotest.(check bool) + "Consuming very small amount should succeed" true success_small + +let test_consume_quickcheck = + let open QCheck.Gen in + let gen_operations = + let gen_operation = + pair (float_range 0.0 1000.0) (int_range 0 1_000_000_000) + in + list_size (int_range 1 50) gen_operation + in + + let fail_peek op_num time_ns time_delta expected current added actual diff = + QCheck.Test.fail_reportf + "Operation %d: peek failed\n\ + \ Time: %d ns (delta: %d ns)\n\ + \ Expected tokens: %.3f (current: %.3f + added: %.3f)\n\ + \ Actual tokens: %.3f\n\ + \ Diff: %.6f" op_num time_ns time_delta expected current added actual + diff + in + + let fail_consume op_num time_ns time_delta amount available success expected + actual diff = + QCheck.Test.fail_reportf + "Operation %d: consume failed\n\ + \ Time: %d ns (delta: %d ns)\n\ + \ Consume amount: %.3f\n\ + \ Available before: %.3f\n\ + \ Success: %b\n\ + \ Expected after: %.3f\n\ + \ Actual after: %.3f\n\ + \ Diff: %.6f" op_num time_ns time_delta amount available success expected + actual diff + in + + let property (burst_size, fill_rate, operations) = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size ~fill_rate) + in + + let rec check_operations op_num time_ns last_refill_ns current_tokens ops = + match ops with + | [] -> + true + | (consume_amount, time_delta_ns) :: rest -> + let new_time_ns = time_ns + time_delta_ns in + let current_time = + Mtime.Span.of_uint64_ns (Int64.of_int new_time_ns) + in + let time_since_refill_seconds = + float_of_int (new_time_ns - last_refill_ns) *. 1e-9 + in + let tokens_added = time_since_refill_seconds *. fill_rate in + let expected_available = + min burst_size (current_tokens +. tokens_added) + in + let actual_before = + Token_bucket.peek_with_timestamp current_time tb + in + let peek_diff = abs_float (actual_before -. expected_available) in + + if peek_diff >= 0.001 then + fail_peek op_num new_time_ns time_delta_ns expected_available + current_tokens tokens_added actual_before peek_diff + else + let success = + Token_bucket.consume_with_timestamp + (fun () -> current_time) + tb consume_amount + in + let actual_after = + Token_bucket.peek_with_timestamp current_time tb + in + let new_tokens = + if success then + expected_available -. consume_amount + else + expected_available + in + let after_diff = abs_float (actual_after -. new_tokens) in + + if after_diff >= 0.001 then + fail_consume op_num new_time_ns time_delta_ns consume_amount + expected_available success new_tokens actual_after after_diff + else + check_operations (op_num + 1) new_time_ns new_time_ns new_tokens + rest + in + + check_operations 1 0 0 burst_size operations + in + + let gen_all = + map3 + (fun burst fill ops -> (burst, fill, ops)) + pfloat (float_range 1e-9 1e9) gen_operations + in + + let arb_all = + QCheck.make + ~print:(fun (burst, fill, ops) -> + let ops_str = + ops + |> List.mapi (fun i (amount, delta) -> + Printf.sprintf " Op %d: consume %.3f at +%d ns" (i + 1) + amount delta + ) + |> String.concat "\n" + in + Printf.sprintf "burst_size=%.3f, fill_rate=%.3f, %d operations:\n%s" + burst fill (List.length ops) ops_str + ) + gen_all + in + + QCheck.Test.make ~name:"Consume operations maintain correct token count" + ~count:100 arb_all (fun (burst, fill, ops) -> property (burst, fill, ops) + ) + +let test = + [ + ( "A bucket with zero or negative fill rate cannot be created" + , `Quick + , test_bad_fill_rate + ) + ; ( "Consume removes correct amount" + , `Quick + , test_consume_removes_correct_amount + ) + ; ("Consume more than available", `Quick, test_consume_more_than_available) + ; ( "Consume refills before removing" + , `Quick + , test_consume_refills_before_removing + ) + ; ("Peek respects burst size", `Quick, test_peek_respects_burst_size) + ; ("Concurrent access", `Quick, test_concurrent_access) + ; ("Refill after sleep", `Slow, test_sleep) + ; ("System time versions", `Quick, test_system_time_versions) + ; ("Concurrent system time", `Quick, test_concurrent_system_time) + ; ( "Consume more than available concurrent" + , `Quick + , test_consume_more_than_available_concurrent + ) + ; ("Delay until available", `Quick, test_delay_until_available) + ; ("Edge cases", `Quick, test_edge_cases) + ; QCheck_alcotest.to_alcotest test_consume_quickcheck + ] + +let () = Alcotest.run "Token bucket library" [("Token bucket tests", test)] diff --git a/ocaml/libs/rate-limit/test/test_token_bucket.mli b/ocaml/libs/rate-limit/test/test_token_bucket.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/libs/rate-limit/token_bucket.ml b/ocaml/libs/rate-limit/token_bucket.ml new file mode 100644 index 00000000000..d59683e02e5 --- /dev/null +++ b/ocaml/libs/rate-limit/token_bucket.ml @@ -0,0 +1,83 @@ +(* + * Copyright (C) 2025 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type state = {tokens: float; last_refill: Mtime.span} + +type t = {burst_size: float; fill_rate: float; state: state Atomic.t} + +let create_with_timestamp timestamp ~burst_size ~fill_rate = + if fill_rate <= 0. then + None + else + let state = Atomic.make {tokens= burst_size; last_refill= timestamp} in + Some {burst_size; fill_rate; state} + +let create = create_with_timestamp (Mtime_clock.elapsed ()) + +let compute_tokens timestamp {tokens; last_refill} ~burst_size ~fill_rate = + let time_delta = Mtime.Span.abs_diff last_refill timestamp in + let time_delta_seconds = Mtime.Span.to_float_ns time_delta *. 1e-9 in + min burst_size (tokens +. (time_delta_seconds *. fill_rate)) + +let peek_with_timestamp timestamp tb = + let tb_state = Atomic.get tb.state in + compute_tokens timestamp tb_state ~burst_size:tb.burst_size + ~fill_rate:tb.fill_rate + +let peek tb = peek_with_timestamp (Mtime_clock.elapsed ()) tb + +let consume_with_timestamp get_time tb amount = + let rec try_consume () = + let timestamp = get_time () in + let old_state = Atomic.get tb.state in + let new_tokens = + compute_tokens timestamp old_state ~burst_size:tb.burst_size + ~fill_rate:tb.fill_rate + in + let success, final_tokens = + if new_tokens >= amount then + (true, new_tokens -. amount) + else + (false, new_tokens) + in + let new_state = {tokens= final_tokens; last_refill= timestamp} in + if Atomic.compare_and_set tb.state old_state new_state then + success + else + try_consume () + in + try_consume () + +let consume = consume_with_timestamp Mtime_clock.elapsed + +let get_delay_until_available_timestamp timestamp tb amount = + let {tokens; last_refill} = Atomic.get tb.state in + let current_tokens = + compute_tokens timestamp {tokens; last_refill} ~burst_size:tb.burst_size + ~fill_rate:tb.fill_rate + in + let required_tokens = max 0. (amount -. current_tokens) in + required_tokens /. tb.fill_rate + +let get_delay_until_available tb amount = + get_delay_until_available_timestamp (Mtime_clock.elapsed ()) tb amount + +(* This implementation only works when there is only one thread trying to + consume - fairness needs to be implemented on top of it with a queue. + If there is no contention, it should only delay once. *) +let rec delay_then_consume tb amount = + if not (consume tb amount) then ( + Thread.delay (get_delay_until_available tb amount) ; + delay_then_consume tb amount + ) diff --git a/ocaml/libs/rate-limit/token_bucket.mli b/ocaml/libs/rate-limit/token_bucket.mli new file mode 100644 index 00000000000..d04f4fd6174 --- /dev/null +++ b/ocaml/libs/rate-limit/token_bucket.mli @@ -0,0 +1,106 @@ +(* + * Copyright (C) 2025 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** This module implements a classic token-bucket rate limiter. Token buckets + contain tokens that are refilled over time, and can be consumed in a + thread-safe way. A token bucket accumulates [fill_rate] tokens per second, + up to [burst_size]. Consumers may take tokens (if available), or query when + enough tokens will become available. + + Token buckets implement rate limiting by allowing operations to proceed + only when sufficient tokens are available - otherwise, the operations can + be delayed until enough tokens are available. + + To avoid doing unnecessary work to refill the bucket, token amounts are + only updated when a consume operation is carried out. The buckets keep a + last_refill timestamp which is updated on consume in tandem with the token + counts, and informs how many tokens should be added by the bucket refill. + + We include versions of functions that take a timestamp as a parameter for + testing purposes only - consumers of this library should use the + timestamp-less versions. +*) + +type t + +val create : burst_size:float -> fill_rate:float -> t option +(** Create token bucket with given parameters. + Returns None if the fill rate is 0 or negative. + @param burst_size Maximum number of tokens that can fit in the bucket + @param fill_rate Number of tokens added to the bucket per second + *) + +val peek : t -> float +(** Retrieve current token amount + @param tb Token bucket + @return Amount of tokens in the token bucket + *) + +val consume : t -> float -> bool +(** Consume tokens from the bucket in a thread-safe manner. + @param tb Token bucket + @param amount How many tokens to consume + @return Whether the tokens were successfully consumed + *) + +val get_delay_until_available : t -> float -> float +(** Get number of seconds that need to pass until bucket is expected to have + enough tokens to fulfil the request + @param tb Token bucket + @param amount How many tokens we want to consume + @return Number of seconds until tokens are available +*) + +val delay_then_consume : t -> float -> unit + +(**/**) + +(* Fuctions accepting a timestamp are meant for testing only *) + +val create_with_timestamp : + Mtime.span -> burst_size:float -> fill_rate:float -> t option +(** Create token bucket with given parameters and supplied inital timestamp + Returns None if the fill_rate is 0 or negative. + @param timestamp Initial timestamp + @param burst_size Maximum number of tokens that can fit in the bucket + @param fill_rate Number of tokens added to the bucket per second + *) + +val peek_with_timestamp : Mtime.span -> t -> float +(** Retrieve token amount in token bucket at given timestamp. + Undefined behaviour when [timestamp] <= [tb.timestamp] + @param timestamp Current time + @param tb Token bucket + @return Amount of tokens in the token bucket + *) + +val consume_with_timestamp : (unit -> Mtime.span) -> t -> float -> bool +(** Consume tokens from the bucket in a thread-safe manner, using supplied + function for obtaining the current time + @param get_time Function to obtain timestamp, e.g. Mtime_clock.elapsed + @param tb Token bucket + @param amount How many tokens to consume + @return Whether the tokens were successfully consumed + *) + +val get_delay_until_available_timestamp : Mtime.span -> t -> float -> float +(** Get number of seconds that need to pass until bucket is expected to have + enough tokens to fulfil the request + @param timestamp + @param tb Token bucket + @param amount How many tokens we want to consume + @return Number of seconds until tokens are available +*) + +(**/**) diff --git a/ocaml/libs/uuid/uuidx.ml b/ocaml/libs/uuid/uuidx.ml index b22c22ebd14..8ae23a84052 100644 --- a/ocaml/libs/uuid/uuidx.ml +++ b/ocaml/libs/uuid/uuidx.ml @@ -64,6 +64,7 @@ type without_secret = | `sr_stat | `subject | `task + | `Rate_limit | `tunnel | `USB_group | `user diff --git a/ocaml/libs/uuid/uuidx.mli b/ocaml/libs/uuid/uuidx.mli index bd0865cf628..e3346480998 100644 --- a/ocaml/libs/uuid/uuidx.mli +++ b/ocaml/libs/uuid/uuidx.mli @@ -75,6 +75,7 @@ type without_secret = | `sr_stat | `subject | `task + | `Rate_limit | `tunnel | `USB_group | `user diff --git a/ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli b/ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli index 286e545321f..a86104e313d 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli @@ -47,7 +47,7 @@ val setup : unit -> unit (** {1 Static property tests} *) -val as_readable : (([< readable] as 'a), 'b) make -> ([> readable], 'b) make +val as_readable : ([< readable], 'b) make -> ([> readable], 'b) make (** [as_readable_opt t] returns [Some t] when [t] is readable, and [None] otherwise. *) val as_writable : ([< writable], 'b) make -> ([> writable], 'b) make @@ -55,12 +55,10 @@ val as_writable : ([< writable], 'b) make -> ([> writable], 'b) make (** {1 Runtime property tests} *) -val as_readable_opt : - (([< rw] as 'a), 'b) make -> ([> readable], 'b) make option +val as_readable_opt : ([< rw], 'b) make -> ([> readable], 'b) make option (** [as_readable_opt t] returns [Some t] when [t] is readable, and [None] otherwise. *) -val as_writable_opt : - (([< rw] as 'a), 'b) make -> ([> writable], 'b) make option +val as_writable_opt : ([< rw], 'b) make -> ([> writable], 'b) make option (** [as_writable_opt t] returns [Some t] when [t] is readable, and [None] otherwise. *) val as_spipe_opt : ('a, [< kind]) make -> ('a, [> espipe]) make option diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 65fa98d62d5..70da9691d7c 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -212,6 +212,7 @@ let reset_state () = ) ; None in + Inventory.reread_inventory () ; config := Network_config.read_management_conf reset_order let set_gateway_interface _dbg name = diff --git a/ocaml/qcow-stream-tool/dune b/ocaml/qcow-stream-tool/dune index 4daf3469dc5..436dd58681c 100644 --- a/ocaml/qcow-stream-tool/dune +++ b/ocaml/qcow-stream-tool/dune @@ -7,5 +7,9 @@ qcow-stream cmdliner unix + lwt.unix + lwt + qcow-types + yojson ) ) diff --git a/ocaml/qcow-stream-tool/qcow_stream_tool.ml b/ocaml/qcow-stream-tool/qcow_stream_tool.ml index 7158867c248..41b57c9a366 100644 --- a/ocaml/qcow-stream-tool/qcow_stream_tool.ml +++ b/ocaml/qcow-stream-tool/qcow_stream_tool.ml @@ -1,11 +1,53 @@ +open Cmdliner + module Impl = struct let stream_decode output = Qcow_stream.stream_decode Unix.stdin output ; `Ok () + + let read_headers qcow_path = + let open Lwt.Syntax in + let t = + let* fd = Lwt_unix.openfile qcow_path [Unix.O_RDONLY] 0 in + let* virtual_size, cluster_bits, _, data_cluster_map = + Qcow_stream.start_stream_decode fd + in + let clusters = Qcow_types.Cluster.Map.bindings data_cluster_map in + let clusters = + List.map + (fun (_, virt_address) -> + let ( >> ) = Int64.shift_right_logical in + let address = + Int64.to_int (virt_address >> Int32.to_int cluster_bits) + in + `Int address + ) + clusters + in + let json = + `Assoc + [ + ("virtual_size", `Int (Int64.to_int virtual_size)) + ; ("cluster_bits", `Int (Int32.to_int cluster_bits)) + ; ("data_clusters", `List clusters) + ] + in + let json_string = Yojson.to_string json in + let* () = Lwt_io.print json_string in + let* () = Lwt_io.flush Lwt_io.stdout in + Lwt.return_unit + in + Lwt_main.run t ; `Ok () end module Cli = struct - open Cmdliner + let output default = + let doc = Printf.sprintf "Path to the output file." in + Arg.(value & pos 0 string default & info [] ~doc) + + let input = + let doc = Printf.sprintf "Path to the input file." in + Arg.(required & pos 0 (some string) None & info [] ~doc) let stream_decode_cmd = let doc = "decode qcow2 formatted data from stdin and write a raw image" in @@ -15,15 +57,28 @@ module Cli = struct ; `P "Decode qcow2 formatted data from stdin and write to a raw file." ] in - let output default = - let doc = Printf.sprintf "Path to the output file." in - Arg.(value & pos 0 string default & info [] ~doc) - in Cmd.v (Cmd.info "stream_decode" ~doc ~man) Term.(ret (const Impl.stream_decode $ output "test.raw")) - let main () = Cmd.eval stream_decode_cmd + let read_headers_cmd = + let doc = + "Determine allocated clusters by parsing qcow2 file at the provided \ + path. Returns JSON like the following: {'virtual_size': X, \ + 'cluster_bits': Y, 'data_clusters': [1,2,3]}" + in + let man = [`S "DESCRIPTION"; `P doc] in + Cmd.v + (Cmd.info "read_headers" ~doc ~man) + Term.(ret (const Impl.read_headers $ input)) + + let cmds = [stream_decode_cmd; read_headers_cmd] end -let () = exit (Cli.main ()) +let info = + let doc = "minimal CLI for qcow-stream" in + Cmd.info "qcow-stream-tool" ~version:"1.0.0" ~doc + +let () = + let cmd = Cmd.group info Cli.cmds in + exit (Cmd.eval cmd) diff --git a/ocaml/sdk-gen/c/helper.ml b/ocaml/sdk-gen/c/helper.ml index 0079b42ef99..78e53b75b1e 100644 --- a/ocaml/sdk-gen/c/helper.ml +++ b/ocaml/sdk-gen/c/helper.ml @@ -28,19 +28,16 @@ let comment doc ?(indent = 0) s = let buf = Buffer.create 16 in let formatter = Format.formatter_of_buffer buf in let open Format in - let out, flush, newline, spaces = - let funcs = Format.pp_get_formatter_out_functions formatter () in - (funcs.out_string, funcs.out_flush, funcs.out_newline, funcs.out_spaces) - in - + let funcs = Format.pp_get_formatter_out_functions formatter () in + let original_out_newline = funcs.out_newline in let funcs = { - out_string= out - ; out_flush= flush - ; out_newline= - (fun () -> out (Printf.sprintf "\n%s * " indent_str) 0 (indent + 4)) - ; out_spaces= spaces - ; out_indent= spaces + funcs with + out_newline= + (fun () -> + funcs.out_string (Printf.sprintf "\n%s * " indent_str) 0 (indent + 4) + ) + ; out_indent= funcs.out_spaces } in Format.pp_set_formatter_out_functions formatter funcs ; @@ -61,7 +58,7 @@ let comment doc ?(indent = 0) s = Format.fprintf formatter "%!" ; Format.pp_set_formatter_out_functions formatter - {funcs with out_newline= newline} ; + {funcs with out_newline= original_out_newline} ; let result = Buffer.contents buf in let n = String.length result in diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 09f6a3b465a..4a4db92dd0d 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -175,7 +175,7 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ?(last_software_update = Date.epoch) ?(last_update_hash = "") ?(ssh_enabled = true) ?(ssh_enabled_timeout = 0L) ?(ssh_expiry = Date.epoch) ?(console_idle_timeout = 0L) ?(ssh_auto_mode = false) ?(secure_boot = false) - () = + ?(https_only = false) () = let host = Xapi_host.create ~__context ~uuid ~name_label ~name_description ~hostname ~address ~external_auth_type ~external_auth_service_name @@ -184,6 +184,9 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode ~secure_boot ~software_version:(Xapi_globs.software_version ()) + ~https_only ~numa_affinity_policy:`default_policy + ~latest_synced_updates_applied:`unknown ~pending_guidances_full:[] + ~pending_guidances_recommended:[] in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host @@ -194,15 +197,14 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ?(external_auth_type = "") ?(external_auth_service_name = "") ?(external_auth_configuration = []) ?(license_params = []) ?(edition = "free") ?(license_server = []) ?(local_cache_sr = Ref.null) - ?(chipset_info = []) ?(ssl_legacy = false) () = + ?(chipset_info = []) ?(ssl_legacy = false) ?(https_only = false) () = let pool = Helpers.get_pool ~__context in let tls_verification_enabled = Db.Pool.get_tls_verification_enabled ~__context ~self:pool in Db.Host.create ~__context ~ref ~current_operations:[] ~allowed_operations:[] ~software_version:(Xapi_globs.software_version ()) - ~https_only:false ~enabled:false - ~aPI_version_major:Datamodel_common.api_version_major + ~enabled:false ~aPI_version_major:Datamodel_common.api_version_major ~aPI_version_minor:Datamodel_common.api_version_minor ~aPI_version_vendor:Datamodel_common.api_version_vendor ~aPI_version_vendor_implementation: @@ -224,7 +226,7 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~last_update_hash:"" ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false - ~secure_boot:false ; + ~secure_boot:false ~https_only ; ref let make_pif ~__context ~network ~host ?(device = "eth0") diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index beb5588e66d..9e7ac61275c 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -27,6 +27,9 @@ let add_host __context name = ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Clock.Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false ~secure_boot:false ~software_version:(Xapi_globs.software_version ()) + ~https_only:false ~numa_affinity_policy:`default_policy + ~latest_synced_updates_applied:`unknown ~pending_guidances_full:[] + ~pending_guidances_recommended:[] ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/tests/test_xapi_xenops.ml b/ocaml/tests/test_xapi_xenops.ml index e1f1bf048e2..42f0bb5708d 100644 --- a/ocaml/tests/test_xapi_xenops.ml +++ b/ocaml/tests/test_xapi_xenops.ml @@ -3,6 +3,92 @@ open Test_common module D = Debug.Make (struct let name = "test_xapi_xenops" end) open D +module Date = Clock.Date + +(** Helper to create a Xenops VM state for testing *) +let make_xenops_state ~power_state ?(last_start_time = 0.0) () = + let open Xenops_interface.Vm in + { + power_state + ; domids= [0] + ; consoles= [] + ; memory_target= 0L + ; memory_actual= 0L + ; memory_limit= 0L + ; vcpu_target= 1 + ; shadow_multiplier_target= 1.0 + ; rtc_timeoffset= "" + ; uncooperative_balloon_driver= false + ; guest_agent= [] + ; xsdata_state= [] + ; pv_drivers_detected= false + ; last_start_time + ; hvm= false + ; nomigrate= false + ; nested_virt= false + ; domain_type= Domain_PV + ; featureset= "" + } + +(** Helper to set up VM for testing: sets pending guidances, resident host, and power state *) +let setup_vm_for_test ~__context ~vm ~guidances ~resident_on ~power_state = + Db.VM.set_pending_guidances ~__context ~self:vm ~value:guidances ; + Db.VM.set_resident_on ~__context ~self:vm ~value:resident_on ; + Db.VM.set_power_state ~__context ~self:vm ~value:power_state + +(** Helper to check pending guidances after an operation *) +let check_pending_guidances ~__context ~vm ~expect_restart_vm + ~expect_restart_device_model ~test_description = + let remaining = Db.VM.get_pending_guidances ~__context ~self:vm in + Alcotest.(check bool) + (Printf.sprintf "restart_vm guidance %s - %s" + (if expect_restart_vm then "present" else "cleared") + test_description + ) + expect_restart_vm + (List.mem `restart_vm remaining) ; + Alcotest.(check bool) + (Printf.sprintf "restart_device_model guidance %s - %s" + (if expect_restart_device_model then "present" else "cleared") + test_description + ) + expect_restart_device_model + (List.mem `restart_device_model remaining) + +(** Helper to simulate a VM state update via update_vm_internal *) +let simulate_vm_state_update ~__context ~vm ~previous_power_state + ~new_power_state ~localhost = + let previous_state = make_xenops_state ~power_state:previous_power_state () in + let new_state = + make_xenops_state ~power_state:new_power_state ~last_start_time:100.0 () + in + let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in + let metrics = Db.VM.get_metrics ~__context ~self:vm in + Db.VM_metrics.set_start_time ~__context ~self:metrics + ~value:(Date.of_unix_time 50.0) ; + ignore + (Xapi_xenops.update_vm_internal ~__context ~id:vm_uuid ~self:vm + ~previous:(Some previous_state) ~info:(Some new_state) ~localhost + ) + +(** Helper to set host software version *) +let set_host_software_version ~__context ~host ~platform_version ~xapi_version = + Db.Host.remove_from_software_version ~__context ~self:host + ~key:Xapi_globs._platform_version ; + Db.Host.add_to_software_version ~__context ~self:host + ~key:Xapi_globs._platform_version ~value:platform_version ; + Db.Host.remove_from_software_version ~__context ~self:host + ~key:Xapi_globs._xapi_version ; + Db.Host.add_to_software_version ~__context ~self:host + ~key:Xapi_globs._xapi_version ~value:xapi_version + +(** Helper to get the pool from the test database *) +let get_pool ~__context = + match Db.Pool.get_all ~__context with + | pool :: _ -> + pool + | [] -> + failwith "No pool found in test database" let simulator_setup = ref false @@ -187,4 +273,132 @@ let test_xapi_restart () = ) unsetup_simulator -let test = [("test_xapi_restart", `Quick, test_xapi_restart)] +(** Test that RestartVM guidance is only cleared when VM starts on up-to-date host *) +let test_pending_guidance_vm_start () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + let vm = make_vm ~__context () in + + (* Set up VM guidances - both restart_vm and restart_device_model *) + let guidances = [`restart_vm; `restart_device_model] in + + (* Test 1: VM starting on up-to-date host - should clear restart_vm *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Halted ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Halted + ~new_power_state:Xenops_interface.Running ~localhost ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false + ~test_description:"VM started on up-to-date host" ; + + (* Test 2: VM starting on old host - should NOT clear restart_vm *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Halted ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Halted + ~new_power_state:Xenops_interface.Running ~localhost:host2 ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:false + ~test_description:"VM started on old host" + +(** Test that NO guidance is cleared when suspended VM resumes *) +let test_pending_guidance_vm_resume () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + (* Test 1: Suspended VM resumed on up-to-date host - should NOT clear any guidance *) + let vm = make_vm ~__context () in + let guidances = [`restart_vm; `restart_device_model] in + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Suspended ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Suspended + ~new_power_state:Xenops_interface.Running ~localhost ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:true + ~test_description:"suspended VM resumed on up-to-date host" ; + + (* Test 2: Suspended VM resumed on old host - should NOT clear any guidance *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Suspended ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Suspended + ~new_power_state:Xenops_interface.Running ~localhost:host2 ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:true + ~test_description:"suspended VM resumed on old host" + +(** Test that RestartVM guidance is always cleared when VM is halted *) +let test_pending_guidance_vm_halt () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + let vm = make_vm ~__context () in + let guidances = [`restart_vm; `restart_device_model] in + + (* Test 1: VM halted on up-to-date host - should clear both guidances *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Running ; + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self:vm ~value:`Halted ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false + ~test_description:"VM halted on up-to-date host" ; + + (* Test 2: VM halted on old host - should ALSO clear both guidances + because VM.start_on will enforce host version check on next start *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Running ; + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self:vm ~value:`Halted ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false ~test_description:"VM halted on old host" + +let test = + [ + ("test_xapi_restart", `Quick, test_xapi_restart) + ; ("test_pending_guidance_vm_start", `Quick, test_pending_guidance_vm_start) + ; ("test_pending_guidance_vm_resume", `Quick, test_pending_guidance_vm_resume) + ; ("test_pending_guidance_vm_halt", `Quick, test_pending_guidance_vm_halt) + ] diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 39e0c8ce51f..66679b43390 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -3869,6 +3869,27 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "rate-limit-create" + , { + reqd= ["client-id"; "burst-size"; "fill-rate"] + ; optn= [] + ; help= + "Add a rate limit to a XAPI client, by specifying fill rate \ + (requests per second) and burst size (maximum number of requests at \ + once)" + ; implementation= No_fd Cli_operations.Rate_limit.create + ; flags= [] + } + ) + ; ( "rate-limit-destroy" + , { + reqd= ["uuid"] + ; optn= [] + ; help= "Destroy rate limiter" + ; implementation= No_fd Cli_operations.Rate_limit.destroy + ; flags= [] + } + ) ] let cmdtable : (string, cmd_spec) Hashtbl.t = Hashtbl.create 50 diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index eb6a0eb3a80..14ec2fcfacd 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -1378,6 +1378,11 @@ let gen_cmds rpc session_id = ["uuid"; "vendor-name"; "device-name"; "pci-id"] rpc session_id ) + ; Client.Rate_limit.( + mk get_all_records_where get_by_uuid rate_limit_record "rate-limit" [] + ["uuid"; "client-id"; "burst-size"; "fill-rate"] + rpc session_id + ) ] let message_create (_ : printer) rpc session_id params = @@ -8179,3 +8184,23 @@ module VM_group = struct in Client.VM_group.destroy ~rpc ~session_id ~self:ref end + +module Rate_limit = struct + let create printer rpc session_id params = + let client_id = List.assoc "client-id" params in + let burst_size = float_of_string (List.assoc "burst-size" params) in + let fill_rate = float_of_string (List.assoc "fill-rate" params) in + let ref = + Client.Rate_limit.create ~rpc ~session_id ~client_id ~burst_size + ~fill_rate + in + let uuid = Client.Rate_limit.get_uuid ~rpc ~session_id ~self:ref in + printer (Cli_printer.PMsg uuid) + + let destroy _printer rpc session_id params = + let ref = + Client.Rate_limit.get_by_uuid ~rpc ~session_id + ~uuid:(List.assoc "uuid" params) + in + Client.Rate_limit.destroy ~rpc ~session_id ~self:ref +end diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index ee68f272eb8..0a63099b55c 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -5933,3 +5933,38 @@ let pci_record rpc session_id pci = () ] } + +let rate_limit_record rpc session_id rate_limit = + let _ref = ref rate_limit in + let empty_record = + ToGet (fun () -> Client.Rate_limit.get_record ~rpc ~session_id ~self:!_ref) + in + let record = ref empty_record in + let x () = lzy_get record in + { + setref= + (fun r -> + _ref := r ; + record := empty_record + ) + ; setrefrec= + (fun (a, b) -> + _ref := a ; + record := Got b + ) + ; record= x + ; getref= (fun () -> !_ref) + ; fields= + [ + make_field ~name:"uuid" ~get:(fun () -> (x ()).API.rate_limit_uuid) () + ; make_field ~name:"client-id" + ~get:(fun () -> (x ()).API.rate_limit_client_id) + () + ; make_field ~name:"burst-size" + ~get:(fun () -> string_of_float (x ()).API.rate_limit_burst_size) + () + ; make_field ~name:"fill-rate" + ~get:(fun () -> string_of_float (x ()).API.rate_limit_fill_rate) + () + ] + } diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index a67c51b0131..f27b4ec00b8 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -492,8 +492,12 @@ module Host = struct | Best_effort (** Best-effort placement. Assigns the memory of the VM to a single node, and soft-pins its VCPUs to the node, if possible. Otherwise - behaves like Any. *) + behaves like Any. + The node(s) need to have enough cores to run all the vCPUs of the VM + *) | Best_effort_hard (** Like Best_effort, but hard-pins the VCPUs *) + | Prio_mem_only + (** Prioritizes reducing memory bandwidth, ignores CPU overload *) [@@deriving rpcty] type numa_affinity_policy_opt = numa_affinity_policy option [@@deriving rpcty] @@ -851,6 +855,10 @@ module XenopsAPI (R : RPC) = struct declare "VM.resume" [] (debug_info_p @-> vm_id_p @-> disk_p @-> returning task_id_p err) + let fast_resume = + declare "VM.fast_resume" [] + (debug_info_p @-> vm_id_p @-> returning task_id_p err) + let s3suspend = declare "VM.s3suspend" [] (debug_info_p @-> vm_id_p @-> returning task_id_p err) diff --git a/ocaml/xapi/api_server_common.ml b/ocaml/xapi/api_server_common.ml index f4167c1f36a..043a3bc96c8 100644 --- a/ocaml/xapi/api_server_common.ml +++ b/ocaml/xapi/api_server_common.ml @@ -132,6 +132,7 @@ module Actions = struct module Observer = Xapi_observer module Host_driver = Xapi_host_driver module Driver_variant = Xapi_host_driver.Variant + module Rate_limit = Xapi_rate_limit end (** Use the server functor to make an XML-RPC dispatcher. *) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 91bea2d25b4..d66448be62a 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -66,6 +66,9 @@ let create_localhost ~__context info = ~console_idle_timeout:Constants.default_console_idle_timeout ~ssh_auto_mode:!Xapi_globs.ssh_auto_mode_default ~secure_boot:false ~software_version:[] + ~https_only:!Xapi_globs.https_only ~numa_affinity_policy:`default_policy + ~latest_synced_updates_applied:`unknown ~pending_guidances_full:[] + ~pending_guidances_recommended:[] in () diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 8095a5c4bfc..600503b0e71 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -65,6 +65,7 @@ exnHelper rbac_static xapi_role + xapi_rate_limit xapi_extensions db) (modes best) @@ -83,6 +84,7 @@ threads.posix fmt clock + rate-limit astring stunnel sexplib0 @@ -129,6 +131,7 @@ locking_helpers exnHelper xapi_role + xapi_rate_limit xapi_extensions db)) (libraries @@ -165,6 +168,7 @@ psq ptime ptime.clock.os + rate-limit rpclib.core rpclib.json rpclib.xml diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 060195e120a..f97f6e96930 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -6707,6 +6707,8 @@ functor in Xapi_pool_helpers.call_fn_on_slaves_then_master ~__context fn end + + module Rate_limit = Xapi_rate_limit end (* for unit tests *) diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml index 30d0eb63811..e3cd13d469b 100644 --- a/ocaml/xapi/qcow_tool_wrapper.ml +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -16,14 +16,15 @@ module D = Debug.Make (struct let name = __MODULE__ end) open D -let run_qcow_tool qcow_tool ?input_fd ?output_fd (_progress_cb : int -> unit) - (args : string list) = +let run_qcow_tool qcow_tool ?(replace_fds = []) ?input_fd ?output_fd + (_progress_cb : int -> unit) (args : string list) = info "Executing %s %s" qcow_tool (String.concat " " args) ; let open Forkhelpers in match with_logfile_fd "qcow-tool" (fun log_fd -> let pid = - safe_close_and_exec input_fd output_fd (Some log_fd) [] qcow_tool args + safe_close_and_exec input_fd output_fd (Some log_fd) replace_fds + qcow_tool args in let _, status = waitpid pid in if status <> Unix.WEXITED 0 then ( @@ -46,14 +47,70 @@ let update_task_progress (__context : Context.t) (x : int) = let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) = - let args = [path] in + let args = ["stream_decode"; path] in let qcow_tool = !Xapi_globs.qcow_stream_tool in run_qcow_tool qcow_tool progress_cb args ~input_fd:unix_fd +let read_header qcow_path = + let args = ["read_headers"; qcow_path] in + let qcow_tool = !Xapi_globs.qcow_stream_tool in + let pipe_reader, pipe_writer = Unix.pipe ~cloexec:true () in + + let progress_cb _ = () in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> run_qcow_tool qcow_tool progress_cb args ~output_fd:pipe_writer) + (fun () -> Unix.close pipe_writer) ; + pipe_reader + +let parse_header qcow_path = + let pipe_reader = read_header qcow_path in + let ic = Unix.in_channel_of_descr pipe_reader in + let buf = Buffer.create 4096 in + let json = Yojson.Basic.from_channel ~buf ~fname:"qcow_header.json" ic in + In_channel.close ic ; + let cluster_size = + 1 lsl Yojson.Basic.Util.(member "cluster_bits" json |> to_int) + in + let cluster_list = + Yojson.Basic.Util.(member "data_clusters" json |> to_list |> List.map to_int) + in + (cluster_size, cluster_list) + let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) (_size : Int64.t) = + let qcow_of_device = + Vhd_tool_wrapper.backing_file_of_device ~driver:"qcow2" + in + let qcow_path = qcow_of_device path in + + (* If VDI is backed by QCOW, parse the header to determine nonzero clusters + to avoid reading all of the raw disk *) + let input_fd = Option.map read_header qcow_path in + + (* Parse the header of the VDI we are diffing against as well *) + let relative_to_qcow_path = Option.bind relative_to qcow_of_device in + let diff_fd = Option.map read_header relative_to_qcow_path in + + let unique_string = Uuidx.(to_string (make ())) in let args = - [path] @ match relative_to with None -> [] | Some vdi -> ["--diff"; vdi] + [path] + @ (match relative_to with None -> [] | Some vdi -> ["--diff"; vdi]) + @ ( match relative_to_qcow_path with + | None -> + [] + | Some _ -> + ["--json-header-diff"; unique_string] + ) + @ match qcow_path with None -> [] | Some _ -> ["--json-header"] in let qcow_tool = !Xapi_globs.qcow_to_stdout in - run_qcow_tool qcow_tool progress_cb args ~output_fd:unix_fd + let replace_fds = Option.map (fun fd -> [(unique_string, fd)]) diff_fd in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> + run_qcow_tool qcow_tool progress_cb args ?input_fd ~output_fd:unix_fd + ?replace_fds + ) + (fun () -> + Option.iter Unix.close input_fd ; + Option.iter Unix.close diff_fd + ) diff --git a/ocaml/xapi/qcow_tool_wrapper.mli b/ocaml/xapi/qcow_tool_wrapper.mli index 51c3c626567..c1c4a6426af 100644 --- a/ocaml/xapi/qcow_tool_wrapper.mli +++ b/ocaml/xapi/qcow_tool_wrapper.mli @@ -23,3 +23,5 @@ val send : -> string -> int64 -> unit + +val parse_header : string -> int * int list diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 0fe9383c737..b22c02b8485 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -179,18 +179,57 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name ~marshaller op_fn ) () - ) ; + ) (* Return task id immediately *) - Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) in - match sync_ty with - | `Sync -> - sync () - | `Async -> - let need_complete = not (Context.forwarded_task __context) in - async ~need_complete - | `InternalAsync -> - async ~need_complete:true + let user_agent_option = http_req.user_agent in + let peek_result = + Option.bind user_agent_option (fun user_agent -> + Rate_limit.Bucket_table.peek Xapi_rate_limit.bucket_table ~user_agent + ) + in + let handle_request () = + match sync_ty with + | `Sync -> + sync () + | `Async -> + let need_complete = not (Context.forwarded_task __context) in + async ~need_complete ; + Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) + | `InternalAsync -> + async ~need_complete:true ; + Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) + in + match user_agent_option with + | Some user_agent -> ( + match peek_result with + | Some tokens -> ( + D.debug + "Bucket table: Expecting to consume 1 token from user_agent %s \ + with available tokens %f" + user_agent tokens ; + match sync_ty with + | `Sync -> + Rate_limit.Bucket_table.submit_sync Xapi_rate_limit.bucket_table + ~user_agent ~callback:sync 1. + | `Async -> + let need_complete = not (Context.forwarded_task __context) in + Rate_limit.Bucket_table.submit Xapi_rate_limit.bucket_table + ~user_agent + ~callback:(fun () -> async ~need_complete) + 1. ; + Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) + | `InternalAsync -> + async ~need_complete:true ; + Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) + ) + | None -> + D.debug "%s not registered, not throttling" user_agent ; + handle_request () + ) + | None -> + D.debug "Bucket table: user_agent was None, not throttling" ; + handle_request () (* regardless of forwarding, we are expected to complete the task *) diff --git a/ocaml/xapi/vhd_tool_wrapper.ml b/ocaml/xapi/vhd_tool_wrapper.ml index 73f25785eb8..f3f791fe251 100644 --- a/ocaml/xapi/vhd_tool_wrapper.ml +++ b/ocaml/xapi/vhd_tool_wrapper.ml @@ -149,25 +149,27 @@ let find_backend_device path = raise Not_found with _ -> None -(** [vhd_of_device path] returns (Some vhd) where 'vhd' is the vhd leaf backing a particular device [path] or None. - [path] may either be a blktap2 device *or* a blkfront device backed by a blktap2 device. If the latter then - the script must be run in the same domain as blkback. *) -let vhd_of_device path = +(** [backing_file_of_device path] returns (Some backing_file) where 'backing_file' + is the leaf backing a particular device [path] (with a driver of type + [driver] or None. [path] may either be a blktap2 device *or* a blkfront + device backed by a blktap2 device. If the latter then the script must be + run in the same domain as blkback. *) +let backing_file_of_device path ~driver = let tapdisk_of_path path = try match Tapctl.of_device (Tapctl.create ()) path with - | _, _, Some ("vhd", vhd) -> - Some vhd + | _, _, Some (typ, backing_file) when typ = driver -> + Some backing_file | _, _, _ -> raise Not_found with | Tapctl.Not_blktap -> ( debug "Device %s is not controlled by blktap" path ; - (* Check if it is a VHD behind a NBD deivce *) + (* Check if it is a [driver] behind a NBD device *) Stream_vdi.(get_nbd_device path |> image_behind_nbd_device) |> function - | Some ("vhd", vhd) -> - debug "%s is a VHD behind NBD device %s" vhd path ; - Some vhd + | Some (typ, backing_file) when typ = driver -> + debug "%s is a %s behind NBD device %s" backing_file driver path ; + Some backing_file | _ -> None ) @@ -182,6 +184,7 @@ let vhd_of_device path = let send progress_cb ?relative_to (protocol : string) (dest_format : string) (s : Unix.file_descr) (path : string) (size : Int64.t) (prefix : string) = + let vhd_of_device = backing_file_of_device ~driver:"vhd" in let s' = Uuidx.(to_string (make ())) in let source_format, source = match (Stream_vdi.get_nbd_device path, vhd_of_device path, relative_to) with diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 785950c384e..d63844ceb59 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -884,11 +884,9 @@ let listen_unix_socket sock_path = Unixext.mkdir_safe (Filename.dirname sock_path) 0o700 ; Unixext.unlink_safe sock_path ; let domain_sock = Xapi_http.bind (Unix.ADDR_UNIX sock_path) in - ignore - (Http_svr.start - ~conn_limit:!Xapi_globs.conn_limit_unix - Xapi_http.server domain_sock - ) + Http_svr.start + ~conn_limit:!Xapi_globs.conn_limit_unix + Xapi_http.server domain_sock let set_stunnel_timeout () = try @@ -1169,6 +1167,10 @@ let server_init () = , [] , fun () -> report_tls_verification ~__context ) + ; ( "Registering rate limits" + , [Startup.OnlyMaster] + , fun () -> Xapi_rate_limit.register ~__context + ) ; ( "Remote requests" , [Startup.OnThread] , Remote_requests.handle_requests diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 5d4fe609b52..161273c83f9 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1133,6 +1133,8 @@ let xapi_requests_cgroup = let genisoimage_path = ref "/usr/bin/genisoimage" +let https_only = ref false + (* Event.{from,next} batching delays *) let make_batching name ~delay_before ~delay_between = let name = Printf.sprintf "%s_delay" name in @@ -1834,6 +1836,11 @@ let other_options = , (fun () -> string_of_int !max_span_depth) , "The maximum depth to which spans are recorded in a trace in Tracing" ) + ; ( "https-only-default" + , Arg.Set https_only + , (fun () -> string_of_bool !https_only) + , "Only expose HTTPS service, disable HTTP/80 in firewall when set to true" + ) ; ( "firewall-backend" , Arg.String (fun s -> diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index ee446592bb9..0689a00e386 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1029,7 +1029,9 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode - ~secure_boot ~software_version = + ~secure_boot ~software_version ~https_only ~numa_affinity_policy + ~latest_synced_updates_applied ~pending_guidances_full + ~pending_guidances_recommended = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1064,7 +1066,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address (* no or multiple pools *) in Db.Host.create ~__context ~ref:host ~current_operations:[] - ~allowed_operations:[] ~https_only:false ~software_version ~enabled:false + ~allowed_operations:[] ~https_only ~software_version ~enabled:false ~aPI_version_major:Datamodel_common.api_version_major ~aPI_version_minor:Datamodel_common.api_version_minor ~aPI_version_vendor:Datamodel_common.api_version_vendor @@ -1073,8 +1075,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~name_label ~uuid ~other_config:[] ~capabilities:[] ~cpu_configuration:[] (* !!! FIXME hard coding *) ~cpu_info:[] ~chipset_info ~memory_overhead:0L - ~sched_policy:"credit" (* !!! FIXME hard coding *) - ~numa_affinity_policy:`default_policy + ~sched_policy:"credit" (* !!! FIXME hard coding *) ~numa_affinity_policy ~supported_bootloaders:(List.map fst Xapi_globs.supported_bootloaders) ~suspend_image_sr:Ref.null ~crash_dump_sr:Ref.null ~logging:[] ~hostname ~address ~metrics ~license_params ~boot_free_mem:0L ~ha_statefiles:[] @@ -1091,8 +1092,8 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~control_domain:Ref.null ~updates_requiring_reboot:[] ~iscsi_iqn:"" ~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[] ~tls_verification_enabled ~last_software_update ~last_update_hash - ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown - ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~ssh_enabled + ~recommended_guidances:[] ~latest_synced_updates_applied + ~pending_guidances_recommended ~pending_guidances_full ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode ~secure_boot ; (* If the host we're creating is us, make sure its set to live *) diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index 316ee9f6edf..3260ff3166e 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -138,6 +138,11 @@ val create : -> ssh_auto_mode:bool -> secure_boot:bool -> software_version:(string * string) list + -> https_only:bool + -> numa_affinity_policy:API.host_numa_affinity_policy + -> latest_synced_updates_applied:API.latest_synced_updates_applied_state + -> pending_guidances_full:API.update_guidances_set + -> pending_guidances_recommended:API.update_guidances_set -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit diff --git a/ocaml/xapi/xapi_http.ml b/ocaml/xapi/xapi_http.ml index 964983d8eda..ecd644825ef 100644 --- a/ocaml/xapi/xapi_http.ml +++ b/ocaml/xapi/xapi_http.ml @@ -351,25 +351,36 @@ let add_handler (name, handler) = failwith (Printf.sprintf "Unregistered HTTP handler: %s" name) in let check_rbac = Rbac.is_rbac_enabled_for_http_action name in - let h req ic context = + let rate_limit user_agent_opt handler () = + if List.mem name Datamodel.custom_rate_limit_http_actions then + handler () + else + match user_agent_opt with + | None -> + handler () + | Some user_agent -> + debug "Rate limiting handler %s with user_agent %s" name user_agent ; + Rate_limit.Bucket_table.submit Xapi_rate_limit.bucket_table + ~user_agent ~callback:handler 1.0 + in + let h req ic () = let client = Http_svr.(client_of_req_and_fd req ic |> Option.map string_of_client) in + let rate_limited_handler = rate_limit req.user_agent (handler req ic) in Debug.with_thread_associated ?client name (fun () -> try if check_rbac then ( try (* session and rbac checks *) - assert_credentials_ok name req - ~fn:(fun () -> handler req ic context) - ic + assert_credentials_ok name req ~fn:rate_limited_handler ic with e -> debug "Leaving RBAC-handler in xapi_http after: %s" (ExnHelper.string_of_exn e) ; raise e ) else (* no rbac checks *) - handler req ic context + rate_limited_handler () with Api_errors.Server_error (name, params) as e -> error "Unhandled Api_errors.Server_error(%s, [ %s ])" name (String.concat "; " params) ; diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index cbb39e28adb..592f53615e5 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1033,6 +1033,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : create_or_get_sr_on_master __context rpc session_id (my_local_cache_sr, my_local_cache_sr_rec) in + debug "Creating host object on master" ; let ref = Client.Host.create ~rpc ~session_id ~uuid:my_uuid @@ -1060,6 +1061,13 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~ssh_auto_mode:host.API.host_ssh_auto_mode ~secure_boot:host.API.host_secure_boot ~software_version:host.API.host_software_version + ~https_only:host.API.host_https_only + ~numa_affinity_policy:host.API.host_numa_affinity_policy + ~latest_synced_updates_applied: + host.API.host_latest_synced_updates_applied + ~pending_guidances_full:host.API.host_pending_guidances_full + ~pending_guidances_recommended: + host.API.host_pending_guidances_recommended in (* Copy other-config into newly created host record: *) no_exn diff --git a/ocaml/xapi/xapi_rate_limit.ml b/ocaml/xapi/xapi_rate_limit.ml new file mode 100644 index 00000000000..7a462aec316 --- /dev/null +++ b/ocaml/xapi/xapi_rate_limit.ml @@ -0,0 +1,69 @@ +(* + * Copyright (C) Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +module D = Debug.Make (struct let name = "xapi_rate_limit" end) + +open Rate_limit + +let bucket_table = Bucket_table.create () + +let create ~__context ~client_id ~burst_size ~fill_rate = + if Bucket_table.mem bucket_table ~user_agent:client_id then + raise + Api_errors.( + Server_error + ( map_duplicate_key + , ["client_id"; client_id; "client_id already registered"] + ) + ) ; + let uuid = Uuidx.make () in + let ref = Ref.make () in + let add_bucket_succeeded = + Bucket_table.add_bucket bucket_table ~user_agent:client_id ~burst_size + ~fill_rate + in + match add_bucket_succeeded with + | true -> + Db.Rate_limit.create ~__context ~ref ~uuid:(Uuidx.to_string uuid) + ~client_id ~burst_size ~fill_rate ; + ref + | false -> + raise + Api_errors.( + Server_error + ( invalid_value + , [ + "fill_rate" + ; string_of_float fill_rate + ; "Fill rate cannot be 0 or negative" + ] + ) + ) + +let destroy ~__context ~self = + let record = Db.Rate_limit.get_record ~__context ~self in + Bucket_table.delete_bucket bucket_table + ~user_agent:record.rate_limit_client_id ; + Db.Rate_limit.destroy ~__context ~self + +let register ~__context = + List.iter + (fun (_, bucket) -> + ignore + (Bucket_table.add_bucket bucket_table + ~fill_rate:bucket.API.rate_limit_fill_rate + ~user_agent:bucket.API.rate_limit_client_id + ~burst_size:bucket.API.rate_limit_burst_size + ) + ) + (Db.Rate_limit.get_all_records ~__context) diff --git a/ocaml/xapi/xapi_rate_limit.mli b/ocaml/xapi/xapi_rate_limit.mli new file mode 100644 index 00000000000..beacea3054e --- /dev/null +++ b/ocaml/xapi/xapi_rate_limit.mli @@ -0,0 +1,27 @@ +(* + * Copyright (C) Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val bucket_table : Rate_limit.Bucket_table.t + +val create : + __context:Context.t + -> client_id:string + -> burst_size:float + -> fill_rate:float + -> [`Rate_limit] Ref.t + +val destroy : __context:Context.t -> self:[`Rate_limit] API.Ref.t -> unit + +val register : __context:Context.t -> unit +(** Create token buckets in the bucket table for each record in the database *) diff --git a/ocaml/xapi/xapi_session.mli b/ocaml/xapi/xapi_session.mli index 10baf03abc2..25ffa3fa366 100644 --- a/ocaml/xapi/xapi_session.mli +++ b/ocaml/xapi/xapi_session.mli @@ -15,8 +15,6 @@ * @group XenAPI functions *) -(** {2 (Fill in Title!)} *) - (* TODO: consider updating sm_exec.ml and removing login_no_password from this mli *) val login_no_password : __context:Context.t diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 14290421fb4..6d1ce9a537f 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -909,6 +909,9 @@ let force_state_reset_keep_current_operations ~__context ~self ~value:state = (* Blank the requires_reboot flag *) Db.VM.set_requires_reboot ~__context ~self ~value:false ; remove_pending_guidance ~__context ~self ~value:`restart_device_model ; + (* Always remove RestartVM guidance when VM becomes Halted: VM.start_on checks + host version via assert_host_has_highest_version_in_pool, preventing the VM + from starting on an outdated host, so it will necessarily start on an up-to-date host *) remove_pending_guidance ~__context ~self ~value:`restart_vm ) ; (* Do not clear resident_on for VM and VGPU in a checkpoint operation *) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 9b12bcec5a6..0ea29ea4cf7 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2350,8 +2350,13 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = then ( Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self ~value:`restart_device_model ; - Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self - ~value:`restart_vm + (* Only remove RestartVM guidance if host is up-to-date with coordinator *) + if + Helpers.Checks.RPU.are_host_versions_same_on_master ~__context + ~host:localhost + then + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_vm ) ) ; create_guest_metrics_if_needed () ; diff --git a/ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml b/ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml index 683e1174b8d..45fbb5287c1 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml @@ -29,13 +29,56 @@ let ipmitool args = (* we connect to the local /dev/ipmi0 if available to read measurements from local BMC *) ipmitool_bin :: args |> String.concat " " -type discovery_error = Devices_missing +type discovery_error = + | Devices_missing + | Power_management_unavailable + | Unknown + | Command of string list * string * int + (** command, reason, IPMI 2.0 spec error code *) let discovery_error_to_string = function | Devices_missing -> "IPMI devices are missing" + | Power_management_unavailable -> + "Power management is not available" + | Command (command, reason, code) -> + Printf.sprintf "Command %s failed because %s (%x)" + (String.concat " " command) + reason code + | Unknown -> + "unknown" + +let result_of_exec_cmd ~default = function + | [], [] -> + default + | [], err :: _ -> + Error err + | oks, _ -> + Ok oks + +let get_dcmi_power_reading () = + let command = ["power"; "reading"] in + let read_out_line line = + (* example line: ' Instantaneous power reading: 34 Watts' *) + try Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some + with _ -> None + in + let read_err_line line = + (* example line: ' DCMI request failed because: Invalid command (c1)' *) + try + Scanf.sscanf line " DCMI request failed because: %S@(%x)" + (fun reason code -> Some (Command (command, reason, code)) + ) + with _ -> None + in + Utils.exec_cmd + (module Process.D) + ~cmdstring:(ipmitool ("dcmi" :: command)) + ~read_out_line ~read_err_line + |> result_of_exec_cmd ~default:(Error Unknown) let discover () = + let ( let* ) = Result.bind in let read_out_line line = (* this code runs once on startup, logging all the output here will be useful for debugging *) D.debug "DCMI discover: %s" line ; @@ -54,22 +97,14 @@ let discover () = else None in - Utils.exec_cmd - (module Process.D) - ~cmdstring:(ipmitool ["dcmi"; "discover"]) - ~read_out_line ~read_err_line - -let get_dcmi_power_reading () = - let read_out_line line = - (* example line: ' Instantaneous power reading: 34 Watts' *) - try Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some - with _ -> None + let* (_ : unit list) = + Utils.exec_cmd + (module Process.D) + ~cmdstring:(ipmitool ["dcmi"; "discover"]) + ~read_out_line ~read_err_line + |> result_of_exec_cmd ~default:(Error Power_management_unavailable) in - let read_err_line _ = None in - Utils.exec_cmd - (module Process.D) - ~cmdstring:(ipmitool ["dcmi"; "power"; "reading"]) - ~read_out_line ~read_err_line + get_dcmi_power_reading () let gen_dcmi_power_reading value = ( Rrd.Host @@ -81,7 +116,7 @@ let gen_dcmi_power_reading value = let generate_dss () = match get_dcmi_power_reading () with - | watts :: _, _ -> + | Ok (watts :: _) -> [gen_dcmi_power_reading watts] | _ -> [] @@ -89,15 +124,11 @@ let generate_dss () = let _ = initialise () ; match discover () with - | () :: _, _ -> + | Ok _ -> D.info "IPMI DCMI power reading is available" ; main_loop ~neg_shift:0.5 ~target:(Reporter.Local 1) ~protocol:Rrd_interface.V2 ~dss_f:generate_dss - | [], errs -> - let reason = - List.nth_opt errs 0 - |> Option.map discovery_error_to_string - |> Option.value ~default:"unknown" - in + | Error reason -> + let reason = discovery_error_to_string reason in D.warn "IPMI DCMI power readings not available, stopping. Reason: %s" reason diff --git a/ocaml/xenopsd/cli/main.ml b/ocaml/xenopsd/cli/main.ml index a8111444880..54842be4b31 100644 --- a/ocaml/xenopsd/cli/main.ml +++ b/ocaml/xenopsd/cli/main.ml @@ -317,6 +317,25 @@ let resume_cmd = , Cmd.info "resume" ~sdocs:_common_options ~doc ~man ) +let fast_resume_cmd = + let vm = vm_arg "resumed" in + let doc = "fast-resume a VM" in + let man = + [ + `S "DESCRIPTION" + ; `P "Fast-resume a VM." + ; `P + {|The suspended domain will be resumed + and the VM will be left in a Running state.|} + ; `S "ERRORS" + ; `P "Something about the current power state." + ] + @ help + in + ( Term.(ret (const Xn.fast_resume $ common_options_t $ vm)) + , Cmd.info "fast-resume" ~sdocs:_common_options ~doc ~man + ) + let pause_cmd = let vm = vm_arg "paused" in let doc = "pause a VM" in @@ -491,6 +510,7 @@ let cmds = ; reboot_cmd ; suspend_cmd ; resume_cmd + ; fast_resume_cmd ; pause_cmd ; unpause_cmd ; import_cmd diff --git a/ocaml/xenopsd/cli/xn.ml b/ocaml/xenopsd/cli/xn.ml index 24fecb9cf09..03c8db2e31c 100644 --- a/ocaml/xenopsd/cli/xn.ml +++ b/ocaml/xenopsd/cli/xn.ml @@ -873,6 +873,15 @@ let suspend _copts disk x = let suspend copts disk x = diagnose_error (need_vm (suspend copts disk) x) +let fast_resume _copts x = + let open Vm in + let vm, _ = find_by_name x in + Client.VM.fast_resume dbg vm.id + |> wait_for_task dbg + |> success_task ignore_task + +let fast_resume copts x = diagnose_error (need_vm (fast_resume copts) x) + let resume _copts disk x = (* We don't currently store where the suspend image is *) let disk = diff --git a/ocaml/xenopsd/cli/xn.mli b/ocaml/xenopsd/cli/xn.mli index 0acd3551e09..615f5c868b2 100644 --- a/ocaml/xenopsd/cli/xn.mli +++ b/ocaml/xenopsd/cli/xn.mli @@ -47,6 +47,9 @@ val resume : -> string option -> [> `Error of bool * string | `Ok of unit] +val fast_resume : + 'a -> string option -> [> `Error of bool * string | `Ok of unit] + val console_connect : 'a -> string option -> [> `Error of bool * string | `Ok of unit] diff --git a/ocaml/xenopsd/lib/softaffinity.ml b/ocaml/xenopsd/lib/softaffinity.ml index 1e7231506da..10fbdbea786 100644 --- a/ocaml/xenopsd/lib/softaffinity.ml +++ b/ocaml/xenopsd/lib/softaffinity.ml @@ -39,7 +39,7 @@ let plan host nodes ~vm = (Fmt.to_to_string NUMAResource.pp_dump allocated) (Fmt.to_to_string NUMARequest.pp_dump remaining) avg ; - if remaining.NUMARequest.memory > 0L || remaining.NUMARequest.vcpus > 0 then + if not (NUMARequest.fits remaining NUMAResource.empty) then (* [vm] doesn't fit on these nodes *) None else diff --git a/ocaml/xenopsd/lib/topology.ml b/ocaml/xenopsd/lib/topology.ml index a2cd401a0cc..f4291ec06f3 100644 --- a/ocaml/xenopsd/lib/topology.ml +++ b/ocaml/xenopsd/lib/topology.ml @@ -28,19 +28,20 @@ module CPUSet = struct end module NUMAResource = struct - type t = {affinity: CPUSet.t; memfree: int64} + type t = {affinity: CPUSet.t; cores: int; memfree: int64} - let make ~affinity ~memfree = + let make ~affinity ~cores ~memfree = if memfree < 0L then invalid_arg (Printf.sprintf "NUMAResource: memory cannot be negative: %Ld" memfree) ; - {affinity; memfree} + {affinity; cores; memfree} - let empty = {affinity= CPUSet.empty; memfree= 0L} + let empty = {affinity= CPUSet.empty; cores= 0; memfree= 0L} let union a b = make ~affinity:(CPUSet.union a.affinity b.affinity) + ~cores:(a.cores + b.cores) ~memfree:(Int64.add a.memfree b.memfree) let min_memory r1 r2 = {r1 with memfree= min r1.memfree r2.memfree} @@ -50,29 +51,43 @@ module NUMAResource = struct Dump.record [ Dump.field "affinity" (fun t -> t.affinity) CPUSet.pp_dump + ; Dump.field "cores" (fun t -> t.cores) int ; Dump.field "memfree" (fun t -> t.memfree) int64 ] ) end module NUMARequest = struct - type t = {memory: int64; vcpus: int} + type t = {memory: int64; vcpus: int; cores: int} - let make ~memory ~vcpus = + let make ~memory ~vcpus ~cores = if Int64.compare memory 0L < 0 then invalid_arg (Printf.sprintf "NUMARequest: memory must be > 0: %Ld" memory) ; if vcpus < 0 then invalid_arg (Printf.sprintf "vcpus cannot be negative: %d" vcpus) ; - {memory; vcpus} + if cores < 0 then + invalid_arg (Printf.sprintf "cores cannot be negative: %d" cores) ; + {memory; vcpus; cores} let fits requested available = + (* this is a hard constraint: a VM cannot boot if it doesn't have + enough memory *) Int64.compare requested.memory available.NUMAResource.memfree <= 0 + (* this is a soft constraint: a VM can still boot if the (soft) affinity + constraint is not met, although if hard affinity is used this is a hard + constraint too *) && CPUSet.(cardinal available.NUMAResource.affinity >= requested.vcpus) + && (* this is an optional constraint: it is desirable to be able to leave + hyperthread siblings idle, when the system is not busy. + However requested.cores can also be 0. + *) + available.NUMAResource.cores >= requested.cores let shrink a b = make ~memory:(max 0L (Int64.sub a.memory b.NUMAResource.memfree)) ~vcpus:(max 0 (a.vcpus - CPUSet.cardinal b.NUMAResource.affinity)) + ~cores:(max 0 (a.cores - b.NUMAResource.cores)) let pp_dump = Fmt.( @@ -80,6 +95,7 @@ module NUMARequest = struct [ Dump.field "memory" (fun t -> t.memory) int64 ; Dump.field "vcpus" (fun t -> t.vcpus) int + ; Dump.field "cores" (fun t -> t.cores) int ] ) end @@ -134,6 +150,7 @@ module NUMA = struct distances: int array array ; cpu_to_node: node array ; node_cpus: CPUSet.t array + ; node_cores: int array ; all: CPUSet.t ; node_usage: int array (** Usage across nodes is meant to be balanced when choosing candidates for a VM *) @@ -203,7 +220,7 @@ module NUMA = struct |> seq_sort ~cmp:dist_cmp |> Seq.map (fun ((_, avg), nodes) -> (avg, Seq.map (fun n -> Node n) nodes)) - let make ~distances ~cpu_to_node = + let make ~distances ~cpu_to_node ~node_cores = let ( let* ) = Option.bind in let node_cpus = Array.map (fun _ -> CPUSet.empty) distances in @@ -256,6 +273,7 @@ module NUMA = struct distances ; cpu_to_node= Array.map node_of_int cpu_to_node ; node_cpus + ; node_cores ; all ; node_usage= Array.map (fun _ -> 0) distances ; candidates @@ -265,6 +283,8 @@ module NUMA = struct let cpuset_of_node t (Node i) = t.node_cpus.(i) + let coreset_of_node t (Node i) = t.node_cores.(i) + let node_of_cpu t i = t.cpu_to_node.(i) let nodes t = @@ -278,8 +298,8 @@ module NUMA = struct {t with node_cpus; all} let resource t node ~memory = - let affinity = cpuset_of_node t node in - NUMAResource.make ~affinity ~memfree:memory + let affinity = cpuset_of_node t node and cores = coreset_of_node t node in + NUMAResource.make ~affinity ~cores ~memfree:memory let candidates t = t.candidates @@ -316,6 +336,7 @@ module NUMA = struct ; Dump.field "node_cpus" (fun t -> t.node_cpus) (Dump.array CPUSet.pp_dump) + ; Dump.field "node_cores" (fun t -> t.node_cores) (Dump.array int) ] ) end diff --git a/ocaml/xenopsd/lib/topology.mli b/ocaml/xenopsd/lib/topology.mli index f1bd6f9f569..8211ffa4ec2 100644 --- a/ocaml/xenopsd/lib/topology.mli +++ b/ocaml/xenopsd/lib/topology.mli @@ -29,10 +29,21 @@ end module NUMAResource : sig (** A NUMA node providing CPU and memory resources *) - type t = private {affinity: CPUSet.t; memfree: int64} - - val make : affinity:CPUSet.t -> memfree:int64 -> t - (** [make ~affinity ~memfree] constructs a resource requiring affinity to be + type t = private { + affinity: CPUSet.t + (** logical CPUs. This is the smallest unit of scheduling available, + e.g. a hyperthread. + This can be used directly as a soft-, or hard-affinity mask. *) + ; cores: int + (** number of physical cores fully contained in this node, each containing threads_per_core CPUs, + although some of them may be disabled if [smt=false] *) + ; memfree: int64 + (** free (not reserved, not in use) memory available on this NUMA + node or set of NUMA nodes *) + } + + val make : affinity:CPUSet.t -> cores:int -> memfree:int64 -> t + (** [make ~affinity ~cores ~memfree] constructs a resource requiring affinity to be non-empty and memfree to be > 0. A zero request is allowed due to [shrink]. * *) @@ -51,11 +62,11 @@ end module NUMARequest : sig (** A (VM) requesting resources *) - type t = private {memory: int64; vcpus: int} + type t = private {memory: int64; vcpus: int; cores: int} - val make : memory:int64 -> vcpus:int -> t - (**[make ~memory ~vcpus] constructs a request. [memory] and [vcpus] must be - strictly positive. *) + val make : memory:int64 -> vcpus:int -> cores:int -> t + (**[make ~memory ~vcpus ~cores] constructs a request. [memory], [vcpus] and + [cores] must be strictly positive. *) val fits : t -> NUMAResource.t -> bool (** [fits requested available] checks whether the [available] resources can @@ -78,8 +89,12 @@ module NUMA : sig (** A NUMA node index. Distinct from an int to avoid mixing with CPU numbers *) type node = private Node of int - val make : distances:int array array -> cpu_to_node:int array -> t option - (** [make distances cpu_to_node] stores the topology. [distances] is a square + val make : + distances:int array array + -> cpu_to_node:int array + -> node_cores:int array + -> t option + (** [make distances cpu_to_node node_cores] stores the topology. [distances] is a square matrix [d] where [d.(i).(j)] is an approximation to how much slower it is to access memory from node [j] when running on node [i]. Distances are normalized to 10, [d.(i).(i)] must equal to 10, and all values must be >= @@ -94,6 +109,7 @@ module NUMA : sig in Xen and then to -1 by the bindings). [cpu_to_nodes.(i)] = NUMA node of CPU [i] + [node_cores.(i)] = number of cores on NUMA node [i] NUMA nodes without any CPUs are accepted (to handle hard affinities). diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 6a06b36ba14..54d528829ff 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -164,6 +164,7 @@ type atomic = (** takes suspend data, plus optionally vGPU state data *) | VM_restore of (Vm.id * data * data option) (** takes suspend data, plus optionally vGPU state data *) + | VM_fast_resume of Vm.id | VM_delay of (Vm.id * float) (** used to suppress fast reboot loops *) | VM_rename of (Vm.id * Vm.id * rename_when) | VM_import_metadata of (Vm.id * Metadata.t) @@ -279,6 +280,8 @@ let rec name_of_atomic = function "VM_save" | VM_restore _ -> "VM_restore" + | VM_fast_resume _ -> + "VM_fast_resume" | VM_delay _ -> "VM_delay" | VM_rename _ -> @@ -2377,6 +2380,9 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) let extras = [] in B.VM.restore t progress_callback (VM_DB.read_exn id) vbds vifs data vgpu_data extras + | VM_fast_resume id -> + debug "VM.fast_resume %s" id ; + B.VM.resume t (VM_DB.read_exn id) | VM_delay (id, t) -> debug "VM %s: waiting for %.2f before next VM action" id t ; Thread.delay t @@ -2669,6 +2675,7 @@ and trigger_cleanup_after_failure_atom op t = | VM_s3resume id | VM_save (id, _, _, _) | VM_restore (id, _, _) + | VM_fast_resume id | VM_delay (id, _) | VM_softreboot id -> immediate_operation dbg id (VM_check_state id) @@ -3622,10 +3629,17 @@ let string_of_numa_affinity_policy = "best-effort" | Best_effort_hard -> "best-effort-hard" + | Prio_mem_only -> + "prio-mem-only" let affinity_of_numa_affinity_policy = let open Xenops_interface.Host in - function Any | Best_effort -> Soft | Best_effort_hard -> Hard + function + | Any | Best_effort | Prio_mem_only -> Soft | Best_effort_hard -> Hard + +let cores_of_numa_affinity_policy policy ~vcpus = + let open Xenops_interface.Host in + match policy with Any | Prio_mem_only -> 0 | _ -> vcpus module HOST = struct let stat _ dbg = @@ -3821,6 +3835,8 @@ module VM = struct let resume _ dbg id disk = queue_operation dbg id (VM_resume (id, Disk disk)) + let fast_resume _ dbg id = queue_operation dbg id (Atomic (VM_fast_resume id)) + let s3suspend _ dbg id = queue_operation dbg id (Atomic (VM_s3suspend id)) let s3resume _ dbg id = queue_operation dbg id (Atomic (VM_s3resume id)) @@ -4402,6 +4418,7 @@ let _ = Server.VM.reboot (VM.reboot ()) ; Server.VM.suspend (VM.suspend ()) ; Server.VM.resume (VM.resume ()) ; + Server.VM.fast_resume (VM.fast_resume ()) ; Server.VM.s3suspend (VM.s3suspend ()) ; Server.VM.s3resume (VM.s3resume ()) ; Server.VM.export_metadata (VM.export_metadata ()) ; diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index e4a61bb9ac8..6cee8a58f05 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -159,6 +159,8 @@ module type S = sig -> string list -> unit + val resume : Xenops_task.task_handle -> Vm.t -> unit + val s3suspend : Xenops_task.task_handle -> Vm.t -> unit val s3resume : Xenops_task.task_handle -> Vm.t -> unit diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index 1a42aafafb4..d812910fd27 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -97,6 +97,8 @@ module VM = struct let restore _ _ _ _ _ _ _ = unimplemented __FUNCTION__ + let resume _ _ = unimplemented __FUNCTION__ + let s3suspend _ _ = unimplemented __FUNCTION__ let s3resume _ _ = unimplemented __FUNCTION__ diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index d4a08e92be7..c5242073237 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -70,6 +70,8 @@ let pvinpvh_xen_cmdline = ref "pv-shim console=xen" let numa_placement_compat = ref true +let numa_best_effort_prio_mem_only = ref false + (* O(N^2) operations, until we get a xenstore cache, so use a small number here *) let vm_guest_agent_xenstore_quota = ref 128 @@ -263,6 +265,13 @@ let options = , (fun () -> string_of_bool !numa_placement_compat) , "NUMA-aware placement of VMs (deprecated, use XAPI setting)" ) + ; ( "numa-best-effort-prio-mem-only" + , Arg.Bool (fun x -> numa_best_effort_prio_mem_only := x) + , (fun () -> string_of_bool !numa_best_effort_prio_mem_only) + , "Revert to the previous 'best effort' NUMA policy, where we only \ + filtered NUMA nodes based on available memory. Only use if there are \ + issues with the new best effort policy" + ) ; ( "pci-quarantine" , Arg.Bool (fun b -> pci_quarantine := b) , (fun () -> string_of_bool !pci_quarantine) diff --git a/ocaml/xenopsd/scripts/qemu-wrapper b/ocaml/xenopsd/scripts/qemu-wrapper index b1d811e7126..554590d0713 100644 --- a/ocaml/xenopsd/scripts/qemu-wrapper +++ b/ocaml/xenopsd/scripts/qemu-wrapper @@ -65,16 +65,6 @@ def restrict_fsize(): limit = 1024 * 1024 setrlimit(RLIMIT_FSIZE, (limit, limit)) -def enable_core_dumps(): - - limit = 64 * 1024 * 1024 - oldlimits = getrlimit(RLIMIT_CORE) - hardlimit = oldlimits[1] - if limit > hardlimit: - hardlimit = limit - setrlimit(RLIMIT_CORE, (limit, hardlimit)) - return limit - def xenstore_read(path): return xenstore.read("", path) @@ -117,9 +107,6 @@ def prepare_exec(): print("Warning: writing pid to '%s' cgroup.procs file: %s" \ % (cgroup_slice, e)) - core_dump_limit = enable_core_dumps() - print("core dump limit: %d" % core_dump_limit) - if not file_serial: restrict_fsize() diff --git a/ocaml/xenopsd/test/test_topology.ml b/ocaml/xenopsd/test/test_topology.ml index d9945ed8018..f3d40d0f42d 100644 --- a/ocaml/xenopsd/test/test_topology.ml +++ b/ocaml/xenopsd/test/test_topology.ml @@ -58,25 +58,35 @@ module Distances = struct (numa, distances) end -let make_numa_common ~cores_per_numa (distances : Distances.t) = +let make_numa_common ~logical_per_physical ~cores_per_numa + (distances : Distances.t) = + (* cores_per_numa refers to logical cores, i.e. cpus *) let numa, distances = distances in let cpu_to_node = - Array.init (cores_per_numa * numa) (fun core -> core / cores_per_numa) + Array.init (cores_per_numa * numa) (fun cpu -> cpu / cores_per_numa) + and node_cores = + (* core here refers to physical *) + Array.init numa (fun _ -> cores_per_numa / logical_per_physical) in Option.map (fun d -> (cores_per_numa * numa, d)) - (NUMA.make ~distances ~cpu_to_node) + (NUMA.make ~distances ~cpu_to_node ~node_cores) let make_numa ~numa ~cores = let cores_per_numa = cores / numa in - match make_numa_common ~cores_per_numa (Distances.example numa) with + match + make_numa_common ~logical_per_physical:2 ~cores_per_numa + (Distances.example numa) + with | None -> Alcotest.fail "Synthetic matrix can't fail to load" | Some d -> d let make_numa_amd ~cores_per_numa = - match make_numa_common ~cores_per_numa Distances.opteron with + match + make_numa_common ~cores_per_numa ~logical_per_physical:2 Distances.opteron + with | None -> Alcotest.fail "Synthetic matrix can't fail to load" | Some d -> @@ -206,7 +216,7 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () = |> List.fold_left (fun (costs_old, costs_new, plans) i -> D.debug "Planning VM %d" i ; - let vm = NUMARequest.make ~memory:mem ~vcpus:vm_cores in + let vm = NUMARequest.make ~memory:mem ~vcpus:vm_cores ~cores:0 in match Softaffinity.plan h nodes ~vm with | None -> Alcotest.fail "No NUMA plan" @@ -304,7 +314,9 @@ let distances_tests = in let test_of_spec (name, distances, expected) = let test () = - let numa_t = make_numa_common ~cores_per_numa:1 distances in + let numa_t = + make_numa_common ~logical_per_physical:1 ~cores_per_numa:1 distances + in match (expected, numa_t) with | None, None -> () diff --git a/ocaml/xenopsd/xc/device.ml b/ocaml/xenopsd/xc/device.ml index 73f136feca4..9beeecf436b 100644 --- a/ocaml/xenopsd/xc/device.ml +++ b/ocaml/xenopsd/xc/device.ml @@ -2774,7 +2774,7 @@ module Backend = struct ] (* 4 and 5 are NICs, and we can only have two, 6 is platform *) - let extra_args = ["-device"; "nvme,serial=nvme0,id=nvme0,addr=7"] + let extra_args = ["-device"; "nvme,serial=nvme0,mdts=9,id=nvme0,addr=7"] end module XenPV = struct let addr ~xs:_ ~domid:_ _ ~nics:_ = 6 end diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 4af94d7b96c..6c65d467f33 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -908,8 +908,18 @@ let numa_hierarchy = lazy (let xcext = get_handle () in let distances = (numainfo xcext).distances in - let cpu_to_node = cputopoinfo xcext |> Array.map (fun t -> t.node) in - NUMA.make ~distances ~cpu_to_node + let topoinfo = cputopoinfo xcext in + let core t = t.core and node t = t.node in + let cpu_to_node = topoinfo |> Array.map node + and node_cores = + let module IntSet = Set.Make (Int) in + let a = Array.make (Array.length distances) IntSet.empty in + Array.iter + (fun t -> a.(node t) <- IntSet.add (core t) a.(node t)) + topoinfo ; + Array.map IntSet.cardinal a + in + NUMA.make ~distances ~cpu_to_node ~node_cores ) let numa_mutex = Mutex.create () @@ -935,40 +945,48 @@ let set_affinity = function | Xenops_server.Soft -> Xenctrlext.vcpu_setaffinity_soft -let numa_placement domid ~vcpus ~memory affinity = +let numa_placement domid ~vcpus ~cores ~memory affinity = let open Xenctrlext in let open Topology in with_lock numa_mutex (fun () -> let ( let* ) = Option.bind in let xcext = get_handle () in let* host = Lazy.force numa_hierarchy in - let numa_meminfo = (numainfo xcext).memory |> Array.to_list in + let numa_meminfo = (numainfo xcext).memory |> Array.to_seq in let nodes = - ListLabels.map2 - (NUMA.nodes host |> List.of_seq) - numa_meminfo - ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) + Seq.map2 + (fun node m -> NUMA.resource host node ~memory:m.memfree) + (NUMA.nodes host) numa_meminfo in - let vm = NUMARequest.make ~memory ~vcpus in + let vm = NUMARequest.make ~memory ~vcpus ~cores in let nodea = match !numa_resources with | None -> - Array.of_list nodes + Array.of_seq nodes | Some a -> - Array.map2 NUMAResource.min_memory (Array.of_list nodes) a + Array.map2 NUMAResource.min_memory (Array.of_seq nodes) a in numa_resources := Some nodea ; - let memory_plan = + let cpu_affinity, memory_plan = match Softaffinity.plan ~vm host nodea with | None -> D.debug "NUMA-aware placement failed for domid %d" domid ; - [] + (None, []) | Some (cpu_affinity, mem_plan) -> + (Some cpu_affinity, mem_plan) + in + let set_vcpu_affinity = function + | None -> + D.debug "%s: not setting vcpu affinity for domain %d" __FUNCTION__ + domid + | Some cpu_affinity -> + D.debug "%s: setting vcpu affinity for domain %d: %s" __FUNCTION__ + domid + (Fmt.to_to_string CPUSet.pp_dump cpu_affinity) ; let cpus = CPUSet.to_mask cpu_affinity in for i = 0 to vcpus - 1 do set_affinity affinity xcext domid i cpus - done ; - mem_plan + done in (* Xen only allows a single node when using memory claims, or none at all. *) let* numa_node, node = @@ -985,6 +1003,7 @@ let numa_placement domid ~vcpus ~memory affinity = let nr_pages = Int64.div memory 4096L |> Int64.to_int in try Xenctrlext.domain_claim_pages xcext domid ~numa_node nr_pages ; + set_vcpu_affinity cpu_affinity ; Some (node, memory) with | Xenctrlext.Not_available -> @@ -1080,16 +1099,17 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = match !Xenops_server.numa_placement with | Any -> None - | (Best_effort | Best_effort_hard) as pin -> + | (Best_effort | Best_effort_hard | Prio_mem_only) as pin -> log_reraise (Printf.sprintf "NUMA placement") (fun () -> if hard_affinity <> [] then ( D.debug "VM has hard affinity set, skipping NUMA optimization" ; None ) else - let affinity = - Xenops_server.affinity_of_numa_affinity_policy pin + let affinity = Xenops_server.affinity_of_numa_affinity_policy pin + and cores = + Xenops_server.cores_of_numa_affinity_policy pin ~vcpus in - numa_placement domid ~vcpus + numa_placement domid ~vcpus ~cores ~memory:(Int64.mul memory.xen_max_mib 1048576L) affinity |> Option.map fst @@ -1353,6 +1373,19 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn store_port local_stuff vm_stuff +let resume_post ~xc:_ ~xs domid = + let dom_path = xs.Xs.getdomainpath domid in + let store_mfn_s = xs.Xs.read (dom_path ^ "/store/ring-ref") in + let store_mfn = Nativeint.of_string store_mfn_s in + let store_port = int_of_string (xs.Xs.read (dom_path ^ "/store/port")) in + xs.Xs.introduce domid store_mfn store_port + +let resume (task : Xenops_task.task_handle) ~xc ~xs ~qemu_domid ~domain_type + domid = + Xenctrl.domain_resume_fast xc domid ; + resume_post ~xc ~xs domid ; + if domain_type = `hvm then Device.Dm.resume task ~xs ~qemu_domid domid + type suspend_flag = Live | Debug let dm_flags = diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index 40f154561a3..574782fdcec 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -242,6 +242,16 @@ val build : -> unit (** Restore a domain using the info provided *) +val resume : + Xenops_task.Xenops_task.task_handle + -> xc:Xenctrl.handle + -> xs:Ezxenstore_core.Xenstore.Xs.xsh + -> qemu_domid:int + -> domain_type:[`hvm | `pv | `pvh] + -> domid + -> unit +(** Fast resume *) + val restore : Xenops_task.Xenops_task.task_handle -> xc:Xenctrl.handle diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 3e7bd2a3584..8b4d0a4b40a 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -3021,6 +3021,26 @@ module VM = struct Domain.shutdown ~xc ~xs di.Xenctrl.domid Domain.S3Suspend ) + let resume t vm = + on_domain t vm (fun xc xs task _vm di -> + let domid = di.Xenctrl.domid in + let qemu_domid = this_domid ~xs in + let domain_type = + match get_domain_type ~xs di with + | Vm.Domain_HVM -> + `hvm + | Vm.Domain_PV -> + `pv + | Vm.Domain_PVinPVH -> + `pvh + | Vm.Domain_PVH -> + `pvh + | Vm.Domain_undefined -> + failwith "undefined domain type: cannot resume" + in + Domain.resume task ~xc ~xs ~qemu_domid ~domain_type domid + ) + let s3resume t vm = (* XXX: TODO: monitor the guest's response; track the s3 state *) on_domain t vm (fun xc _xs _task _vm di -> @@ -5385,7 +5405,13 @@ let init () = ) ; Device.Backend.init () ; Xenops_server.default_numa_affinity_policy := - if !Xenopsd.numa_placement_compat then Best_effort else Any ; + if !Xenopsd.numa_placement_compat then + if !Xenopsd.numa_best_effort_prio_mem_only then + Prio_mem_only + else + Best_effort + else + Any ; info "Default NUMA affinity policy is '%s'" Xenops_server.(string_of_numa_affinity_policy !default_numa_affinity_policy) ; Xenops_server.numa_placement := !Xenops_server.default_numa_affinity_policy ; diff --git a/opam/qcow-stream-tool.opam b/opam/qcow-stream-tool.opam index 8090aec7a3f..c4a01535780 100644 --- a/opam/qcow-stream-tool.opam +++ b/opam/qcow-stream-tool.opam @@ -10,6 +10,7 @@ depends: [ "dune" {>= "3.20"} "qcow-stream" "cmdliner" + "yojson" "odoc" {with-doc} ] build: [ diff --git a/opam/rate-limit.opam b/opam/rate-limit.opam new file mode 100644 index 00000000000..e5114dc41fb --- /dev/null +++ b/opam/rate-limit.opam @@ -0,0 +1,31 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "Simple token bucket-based rate-limiting" +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" +homepage: "https://xapi-project.github.io/" +bug-reports: "https://github.com/xapi-project/xen-api/issues" +depends: [ + "dune" {>= "3.20"} + "ocaml" {>= "4.12"} + "xapi-log" + "xapi-stdext-unix" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/xapi-project/xen-api.git" +x-maintenance-intent: ["(latest)"] diff --git a/opam/uuid.opam b/opam/uuid.opam index 2fbe23bbd56..2cb6905f12d 100644 --- a/opam/uuid.opam +++ b/opam/uuid.opam @@ -1,30 +1,35 @@ # This file is generated by dune, edit dune-project instead -license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" -x-maintenance-intent: ["(latest)"] opam-version: "2.0" -maintainer: "xen-api@lists.xen.org" -authors: "xen-api@lists.xen.org" +synopsis: "Library used by xapi to generate database UUIDs" +description: + "This library allows xapi to use UUIDs with phantom types to avoid mixing UUIDs from different classes of objects. It's based on `uuidm`." +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api.git" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] - -available: [ os = "linux" | os = "macos" ] +bug-reports: "https://github.com/xapi-project/xen-api/issues" depends: [ - "ocaml" - "dune" {>= "3.15"} + "dune" {>= "3.20"} "alcotest" {with-test} "fmt" {with-test} + "ptime" "uuidm" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] ] -synopsis: "Library required by xapi" -description: """ -These libraries are provided for backwards compatibility only. -No new code should use these libraries.""" -url { - src: - "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} +dev-repo: "git+https://github.com/xapi-project/xen-api.git" +x-maintenance-intent: ["(latest)"] +available: [ os = "linux" | os = "macos" ] diff --git a/opam/uuid.opam.template b/opam/uuid.opam.template index aacc8f63c2b..ce8a4cdd441 100644 --- a/opam/uuid.opam.template +++ b/opam/uuid.opam.template @@ -1,27 +1 @@ -opam-version: "2.0" -maintainer: "xen-api@lists.xen.org" -authors: "xen-api@lists.xen.org" -homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api.git" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] - available: [ os = "linux" | os = "macos" ] -depends: [ - "ocaml" - "dune" {>= "3.15"} - "alcotest" {with-test} - "fmt" {with-test} - "uuidm" -] -synopsis: "Library required by xapi" -description: """ -These libraries are provided for backwards compatibility only. -No new code should use these libraries.""" -url { - src: - "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py index b0638bc5904..4ce1cc72b56 100755 --- a/python3/libexec/qcow2-to-stdout.py +++ b/python3/libexec/qcow2-to-stdout.py @@ -24,6 +24,7 @@ # clusters. For the sake of simplicity the code sometimes talks about # refcount tables and L1 tables when referring to those clusters. +import json import argparse import math import os @@ -91,7 +92,9 @@ def write_features(cluster, offset, data_file_name): def write_qcow2_content(input_file, cluster_size, refcount_bits, - data_file_name, data_file_raw, diff_file_name): + data_file_name, data_file_raw, diff_file_name, + virtual_size, nonzero_clusters, + diff_virtual_size, diff_nonzero_clusters): # Some basic values l1_entries_per_table = cluster_size // 8 l2_entries_per_table = cluster_size // 8 @@ -102,8 +105,12 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, fd = os.open(input_file, os.O_RDONLY) # Virtual disk size, number of data clusters and L1 entries - block_device_size = os.lseek(fd, 0, os.SEEK_END) - disk_size = align_up(block_device_size, 512) + if virtual_size is None: + block_device_size = os.lseek(fd, 0, os.SEEK_END) + disk_size = align_up(block_device_size, 512) + else: + block_device_size = virtual_size + disk_size = virtual_size total_data_clusters = math.ceil(disk_size / cluster_size) l1_entries = math.ceil(total_data_clusters / l2_entries_per_table) allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table) @@ -118,6 +125,28 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, allocated_l2_tables = 0 allocated_data_clusters = 0 + def allocate_cluster(idx): + nonlocal allocated_data_clusters + nonlocal allocated_l2_tables + bitmap_set(l2_bitmap, idx) + allocated_data_clusters += 1 + # Allocated data clusters also need their corresponding L1 entry and L2 table + l1_idx = math.floor(idx / l2_entries_per_table) + if not bitmap_is_set(l1_bitmap, l1_idx): + bitmap_set(l1_bitmap, l1_idx) + allocated_l2_tables += 1 + + # Allocates a cluster in the appropriate bitmaps if it's different + # from cluster_to_compare_with + def check_cluster_allocate(idx, cluster, cluster_to_compare_with): + # If the last cluster is smaller than cluster_size pad it with zeroes + if len(cluster) < cluster_size: + cluster += bytes(cluster_size - len(cluster)) + # If a cluster has different data from the cluster_to_compare_with then it + # must be allocated in the output file and its L2 entry must be set + if cluster != cluster_to_compare_with: + allocate_cluster(idx) + if data_file_raw: # If data_file_raw is set then all clusters are allocated and # we don't need to read the input file at all. @@ -126,26 +155,39 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, bitmap_set(l1_bitmap, idx) for idx in range(total_data_clusters): bitmap_set(l2_bitmap, idx) - else: - # Allocates a cluster in the appropriate bitmaps if it's different - # from cluster_to_compare_with - def check_cluster_allocate(idx, cluster, cluster_to_compare_with): - nonlocal allocated_data_clusters - nonlocal allocated_l2_tables - # If the last cluster is smaller than cluster_size pad it with zeroes - if len(cluster) < cluster_size: - cluster += bytes(cluster_size - len(cluster)) - # If a cluster has different data from the cluster_to_compare_with then it - # must be allocated in the output file and its L2 entry must be set - if cluster != cluster_to_compare_with: - bitmap_set(l2_bitmap, idx) - allocated_data_clusters += 1 - # Allocated data clusters also need their corresponding L1 entry and L2 table - l1_idx = math.floor(idx / l2_entries_per_table) - if not bitmap_is_set(l1_bitmap, l1_idx): - bitmap_set(l1_bitmap, l1_idx) - allocated_l2_tables += 1 + elif nonzero_clusters is not None: + if diff_file_name: + if diff_virtual_size is None or diff_nonzero_clusters is None: + sys.exit("[Error] QCOW headers for the diff file were not provided.") + # Read all the clusters that differ from the diff_file_name + diff_fd = os.open(diff_file_name, os.O_RDONLY) + last_diff_cluster = align_up(diff_virtual_size, cluster_size) // cluster_size + # In case input_file is bigger than diff_file_name, first check + # if clusters from diff_file_name differ, and then check if the + # rest contain data + diff_nonzero_clusters_set = set(diff_nonzero_clusters) + for cluster in nonzero_clusters: + if cluster >= last_diff_cluster: + allocate_cluster(cluster) + elif cluster in diff_nonzero_clusters_set: + # If a cluster has different data from the original_cluster + # then it must be allocated + cluster_data = os.pread(fd, cluster_size, cluster_size * cluster) + original_cluster = os.pread(diff_fd, cluster_size, cluster_size * cluster) + check_cluster_allocate(cluster, cluster_data, original_cluster) + diff_nonzero_clusters_set.remove(cluster) + else: + allocate_cluster(cluster) + + # These are not present in the original file + for cluster in diff_nonzero_clusters_set: + allocate_cluster(cluster) + else: + for cluster in nonzero_clusters: + allocate_cluster(cluster) + + else: zero_cluster = bytes(cluster_size) last_cluster = align_up(block_device_size, cluster_size) // cluster_size if diff_file_name: @@ -384,11 +426,54 @@ def main(): help="enable data_file_raw on the generated image (implies -d)", action="store_true", ) + parser.add_argument( + "--json-header", + dest="json_header", + help="stdin contains a JSON of pre-parsed QCOW2 information" + "(virtual_size, data_clusters, cluster_bits)", + action="store_true", + ) + parser.add_argument( + "--json-header-diff", + dest="json_header_diff", + metavar="json_header_diff", + help="File descriptor that contains a JSON of pre-parsed QCOW2 " + "information for the diff_file_name", + type=int, + default=None, + ) args = parser.parse_args() if args.data_file_raw: args.data_file = True + virtual_size = None + nonzero_clusters = None + diff_virtual_size = None + diff_nonzero_clusters = None + if args.json_header: + json_header = json.load(sys.stdin) + try: + virtual_size = json_header['virtual_size'] + source_cluster_size = 2 ** json_header['cluster_bits'] + if source_cluster_size != args.cluster_size: + args.cluster_size = source_cluster_size + nonzero_clusters = json_header['data_clusters'] + except KeyError as e: + raise RuntimeError(f'Incomplete JSON - missing value for {str(e)}') from e + if args.json_header_diff: + f = os.fdopen(args.json_header_diff) + json_header = json.load(f) + try: + diff_virtual_size = json_header['virtual_size'] + if 2 ** json_header['cluster_bits'] == args.cluster_size: + diff_nonzero_clusters = json_header['data_clusters'] + else: + sys.exit(f"[Error] Cluster size in the files being compared are " + f"different: {2**json_header['cluster_bits']} vs. {args.cluster_size}") + except KeyError as e: + raise RuntimeError(f'Incomplete JSON for the diff - missing value for {str(e)}') from e + if not os.path.exists(args.input_file): sys.exit(f"[Error] {args.input_file} does not exist.") @@ -413,7 +498,11 @@ def main(): args.refcount_bits, data_file_name, args.data_file_raw, - args.diff_file_name + args.diff_file_name, + virtual_size, + nonzero_clusters, + diff_virtual_size, + diff_nonzero_clusters ) diff --git a/quality-gate.sh b/quality-gate.sh index c7965c34f0e..b6c3f7df759 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -2,146 +2,145 @@ set -e -list-hd () { - N=253 - LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$LIST_HD" -eq "$N" ]; then - echo "OK counted $LIST_HD List.hd usages" - else - echo "ERROR expected $N List.hd usages, got $LIST_HD" 1>&2 - exit 1 - fi +list-hd() { + N=253 + LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$LIST_HD" -eq "$N" ]; then + echo "OK counted $LIST_HD List.hd usages" + else + echo "ERROR expected $N List.hd usages, got $LIST_HD" 1>&2 + exit 1 + fi } -verify-cert () { - N=13 - NONE=$(git grep -r --count 'verify_cert:None' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$NONE" -eq "$N" ]; then - echo "OK counted $NONE usages of verify_cert:None" - else - echo "ERROR expected $N verify_cert:None usages, got $NONE" 1>&2 - exit 1 - fi +verify-cert() { + N=13 + NONE=$(git grep -r --count 'verify_cert:None' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$NONE" -eq "$N" ]; then + echo "OK counted $NONE usages of verify_cert:None" + else + echo "ERROR expected $N verify_cert:None usages, got $NONE" 1>&2 + exit 1 + fi } -mli-files () { - N=459 - X="ocaml/tests" - X+="|ocaml/quicktest" - X+="|ocaml/message-switch/core_test" - # do not count ml files from the tests in ocaml/{tests/quicktest} - M=$(comm -23 <(git ls-files -- '**/*.ml' | sed 's/.ml$//' | sort) \ - <(git ls-files -- '**/*.mli' | sed 's/.mli$//' | sort) |\ +mli-files() { + N=460 + X="ocaml/tests" + X+="|ocaml/quicktest" + X+="|ocaml/message-switch/core_test" + # do not count ml files from the tests in ocaml/{tests/quicktest} + M=$(comm -23 <(git ls-files -- '**/*.ml' | sed 's/.ml$//' | sort) \ + <(git ls-files -- '**/*.mli' | sed 's/.mli$//' | sort) | grep -cvE "$X") - if [ "$M" -eq "$N" ]; then - echo "OK counted $M .ml files without an .mli" - else - echo "ERROR expected $N .ml files without .mlis, got $M."\ - "If you created some .ml files, they are probably missing corresponding .mli's" 1>&2 - exit 1 - fi + if [ "$M" -eq "$N" ]; then + echo "OK counted $M .ml files without an .mli" + else + echo "ERROR expected $N .ml files without .mlis, got $M." \ + "If you created some .ml files, they are probably missing corresponding .mli's" 1>&2 + exit 1 + fi } -structural-equality () { - N=7 - EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$EQ" -eq "$N" ]; then - echo "OK counted $EQ usages of ' == '" - else - echo "ERROR expected $N usages of ' == ', got $EQ; use = rather than ==" 1>&2 - exit 1 - fi - - if git grep -r --count ' != ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml'; then - echo "ERROR expected no usages of ' != '; use <> rather than !=" 1>&2 - exit 1 - else - echo "OK found no usages of ' != '" - fi +structural-equality() { + N=7 + EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$EQ" -eq "$N" ]; then + echo "OK counted $EQ usages of ' == '" + else + echo "ERROR expected $N usages of ' == ', got $EQ; use = rather than ==" 1>&2 + exit 1 + fi + + if git grep -r --count ' != ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml'; then + echo "ERROR expected no usages of ' != '; use <> rather than !=" 1>&2 + exit 1 + else + echo "OK found no usages of ' != '" + fi } -vtpm-unimplemented () { - N=2 - VTPM=$(git grep -r --count 'maybe_raise_vtpm_unimplemented' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$VTPM" -eq "$N" ]; then - echo "OK found $VTPM usages of vtpm unimplemented errors" - else - echo "ERROR expected $N usages of unimplemented vtpm functionality, got $VTPM." 1>&2 - exit 1 - fi +vtpm-unimplemented() { + N=2 + VTPM=$(git grep -r --count 'maybe_raise_vtpm_unimplemented' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$VTPM" -eq "$N" ]; then + echo "OK found $VTPM usages of vtpm unimplemented errors" + else + echo "ERROR expected $N usages of unimplemented vtpm functionality, got $VTPM." 1>&2 + exit 1 + fi } -vtpm-fields () { - A=$(git grep -hc "vTPM'_.*:" ocaml/xapi/importexport.ml) - B=$(git grep -hc ' field' ocaml/idl/datamodel_vtpm.ml) - case "$A/$B" in - 5/6) - echo "OK found $A/$B VTPM fields in importexport.ml datamodel_vtpm.ml" - ;; - *) - echo "ERROR have VTPM fields changed? $A/$B - check importexport.ml" 1>&2 - exit 1 - ;; - esac +vtpm-fields() { + A=$(git grep -hc "vTPM'_.*:" ocaml/xapi/importexport.ml) + B=$(git grep -hc ' field' ocaml/idl/datamodel_vtpm.ml) + case "$A/$B" in + 5/6) + echo "OK found $A/$B VTPM fields in importexport.ml datamodel_vtpm.ml" + ;; + *) + echo "ERROR have VTPM fields changed? $A/$B - check importexport.ml" 1>&2 + exit 1 + ;; + esac } -ocamlyacc () { - N=0 - OCAMLYACC=$(git grep -r -o --count "ocamlyacc" '**/dune' | wc -l) - if [ "$OCAMLYACC" -eq "$N" ]; then - echo "OK found $OCAMLYACC usages of ocamlyacc usages in dune files." - else - echo "ERROR expected $N usages of ocamlyacc in dune files, got $OCAMLYACC." 1>&2 - exit 1 - fi +ocamlyacc() { + N=0 + OCAMLYACC=$(git grep -r -o --count "ocamlyacc" '**/dune' | wc -l) + if [ "$OCAMLYACC" -eq "$N" ]; then + echo "OK found $OCAMLYACC usages of ocamlyacc usages in dune files." + else + echo "ERROR expected $N usages of ocamlyacc in dune files, got $OCAMLYACC." 1>&2 + exit 1 + fi } - -unixgetenv () { - N=0 - UNIXGETENV=$(git grep -P -r -o --count 'getenv(?!_opt)' -- **/*.ml | wc -l) - if [ "$UNIXGETENV" -eq "$N" ]; then - echo "OK found $UNIXGETENV usages of exception-raising Unix.getenv in OCaml files." - else - echo "ERROR expected $N usages of exception-raising Unix.getenv in OCaml files, got $UNIXGETENV" 1>&2 - exit 1 - fi +unixgetenv() { + N=0 + UNIXGETENV=$(git grep -P -r -o --count 'getenv(?!_opt)' -- **/*.ml | wc -l) + if [ "$UNIXGETENV" -eq "$N" ]; then + echo "OK found $UNIXGETENV usages of exception-raising Unix.getenv in OCaml files." + else + echo "ERROR expected $N usages of exception-raising Unix.getenv in OCaml files, got $UNIXGETENV" 1>&2 + exit 1 + fi } -hashtblfind () { - N=33 - # Looks for all .ml files except the ones using Core.Hashtbl.find, - # which already returns Option - HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$HASHTBLFIND" -eq "$N" ]; then - echo "OK counted $HASHTBLFIND usages of exception-raising Hashtbl.find" - else - echo "ERROR expected $N usages of exception-raising Hashtbl.find, got $HASHTBLFIND" 1>&2 - exit 1 - fi +hashtblfind() { + N=33 + # Looks for all .ml files except the ones using Core.Hashtbl.find, + # which already returns Option + HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$HASHTBLFIND" -eq "$N" ]; then + echo "OK counted $HASHTBLFIND usages of exception-raising Hashtbl.find" + else + echo "ERROR expected $N usages of exception-raising Hashtbl.find, got $HASHTBLFIND" 1>&2 + exit 1 + fi } -unnecessary-length () { - N=0 - local_grep () { - git grep -r -o --count "$1" -- '**/*.ml' | wc -l - } - UNNECESSARY_LENGTH=$(local_grep "List.length.*=+\s*0") - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*=+\s*List.length"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s>\s*0"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s<>\s*0"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*<\s*List.length"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*<>\s*List.length"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s<\s*1"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "1\s*>\s*List.length"))) - if [ "$UNNECESSARY_LENGTH" -eq "$N" ]; then - echo "OK found $UNNECESSARY_LENGTH unnecessary usages of List.length in OCaml files." - else - echo "ERROR expected $N unnecessary usages of List.length in OCaml files, +unnecessary-length() { + N=0 + local_grep() { + git grep -r -o --count "$1" -- '**/*.ml' | wc -l + } + UNNECESSARY_LENGTH=$(local_grep "List.length.*=+\s*0") + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "0\s*=+\s*List.length"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "List.length.*\s>\s*0"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "List.length.*\s<>\s*0"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "0\s*<\s*List.length"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "0\s*<>\s*List.length"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "List.length.*\s<\s*1"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "1\s*>\s*List.length"))) + if [ "$UNNECESSARY_LENGTH" -eq "$N" ]; then + echo "OK found $UNNECESSARY_LENGTH unnecessary usages of List.length in OCaml files." + else + echo "ERROR expected $N unnecessary usages of List.length in OCaml files, got $UNNECESSARY_LENGTH. Use lst =/<> [] or match statements instead." 1>&2 - exit 1 - fi + exit 1 + fi } list-hd @@ -154,4 +153,3 @@ ocamlyacc unixgetenv hashtblfind unnecessary-length -