c_api_experimental.h 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. #ifndef TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_
  13. #define TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_
  14. #include "tensorflow/c/c_api.h"
  15. #include "tensorflow/c/eager/c_api.h"
  16. #ifdef __cplusplus
  17. extern "C" {
  18. #endif
  19. // Resets `op_to_reset` with `op_or_function_name` and `raw_device_name`. This
  20. // is for performance optimization by reusing an exiting unused op rather than
  21. // creating a new op every time. If `raw_device_name` is `NULL` or empty, it
  22. // does not set the device name. If it's not `NULL`, then it attempts to parse
  23. // and set the device name. It's effectively `TFE_OpSetDevice`, but it is faster
  24. // than separately calling it because if the existing op has the same
  25. // `raw_device_name`, it skips parsing and just leave as it is.
  26. TF_CAPI_EXPORT extern void TFE_OpReset(TFE_Op* op_to_reset,
  27. const char* op_or_function_name,
  28. const char* raw_device_name,
  29. TF_Status* status);
  30. // Enables only graph collection in RunMetadata on the functions executed from
  31. // this context.
  32. TF_CAPI_EXPORT extern void TFE_ContextEnableGraphCollection(TFE_Context* ctx);
  33. // Disables only graph collection in RunMetadata on the functions executed from
  34. // this context.
  35. TF_CAPI_EXPORT extern void TFE_ContextDisableGraphCollection(TFE_Context* ctx);
  36. // TODO(fishx): Move these monitoring APIs into a separate file.
  37. // -----------------------------------------------------------------------------
  38. // Monitoring Counter APIs.
  39. // These APIs de-templated monitoring Counter for swig.
  40. typedef struct TFE_MonitoringCounterCell TFE_MonitoringCounterCell;
  41. // Atomically increments the value of the cell. The value must be non-negative.
  42. TF_CAPI_EXPORT extern void TFE_MonitoringCounterCellIncrementBy(
  43. TFE_MonitoringCounterCell* cell, int64_t value);
  44. // Retrieves the current value of the cell.
  45. TF_CAPI_EXPORT extern int64_t TFE_MonitoringCounterCellValue(
  46. TFE_MonitoringCounterCell* cell);
  47. // APIs for Counter without label.
  48. typedef struct TFE_MonitoringCounter0 TFE_MonitoringCounter0;
  49. // Returns a new Counter metric object. The caller should manage lifetime of
  50. // the object. Using duplicate metric name will crash the program with fatal
  51. // error.
  52. TF_CAPI_EXPORT extern TFE_MonitoringCounter0* TFE_MonitoringNewCounter0(
  53. const char* name, TF_Status* status, const char* description);
  54. // Deletes the Counter object.
  55. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter0(
  56. TFE_MonitoringCounter0* counter);
  57. // Retrieves the cell from the Counter object. The Counter object will manage
  58. // lifetime of the cell.
  59. TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter0(
  60. TFE_MonitoringCounter0* counter);
  61. // APIs for Counter with 1 label.
  62. typedef struct TFE_MonitoringCounter1 TFE_MonitoringCounter1;
  63. TF_CAPI_EXPORT extern TFE_MonitoringCounter1* TFE_MonitoringNewCounter1(
  64. const char* name, TF_Status* status, const char* description,
  65. const char* label1);
  66. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter1(
  67. TFE_MonitoringCounter1* counter);
  68. TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter1(
  69. TFE_MonitoringCounter1* counter, const char* label1);
  70. // APIs for Counter with 2 labels.
  71. typedef struct TFE_MonitoringCounter2 TFE_MonitoringCounter2;
  72. TF_CAPI_EXPORT extern TFE_MonitoringCounter2* TFE_MonitoringNewCounter2(
  73. const char* name, TF_Status* status, const char* description,
  74. const char* label1, const char* label2);
  75. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter2(
  76. TFE_MonitoringCounter2* counter);
  77. TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter2(
  78. TFE_MonitoringCounter2* counter, const char* label1, const char* label2);
  79. // -----------------------------------------------------------------------------
  80. // Monitoring Gauge APIs.
  81. // These APIs de-templated monitoring Gauge for swig.
  82. typedef struct TFE_MonitoringIntGaugeCell TFE_MonitoringIntGaugeCell;
  83. // Atomically set the value of the cell.
  84. TF_CAPI_EXPORT extern void TFE_MonitoringIntGaugeCellSet(
  85. TFE_MonitoringIntGaugeCell* cell, int64_t value);
  86. // Retrieves the current value of the cell.
  87. TF_CAPI_EXPORT extern int64_t TFE_MonitoringIntGaugeCellValue(
  88. TFE_MonitoringIntGaugeCell* cell);
  89. // APIs for Int Gauge without label.
  90. typedef struct TFE_MonitoringIntGauge0 TFE_MonitoringIntGauge0;
  91. TF_CAPI_EXPORT extern TFE_MonitoringIntGauge0* TFE_MonitoringNewIntGauge0(
  92. const char* name, TF_Status* out_status, const char* description);
  93. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge0(
  94. TFE_MonitoringIntGauge0* gauge);
  95. TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell*
  96. TFE_MonitoringGetCellIntGauge0(TFE_MonitoringIntGauge0* gauge);
  97. // APIs for Int Gauge with 1 label.
  98. typedef struct TFE_MonitoringIntGauge1 TFE_MonitoringIntGauge1;
  99. TF_CAPI_EXPORT extern TFE_MonitoringIntGauge1* TFE_MonitoringNewIntGauge1(
  100. const char* name, TF_Status* out_status, const char* description,
  101. const char* label1);
  102. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge1(
  103. TFE_MonitoringIntGauge1* gauge);
  104. TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell*
  105. TFE_MonitoringGetCellIntGauge1(TFE_MonitoringIntGauge1* gauge,
  106. const char* label1);
  107. // APIs for Int Gauge with 2 label.
  108. typedef struct TFE_MonitoringIntGauge2 TFE_MonitoringIntGauge2;
  109. TF_CAPI_EXPORT extern TFE_MonitoringIntGauge2* TFE_MonitoringNewIntGauge2(
  110. const char* name, TF_Status* out_status, const char* description,
  111. const char* label1, const char* label2);
  112. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge2(
  113. TFE_MonitoringIntGauge2* gauge);
  114. TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell*
  115. TFE_MonitoringGetCellIntGauge2(TFE_MonitoringIntGauge2* gauge,
  116. const char* label1, const char* label2);
  117. typedef struct TFE_MonitoringStringGaugeCell TFE_MonitoringStringGaugeCell;
  118. TF_CAPI_EXPORT extern void TFE_MonitoringStringGaugeCellSet(
  119. TFE_MonitoringStringGaugeCell* cell, const char* value);
  120. // Retrieves the string value and saves it in buffer.
  121. TF_CAPI_EXPORT extern const void TFE_MonitoringStringGaugeCellValue(
  122. TFE_MonitoringStringGaugeCell* cell, TF_Buffer* buf);
  123. // APIs for String Gauge without label.
  124. typedef struct TFE_MonitoringStringGauge0 TFE_MonitoringStringGauge0;
  125. TF_CAPI_EXPORT extern TFE_MonitoringStringGauge0* TFE_MonitoringNewStringGauge0(
  126. const char* name, TF_Status* out_status, const char* description);
  127. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge0(
  128. TFE_MonitoringStringGauge0* gauge);
  129. TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
  130. TFE_MonitoringGetCellStringGauge0(TFE_MonitoringStringGauge0* gauge);
  131. // APIs for String Gauge with 1 label.
  132. typedef struct TFE_MonitoringStringGauge1 TFE_MonitoringStringGauge1;
  133. TF_CAPI_EXPORT extern TFE_MonitoringStringGauge1* TFE_MonitoringNewStringGauge1(
  134. const char* name, TF_Status* out_status, const char* description,
  135. const char* label1);
  136. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge1(
  137. TFE_MonitoringStringGauge1* gauge);
  138. TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
  139. TFE_MonitoringGetCellStringGauge1(TFE_MonitoringStringGauge1* gauge,
  140. const char* label1);
  141. // APIs for String Gauge with 2 label.
  142. typedef struct TFE_MonitoringStringGauge2 TFE_MonitoringStringGauge2;
  143. TF_CAPI_EXPORT extern TFE_MonitoringStringGauge2* TFE_MonitoringNewStringGauge2(
  144. const char* name, TF_Status* out_status, const char* description,
  145. const char* label1, const char* label2);
  146. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge2(
  147. TFE_MonitoringStringGauge2* gauge);
  148. TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
  149. TFE_MonitoringGetCellStringGauge2(TFE_MonitoringStringGauge2* gauge,
  150. const char* label1, const char* label2);
  151. // APIs for String Gauge with 3 labels.
  152. typedef struct TFE_MonitoringStringGauge3 TFE_MonitoringStringGauge3;
  153. TF_CAPI_EXPORT extern TFE_MonitoringStringGauge3* TFE_MonitoringNewStringGauge3(
  154. const char* name, TF_Status* out_status, const char* description,
  155. const char* label1, const char* label2, const char* label3);
  156. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge3(
  157. TFE_MonitoringStringGauge3* gauge);
  158. TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
  159. TFE_MonitoringGetCellStringGauge3(TFE_MonitoringStringGauge3* gauge,
  160. const char* label1, const char* label2,
  161. const char* label3);
  162. // APIs for String Gauge with 4 labels.
  163. typedef struct TFE_MonitoringStringGauge4 TFE_MonitoringStringGauge4;
  164. TF_CAPI_EXPORT extern TFE_MonitoringStringGauge4* TFE_MonitoringNewStringGauge4(
  165. const char* name, TF_Status* out_status, const char* description,
  166. const char* label1, const char* label2, const char* label3,
  167. const char* label4);
  168. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge4(
  169. TFE_MonitoringStringGauge4* gauge);
  170. TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
  171. TFE_MonitoringGetCellStringGauge4(TFE_MonitoringStringGauge4* gauge,
  172. const char* label1, const char* label2,
  173. const char* label3, const char* label4);
  174. typedef struct TFE_MonitoringBoolGaugeCell TFE_MonitoringBoolGaugeCell;
  175. TF_CAPI_EXPORT extern void TFE_MonitoringBoolGaugeCellSet(
  176. TFE_MonitoringBoolGaugeCell* cell, bool value);
  177. TF_CAPI_EXPORT extern bool TFE_MonitoringBoolGaugeCellValue(
  178. TFE_MonitoringBoolGaugeCell* cell);
  179. // APIs for Bool Gauge without label.
  180. typedef struct TFE_MonitoringBoolGauge0 TFE_MonitoringBoolGauge0;
  181. TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge0* TFE_MonitoringNewBoolGauge0(
  182. const char* name, TF_Status* out_status, const char* description);
  183. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge0(
  184. TFE_MonitoringBoolGauge0* gauge);
  185. TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell*
  186. TFE_MonitoringGetCellBoolGauge0(TFE_MonitoringBoolGauge0* gauge);
  187. // APIs for Bool Gauge with 1 label.
  188. typedef struct TFE_MonitoringBoolGauge1 TFE_MonitoringBoolGauge1;
  189. TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge1* TFE_MonitoringNewBoolGauge1(
  190. const char* name, TF_Status* out_status, const char* description,
  191. const char* label1);
  192. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge1(
  193. TFE_MonitoringBoolGauge1* gauge);
  194. TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell*
  195. TFE_MonitoringGetCellBoolGauge1(TFE_MonitoringBoolGauge1* gauge,
  196. const char* label1);
  197. // APIs for Bool Gauge with 2 label.
  198. typedef struct TFE_MonitoringBoolGauge2 TFE_MonitoringBoolGauge2;
  199. TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge2* TFE_MonitoringNewBoolGauge2(
  200. const char* name, TF_Status* out_status, const char* description,
  201. const char* label1, const char* label2);
  202. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge2(
  203. TFE_MonitoringBoolGauge2* gauge);
  204. TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell*
  205. TFE_MonitoringGetCellBoolGauge2(TFE_MonitoringBoolGauge2* gauge,
  206. const char* label1, const char* label2);
  207. // -----------------------------------------------------------------------------
  208. // Monitoring Sampler APIs.
  209. // These APIs de-templated monitoring Sampler for swig.
  210. typedef struct TFE_MonitoringSamplerCell TFE_MonitoringSamplerCell;
  211. // Atomically add the value of the cell.
  212. TF_CAPI_EXPORT extern void TFE_MonitoringSamplerCellAdd(
  213. TFE_MonitoringSamplerCell* cell, double value);
  214. // Retrieves the current value of the cell. The return value is a HistogramProto
  215. // saved in buffer.
  216. TF_CAPI_EXPORT extern void TFE_MonitoringSamplerCellValue(
  217. TFE_MonitoringSamplerCell* cell, TF_Buffer* buf);
  218. // APIs for sampler buckets
  219. typedef struct TFE_MonitoringBuckets TFE_MonitoringBuckets;
  220. TF_CAPI_EXPORT extern TFE_MonitoringBuckets*
  221. TFE_MonitoringNewExponentialBuckets(double scale, double growth_factor,
  222. int bucket_count);
  223. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBuckets(
  224. TFE_MonitoringBuckets* buckets);
  225. // APIs for Sampler without label.
  226. typedef struct TFE_MonitoringSampler0 TFE_MonitoringSampler0;
  227. TF_CAPI_EXPORT extern TFE_MonitoringSampler0* TFE_MonitoringNewSampler0(
  228. const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status,
  229. const char* description);
  230. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler0(
  231. TFE_MonitoringSampler0* sampler);
  232. TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler0(
  233. TFE_MonitoringSampler0* sampler);
  234. // APIs for Sampler with 1 label.
  235. typedef struct TFE_MonitoringSampler1 TFE_MonitoringSampler1;
  236. TF_CAPI_EXPORT extern TFE_MonitoringSampler1* TFE_MonitoringNewSampler1(
  237. const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status,
  238. const char* description, const char* label1);
  239. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler1(
  240. TFE_MonitoringSampler1* sampler);
  241. TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler1(
  242. TFE_MonitoringSampler1* sampler, const char* label1);
  243. // APIs for Sampler with 2 label.
  244. typedef struct TFE_MonitoringSampler2 TFE_MonitoringSampler2;
  245. TF_CAPI_EXPORT extern TFE_MonitoringSampler2* TFE_MonitoringNewSampler2(
  246. const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status,
  247. const char* description, const char* label1, const char* label2);
  248. TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler2(
  249. TFE_MonitoringSampler2* sampler);
  250. TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler2(
  251. TFE_MonitoringSampler2* sampler, const char* label1, const char* label2);
  252. // Sets whether to use TFRT
  253. TF_CAPI_EXPORT extern void TFE_ContextOptionsSetTfrt(TFE_ContextOptions*,
  254. bool use_tfrt);
  255. // Sets whether to use TFRT distributed runtime
  256. TF_CAPI_EXPORT extern void TFE_ContextOptionsSetTfrtDistributedRuntime(
  257. TFE_ContextOptions* options, bool use_tfrt_distributed_runtime);
  258. // Returns the context_id from the EagerContext which is used by the
  259. // EagerService to maintain consistency between client and worker. The
  260. // context_id is initialized with a dummy value and is later set when the worker
  261. // is initialized (either locally or remotely). The context_id can change during
  262. // the process lifetime although this should cause the worker to be
  263. // reinitialized (e.g. cleared caches) as well.
  264. TF_CAPI_EXPORT extern uint64_t TFE_GetContextId(TFE_Context* ctx);
  265. // -----------------------------------------------------------------------------
  266. // Cancellation APIs.
  267. typedef struct TFE_CancellationManager TFE_CancellationManager;
  268. TF_CAPI_EXPORT extern TFE_CancellationManager* TFE_NewCancellationManager();
  269. TF_CAPI_EXPORT extern bool TFE_CancellationManagerIsCancelled(
  270. TFE_CancellationManager*);
  271. TF_CAPI_EXPORT extern void TFE_CancellationManagerStartCancel(
  272. TFE_CancellationManager*);
  273. TF_CAPI_EXPORT extern void TFE_DeleteCancellationManager(
  274. TFE_CancellationManager*);
  275. // Associates the given `cancellation_manager` with `op`, so that invoking
  276. // `TFE_CancellationManagerStartCancel(cancellation_manager)` will cancel the
  277. // execution of `op`.
  278. typedef struct TFE_CancellationManager TFE_CancellationManager;
  279. TF_CAPI_EXPORT extern void TFE_OpSetCancellationManager(
  280. TFE_Op* op, TFE_CancellationManager* cancellation_manager,
  281. TF_Status* status);
  282. // -----------------------------------------------------------------------------
  283. // Eager Executor APIs.
  284. typedef struct TFE_Executor TFE_Executor;
  285. // Creates a new eager Executor. Nodes in one executor are guaranteed to be
  286. // executed in sequence. Assigning nodes to different executors allows executing
  287. // nodes in parallel.
  288. TF_CAPI_EXPORT extern TFE_Executor* TFE_NewExecutor(bool is_async);
  289. // Deletes the eager Executor without waiting for enqueued nodes. Please call
  290. // TFE_ExecutorWaitForAllPendingNodes before calling this API if you want to
  291. // make sure all nodes are finished.
  292. TF_CAPI_EXPORT extern void TFE_DeleteExecutor(TFE_Executor*);
  293. // Returns true if the executor is in async mode.
  294. TF_CAPI_EXPORT extern bool TFE_ExecutorIsAsync(TFE_Executor*);
  295. // Causes the calling thread to block till all ops dispatched in this executor
  296. // have been executed. Note that "execution" here refers to kernel execution /
  297. // scheduling of copies, etc. Similar to sync execution, it doesn't guarantee
  298. // that lower level device queues (like GPU streams) have been flushed.
  299. //
  300. // This call may not block for execution of ops enqueued concurrently with this
  301. // call.
  302. TF_CAPI_EXPORT extern void TFE_ExecutorWaitForAllPendingNodes(
  303. TFE_Executor*, TF_Status* status);
  304. // When an error happens, any pending operations are discarded and newly issued
  305. // ops return an error. This call clears the error state and re-enables
  306. // execution of newly issued ops.
  307. //
  308. // Note that outputs of discarded ops remain in a corrupt state and should not
  309. // be used for future calls.
  310. // TODO(agarwal): mark the affected handles and raise errors if they are used.
  311. TF_CAPI_EXPORT extern void TFE_ExecutorClearError(TFE_Executor*);
  312. // Sets a custom Executor for current thread. All nodes created by this thread
  313. // will be added to this Executor. It will override current executor.
  314. TF_CAPI_EXPORT extern void TFE_ContextSetExecutorForThread(TFE_Context*,
  315. TFE_Executor*);
  316. // Returns the Executor for current thread.
  317. TF_CAPI_EXPORT extern TFE_Executor* TFE_ContextGetExecutorForThread(
  318. TFE_Context*);
  319. // -----------------------------------------------------------------------------
  320. // Dynamic cluster API.
  321. // Update an existing context with a new set of servers defined in a ServerDef
  322. // proto. Servers can be added to and removed from the list of remote workers
  323. // in the context. New set of servers identified by the ServerDef must be up
  324. // when the context is updated.
  325. //
  326. // This API is for experimental usage and may be subject to change.
  327. TF_CAPI_EXPORT extern void TFE_ContextUpdateServerDef(TFE_Context* ctx,
  328. int keep_alive_secs,
  329. const void* proto,
  330. size_t proto_len,
  331. TF_Status* status);
  332. // Checks whether a remote worker is alive or not. This will return true even if
  333. // the context doesn't exist on the remote worker.
  334. TF_CAPI_EXPORT extern bool TFE_ContextCheckAlive(TFE_Context* ctx,
  335. const char* worker_name,
  336. TF_Status* status);
  337. // Sync pending nodes in local executors (including the context default executor
  338. // and thread executors) and streaming requests to remote executors, and get the
  339. // combined status.
  340. TF_CAPI_EXPORT extern void TFE_ContextAsyncWait(TFE_Context* ctx,
  341. TF_Status* status);
  342. // This function will block till the operation that produces `h` has
  343. // completed. This is only valid on local TFE_TensorHandles. The pointer
  344. // returned will be on the device in which the TFE_TensorHandle resides (so e.g.
  345. // for a GPU tensor this will return a pointer to GPU memory). The pointer is
  346. // only guaranteed to be valid until TFE_DeleteTensorHandle is called on this
  347. // TensorHandle. Only supports POD data types.
  348. TF_CAPI_EXPORT extern void* TFE_TensorHandleDevicePointer(TFE_TensorHandle*,
  349. TF_Status*);
  350. // This function will block till the operation that produces `h` has
  351. // completed. This is only valid on local TFE_TensorHandles. Returns the size in
  352. // bytes of the memory pointed to by the device pointer returned above.
  353. TF_CAPI_EXPORT extern size_t TFE_TensorHandleDeviceMemorySize(TFE_TensorHandle*,
  354. TF_Status*);
  355. // Creates a new TensorHandle from memory residing in the physical device
  356. // device_name. Takes ownership of the memory, and will call deleter to release
  357. // it after TF no longer needs it or in case of error.
  358. //
  359. // Custom devices must use TFE_NewCustomDeviceTensorHandle instead.
  360. TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandleFromDeviceMemory(
  361. TFE_Context* ctx, const char* device_name, TF_DataType, const int64_t* dims,
  362. int num_dims, void* data, size_t len,
  363. void (*deallocator)(void* data, size_t len, void* arg),
  364. void* deallocator_arg, TF_Status* status);
  365. // Retrieves the address space (i.e. job, replia, task) of the local host and
  366. // saves it in the buffer.
  367. TF_CAPI_EXPORT extern void TFE_HostAddressSpace(TFE_Context* ctx,
  368. TF_Buffer* buf);
  369. // APIs for generically dealing with op attributes (e.g. when forwarding them
  370. // through custom device implementations).
  371. //
  372. // TODO(allenl): Currently these are black boxes, but we should have some way to
  373. // inspect values. This would let people e.g. copy over most attributes and then
  374. // modify some based on their values.
  375. // A reference to an op's name -> attribute mapping
  376. typedef struct TFE_OpAttrs TFE_OpAttrs;
  377. // Fetch a reference to `op`'s attributes. The returned reference is only valid
  378. // while `op` is alive.
  379. TF_CAPI_EXPORT extern const TFE_OpAttrs* TFE_OpGetAttrs(const TFE_Op* op);
  380. // Add attributes in `attrs` to `op`.
  381. //
  382. // Does not overwrite or update existing attributes, but adds new ones.
  383. TF_CAPI_EXPORT extern void TFE_OpAddAttrs(TFE_Op* op, const TFE_OpAttrs* attrs);
  384. // Serialize `attrs` as a tensorflow::NameAttrList protocol buffer (into `buf`),
  385. // containing the op name and a map of its attributes.
  386. TF_CAPI_EXPORT extern void TFE_OpAttrsSerialize(const TFE_OpAttrs* attrs,
  387. TF_Buffer* buf,
  388. TF_Status* status);
  389. // Set an op's attribute from a serialized AttrValue protocol buffer.
  390. //
  391. // Analogous to TF_SetAttrValueProto for building graph operations.
  392. TF_CAPI_EXPORT extern void TFE_OpSetAttrValueProto(const TFE_Op* op,
  393. const char* attr_name,
  394. const void* proto,
  395. size_t proto_len,
  396. TF_Status* status);
  397. // TODO(b/166642410): It would be nice, for custom devices and for other users,
  398. // to have a non-string representation of devices (TF_Device) extracted from
  399. // tensors/ops/etc. and usable in APIs like OpSetDevice/ResetOp/etc.
  400. #define TFE_CUSTOM_DEVICE_VERSION 4
  401. // Struct to be filled in. Functions are required except where indicated.
  402. typedef struct TFE_CustomDevice {
  403. int version = TFE_CUSTOM_DEVICE_VERSION;
  404. // Method to copy a tensor to the custom device.
  405. TFE_TensorHandle* (*copy_tensor_to_device)(TFE_Context* context,
  406. TFE_TensorHandle* tensor,
  407. TF_Status* status,
  408. void* device_info);
  409. // Method to copy a tensor from the custom device to a target device.
  410. TFE_TensorHandle* (*copy_tensor_from_device)(TFE_Context* context,
  411. TFE_TensorHandle* tensor,
  412. const char* target_device_name,
  413. TF_Status* status,
  414. void* device_info);
  415. // Method to execute an operation.
  416. //
  417. // Arguments provide enough information to reconstruct the original `TFE_Op`,
  418. // or construct a transformed version, by inspecting the passed `op`.
  419. //
  420. // TFE_OpGetDevice(op) records the original placement of the operation. It may
  421. // be an empty string if no device was explicitly requested, but will
  422. // otherwise be the name of this custom device. Ops are placed onto a custom
  423. // device if any of their inputs are on that custom device, but custom devices
  424. // are free to set a bad status in order to require explicit placement.
  425. void (*execute)(const TFE_Op* op, int* num_outputs,
  426. TFE_TensorHandle** outputs, TF_Status* s, void* device_info);
  427. // Method to delete a device.
  428. void (*delete_device)(void* device_info);
  429. // Implements TFE_CreatePackedTensorHandle when one of `handles` is on this
  430. // custom device.
  431. //
  432. // Many devices will want to simply return an "unimplemented" status
  433. // here. This is the default behavior if `pack` is null when passed to
  434. // TFE_RegisterCustomDevice.
  435. TFE_TensorHandle* (*pack)(TFE_Context* context, TFE_TensorHandle** handles,
  436. int num_handles, TF_Status* s,
  437. void* device_info) = nullptr;
  438. } TFE_CustomDevice;
  439. // Registers a custom device for use with eager execution.
  440. //
  441. // Eager operations may be placed on this device, e.g. `with
  442. // tf.device("CUSTOM"):` from Python if `device_name` for this call is
  443. // "/job:localhost/replica:0/task:0/device:CUSTOM:0".
  444. //
  445. // The custom device defines copy operations for moving TensorHandles on and
  446. // off, and an execution operation for named operations. Often execution will
  447. // simply wrap op execution on one or more physical devices.
  448. //
  449. // device_info is an opaque caller-defined type stored with the custom device
  450. // which is passed to the functions referenced in the TFE_CustomDevice struct
  451. // `device` (execute, delete_device, etc.). It can for example contain the
  452. // names of wrapped devices.
  453. //
  454. // There are currently no graph semantics implemented for registered custom
  455. // devices, so executing tf.functions which contain operations placed on custom
  456. // devices will fail.
  457. //
  458. // `device_name` must not name an existing physical or custom device. It must
  459. // follow the format:
  460. //
  461. // /job:<name>/replica:<replica>/task:<task>/device:<type>:<device_num>
  462. //
  463. // If the device is successfully registered, `status` is set to TF_OK. Otherwise
  464. // the device is not usable. In case of a bad status, `device.delete_device` is
  465. // still called on `device_info` (i.e. the caller does not retain ownership).
  466. //
  467. // This API is highly experimental, and in particular is expected to change when
  468. // it starts supporting operations with attributes and when tf.function support
  469. // is added.
  470. TF_CAPI_EXPORT extern void TFE_RegisterCustomDevice(TFE_Context* ctx,
  471. TFE_CustomDevice device,
  472. const char* device_name,
  473. void* device_info,
  474. TF_Status* status);
  475. // Struct to be filled in to define a custom device tensor handle. Fields are
  476. // required except where indicated.
  477. typedef struct TFE_CustomDeviceTensorHandleMethods {
  478. int version = TFE_CUSTOM_DEVICE_VERSION;
  479. // Computes the rank of the tensor handle.
  480. //
  481. // Shapes are specified via callbacks because retrieving the shape of a tensor
  482. // is a blocking operation for async eager; custom devices should avoid
  483. // retrieving shapes of tensors they wrap until the custom device tensor's
  484. // shape is explicitly requested where possible.
  485. int (*num_dims)(void* data, TF_Status* status);
  486. // Computes the axis length at `dim_index`.
  487. int64_t (*dim)(void* data, int dim_index, TF_Status* status);
  488. void (*deallocator)(void* data);
  489. // Summarizes the value of this tensor. The caller takes ownership of the
  490. // returned buffer. If `status` is not TF_OK, instead returns a null pointer.
  491. //
  492. // Does not include the shape and dtype of the tensor (which is generally
  493. // appended later), but should include any information specific to this custom
  494. // device which would be useful for debugging.
  495. //
  496. // Optional. If null, defaults to resolving the TFE_TensorHandle into a
  497. // TF_Tensor and summarizing that.
  498. TF_Buffer* (*summarize)(void* data, TF_Status* status) = nullptr;
  499. } TFE_CustomDeviceTensorHandle;
  500. // Creates a new TensorHandle from memory residing in a custom device. Takes
  501. // ownership of the memory pointed to by `tensor_handle_data`, and calls
  502. // `methods.deallocator` to release it after TF no longer needs it or in case of
  503. // an error.
  504. //
  505. // This call is similar to `TFE_NewTensorHandleFromDeviceMemory`, but supports
  506. // custom devices instead of physical devices and does not require blocking
  507. // waiting for exact shapes.
  508. TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewCustomDeviceTensorHandle(
  509. TFE_Context*, const char* device_name, TF_DataType, void* data,
  510. TFE_CustomDeviceTensorHandle methods, TF_Status* status);
  511. TF_CAPI_EXPORT extern void TFE_ContextGetFunctionDef(TFE_Context* ctx,
  512. const char* function_name,
  513. TF_Buffer* buf,
  514. TF_Status* status);
  515. // Allocate and return a new Tensor on the host.
  516. //
  517. // The caller must set the Tensor values by writing them to the pointer returned
  518. // by TF_TensorData with length TF_TensorByteSize.
  519. TF_CAPI_EXPORT extern TF_Tensor* TFE_AllocateHostTensor(TFE_Context* ctx,
  520. TF_DataType dtype,
  521. const int64_t* dims,
  522. int num_dims,
  523. TF_Status* status);
  524. // Given a Tensor, wrap it with a TensorHandle
  525. //
  526. // Similar to TFE_NewTensorHandle, but includes a pointer to the TFE_Context.
  527. // The context should be identical to that of the Tensor.
  528. TF_CAPI_EXPORT TFE_TensorHandle* TFE_NewTensorHandleFromTensor(
  529. TFE_Context* ctx, TF_Tensor* t, TF_Status* status);
  530. // Create a packed TensorHandle with the given list of TensorHandles.
  531. // If `handles` are on the same device, assign the same device to the packed
  532. // handle; if `handles` are on different deivces, assign a CompositeDevice to
  533. // it.
  534. TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_CreatePackedTensorHandle(
  535. TFE_Context* ctx, TFE_TensorHandle** handles, int* num_handles,
  536. TF_Status* status);
  537. // Configure soft device placement policy for the eager executor. Note this
  538. // policy is applied to any subsequent op executions.
  539. TF_CAPI_EXPORT void TFE_ContextSetSoftDevicePlacement(TFE_Context* ctx,
  540. unsigned char enable,
  541. TF_Status* status);
  542. // Configure device placement policy logging for the eager executor. Note this
  543. // policy is applied to any subsequent op executions.
  544. TF_CAPI_EXPORT void TFE_ContextSetLogDevicePlacement(TFE_Context* ctx,
  545. unsigned char enable,
  546. TF_Status* status);
  547. // Returns the device type of the operation that produced `h`.
  548. TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceType(
  549. TFE_TensorHandle* h, TF_Status* status);
  550. // Returns the device ID of the operation that produced `h`.
  551. TF_CAPI_EXPORT extern int TFE_TensorHandleDeviceID(TFE_TensorHandle* h,
  552. TF_Status* status);
  553. // Get a comma-separated list of op names executed in graph functions dispatched
  554. // to `ctx`. This feature is currently only enabled for TFRT debug builds, for
  555. // performance and simplicity reasons.
  556. TF_CAPI_EXPORT extern void TFE_GetExecutedOpNames(TFE_Context* ctx,
  557. TF_Buffer* buf,
  558. TF_Status* status);
  559. // Set logical devices to the context's device manager.
  560. // If logical devices are already configured at context initialization
  561. // through TFE_ContextOptions, this method should not be called.
  562. TF_CAPI_EXPORT extern void TFE_SetLogicalCpuDevices(TFE_Context* ctx,
  563. int num_cpus,
  564. const char* prefix,
  565. TF_Status* status);
  566. #ifdef __cplusplus
  567. } /* end extern "C" */
  568. #endif
  569. #endif // TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_
粤ICP备19079148号