Skip to content

Commit eda875f

Browse files
committed
fix hard coded n_state in predictWith and rename variables
1 parent 41fac5f commit eda875f

12 files changed

Lines changed: 40 additions & 39 deletions

convert_whisper_encoder_to_coreml.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
encoder = model.encoder
1313
encoder.eval()
1414

15-
mel_segment = torch.ones((1, 80, 3000))
16-
traced_encoder = torch.jit.trace(encoder, mel_segment)
15+
melSegment = torch.ones((1, 80, 3000))
16+
traced_encoder = torch.jit.trace(encoder, melSegment)
1717

1818
# convert to coreml model
1919
encoder = ct.convert(
2020
traced_encoder,
21-
inputs=[ct.TensorType(name="mel_segment", shape=mel_segment.shape)],
21+
inputs=[ct.TensorType(name="melSegment", shape=melSegment.shape)],
2222
outputs=[ct.TensorType(name="output")],
2323
compute_units=ct.ComputeUnit.ALL,
2424
)
@@ -27,11 +27,11 @@
2727
encoder_fp16.save(f"encoder_{modelSize}_fp16.mlmodel")
2828

2929
# test accuracy
30-
torch_output = traced_encoder.forward(mel_segment)
30+
torch_output = traced_encoder.forward(melSegment)
3131
print("torch model output:", torch_output)
32-
mel_segment = mel_segment.cpu().detach().numpy()
32+
melSegment = melSegment.cpu().detach().numpy()
3333
coreml_output = torch.from_numpy(
34-
list(encoder_fp16.predict({'mel_segment': mel_segment}).values())[0]
34+
list(encoder_fp16.predict({'melSegment': melSegment}).values())[0]
3535
)
3636
print(f"coreml {modelSize} model output:", coreml_output)
3737
diff = torch.abs(torch_output - coreml_output).detach()

coreml/CoremlEncoder.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ NS_ASSUME_NONNULL_BEGIN
1616
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
1717
@interface CoremlEncoderInput : NSObject<MLFeatureProvider>
1818

19-
/// mel_segment as 1 × 80 × 3000 3-dimensional array of floats
20-
@property (readwrite, nonatomic, strong) MLMultiArray * mel_segment;
19+
/// melSegment as 1 × 80 × 3000 3-dimensional array of floats
20+
@property (readwrite, nonatomic, strong) MLMultiArray * melSegment;
2121
- (instancetype)init NS_UNAVAILABLE;
22-
- (instancetype)initWithMel_segment:(MLMultiArray *)mel_segment NS_DESIGNATED_INITIALIZER;
22+
- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment NS_DESIGNATED_INITIALIZER;
2323

2424
@end
2525

@@ -123,11 +123,11 @@ API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((
123123

124124
/**
125125
Make a prediction using the convenience interface
126-
@param mel_segment as 1 × 80 × 3000 3-dimensional array of floats:
126+
@param melSegment as 1 × 80 × 3000 3-dimensional array of floats:
127127
@param error If an error occurs, upon return contains an NSError object that describes the problem. If you are not interested in possible errors, pass in NULL.
128128
@return the prediction as CoremlEncoderOutput
129129
*/
130-
- (nullable CoremlEncoderOutput *)predictionFromMel_segment:(MLMultiArray *)mel_segment error:(NSError * _Nullable __autoreleasing * _Nullable)error;
130+
- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error;
131131

132132
/**
133133
Batch prediction

coreml/CoremlEncoder.m

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,21 @@
1212

1313
@implementation CoremlEncoderInput
1414

15-
- (instancetype)initWithMel_segment:(MLMultiArray *)mel_segment {
15+
- (instancetype)initWithMelSegment:(MLMultiArray *)melSegment {
1616
self = [super init];
1717
if (self) {
18-
_mel_segment = mel_segment;
18+
_melSegment = melSegment;
1919
}
2020
return self;
2121
}
2222

2323
- (NSSet<NSString *> *)featureNames {
24-
return [NSSet setWithArray:@[@"mel_segment"]];
24+
return [NSSet setWithArray:@[@"melSegment"]];
2525
}
2626

2727
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
28-
if ([featureName isEqualToString:@"mel_segment"]) {
29-
return [MLFeatureValue featureValueWithMultiArray:self.mel_segment];
28+
if ([featureName isEqualToString:@"melSegment"]) {
29+
return [MLFeatureValue featureValueWithMultiArray:self.melSegment];
3030
}
3131
return nil;
3232
}
@@ -176,8 +176,8 @@ - (nullable CoremlEncoderOutput *)predictionFromFeatures:(CoremlEncoderInput *)i
176176
return [[CoremlEncoderOutput alloc] initWithOutput:(MLMultiArray *)[outFeatures featureValueForName:@"output"].multiArrayValue];
177177
}
178178

179-
- (nullable CoremlEncoderOutput *)predictionFromMel_segment:(MLMultiArray *)mel_segment error:(NSError * _Nullable __autoreleasing * _Nullable)error {
180-
CoremlEncoderInput *input_ = [[CoremlEncoderInput alloc] initWithMel_segment:mel_segment];
179+
- (nullable CoremlEncoderOutput *)predictionFromMelSegment:(MLMultiArray *)melSegment error:(NSError * _Nullable __autoreleasing * _Nullable)error {
180+
CoremlEncoderInput *input_ = [[CoremlEncoderInput alloc] initWithMelSegment:melSegment];
181181
return [self predictionFromFeatures:input_ error:error];
182182
}
183183

coreml/CoremlEncoder.mlmodel

-2 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.
-1 Bytes
Binary file not shown.

coreml/CoremlEncoder.mlmodelc/metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
"formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
6060
"shortDescription" : "",
6161
"shape" : "[1, 80, 3000]",
62-
"name" : "mel_segment",
62+
"name" : "melSegment",
6363
"type" : "MultiArray"
6464
}
6565
],

coreml/CoremlEncoder.mlmodelc/model.espresso.net

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"layers" : [
1414
{
1515
"size_of_axes" : 1,
16-
"bottom" : "mel_segment",
16+
"bottom" : "melSegment",
1717
"axes_0" : -2,
1818
"weights" : {
1919

coreml/CoremlEncoder.mlmodelc/model.espresso.shape

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -284,13 +284,6 @@
284284
"_rank" : 4,
285285
"h" : 1
286286
},
287-
"mel_segment" : {
288-
"k" : 1,
289-
"w" : 3000,
290-
"n" : 1,
291-
"_rank" : 3,
292-
"h" : 80
293-
},
294287
"input.17" : {
295288
"k" : 1,
296289
"w" : 1536,
@@ -720,6 +713,13 @@
720713
"_rank" : 4,
721714
"h" : 1500
722715
},
716+
"melSegment" : {
717+
"k" : 1,
718+
"w" : 3000,
719+
"n" : 1,
720+
"_rank" : 3,
721+
"h" : 80
722+
},
723723
"q.3" : {
724724
"k" : 1500,
725725
"w" : 64,

main.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,19 @@ int main() {
77
const void* coremlEncoder = loadModel("coreml/CoremlEncoder.mlmodelc");
88

99
// prepare easy test input like torch.ones
10-
float* inFloats = (float *)malloc(sizeof(float) * 80 * 3000);
10+
float* mel = (float *)malloc(sizeof(float) * 80 * 3000);
1111
for(int i=0;i<80*3000; i++) {
12-
inFloats[i] = 1.0;
12+
mel[i] = 1.0;
1313
}
1414

15-
float* outFloats = predictWith(coremlEncoder, inFloats);
15+
// alloc output buffer
16+
int n_state = 384; // tiny=384
17+
float* outFloats = (float *)malloc(sizeof(float) * 1500 * n_state);
18+
19+
predictWith(coremlEncoder, mel, outFloats);
1620

1721
// it should match
1822
// pytorch output: {'output': array([[[-0.28637695, -0.25561523, ..., -0.10253906]]], dtype=float32)
19-
cout << outFloats[0] << " " << outFloats[1] << " " << outFloats[384*1500-1];
23+
cout << outFloats[0] << " " << outFloats[1] << " " << outFloats[n_state*1500-1];
2024
closeModel(coremlEncoder);
2125
}

0 commit comments

Comments
 (0)