Skip to content Skip to sidebar Skip to footer

Valueerror: Data Cardinality Is Ambiguous

I'm trying to train LSTM network on data taken from a DataFrame. Here's the code: x_lstm=x.to_numpy().reshape(1,x.shape[0],x.shape[1]) model = keras.models.Sequential([ keras.

Solution 1:

As the Error suggests, the First Dimension of X and y is different. First Dimension indicates the Batch Size and it should be same.

Please ensure that Y also has the shape, (1, something).

I could reproduce your error with the Code shown below:

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import tensorflow as tf
import numpy as np


# define sequences
sequences = [
    [1, 2, 3, 4],
       [1, 2, 3],
             [1]
    ]

# pad sequence
padded = pad_sequences(sequences)
X = np.expand_dims(padded, axis = 0)
print(X.shape) # (1, 3, 4)

y = np.array([1,0,1])
#y = y.reshape(1,-1)print(y.shape) # (3,)

model = Sequential()
model.add(LSTM(4, return_sequences=False, input_shape=(None, X.shape[2])))
model.add(Dense(1, activation='sigmoid'))

model.compile (
    loss='mean_squared_error',
    optimizer=tf.keras.optimizers.Adam(0.001))

model.fit(x = X, y = y)

If we observe the Print Statements,

Shape of X is(1, 3, 4)
Shape of y is(3,)

This Error can be fixed by uncommenting the Line, y = y.reshape(1,-1), which makes the First Dimension (Batch_Size) equal (1) for both X and y.

Now, the working code is shown below, along with the Output:

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import tensorflow as tf
import numpy as np


# define sequences
sequences = [
    [1, 2, 3, 4],
       [1, 2, 3],
             [1]
    ]

# pad sequence
padded = pad_sequences(sequences)
X = np.expand_dims(padded, axis = 0)
print('Shape of X is ', X.shape) # (1, 3, 4)

y = np.array([1,0,1])
y = y.reshape(1,-1)
print('Shape of y is', y.shape) # (1, 3)

model = Sequential()
model.add(LSTM(4, return_sequences=False, input_shape=(None, X.shape[2])))
model.add(Dense(1, activation='sigmoid'))

model.compile (
    loss='mean_squared_error',
    optimizer=tf.keras.optimizers.Adam(0.001))

model.fit(x = X, y = y)

The Output of above code is :

Shape of X is  (1, 3, 4)
Shape of y is (1, 3)
1/1 [==============================] - 0s1ms/step - loss: 0.2588
<tensorflow.python.keras.callbacks.History at 0x7f5b0d78f4a8>

Hope this helps. Happy Learning!

Solution 2:

I am also having this problem. Help me please.

ValueError: Data cardinality is ambiguous:x sizes:770y sizes:771Makesureallarrayscontainthesamenumberofsamples.

Below is the code I used.

import math

# split into train and test sets 
train_size = int((dataset.shape[0]*(6/8)))
test_size = len(dataset) - train_size

X_train = X[0:train_size] 
X_test = X[train_size:X.shape[0]]
y_train = y[0:train_size] 
y_test = y[train_size:X.shape[0]]

# %%# Reshape input to 3D 
train_resize = int(X_train.shape[0]/n_steps)
test_resize = int(X_test.shape[0]/n_steps)

rows_train, cols_train = X_train.shape
rows_test, cols_test = X_test.shape
delrows_train = np.random.randint(rows_train, size=(7,1))
delrows_test = np.random.randint(rows_test, size=(16,1))
X_train = np.delete(X_train, delrows_train, axis=0)
X_test = np.delete(X_test, delrows_test, axis=0)

# print(rows_train)print(train_resize)
print(test_resize)

X_train = X_train.reshape(train_resize,n_steps,X.shape[1])
y_train = y_train[::n_steps]
X_test = X_test.reshape(test_resize,n_steps,X.shape[1])
y_test = y_test[::n_steps]
#%% print("X_train size- ", X_train.shape)
print("y_train size- ", y_train.shape)
print("X_test size- ", X_test.shape)
print("y_test size- ", y_test.shape)

# design and train LSTM  network from keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
import timeit
import keras

start = timeit.default_timer()

model = Sequential()
model.add(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2]),return_sequences=True))
model.add(LSTM(100,return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
opt = Adam(learning_rate=0.0001)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=2, shuffle=False)

stop = timeit.default_timer()

Post a Comment for "Valueerror: Data Cardinality Is Ambiguous"