# python - python - numpy和Pandas的零填充

csv数据格式如下。

``````
u i r c

1 1 5 1

2 2 5 1

3 3 1 0

4 4 1 1

``````

``````
1 1 1 5 2 2 5 4 4 1

0 3 3 1

``````

``````
1 1 1 5 2 2 5 0 0 0 4 4 1

0 0 0 0 0 0 0 3 3 1 0 0 0

``````

``````
nomatch = 0

for j in range(1, cols, 3):

if arr[0][j] != arr[1][j]:

nomatch+=1

z = np.zeros(nomatch*3, dtype=arr.dtype)

h1 = np.split(arr, [0][0])

new0 = np.concatenate((arr[0],z))

new1 = np.concatenate((z,arr[1])) # problem

final = np.concatenate((new0, new1))

``````

``````
u = df.set_index('c').stack().to_frame('value').reset_index(1, drop=True)

u.set_index(

np.arange(u.shape[0]), append=True).unstack(-1, fill_value=0)

``````
``````
0 1 2 3 4 5 6 7 8 9 10 11

c

0 0 0 0 0 0 0 3 3 1 0 0 0

1 1 1 5 2 2 5 0 0 0 4 4 1

``````

``````
u = df.set_index('c').stack().reset_index()

pd.crosstab(

u['c'], u.index, u[0], aggfunc='first').fillna(0).astype(int)

``````

``````
a_ = df.to_numpy()

a = a_[:,:-1]

# mask based on the last column

c = a_[:,-1] == 1

# flat array

flat_a = a.ravel()

# stack flat array to mask accordingly

out = np.vstack((flat_a, flat_a))

# repeat the mask based on the shape of the array

m = np.repeat(c, a.shape[1])

# mask to apply to the ndarray

b = np.vstack((m, ~m))

print(out*b)

array([[1, 1, 5, 2, 2, 5, 0, 0, 0, 4, 4, 1],

[0, 0, 0, 0, 0, 0, 3, 3, 1, 0, 0, 0]], dtype=int64)

``````

``````
df2 = pd.concat([df]*1_000, axis=0)

def yatu(df):

a_ = df.to_numpy()

a = a_[:,:-1]

c = a_[:,-1] == 1

flat_a = a.ravel()

out = np.vstack((flat_a, flat_a))

m = np.repeat(c, a.shape[1])

b = np.vstack((m, ~m))

out*b

def user3483203(df):

u = df.set_index('c').stack().reset_index()

return pd.crosstab(

u['c'], u.index, u[0], aggfunc='first').fillna(0).astype(int)

%timeit yatu(df2)

# 161 µs ± 2.05 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

%timeit user3483203(df2)

# 16 ms ± 223 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

``````