How pandas groups and merges by alias alias

1. Condition:
according to whether the alias in the two lines intersect, and if so, Then merge
separated by-sharp sign, where: alias
2 in alias separated by-sharp sign, data
name alias
0 potato potato-sharp egg-sharp potato-sharp potato
1 potato potato-sharp potato
2 corn cob-sharp corn-sharp corn-sharp corn
3 potato son
4 cereal bag cereal-sharp corn

, potato

clipboard.png

3
name alias
-sharp-sharp -sharp-sharp-sharp-sharp
-sharp -sharp-sharp-sharp
clipboard.png

that is, 1,2,4 merging, 3,5 merging (merging with-sharp and de-repeating)

cols = ["name"," alias"]
data = [["potato", "Didou-sharp egg-sharp potato-sharp potato"],
["potato", "potato-sharp potato"],
["corn", "stick-sharp-sharp corn-sharp corn"],
["potato", "potato"],
["Baogu", "Baogu-sharp corn"]
frame = pd.DataFrame

Feb.26,2021

import pandas as pd
import operator
from collections import Counter

cols = ['name', 'alias']
data = [
    ['', '-sharp-sharp-sharp'],
    ['', '-sharp'],
    ['', '-sharp-sharp-sharp'],
    ['', ''],
    ['', '-sharp']
]

df = pd.DataFrame(data, columns=cols)

-sharp
inter_lst = reduce(operator.add, df['alias'].apply(lambda x: x.split('-sharp')))
for k, v in Counter(inter_lst).iteritems():
    if v == 1:
        continue

    -sharp
    df1 = df[df['alias'].str.contains(k)]
    df = df.drop(df1.index)

    -sharp
    name_lst = reduce(operator.add, df1['name'].apply(lambda x: x.split('-sharp')))
    alias_lst = reduce(operator.add, df1['alias'].apply(lambda x: x.split('-sharp')))

    name = '-sharp'.join(list(set(name_lst)))
    alias = '-sharp'.join(list(set(alias_lst)))

    -sharp
    df = df.append(pd.Series(dict(name=name, alias=alias)), ignore_index=True)

print df

import pandas as pd
cols = ['name', 'alias']
data = [['','-sharp-sharp-sharp'],
['', '-sharp'],
['', '-sharp-sharp-sharp'],
['', ''],
['', '-sharp']]
frame = pd.DataFrame(data,columns=cols)

d=frame.set_index('name').to_dict()['alias'] -sharp dict
k,v = [], []
for i in d:
    print(k, v)
    sv = set(d[i].split('-sharp'))
    cf, cfi = False, None -sharp 
    for j in range(len(v)):
        if i in v[j]:
            k[j].add(i)
            v[j].update(sv)
            cf, cfi = True, j
            break
    if cf:
        remove = []
        for j in range(len(v)):
            if j != cfi and k[j] & v[cfi]:
                remove.insert(0, j)
                k[cfi].update(k[j])
                v[cfi].update(v[j])
        for j in remove:
            k.pop(j)
            v.pop(j)
    else:
        k.append(set([i]))
        v.append(sv)

frame = pd.DataFrame({'name':['-sharp'.join(i) for i in k], 'alias':['-sharp'.join(i) for i in v]})
print(frame)
             alias       name
0         -sharp-sharp      -sharp
1  -sharp-sharp-sharp-sharp  -sharp-sharp
Menu