python 数据分析案例——USA政府访问数据 左手的ㄟ右手 2022-12-20 02:51 98阅读 0赞 import json import re import numpy as np import pandas as pd from collections import defaultdict from collections import Counter path='D:\example.txt' record=[json.loads(x)for x in open(path)] record record[0] timezonge=[rq['tz'] for rq in record if 'tz' in rq] timezonge timezonge[:10] def get_counte(sequence): counts=defaultdict(int) for x in sequence: counts[x]+=1 return counts counts=get_counte(timezonge) counts counts=Counter(timezonge) counts.most_common(10) f1=pd.DataFrame(record) f1 f1.info() f1['tz'][:10] f1 f1.columns r2=pd.Series([x.split()[0] for x in f1.a.dropna()]) r2 f1.a r2[:5] r2.value_counts()[:8] f1_not_null=f1[f1.a.notnull()] f1_not_null f1 f1_not_null['os']=np.where(f1_not_null['a'].str.contains('Windows'),'Windows', 'Not Windows') f1_not_null os_tz=f1_not_null.groupby(['tz','os']) # print(os_tz) os_tz os_tz_count=os_tz.size().unstack().fillna(0) os_tz_count os_tz_count.index indexer=os_tz_count.sum(1).argsort() indexer os_tz_count_top10=os_tz_count.take(indexer[-10:]) os_tz_count_top10 os_tz_count.sum(1).nlargest(10) 案例数据 可以访问公众号二进制失真处理 后台回复usa1 即可获取 ![20201110100415683.png][] [20201110100415683.png]: /images/20221120/ad75dbf695ef4ebc9ac3b455e0a69992.png
还没有评论,来说两句吧...