Overview
This article extends the WeChat bot implementation by analyzing friend statistics using wxpy library. The analysis covers gender distribution, geographic distribution across provinces and cities, signature word clouds, and location-based heatmaps.
Implementation Components
Word Cloud Generation
The wordcloud libray generates visual word clouds from text data.
Installation
pip install wordcloud
Usage
from wordcloud import WordCloud, ImageColorGenerator
def create_wordcloud(text_data, config):
"""Generate word cloud from text"""
cloud_config = {
'background_color': config.get('bg_color', 'white'),
'max_words': config.get('max_words', 200),
'font_path': config.get('font_path', 'simhei.ttf'),
'min_font_size': config.get('min_size', 10),
'max_font_size': config.get('max_size', 80),
'width': config.get('width', 800),
'height': config.get('height', 400),
'mask': config.get('mask_image', None)
}
cloud_generator = WordCloud(**cloud_config)
result = cloud_generator.generate(text_data)
output_path = config.get('output_path', 'wordcloud.png')
result.to_file(output_path)
return result
Chinese Text Segmentation
The jieba library performs Chinese word segmentation, converting continuous text into individual words.
Installation
pip install jieba
Usage
import jieba
import os
def segment_text(raw_text, custom_dict_path=None):
"""Split Chinese text into individual words"""
if custom_dict_path and os.path.exists(custom_dict_path):
jieba.load_userdict(custom_dict_path)
jieba.suggest_freq(('微信'), True)
jieba.suggest_freq(('朋友圈'), True)
words = jieba.cut(raw_text)
return ' '.join(words)
Chart Visualization
The matplotlib library creates various chart types including pie charts and bar charts.
Installation
pip install matplotlib
Usage
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
def plot_gender_distribution(male_count, female_count, other_count, username):
"""Create bar chart showing gender distribution"""
fig = plt.figure(figsize=(8, 6))
labels = ['Male', 'Female', 'Unknown']
values = [male_count, female_count, other_count]
colors = ['#3498db', '#e74c3c', '#95a5a6']
plt.bar(labels, values, color=colors)
plt.title(f"{username}'s Friends Gender Distribution")
plt.ylabel('Count')
for i, v in enumerate(values):
plt.text(i, v + 1, str(v), ha='center')
plt.savefig('gender_distribution.png')
plt.close()
def plot_province_distribution(province_data, username):
"""Create pie chart showing province distribution"""
fig = plt.figure(figsize=(10, 8))
provinces = list(province_data.keys())
counts = list(province_data.values())
plt.pie(counts, labels=provinces, autopct='%1.1f%%', startangle=90)
plt.title(f"{username}'s Friends Province Distribution")
plt.axis('equal')
plt.savefig('province_distribution.png')
plt.close()
Geographic Heatmap
The pyecharts library generates interactive heatmaps for geographic data.
Installation
pip install pyecharts
e pip install echarts-countries-pypkg
e pip install echarts-china-provinces-pypkg
e pip install echarts-china-cities-pypkg
Usage
from pyecharts import Map, Page
def generate_location_heatmap(friends_data, username):
"""Generate China map heatmap showing friend locations"""
map_chart = Map(
f"{username}'s Friends Location Map",
width=1200,
height=600
)
map_chart.add(
"Friends",
list(friends_data.keys()),
list(friends_data.values()),
is_label_show=True,
is_visualmap=True,
maptype='china',
visual_range=[0, max(friends_data.values())],
visual_text_color='#FF0000',
visual_range_text=['Low', 'High'],
visual_range_color=['#FFFF00', '#FF0000']
)
map_chart.render('friends_map.html')
Core Data Processing
Collecting Friend Information
from wxpy import Bot
import re
def collect_friend_statistics(bot_instance):
"""Collect comprehensive friend statistics"""
friends = bot_instance.friends()
stats = {
'gender': {},
'province': {},
'city': {},
'signatures': []
}
for friend in friends:
gender = friend.sex
stats['gender'][gender] = stats['gender'].get(gender, 0) + 1
province = friend.province
if province:
stats['province'][province] = stats['province'].get(province, 0) + 1
city = friend.city
if city:
stats['city'][city] = stats['city'].get(city, 0) + 1
if friend.signature:
stats['signatures'].append(friend.signature)
return stats
def process_signatures(signatures):
"""Clean and extract Chinese text from signatures"""
combined_text = ""
pattern = re.compile(r'[\u4e00-\u9fa5]+')
for signature in signatures:
chinese_chars = re.findall(pattern, signature)
combined_text += ''.join(chinese_chars)
return combined_text
Displaying Statistics
def display_statistics(stats):
"""Print formatted statistics"""
gender_map = {1: 'Male', 2: 'Female', 0: 'Unknown'}
total = sum(stats['gender'].values())
for gender_code, count in stats['gender'].items():
percentage = (count / total) * 100
gender_label = gender_map.get(gender_code, 'Unknown')
print(f"{gender_label}: {count} ({percentage:.2f}%)")
print(f"\nTotal friends: {total}")
print(f"Provinces covered: {len(stats['province'])}")
print(f"Cities covered: {len(stats['city'])}")
Packaging Considerations
When converting the application to executable format, several dependencies require special handling:
jieba Dictionary Path
import jieba
import os
jieba.set_dictionary(os.path.join(os.path.dirname(__file__), 'dict.txt'))
jieba.initialize()
WordCloud Stopwords
Modify the wordcloud source file to resolve stopwords path issue:
# Original (line 30):
STOPWORDS = set(map(str.strip, open(os.path.join(FILE, 'stopwords')).readlines()))
# Modified:
STOPWORDS = set(map(str.strip, open(os.path.join(os.path.dirname(sys.executable'), 'stopwords')).readlines()))
Restore the original code after packaging for development use.