标签:探查 postgresql name column 数据库 num print table self
1 import pandas as pd 2 import xlrd 3 import psycopg2 4 5 class Expedition(object): 6 def __init__(self, database, user, psd, ip, port): 7 self.database = database 8 self.user = user 9 self.psd = psd 10 self.ip = ip 11 self.port = port 12 self.tables = [] 13 self.schema = 'schema名' 14 15 def get_table_info(self): 16 dblink = psycopg2.connect(database=self.database, user=self.user, password=self.psd, host=self.ip, port=self.port) 17 print("链接成功") 18 cur = dblink.cursor() 19 print("创建游标成功") 20 for table in self.tables: 21 print(table) 22 cur.execute(f"""SELECT cast(obj_description(relfilenode,'pg_class') as varchar) FROM pg_class b 23 WHERE relname = '{table}'""") 24 table_value = cur.fetchall() 25 if table_value == []: 26 continue 27 # 获取表备注 28 table_comment = table_value[0][0] 29 print (f"获取表备注{table_comment}成功") 30 31 # 获取字段列表 32 cur.execute(f"""select column_name from information_schema.columns where table_catalog ='{self.database}' and table_name = '{table}'""") 33 columns = cur.fetchall() 34 print(f"获取字段列表:{columns}成功") 35 if columns == []: 36 continue 37 38 # 从字段列表中取出包裹在集合的字段名 39 for column in columns: 40 # 得到字段名 41 column_name = column[0] 42 print (f"获得字段成功{column_name}") 43 44 # 获得该字段的备注信息和字段类型 45 print(f"""SELECT col_description(a.attrelid,a.attnum),format_type(a.atttypid,a.atttypmod) as type 46 FROM pg_class as c,pg_attribute as a where c.relname = '{table}' and a.attrelid = c.oid and a.attnum>0 and a.attname = '{column_name}'""") 47 cur.execute(f"""SELECT cast(col_description(a.attrelid,a.attnum)as varchar),format_type(a.atttypid,a.atttypmod) as type 48 FROM pg_class as c,pg_attribute as a where c.relname = '{table}' and a.attrelid = c.oid and a.attnum>0 and a.attname = '{column_name}'""") 49 comment = cur.fetchall() 50 51 # 获得该字段备注和字段类型列表 52 print(f"获得备注类型列表:{comment}") 53 54 # 获得字段的备注和类型 55 column_comment = comment[0][0] 56 column_type = comment[0][1] 57 print(f"获得字段备注:{column_comment}成功,获得字段类型成功:{column_type}") 58 59 # 获得记录数 60 print(f"SELECT count(1),count(DISTINCT cast({column_name} as varchar)) from {self.schema}.{table}") 61 cur.execute(f"""SELECT count(1),count(DISTINCT cast("{column_name}" as varchar)) from {self.schema}.{table}""") 62 field_num = cur.fetchall() 63 all_num = field_num[0][0] 64 only_num = field_num[0][1] 65 print(f"记录数:{all_num},唯一记录数:{only_num}") 66 67 # 唯一率 68 try: 69 70 only_rate = only_num / all_num 71 except Exception as result: 72 print(result) 73 only_rate = 0 74 75 76 # 空值数量 77 try: 78 print(f"SELECT count(1) from self.schema where CAST({column_name} as varchar) != ''") 79 cur.execute(f"""SELECT count(1) from {self.schema}.{table} 80 where cast("{column_name}" as varchar) = ''or "{column_name}" is null""") 81 null_resule = cur.fetchall() 82 null_num = null_resule[0][0] 83 84 null_rate = (all_num - null_num) / all_num 85 print(f"获得null值数量;{null_num}成功") 86 except Exception as result: 87 print(result) 88 null_num = "未获得数据" 89 null_rate = "未获得数据" 90 91 # # 获取最大值/最小值 92 93 if column_type.startswith("i") or column_type.startswith("f") or column_type.startswith("do") or column_type.startswith("n") or column_type.startswith("bi"): 94 try: 95 print( 96 f"select MAX(cast({table}.{column_name} as varchar)),MIN(cast({table}.{column_name} as varchar)) from {self.schema}.{table}") 97 cur.execute( 98 f"""select MAX(cast({table}."{column_name}" as float)),MIN(cast({table}."{column_name}" as float)) from {self.schema}.{table}""") 99 extreme_result = cur.fetchall() 100 max_value = extreme_result[0][0] 101 min_value = extreme_result[0][1] 102 print(extreme_result) 103 print(f"最大值:{max_value},最小值:{min_value}") 104 except: 105 106 print("错误信息") 107 108 else: 109 try: 110 print( 111 f"select MAX(cast({table}.{column_name} as varchar)),MIN(cast({table}.{column_name} as varchar)) from {self.schema}.{table}") 112 cur.execute( 113 f"""select MAX(cast({table}."{column_name}" as varchar)),MIN(cast({table}."{column_name}" as varchar)) from {self.schema}.{table}""") 114 extreme_result = cur.fetchall() 115 max_value = extreme_result[0][0] 116 min_value = extreme_result[0][1] 117 print(extreme_result) 118 print(f"最大值:{max_value},最小值:{min_value}") 119 except: 120 121 print("错误信息") 122 123 # # 获取最大长度和最小长度 124 try: 125 cur.execute(f"""SELECT max(char_length(CAST("{column_name}" as VARCHAR))),\ 126 min(char_length(CAST("{column_name}" as VARCHAR))) from {self.schema}.{table}""") 127 column_lengtn = cur.fetchall() 128 max_lenth = column_lengtn[0][0] 129 min_lenth = column_lengtn[0][1] 130 print(f"最大长度:{max_lenth},最小长度:{min_lenth}") 131 except Exception as err: 132 max_lenth = "未获得数据" 133 min_lenth = "未获得数据" 134 135 # 获得样例数据 136 wordlist = [] 137 try: 138 print(f"""SELECT distinct cast({column_name} as varchar) from {self.schema}.{table} where CAST('{column_name}' as varchar) != '' 139 LIMIT 4""") 140 cur.execute(f"""SELECT distinct cast("{column_name}" as varchar) from {self.schema}.{table} where CAST("{column_name}" as varchar) != '' 141 LIMIT 4""") 142 words = cur.fetchall() 143 for word in words: 144 print(word) 145 wordlist.append(word[0]) 146 print(wordlist) 147 except: 148 wordlist = ['未获得数据'] 149 150 151 152 153 154 # print(table, table_comment, column_name, column_comment, column_type, all_num, null_num, only_num, only_rate, max_value, min_value, max_lenth, min_lenth, f"{wordlist}") 155 # 调用写入 156 self.write_into_excel(table, table_comment, column_name, column_comment, column_type, all_num, null_num, null_rate, only_num, \ 157 only_rate, max_value, min_value, max_lenth, min_lenth, f"{wordlist}") 158 print("写入成功") 159 160 161 162 163 164 # 从表格获取表明 165 def get_excel_value(self): 166 # 打开 167 data = xlrd.open_workbook(r"C:\Users\Administrator\Desktop\未探查总.xlsx") 168 # 获取sheet对象 169 table = data.sheet_by_name(U"Sheet1") 170 171 # row = table.row_values(0) 172 # 读取第一列数据,返回一个列表 173 col = table.col_values(0) 174 self.tables = col 175 print (f"共从excel中读取{len(self.tables)}个表名") 176 177 178 # 将数据写入表格 179 def write_into_excel(self, table_name, table_comment, column_name, column_comment, column_type, all_num, null_num, null_rate, only_num, only_rate, max_value, min_value,max_lenth,min_lenth,word_list): 180 181 df = pd.DataFrame(pd.read_excel('C:\\Users\Administrator\Desktop\探查模板.xlsx')) # 读取原数据 182 df_rows = df.shape[0] # 获取行数 183 # 增加一条数据 184 df.loc[df_rows] = [table_name, table_comment, column_name, column_comment, column_type, all_num, null_num,null_rate, only_num, only_rate, max_value, min_value, max_lenth, min_lenth, word_list] # 与原数据同格式 185 df.to_excel('C:\\Users\Administrator\Desktop\探查模板.xlsx', sheet_name='sheet1', index=False, header=True) 186 187 188 189 190 if __name__=='__main__': 191 run = Expedition("数据库名", "账户名", "密码", "ip地址", 端口号) 192 run.get_excel_value() 193 run.get_table_info()
标签:探查,postgresql,name,column,数据库,num,print,table,self 来源: https://www.cnblogs.com/luweilehei/p/12508792.html
本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享; 2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关; 3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关; 4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除; 5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。