where I can talk.

第一次面试遇到的编程题

· Read in about 3 min · (470 Words)
Python

有一个3G大小的文本文件,里面每一行存储着形如

‘hd_GwpWe6u+ ‘\t’ + Lxx7r30hStUgxY6q.jpg’的信息,

‘hd_xxxx’代表公司名称,‘xxxxx.jpg’代表图片名称。

统计文本文件中的信息,并将公司整理为如下并存入文件中:

[20 - ] + ‘\t’ + id1 + ‘\t’ + id2….

[15 - 20] + ‘\t’ + id1 + ‘\t’ + id2….

[10 - 15] + ‘\t’ + id1 + ‘\t’ + id2….

[5 - 10] + ‘\t’ + id1 + ‘\t’ + id2….

[0 - 5] + ‘\t’ + id1 + ‘\t’ + id2….

解决:

from collections import defaultdict


class Counter:
    def __init__(self, path):
        """
        :param path: infomation file path
        """
        self.info = defaultdict(list)
        self.most = '[20 - ]'
        self.more = '\n[15 - 20]'
        self.normal = '\n[10 - 15]'
        self.little = '\n[5- 10]'
        self.less = '\n[0 - 5]'
        with open(path, 'r') as f:
            data = f.readlines()
            for i in data:
                company = i[:10]
                jpg = i[11:-1]
                self.info[company].append(jpg)

    def assort_file(self, path):
        """
        :param path: output file path
        """
        for k, v in self.info.items():
            lens = len(v)
            self.count_num(lens, k)
        sorted_info = [
            self.most, self.more, self.normal, self.little, self.less]
        for i in sorted_info:
            self.insert_file(path, i)

    def count_num(self, n, id):
        """
        :param n: a number
        :param id: company id
        """
        if n >= 20:
            self.most += '\t' + str(id)
        elif 15 <= n < 20:
            self.more += '\t' + str(id)
        elif 10 <= n < 15:
            self.normal += '\t' + str(id)
        elif 5 <= n < 10:
            self.little += '\t' + str(id)
        elif 0 <= n < 5:
            self.less += '\t' + str(id)

    def insert_file(self, path, info):
        """
        :param path: file path
        :param info: company's ids
        """
        with open(path, 'a') as f:
            f.write(info)


if __name__ == '__main__':
    import os
    file_path = os.path.join(os.path.abspath('.'), 'company.txt')
    output_path = os.path.join(os.path.abspath('.'), 'count.txt')
    assortfile = Counter(file_path).assort_file(output_path)

# 生成随机信息
def gen_random(length):
    """
    :param length: random number's length
    :return random number
    """
    import random
    import string
    char = string.ascii_letters + string.digits
    return ''.join(random.choice(char) for i in range(length))


def insert_info(path, info):
    """
    :param path: output file path
    :param info: written content
    """
    with open(path, 'a') as f:
        f.write(info)


def gen_info(path, num, same=None):
    """
    :param path: output file path
    :param num: the number of random numbers
    :param same: the company's id is same or different
    :return: if error happened, return None
    """
    if same is False:
        for i in range(num):
            company = 'hd_' + gen_random(7)
            img = gen_random(16) + '.jpg'
            info = company + '/t' + img + '/n'
            insert_info(path, info)
    elif same is True:
        company = 'hd_' + gen_random(7)
        for i in range(num):
            img = gen_random(16) + '.jpg'
            info = company + '\t' + img + '\n'
            insert_info(path, info)
    else:
        return None


if __name__ == '__main__':
    import os
    file_name = os.path.join(os.path.abspath('.'), 'company.txt')
    gen_info(file_name, 20, True)
    gen_info(file_name, 15, True)

二分查找变种–查找第一个大于 key 的元素

def search_first_larger(list_name, num):
    low, high = 0, len(list_name) - 1
    while low <= high:
        mid = int((low + high) / 2)
        if l[mid] <= num:
            low = mid + 1
        elif l[mid] > num:
            high = mid - 1
        else:
            return mid
    return low

Comments