Skip to content

Reference

__main__.py

Entry point that passes info read from files to chain, then passes LLM result to be written

main()

Create chain, read info from files, append generated questions, then write to new file

Source code in src/chatbot_util/__main__.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def main():
    """Create chain, read info from files, append generated questions, then write to new file"""
    # Expanded file paths
    filenames = {
        "readfile": os.path.expanduser("~/.chatbot-util/Chatbot FAQ - Enter Here.csv"),
        "readfile2": os.path.expanduser("~/.chatbot-util/Other.txt"),
        "writefile": os.path.expanduser("~/.chatbot-util/Permutated.csv"),
    }

    # Read info, generate questions, then write final output
    print("\nReading topics, questions, employees, and answers...")
    store, employees, phrases, answers, nums = file_io.read(filenames)
    permutated_store = chain.generate(store, phrases)
    print(f'\nWriting to "{filenames["writefile"]}"...')
    file_io.write(filenames, permutated_store, employees, answers, nums)
    print("Done.\n")

chain.py

Setup language model and output parser, then generate and append new questions

generate(store, phrases)

Generate and append new questions to store

Source code in src/chatbot_util/chain.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def generate(store, phrases):
    """Generate and append new questions to store"""

    def progress(index, total):
        """Updates generation progress"""
        sys.stdout.write(f"\rGenerating similar queries for: {index}/{total}...")
        sys.stdout.flush()

    # Calculate total number of questions to generate
    index, total = 1, 0
    for topic in store:
        for question in store[topic]:
            total += 1

    for topic in store:
        new_questions = []
        for question in store[topic]:
            progress(index, total)
            prompt = INSTRUCTION + question
            new_questions.append(invoke(prompt, phrases))
            index += 1
        for new_sub_question in new_questions:
            for new_question in new_sub_question:
                store[topic].append(new_question)
    return store

invoke(prompt, phrases)

Define chat model, then create the chain

Source code in src/chatbot_util/chain.py
46
47
48
49
50
51
52
53
54
55
56
def invoke(prompt, phrases):
    """Define chat model, then create the chain"""
    options = {"seed": 39}
    response = ollama.generate(
        model="mistral",
        prompt=prompt,
        options=options,
    )

    cleaned_response = parse(response["response"], phrases)
    return cleaned_response

parse(response, phrases)

Parse lines from LLM and clean them before returning

Source code in src/chatbot_util/chain.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def parse(response, phrases):
    """Parse lines from LLM and clean them before returning"""

    def remove_empty(lines):
        """Remove empty lines"""
        for line in lines:
            if line == "":
                lines.remove(line)
        return lines

    def remove_numbers(lines):
        """Remove numbers"""
        for i, line in enumerate(lines):
            if line[1] == ".":
                lines[i] = line[3:]
        return lines

    def remove_phrases(lines, phrases):
        for i, line in enumerate(lines):
            for phrase in phrases:
                line = line.replace(phrase[0], phrase[1])
            lines[i] = line
        return lines

    lines = response.strip().split("\n")
    nonempty_lines = remove_empty(lines)
    no_num_lines = remove_numbers(nonempty_lines)
    cleaned_lines = remove_phrases(no_num_lines, phrases)
    return cleaned_lines

file_io.py

Reads csv, employees, and answers, and writes generated result to csv

read(filenames)

Read questions from csv file, read employees, phrases and answers from text files

Source code in src/chatbot_util/file_io.py
128
129
130
131
132
133
def read(filenames):
    """Read questions from csv file, read employees, phrases and answers from text files"""
    store, nums = read_entries(filenames["readfile"])
    employees, phrases, answers = read_other(filenames["readfile2"])

    return store, employees, phrases, answers, nums

read_answers(lines)

Read and return answers for cen, robotics, instr

Source code in src/chatbot_util/file_io.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def read_answers(lines):
    """Read and return answers for cen, robotics, instr"""
    cen_answers = read_cen(lines[0])
    robotics_answers = read_basic(lines[1])
    instr_answers = read_basic(lines[2])
    reach_answers = read_basic(lines[3])

    answers = {
        "cen_answers": cen_answers,
        "robotics_answers": robotics_answers,
        "instr_answers": instr_answers,
        "reach_answers": reach_answers,
    }
    return answers

read_basic(lines)

Read and return basic answers for topics other than CEN

Source code in src/chatbot_util/file_io.py
83
84
85
86
87
88
89
90
def read_basic(lines):
    """Read and return basic answers for topics other than CEN"""
    basic_answers = []
    for line in lines:
        clean_line = line.strip("\n")
        basic_answers.append(clean_line)

    return basic_answers

read_cen(lines)

Read and return cen_answers

Source code in src/chatbot_util/file_io.py
70
71
72
73
74
75
76
77
78
79
80
def read_cen(lines):
    """Read and return cen_answers"""
    cen_answers = {}
    for i, line in enumerate(lines):
        if len(line) >= 3:
            part1, part2 = line[:-1].split(sep=":")
        else:
            part1, part2 = line.split(sep=":")
        cen_answers[f"cen_{i}"] = [part1, part2]

    return cen_answers

read_employees(lines)

Read and return employee list

Source code in src/chatbot_util/file_io.py
46
47
48
49
50
51
52
53
54
55
56
def read_employees(lines):
    """Read and return employee list"""
    employees = {}
    for line in lines:
        if len(line) >= 3:
            employee, role = line[:-1].split(sep=":")
        else:
            employee, role = line.split(sep=":")
        employees[employee] = role

    return employees

read_entries(filename)

Read and return topics and basic answers

Source code in src/chatbot_util/file_io.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def read_entries(filename):
    """Read and return topics and basic answers"""
    with open(filename, "r", encoding="utf-8") as f:
        reader = csv.reader(f, delimiter="\n")
        store = {}
        cur_topic = ""
        nums = {
            "num_cen": 0,
            "num_robotics": 0,
            "num_instr": 0,
            "num_reach": 0,
        }

        for i, line in enumerate(reader):
            if i != 0:
                if len(line) > 0:
                    topic = line[0].split(",")[0]
                    question = line[0].removeprefix(topic + ",")
                    if topic != "" and question != "":
                        cur_topic = topic
                        store[cur_topic] = []
                    if question != "":
                        store[cur_topic].append(question)
                        if cur_topic == "CEN":
                            nums["num_cen"] += 1
                        elif cur_topic == "Robotics":
                            nums["num_robotics"] += 1
                        elif cur_topic == "Instructional":
                            nums["num_instr"] += 1
                        elif cur_topic == "Edu-Reach":
                            nums["num_reach"] += 1

    return store, nums

read_other(filename)

Read and return employees, phrases, and answers

Source code in src/chatbot_util/file_io.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def read_other(filename):
    """Read and return employees, phrases, and answers"""
    with open(filename, "r", encoding="utf-8") as f:
        raw_lines = f.readlines()
        lines = [[], [], [], [], [], []]

        cur = 0
        for raw_line in raw_lines:
            if raw_line == "\n":
                cur += 1
                continue
            lines[cur].append(raw_line)

    employees = read_employees(lines[0])
    phrases = read_phrases(lines[1])
    answers = read_answers(lines[2:])
    return employees, phrases, answers

read_phrases(lines)

Read and return phrases to find and replace

Source code in src/chatbot_util/file_io.py
59
60
61
62
63
64
65
66
67
def read_phrases(lines):
    """Read and return phrases to find and replace"""
    phrases = []
    for line in lines:
        find, replace = line.split(sep=":")
        replace = replace.strip("\n")
        phrases.append([find, replace])

    return phrases

write(filenames, store, employees, answers, nums)

Format questions and topics, write to csv file

Source code in src/chatbot_util/file_io.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def write(filenames, store, employees, answers, nums):
    """Format questions and topics, write to csv file"""
    with open(filenames["writefile"], "w", encoding="utf-8") as csvfile:
        csvfile.write('"question","answer"\n')
        indices = {
            "cen_index": 0,
            "robotics_index": 0,
            "instr_index": 0,
            "reach_index": 0,
        }
        for topic in store:
            for question in store[topic]:
                # Write cleaned entry to csv
                answer, indices = utils.create_answer(
                    topic,
                    question,
                    employees,
                    answers,
                    nums,
                    indices,
                )
                entry = utils.clean_entry(question, answer)
                csvfile.write(entry)

utils.py

Utilities for creating and cleaning answers based on file content

clean_entry(question, answer)

Clean up unnecessary quotes

Source code in src/chatbot_util/utils.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def clean_entry(question, answer):
    """Clean up unnecessary quotes"""
    if question[0] == '"' and question[-1] == '"':
        entry = f'{question},"{answer}"\n'
    elif question[0] == '"':
        entry = f'{question}","{answer}"\n'
    elif question[-1] == '"':
        entry = f'"{question},"{answer}"\n'
    else:
        entry = f'"{question}","{answer}"\n'
    return entry

create_answer(topic, question, employees, answers, nums, indices)

Convert topics to answers depending on whether the topic is a person, CEN, or other

Source code in src/chatbot_util/utils.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def create_answer(topic, question, employees, answers, nums, indices):
    """Convert topics to answers depending on whether the topic is a person, CEN, or other"""
    if topic == "CEN":
        answer, indices["cen_index"] = create_cen_answer(
            question,
            answers["cen_answers"],
            nums["num_cen"],
            indices["cen_index"],
        )
    elif topic == "Robotics":
        answer, indices["robotics_index"] = create_other_answer(
            answers["robotics_answers"],
            nums["num_robotics"],
            indices["robotics_index"],
        )
    elif topic == "Instructional":
        answer, indices["instr_index"] = create_other_answer(
            answers["instr_answers"],
            nums["num_instr"],
            indices["instr_index"],
        )
    elif topic == "Edu-Reach":
        answer, indices["reach_index"] = create_other_answer(
            answers["reach_answers"],
            nums["num_reach"],
            indices["reach_index"],
        )
    else:
        answer = create_person_answer(topic, employees)
    return answer, indices

create_cen_answer(question, cen_answers, num_cen, cen_index)

Update CEN topics to be the relevant answer

Source code in src/chatbot_util/utils.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def create_cen_answer(question, cen_answers, num_cen, cen_index):
    """Update CEN topics to be the relevant answer"""
    answer = None
    i = 0
    while not answer:
        if (
            ((cen_index in set(range(i * 2, (i + 1) * 2))) and (num_cen >= (i + 1) * 2))
            or (cen_index == 0 and num_cen == 1)
            or (cen_index in set(range(num_cen + (i * 10), num_cen + ((i + 1) * 10))))
        ):
            answer = create_cen_answer_helper(question, cen_answers[f"cen_{i}"])
        i += 1
    cen_index += 1
    return answer, cen_index

create_cen_answer_helper(question, cen_answer)

Format answer based on question content

Source code in src/chatbot_util/utils.py
17
18
19
20
21
22
23
24
25
26
27
def create_cen_answer_helper(question, cen_answer):
    """Format answer based on question content"""
    if (
        ("CEN" in question)
        and ("acronym" not in question)
        and ("abbreviation" not in question)
    ):
        answer = f"{cen_answer[0]}CEN{cen_answer[1]}"
    else:
        answer = f"{cen_answer[0]}Collegiate Edu-Nation{cen_answer[1]}"
    return answer

create_other_answer(answers, num, index)

Update other topics to be the relevant answer

Source code in src/chatbot_util/utils.py
46
47
48
49
50
51
52
53
54
55
def create_other_answer(answers, num, index):
    """Update other topics to be the relevant answer"""
    answer = None
    i = 0
    while not answer:
        if (index == i) or (index in set(range(num + (i * 5), num + ((i + 1) * 5)))):
            answer = answers[i]
        i += 1
    index += 1
    return answer, index

create_person_answer(topic, employees)

Update person topics to be their contact info

Source code in src/chatbot_util/utils.py
 7
 8
 9
10
11
12
13
14
def create_person_answer(topic, employees):
    """Update person topics to be their contact info"""
    temp = topic.split(" ")
    answer = (
        f"{topic}, CEN's {employees[topic]}, can help with that. "
        f"Their contact is {temp[0][0].lower()}{temp[1].lower()}@edu-nation.org"
    )
    return answer