Status: Unknown

Assertion

Humans have fewer genes than many plants and animals.

Results

Assumptions

– The comparison is based on protein-coding genes only; non-coding regions can vary significantly among species.
– The latest human genomic data (from sources like the Human Genome Project and ENCODE) are being used as a reference point for gene count.
– The examples of plants with more genes than humans, such as Arabidopsis thaliana and certain rice species, have been selected based on well-established research findings.
===
– Protein-coding gene comparison: The statement assumes that the focus is solely on protein-coding genes to determine which organism has more, disregarding non-coding regions which can vary greatly among species and may contain regulatory elements or other functional sequences.

– Current human genomic data reliability: It is assumed that the most recent and accurate gene count estimates from reputable sources (like the Human Genome Project and ENCODE) are being used to assert the number of protein-coding genes in humans.

– Existence of higher gene counts in certain plants: The statement assumes there is substantial scientific evidence showing that some plant species have a greater number of protein-coding genes than those found in

Fact Checker Output

Explanation:

1. True – Comparisons based on protein-coding genes are common as they directly relate to the organism’s functional complexity and traits; however, this approach does not account for the entirety of genomic diversity or potential functions within non-coding regions which can vary significantly among species.

2. True – Utilizing data from authoritative sources like the Human Genome Project and ENCODE ensures that gene count estimations are based on consensus scientific research, providing a reliable reference for human genetic information.

3. Generally True – While it is true that some plants have more protein-coding genes than humans, such as Arabidopsis thaliana (approximately 27,900) and certain rice species with over 48,000 protein-coding genes, these examples are well supported by scientific research. However, the statement does not account for potential variations or updates in gene count estimates that may emerge from ongoing genomic studies.

It’s essential to note that while the above explanations indicate general truthfulness of the assumptions, specific details and context can vary with new discoveries in genom

Model Used

microsoft/Phi-3-mini-4k-instruct-gguf

Script Name

fact_checker_mongodb.py

Script Code

import os
import sys
import requests
from langchain_openai.llms import OpenAI
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from statements import get_random_statement
from mongodb_helper import insert_record  # Import MongoDB helper functions
import html

# Load environment variables from .env file
load_dotenv()

# Define category IDs
CATEGORY_IDS = {
    "True": 2,
    "False": 3,
    "Debatable": 4,
    "Unknown": 6
}

def create_wordpress_post(title, content, category):
    data = {
        "title": title,
        "content": content,
        "status": "publish",
        "categories": [CATEGORY_IDS[category]]
    }

    response = requests.post(
        os.getenv("WORDPRESS_POSTS_URL"),
        json=data,
        auth=(os.getenv("WORDPRESS_USERNAME"), os.getenv("WORDPRESS_PASSWORD"))
    )

    if response.status_code == 201:
        print("Blog post created successfully.")
    else:
        print(f"Failed to create blog post: {response.status_code} - {response.text}")

def fact_check(assertion):
    llm = OpenAI(temperature=0.7, model=os.getenv("MODEL_NAME"))

    # Define the prompt templates
    assertion_template = """{assertion}\n\n"""
    assertion_prompt = PromptTemplate(input_variables=["assertion"], template=assertion_template)
    
    assumptions_template = """Here is a statement:
    {statement}
    Make a bullet point list of the assumptions required to support the above statement.\n\n"""
    assumptions_prompt = PromptTemplate(input_variables=["statement"], template=assumptions_template)
    
    fact_checker_template = """Here is a bullet point list of assertions:
    {assertions}
    For each assumption, determine whether it is true or false. Explain your reasoning.\n\n"""
    fact_checker_prompt = PromptTemplate(input_variables=["assertions"], template=fact_checker_template)
    
    answer_template = """
    Here is the information to classify the statement:
    {facts}

    Based on the above information, how would you classify the statement? Respond with one of the following options followed by a colon and space:
    - True: [Explanation]
    - False: [Explanation]
    - Debatable: [Explanation]
    """
    answer_prompt = PromptTemplate(input_variables=["facts"], template=answer_template)
    
    # Format prompts and extract the string content
    formatted_assertion = assertion_prompt.format_prompt(assertion=assertion).text
    assertion_output = llm.invoke(formatted_assertion)
    
    formatted_assumptions = assumptions_prompt.format_prompt(statement=assertion_output).text
    assumptions_output = llm.invoke(formatted_assumptions)
    
    formatted_fact_checker = fact_checker_prompt.format_prompt(assertions=assumptions_output).text
    fact_checker_output = llm.invoke(formatted_fact_checker)
    
    formatted_answer = answer_prompt.format_prompt(facts=fact_checker_output).text
    final_output = llm.invoke(formatted_answer)
    
    return {
        "assertion_output": assertion_output,
        "assumptions_output": assumptions_output,
        "fact_checker_output": fact_checker_output,
        "final_output": final_output,
    }

def extract_status_and_reasoning(final_output):
    final_output = final_output.strip()
    if "True:" in final_output:
        status_start = final_output.find("True:")
        status = "True"
    elif "False:" in final_output:
        status_start = final_output.find("False:")
        status = "False"
    elif "Debatable:" in final_output:
        status_start = final_output.find("Debatable:")
        status = "Debatable"
    else:
        return "Unknown", final_output

    reasoning = final_output[status_start + len(status) + 1:].strip()
    return status, reasoning

if __name__ == "__main__":
    if len(sys.argv) > 1:
        assertion = sys.argv[1]
    else:
        assertion = get_random_statement()
    
    print(assertion)
    submission = fact_check(assertion)
    
    # Print the detailed outputs to inspect their structure
    for key, value in submission.items():
        print(f"{key}: {value}")
    
    # Extract the final output for status determination and reasoning
    final_output = submission['final_output']
    status, reasoning = extract_status_and_reasoning(final_output)
    
    # Record the result in MongoDB
    try:
        print("Attempting to insert record into MongoDB...")
        insert_record(
            script_name="fact_checker_mongodb.py",
            script_code=html.escape(open(__file__).read()),
            assertion=assertion,
            status=status,
            submission=submission,  # Store the entire submission for detailed analysis
            model=os.getenv("MODEL_NAME")
        )
        print("Record inserted into MongoDB successfully.")
    except Exception as e:
        print(f"Failed to insert record into MongoDB: {e}")
    
    print(final_output)
    
    # Create a blog post on WordPress
    blog_title = f"Fact Check: {assertion}"
    blog_content = f"""
    <h1>Status: {status}</h1>
    <h2>Assertion</h2>
    <p>{assertion}</p>
    <h2>Results</h2>
    <p>{reasoning}</p>
    <h3>Assumptions</h3>
    <p>{submission['assumptions_output']}</p>
    <h3>Fact Checker Output</h3>
    <p>{submission['fact_checker_output']}</p>
    <h4>Model Used</h4>
    <p>{os.getenv("MODEL_NAME")}</p>
    <h4>Script Name</h4>
    <p>fact_checker_mongodb.py</p>
    <h4>Script Code</h4>
    <pre>{html.escape(open(__file__).read())}</pre>
    """
    create_wordpress_post(blog_title, blog_content, status)

Fact Check: Humans have fewer genes than many plants and animals.