pszemraj · August 27, 2024 18:59
diff --git a/extract_comments_and_docs.py b/extract_comments_and_docs.py
 import re


 def extract_comments_and_docs(multiline_string):
    # Pattern to match lines where the first non-whitespace character is '#'

    comment_pattern = r"^\s*#(.*)"

    # Pattern to match any text within triple quotes (either ''' or """)

    docstring_pattern = r"(\'\'\'(.*?)\'\'\'|\"\"\"(.*?)\"\"\")"

    # Pattern to match any text within single or double quotes (excluding triple quotes)

    string_pattern = r'(?<!\w)(\'(?:[^\']|\'\')*\'|"(?:[^"]|"")*")(?!\w)'

    # Extract all comments using the comment pattern

    comments = re.findall(comment_pattern, multiline_string, re.MULTILINE)

    # Extract all docstrings using the docstring pattern

    docstrings = re.findall(docstring_pattern, multiline_string, re.DOTALL)

    # Extract all strings using the string pattern

    strings = re.findall(string_pattern, multiline_string)

    # Since re.findall returns tuples for each match group in docstrings, we need to flatten the list

    docstrings_flattened = [match[1] if match[1] else match[2] for match in docstrings]

    # Remove surrounding quotes from strings

    cleaned_strings = [s.strip("'\"") for s in strings]

    # Concatenate comments, docstrings, and strings with a space

    concatenated_string = (
        " ".join(comment.strip() for comment in comments)
        + " "
        + " ".join(docstring.strip() for docstring in docstrings_flattened)
        + " "
        + " ".join(cleaned_strings)
    )

    return concatenated_string.strip()


 # Example usage:

 multiline_string = """
 #coding utf-8
 '''
 斐波那契数列-循环法
 '''
 def Fib_circle():
    msg = "This is a test string"
    while True:   # 去掉while循环，只用for循环
        num_1 = 0
        num_2 = 1
        fib_array = [0] # 用于存储计算出的FB数列值
        m = input('你想要查找的起始项：')
        n = input('你想要查找的结束项：')
        if m.isdigit() and n.isdigit():   # 在这个实现函数中，不要进行检验。每个函数只做一个事情
            m = int(m) # 将输入化为整数型
            n = int(n)
            for i in range(n):
                num_1, num_2 = num_2, num_1 + num_2
                fib_array.append(num_1)
            print(f'你要查找的数列为{list(enumerate(fib_array[m:], m))}')
            break
        else:
            print('请输入有效的正整数')

 if __name__ == '__main__':
    Fib_circle()
 """

 result = extract_comments_and_docs(multiline_string)
 result
	import re


	def extract_comments_and_docs(multiline_string):
	# Pattern to match lines where the first non-whitespace character is '#'

	comment_pattern = r"^\s#(.)"

	# Pattern to match any text within triple quotes (either ''' or """)

	docstring_pattern = r"(\'\'\'(.?)\'\'\'\|\"\"\"(.?)\"\"\")"

	# Pattern to match any text within single or double quotes (excluding triple quotes)

	string_pattern = r'(?<!\w)(\'(?:[^\']\|\'\')\'\|"(?:[^"]\|"")")(?!\w)'

	# Extract all comments using the comment pattern

	comments = re.findall(comment_pattern, multiline_string, re.MULTILINE)

	# Extract all docstrings using the docstring pattern

	docstrings = re.findall(docstring_pattern, multiline_string, re.DOTALL)

	# Extract all strings using the string pattern

	strings = re.findall(string_pattern, multiline_string)

	# Since re.findall returns tuples for each match group in docstrings, we need to flatten the list

	docstrings_flattened = [match[1] if match[1] else match[2] for match in docstrings]

	# Remove surrounding quotes from strings

	cleaned_strings = [s.strip("'\"") for s in strings]

	# Concatenate comments, docstrings, and strings with a space

	concatenated_string = (
	" ".join(comment.strip() for comment in comments)
	+ " "
	+ " ".join(docstring.strip() for docstring in docstrings_flattened)
	+ " "
	+ " ".join(cleaned_strings)
	)

	return concatenated_string.strip()


	# Example usage:

	multiline_string = """
	#coding utf-8
	'''
	斐波那契数列-循环法
	'''
	def Fib_circle():
	msg = "This is a test string"
	while True: # 去掉while循环，只用for循环
	num_1 = 0
	num_2 = 1
	fib_array = [0] # 用于存储计算出的FB数列值
	m = input('你想要查找的起始项：')
	n = input('你想要查找的结束项：')
	if m.isdigit() and n.isdigit(): # 在这个实现函数中，不要进行检验。每个函数只做一个事情
	m = int(m) # 将输入化为整数型
	n = int(n)
	for i in range(n):
	num_1, num_2 = num_2, num_1 + num_2
	fib_array.append(num_1)
	print(f'你要查找的数列为{list(enumerate(fib_array[m:], m))}')
	break
	else:
	print('请输入有效的正整数')

	if __name__ == '__main__':
	Fib_circle()
	"""

	result = extract_comments_and_docs(multiline_string)
	result