admin管理员组

文章数量:1438820

聊聊Spring AI Alibaba的MySQLDocumentReader

本文主要研究一下Spring AI Alibaba的MySQLDocumentReader

MySQLDocumentReader

community/document-readers/spring-ai-alibaba-starter-document-reader-mysql/src/main/java/com/alibaba/cloud/ai/reader/mysql/MySQLDocumentReader.java

代码语言:javascript代码运行次数:0运行复制
public class MySQLDocumentReader implements DocumentReader {

	private final MySQLResource mysqlResource;

	public MySQLDocumentReader(MySQLResource mysqlResource) {
		this.mysqlResource = mysqlResource;
	}

	@Override
	public List<Document> get() {
		List<Document> documents = new ArrayList<>();
		try {
			// Register MySQL JDBC driver
			Class.forName("com.mysql.cj.jdbc.Driver");

			// Create database connection
			try (Connection connection = createConnection()) {
				documents = executeQueryAndProcessResults(connection);
			}
		}
		catch (ClassNotFoundException e) {
			throw new RuntimeException("MySQL JDBC driver not found", e);
		}
		catch (SQLException e) {
			throw new RuntimeException("Error executing MySQL query: " + e.getMessage(), e);
		}
		return documents;
	}

	/**
	 * Create database connection
	 */
	private Connection createConnection() throws SQLException {
		return DriverManager.getConnection(mysqlResource.getJdbcUrl(), mysqlResource.getUsername(),
				mysqlResource.getPassword());
	}

	/**
	 * Execute query and process results
	 */
	private List<Document> executeQueryAndProcessResults(Connection connection) throws SQLException {
		List<Document> documents = new ArrayList<>();
		try (Statement statement = connection.createStatement();
				ResultSet resultSet = statement.executeQuery(mysqlResource.getQuery())) {

			List<String> columnNames = getColumnNames(resultSet.getMetaData());
			while (resultSet.next()) {
				Map<String, Object> rowData = extractRowData(resultSet, columnNames);
				String content = buildContent(rowData);
				Map<String, Object> metadata = buildMetadata(rowData);
				documents.add(new Document(content, metadata));
			}
		}
		return documents;
	}

	/**
	 * Get list of column names
	 */
	private List<String> getColumnNames(ResultSetMetaData metaData) throws SQLException {
		List<String> columnNames = new ArrayList<>();
		int columnCount = metaData.getColumnCount();
		for (int i = 1; i <= columnCount; i++) {
			columnNames.add(metaData.getColumnName(i));
		}
		return columnNames;
	}

	/**
	 * Extract row data
	 */
	private Map<String, Object> extractRowData(ResultSet resultSet, List<String> columnNames) throws SQLException {
		Map<String, Object> rowData = new HashMap<>();
		for (int i = 0; i < columnNames.size(); i++) {
			String columnName = columnNames.get(i);
			Object value = resultSet.getObject(i + 1);
			rowData.put(columnName, value);
		}
		return rowData;
	}

	/**
	 * Build document content
	 */
	private String buildContent(Map<String, Object> rowData) {
		StringBuilder contentBuilder = new StringBuilder();
		List<String> contentColumns = mysqlResource.getTextColumns();

		if (contentColumns == null || contentColumns.isEmpty()) {
			// If no content columns specified, use all columns
			for (Map.Entry<String, Object> entry : rowData.entrySet()) {
				appendColumnContent(contentBuilder, entry.getKey(), entry.getValue());
			}
		}
		else {
			// Only use specified content columns
			for (String column : contentColumns) {
				if (rowData.containsKey(column)) {
					appendColumnContent(contentBuilder, column, rowData.get(column));
				}
			}
		}
		return contentBuilder.toString().trim();
	}

	/**
	 * Append column content
	 */
	private void appendColumnContent(StringBuilder builder, String column, Object value) {
		builder.append(column).append(": ").append(value).append("\n");
	}

	/**
	 * Build metadata
	 */
	private Map<String, Object> buildMetadata(Map<String, Object> rowData) {
		Map<String, Object> metadata = new HashMap<>();
		metadata.put(MySQLResource.SOURCE, mysqlResource.getJdbcUrl());

		List<String> metadataColumns = mysqlResource.getMetadataColumns();
		if (metadataColumns != null) {
			for (String column : metadataColumns) {
				if (rowData.containsKey(column)) {
					metadata.put(column, rowData.get(column));
				}
			}
		}
		return metadata;
	}

}

MySQLDocumentReader要求构造器传入MySQLResource,其get方法通过jdbc执行指定的query,将字段名与值通过拼接在一起作为内容,根据metadataColumns来将自定字段的值作为metadata

MySQLResource

community/document-readers/spring-ai-alibaba-starter-document-reader-mysql/src/main/java/com/alibaba/cloud/ai/reader/mysql/MySQLResource.java

代码语言:javascript代码运行次数:0运行复制
public class MySQLResource {

	// MySQL connection properties
	private final String host; // MySQL server hostname or IP address

	private final int port; // MySQL server port number, default is 3306

	private final String database; // Name of the database to connect to

	private final String username; // MySQL user name for authentication

	private final String password; // MySQL password for authentication

	// Query settings
	private final String query; // SQL query to execute

	private final List<String> contentColumns; // Columns to include in document content

	private final List<String> metadataColumns; // Columns to include in document metadata

	public static final String SOURCE = "mysql";

	// Default values for MySQL connection
	public static final String DEFAULT_HOST = "127.0.0.1";

	public static final int DEFAULT_PORT = 3306;

	public static final String DEFAULT_USERNAME = "root";

	public static final String DEFAULT_PASSWORD = "root";

	public MySQLResource(String host, int port, String database, String username, String password, String query,
			List<String> contentColumns, List<String> metadataColumns) {
		this.host = host;
		this.port = port;
		this.database = database;
		this.username = username;
		this.password = password;
		this.query = query;
		this.contentColumns = contentColumns;
		this.metadataColumns = metadataColumns;
	}

	/**
	 * Constructor with default host and port
	 * @param database Database name
	 * @param username MySQL username
	 * @param password MySQL password
	 * @param query SQL query to execute
	 * @param contentColumns Columns to include in document content
	 * @param metadataColumns Columns to include in document metadata
	 */
	public MySQLResource(String database, String username, String password, String query, List<String> contentColumns,
			List<String> metadataColumns) {
		this(DEFAULT_HOST, DEFAULT_PORT, database, username, password, query, contentColumns, metadataColumns);
	}

	/**
	 * Constructor with all default connection parameters
	 * @param database Database name
	 * @param query SQL query to execute
	 * @param contentColumns Columns to include in document content
	 * @param metadataColumns Columns to include in document metadata
	 */
	public MySQLResource(String database, String query, List<String> contentColumns, List<String> metadataColumns) {
		this(DEFAULT_HOST, DEFAULT_PORT, database, DEFAULT_USERNAME, DEFAULT_PASSWORD, query, contentColumns,
				metadataColumns);
	}

	//......
}	

MySQLResource定义了port、database、username、password、query、contentColumns、metadataColumns等属性

示例

代码语言:javascript代码运行次数:0运行复制
@EnabledIfSystemProperty(named = "mysql.host", matches = ".+")
public class MySQLDocumentReaderTest {

	private MySQLResource mysqlResource;

	private MySQLDocumentReader reader;

	@BeforeEach
	void setUp() {
		// Read MySQL connection information from system properties
		String host = System.getProperty("mysql.host", "localhost");
		int port = Integer.parseInt(System.getProperty("mysql.port", "3306"));
		String database = System.getProperty("mysql.database", "mysql"); // Use default
																			// mysql
																			// database
		String username = System.getProperty("mysql.username", "root");
		String password = System.getProperty("mysql.password", "root");
		String query = System.getProperty("mysql.query", "SELECT * FROM user LIMIT 10;"); // Use
																							// user
																							// table
																							// in
																							// mysql
																							// database

		// Read content and metadata columns from system properties
		String contentColumnsStr = System.getProperty("mysql.content.columns", "User,Host");
		String metadataColumnsStr = System.getProperty("mysql.metadata.columns", "User,Host");

		List<String> contentColumns = Arrays.asList(contentColumnsStr.split(","));
		List<String> metadataColumns = Arrays.asList(metadataColumnsStr.split(","));

		// Setup test MySQL resource
		mysqlResource = new MySQLResource(host, port, database, username, password, query, contentColumns,
				metadataColumns);

		reader = new MySQLDocumentReader(mysqlResource);
	}

	@Test
	void testGetDocuments() {
		// This test requires a running MySQL instance with test data
		// You may need to modify the connection details and query using system
		// properties:
		// -Dmysql.host=your_host -Dmysql.port=your_port -Dmysql.database=your_db
		// -Dmysql.username=your_user -Dmysql.password=your_pass

		List<Document> documents = reader.get();

		// Basic assertions
		assertNotNull(documents);
		assertFalse(documents.isEmpty());

		// Test first document
		Document firstDoc = documents.get(0);
		assertNotNull(firstDoc);

		// Test document content
		String content = firstDoc.getText();
		assertNotNull(content);
	}

	@Test
	void testInvalidConnection() {
		// Test with invalid credentials
		MySQLResource invalidResource = new MySQLResource("invalid_host", 3306, "invalid_db", "invalid_user",
				"invalid_pass", "SELECT * FROM test_table", null, null);

		MySQLDocumentReader invalidReader = new MySQLDocumentReader(invalidResource);

		// Should throw RuntimeException
		assertThrows(RuntimeException.class, invalidReader::get);
	}

}

小结

spring-ai-alibaba-starter-document-reader-mysql提供了MySQLDocumentReader用于读取mysql的表内容和metadata到document,一条记录转为一个document,其中contentColumns用于指定哪些字段作为document的内容,多个字段用\n分隔,metadataColumns用于指定哪个字段的值作为metadata。

doc

  • java2ai

本文标签: 聊聊Spring AI Alibaba的MySQLDocumentReader