Quantcast
Channel: Inserting large number of rows into database with Spring boot - Code Review Stack Exchange
Viewing all articles
Browse latest Browse all 2

Inserting large number of rows into database with Spring boot

$
0
0

I need to insert many million rows + many GB of data into a database for a project that uses Spring boot. I recreated a minimal example with a one to many relationship and am trying to find the fastest solution. The full code is here: https://github.com/Vuizur/springmassinsert, but the rough structure is:

// Email.java@Entity@Table(name = "emails")public class Email {    @Id    @GeneratedValue(strategy = GenerationType.IDENTITY)    private Long id;    @Column(name = "address")    private String address;    @Column(name="text")    private String text;    @ManyToOne(fetch = FetchType.LAZY)    @JoinColumn(name = "user_id")    private User user;}// User.java@Entity@Table(name = "users")public class User {    @Id    @GeneratedValue(strategy = GenerationType.IDENTITY)    private Long id;    @Column(name = "name")    private String name;    @OneToMany(mappedBy = "user", cascade = CascadeType.ALL, fetch = FetchType.LAZY)    private List<Email> emails;}

But the most important part is the insert code, where I tested 4 versions:

@Servicepublic class InsertService {    @Autowired    private JdbcTemplate jdbcTemplate;    @Autowired    private UserRepository userRepository;    private final int NUMBER_OF_USERS = 10000;    private final int NUMBER_OF_EMAILS = 10;    public void insert() {        List<User> users = new ArrayList<>();        for (int i = 0; i < NUMBER_OF_USERS; i++) {            User user = new User();            user.setName("User " + i);            List<Email> emails = new ArrayList<>();            for (int j = 0; j < NUMBER_OF_EMAILS; j++) {                Email email = new Email();                email.setAddress("email" + j +"@gmail.com");                email.setUser(user);                emails.add(email);            }            user.setEmails(emails);            users.add(user);        }        userRepository.saveAll(users);    }    public void insertBatch() {        List<User> users = new ArrayList<>();        for (int i = 0; i < NUMBER_OF_USERS; i++) {            User user = new User();            user.setName("User " + i);            List<Email> emails = new ArrayList<>();            for (int j = 0; j < NUMBER_OF_EMAILS; j++) {                Email email = new Email();                email.setAddress("email" + j +"@gmail.com");                email.setUser(user);                emails.add(email);            }            user.setEmails(emails);            users.add(user);            if (i % 1000 == 0) {                userRepository.saveAll(users);                users.clear();            }        }    }    public void insertJdbc() {        List<User> users = new ArrayList<>();        for (int i = 0; i < NUMBER_OF_USERS; i++) {            User user = new User();            user.setName("User " + i);            List<Email> emails = new ArrayList<>();            for (int j = 0; j < NUMBER_OF_EMAILS; j++) {                Email email = new Email();                email.setAddress("email" + j +"@gmail.com");                email.setUser(user);                emails.add(email);            }            user.setEmails(emails);            users.add(user);        }        for (User user : users) {            jdbcTemplate.update("insert into users (name) values (?)", user.getName());            for (Email email : user.getEmails()) {                jdbcTemplate.update("insert into emails (address, text, user_id) values (?, ?, ?)", email.getAddress(),                        email.getText(), user.getId());            }        }    }    public void insertJdbcBatch() {        List<User> users = new ArrayList<>();        for (int i = 0; i < NUMBER_OF_USERS; i++) {            User user = new User();            user.setName("User " + i);            List<Email> emails = new ArrayList<>();            for (int j = 0; j < NUMBER_OF_EMAILS; j++) {                Email email = new Email();                email.setAddress("email" + j +"@gmail.com");                email.setUser(user);                emails.add(email);            }            user.setEmails(emails);            users.add(user);        }            try {            // Create a prepared statement for inserting users            PreparedStatement userPs = jdbcTemplate.getDataSource().getConnection()                    .prepareStatement("insert into users (name) values (?)", Statement.RETURN_GENERATED_KEYS);            // Create a prepared statement for inserting emails            PreparedStatement emailPs = jdbcTemplate.getDataSource().getConnection()                    .prepareStatement("insert into emails (address, text, user_id) values (?, ?, ?)");            for (User user : users) {                // Set the user name parameter and add to the batch                userPs.setString(1, user.getName());                userPs.addBatch();            }            // Execute the batch update for users and get the generated ids            userPs.executeBatch();            ResultSet rs = userPs.getGeneratedKeys();            int index = 0;            while (rs.next()) {                // Set the user id from the result set                users.get(index).setId(rs.getLong(1));                index++;            }            for (User user : users) {                for (Email email : user.getEmails()) {                    // Set the email parameters and add to the batch                    emailPs.setString(1, email.getAddress());                    emailPs.setString(2, email.getText());                    emailPs.setLong(3, user.getId());                    emailPs.addBatch();                }            }            // Execute the batch update for emails            emailPs.executeBatch();        } catch (Exception e) {            System.out.println(e);        }    }    @PostConstruct    public void benchmark() {        long startTime = System.currentTimeMillis();        insertJdbcBatch();        long endTime = System.currentTimeMillis();        System.out.println("Inserting " + NUMBER_OF_USERS +" users with " + NUMBER_OF_EMAILS +" emails each took "+ (endTime - startTime) +" milliseconds");        // Print insert per seconds        System.out                .println((NUMBER_OF_USERS * NUMBER_OF_EMAILS * 1.0 / ((endTime - startTime) / 1000.0)) +" inserts per second");    }}

The performance results are (on a bad external HDD):

ApproachInserts/second
JPA (insert)4247
JPA with save all 10004401
JDBC1842
JDBC with prepared statements13005

So the difference between the first two versions is not significant, the naive JDBC version is really slow, but JDBC with prepared statements is the clear winner. Is there something else I can do to speed up the inserts even more?


Viewing all articles
Browse latest Browse all 2

Latest Images

Trending Articles





Latest Images

<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>
<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596344.js" async> </script>